Ryujinx/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs

using Ryujinx.Common.Logging;
using Ryujinx.Common.Pools;
using Ryujinx.Memory.Range;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Ryujinx.Graphics.Gpu.Memory
{
    /// <summary>
    /// A range within a buffer that has been modified by the GPU.
    /// </summary>
    class BufferModifiedRange : IRange
    {
        /// <summary>
        /// Start address of the range in guest memory.
        /// </summary>
        public ulong Address { get; }

        /// <summary>
        /// Size of the range in bytes.
        /// </summary>
        public ulong Size { get; }

        /// <summary>
        /// End address of the range in guest memory.
        /// </summary>
        public ulong EndAddress => Address + Size;

        /// <summary>
        /// The GPU sync number at the time of the last modification.
        /// </summary>
        public ulong SyncNumber { get; internal set; }

        /// <summary>
        /// The range list that originally owned this range.
        /// </summary>
        public BufferModifiedRangeList Parent { get; internal set; }

        /// <summary>
        /// Creates a new instance of a modified range.
        /// </summary>
        /// <param name="address">Start address of the range</param>
        /// <param name="size">Size of the range in bytes</param>
        /// <param name="syncNumber">The GPU sync number at the time of creation</param>
        /// <param name="parent">The range list that owns this range</param>
        public BufferModifiedRange(ulong address, ulong size, ulong syncNumber, BufferModifiedRangeList parent)
        {
            Address = address;
            Size = size;
            SyncNumber = syncNumber;
            Parent = parent;
        }

        /// <summary>
        /// Checks if a given range overlaps with the modified range.
        /// </summary>
        /// <param name="address">Start address of the range</param>
        /// <param name="size">Size in bytes of the range</param>
        /// <returns>True if the range overlaps, false otherwise</returns>
        public bool OverlapsWith(ulong address, ulong size)
        {
            return Address < address + size && address < EndAddress;
        }
    }

    /// <summary>
    /// A structure used to track GPU modified ranges within a buffer.
    /// </summary>
    class BufferModifiedRangeList : RangeList<BufferModifiedRange>
    {
        private const int BackingInitialSize = 8;

        private GpuContext _context;
        private Buffer _parent;
        private Action<ulong, ulong> _flushAction;

        private List<BufferMigration> _sources;
        private BufferMigration _migrationTarget;

        private object _lock = new object();

        /// <summary>
        /// Whether the modified range list has any entries or not.
        /// </summary>
        public bool HasRanges
        {
            get
            {
                lock (_lock)
                {
                    return Count > 0;
                }
            }
        }

        /// <summary>
        /// Creates a new instance of a modified range list.
        /// </summary>
        /// <param name="context">GPU context that the buffer range list belongs to</param>
        /// <param name="parent">The parent buffer that owns this range list</param>
        /// <param name="flushAction">The flush action for the parent buffer</param>
        public BufferModifiedRangeList(GpuContext context, Buffer parent, Action<ulong, ulong> flushAction) : base(BackingInitialSize)
        {
            _context = context;
            _parent = parent;
            _flushAction = flushAction;
        }

        /// <summary>
        /// Given an input range, calls the given action with sub-ranges which exclude any of the modified regions.
        /// </summary>
        /// <param name="address">Start address of the query range</param>
        /// <param name="size">Size of the query range in bytes</param>
        /// <param name="action">Action to perform for each remaining sub-range of the input range</param>
        public void ExcludeModifiedRegions(ulong address, ulong size, Action<ulong, ulong> action)
        {
            lock (_lock)
            {
                // Slices a given region using the modified regions in the list. Calls the action for the new slices.
                ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();

                int count = FindOverlapsNonOverlapping(address, size, ref overlaps);

                for (int i = 0; i < count; i++)
                {
                    BufferModifiedRange overlap = overlaps[i];
                    
                    if (overlap.Address > address)
                    {
                        // The start of the remaining region is uncovered by this overlap. Call the action for it.
                        action(address, overlap.Address - address);
                    }

                    // Remaining region is after this overlap.
                    size -= overlap.EndAddress - address;
                    address = overlap.EndAddress;
                }

                if ((long)size > 0)
                {
                    // If there is any region left after removing the overlaps, signal it.
                    action(address, size);
                }
            }
        }

        /// <summary>
        /// Signal that a region of the buffer has been modified, and add the new region to the range list.
        /// Any overlapping ranges will be (partially) removed.
        /// </summary>
        /// <param name="address">Start address of the modified region</param>
        /// <param name="size">Size of the modified region in bytes</param>
        public void SignalModified(ulong address, ulong size)
        {
            // Must lock, as this can affect flushes from the background thread.
            lock (_lock)
            {
                // We may overlap with some existing modified regions. They must be cut into by the new entry.
                ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();

                int count = FindOverlapsNonOverlapping(address, size, ref overlaps);

                ulong endAddress = address + size;
                ulong syncNumber = _context.SyncNumber;

                for (int i = 0; i < count; i++)
                {
                    // The overlaps must be removed or split.

                    BufferModifiedRange overlap = overlaps[i];

                    if (overlap.Address == address && overlap.Size == size)
                    {
                        // Region already exists. Just update the existing sync number.
                        overlap.SyncNumber = syncNumber;
                        overlap.Parent = this;

                        return;
                    }

                    Remove(overlap);

                    if (overlap.Address < address && overlap.EndAddress > address)
                    {
                        // A split item must be created behind this overlap.

                        Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber, overlap.Parent));
                    }

                    if (overlap.Address < endAddress && overlap.EndAddress > endAddress)
                    {
                        // A split item must be created after this overlap.

                        Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber, overlap.Parent));
                    }
                }

                Add(new BufferModifiedRange(address, size, syncNumber, this));
            }
        }

        /// <summary>
        /// Gets modified ranges within the specified region, and then fires the given action for each range individually.
        /// </summary>
        /// <param name="address">Start address to query</param>
        /// <param name="size">Size to query</param>
        /// <param name="rangeAction">The action to call for each modified range</param>
        public void GetRanges(ulong address, ulong size, Action<ulong, ulong> rangeAction)
        {
            int count = 0;

            ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();

            // Range list must be consistent for this operation.
            lock (_lock)
            {
                count = FindOverlapsNonOverlapping(address, size, ref overlaps);
            }

            for (int i = 0; i < count; i++)
            {
                BufferModifiedRange overlap = overlaps[i];
                rangeAction(overlap.Address, overlap.Size);
            }
        }

        /// <summary>
        /// Queries if a range exists within the specified region.
        /// </summary>
        /// <param name="address">Start address to query</param>
        /// <param name="size">Size to query</param>
        /// <returns>True if a range exists in the specified region, false otherwise</returns>
        public bool HasRange(ulong address, ulong size)
        {
            // Range list must be consistent for this operation.
            lock (_lock)
            {
                return FindOverlapsNonOverlapping(address, size, ref ThreadStaticArray<BufferModifiedRange>.Get()) > 0;
            }
        }

        /// <summary>
        /// Performs the given range action, or one from a migration that overlaps and has not synced yet.
        /// </summary>
        /// <param name="offset">The offset to pass to the action</param>
        /// <param name="size">The size to pass to the action</param>
        /// <param name="syncNumber">The sync number that has been reached</param>
        /// <param name="parent">The modified range list that originally owned this range</param>
        /// <param name="rangeAction">The action to perform</param>
        public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent, Action<ulong, ulong> rangeAction)
        {
            bool firstSource = true;

            if (parent != this)
            {
                lock (_lock)
                {
                    if (_sources != null)
                    {
                        foreach (BufferMigration source in _sources)
                        {
                            if (source.Overlaps(offset, size, syncNumber))
                            {
                                if (firstSource && !source.FullyMatches(offset, size))
                                {
                                    // Perform this buffer's action first. The migrations will run after.
                                    rangeAction(offset, size);
                                }

                                source.RangeActionWithMigration(offset, size, syncNumber, parent);

                                firstSource = false;
                            }
                        }
                    }
                }
            }

            if (firstSource)
            {
                // No overlapping migrations, or they are not meant for this range, flush the data using the given action.
                rangeAction(offset, size);
            }
        }

        /// <summary>
        /// Removes modified ranges ready by the sync number from the list, and flushes their buffer data within a given address range.
        /// </summary>
        /// <param name="overlaps">Overlapping ranges to check</param>
        /// <param name="rangeCount">Number of overlapping ranges</param>
        /// <param name="highestDiff">The highest difference between an overlapping range's sync number and the current one</param>
        /// <param name="currentSync">The current sync number</param>
        /// <param name="address">The start address of the flush range</param>
        /// <param name="endAddress">The end address of the flush range</param>
        private void RemoveRangesAndFlush(
            BufferModifiedRange[] overlaps,
            int rangeCount,
            long highestDiff,
            ulong currentSync,
            ulong address,
            ulong endAddress)
        {
            lock (_lock)
            {
                if (_migrationTarget == null)
                {
                    ulong waitSync = currentSync + (ulong)highestDiff;

                    for (int i = 0; i < rangeCount; i++)
                    {
                        BufferModifiedRange overlap = overlaps[i];

                        long diff = (long)(overlap.SyncNumber - currentSync);

                        if (diff <= highestDiff)
                        {
                            ulong clampAddress = Math.Max(address, overlap.Address);
                            ulong clampEnd = Math.Min(endAddress, overlap.EndAddress);

                            ClearPart(overlap, clampAddress, clampEnd);

                            RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, overlap.Parent, _flushAction);
                        }
                    }

                    return;
                }
            }

            // There is a migration target to call instead. This can't be changed after set so accessing it outside the lock is fine.

            _migrationTarget.Destination.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress);
        }

        /// <summary>
        /// Gets modified ranges within the specified region, waits on ones from a previous sync number,
        /// and then fires the flush action for each range individually.
        /// </summary>
        /// <remarks>
        /// This function assumes it is called from the background thread.
        /// Modifications from the current sync number are ignored because the guest should not expect them to be available yet.
        /// They will remain reserved, so that any data sync prioritizes the data in the GPU.
        /// </remarks>
        /// <param name="address">Start address to query</param>
        /// <param name="size">Size to query</param>
        public void WaitForAndFlushRanges(ulong address, ulong size)
        {
            ulong endAddress = address + size;
            ulong currentSync = _context.SyncNumber;

            int rangeCount = 0;

            ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();

            // Range list must be consistent for this operation
            lock (_lock)
            {
                if (_migrationTarget != null)
                {
                    rangeCount = -1;
                }
                else
                {
                    rangeCount = FindOverlapsNonOverlapping(address, size, ref overlaps);
                }
            }

            if (rangeCount == -1)
            {
                _migrationTarget.Destination.WaitForAndFlushRanges(address, size);

                return;
            }
            else if (rangeCount == 0)
            {
                return;
            }

            // First, determine which syncpoint to wait on.
            // This is the latest syncpoint that is not equal to the current sync.

            long highestDiff = long.MinValue;

            for (int i = 0; i < rangeCount; i++)
            {
                BufferModifiedRange overlap = overlaps[i];

                long diff = (long)(overlap.SyncNumber - currentSync);

                if (diff < 0 && diff > highestDiff)
                {
                    highestDiff = diff;
                }
            }

            if (highestDiff == long.MinValue)
            {
                return;
            }

            // Wait for the syncpoint.
            _context.Renderer.WaitSync(currentSync + (ulong)highestDiff);

            RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress);
        }

        /// <summary>
        /// Inherit ranges from another modified range list.
        /// </summary>
        /// <param name="ranges">The range list to inherit from</param>
        /// <param name="registerRangeAction">The action to call for each modified range</param>
        public void InheritRanges(BufferModifiedRangeList ranges, Action<ulong, ulong> registerRangeAction)
        {
            BufferModifiedRange[] inheritRanges;

            lock (ranges._lock)
            {
                BufferMigration migration = new(ranges._parent, ranges._flushAction, ranges, this, _context.SyncNumber);

                ranges._parent.IncrementReferenceCount();
                ranges._migrationTarget = migration;

                _context.RegisterBufferMigration(migration);

                inheritRanges = ranges.ToArray();

                lock (_lock)
                {
                    (_sources ??= new List<BufferMigration>()).Add(migration);

                    foreach (BufferModifiedRange range in inheritRanges)
                    {
                        Add(range);
                    }
                }
            }

            ulong currentSync = _context.SyncNumber;
            foreach (BufferModifiedRange range in inheritRanges)
            {
                if (range.SyncNumber != currentSync)
                {
                    registerRangeAction(range.Address, range.Size);
                }
            }
        }

        /// <summary>
        /// Removes a source buffer migration, indicating its copy has completed.
        /// </summary>
        /// <param name="migration">The migration to remove</param>
        public void RemoveMigration(BufferMigration migration)
        {
            lock (_lock)
            {
                _sources.Remove(migration);
            }
        }

        private void ClearPart(BufferModifiedRange overlap, ulong address, ulong endAddress)
        {
            Remove(overlap);

            // If the overlap extends outside of the clear range, make sure those parts still exist.

            if (overlap.Address < address)
            {
                Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber, overlap.Parent));
            }

            if (overlap.EndAddress > endAddress)
            {
                Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber, overlap.Parent));
            }
        }

        /// <summary>
        /// Clear modified ranges within the specified area.
        /// </summary>
        /// <param name="address">Start address to clear</param>
        /// <param name="size">Size to clear</param>
        public void Clear(ulong address, ulong size)
        {
            lock (_lock)
            {
                // This function can be called from any thread, so it cannot use the arrays for background or foreground.
                BufferModifiedRange[] toClear = new BufferModifiedRange[1];

                int rangeCount = FindOverlapsNonOverlapping(address, size, ref toClear);

                ulong endAddress = address + size;

                for (int i = 0; i < rangeCount; i++)
                {
                    BufferModifiedRange overlap = toClear[i];

                    ClearPart(overlap, address, endAddress);
                }
            }
        }

        /// <summary>
        /// Clear all modified ranges.
        /// </summary>
        public void Clear()
        {
            lock (_lock)
            {
                Count = 0;
            }
        }
    }
}
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								using Ryujinx.Common.Logging;
 								using Ryujinx.Common.Pools;
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								using Ryujinx.Memory.Range;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								using System;
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								using System.Collections.Generic;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								using System.Linq;
 								namespace Ryujinx.Graphics.Gpu.Memory
 								{
 								    /// <summary>
 								    /// A range within a buffer that has been modified by the GPU.
 								    /// </summary>
 								    class BufferModifiedRange : IRange
 								    {
 								        /// <summary>
 								        /// Start address of the range in guest memory.
 								        /// </summary>
 								        public ulong Address { get; }
 								        /// <summary>
 								        /// Size of the range in bytes.
 								        /// </summary>
 								        public ulong Size { get; }
 								        /// <summary>
 								        /// End address of the range in guest memory.
 								        /// </summary>
 								        public ulong EndAddress => Address + Size;
 								        /// <summary>
 								        /// The GPU sync number at the time of the last modification.
 								        /// </summary>
 								        public ulong SyncNumber { get; internal set; }
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <summary>
 								        /// The range list that originally owned this range.
 								        /// </summary>
 								        public BufferModifiedRangeList Parent { get; internal set; }
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        /// <summary>
 								        /// Creates a new instance of a modified range.
 								        /// </summary>
 								        /// <param name="address">Start address of the range</param>
 								        /// <param name="size">Size of the range in bytes</param>
 								        /// <param name="syncNumber">The GPU sync number at the time of creation</param>
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <param name="parent">The range list that owns this range</param>
 								        public BufferModifiedRange(ulong address, ulong size, ulong syncNumber, BufferModifiedRangeList parent)
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        {
 								            Address = address;
 								            Size = size;
 								            SyncNumber = syncNumber;
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								            Parent = parent;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        }
 								        /// <summary>
 								        /// Checks if a given range overlaps with the modified range.
 								        /// </summary>
 								        /// <param name="address">Start address of the range</param>
 								        /// <param name="size">Size in bytes of the range</param>
 								        /// <returns>True if the range overlaps, false otherwise</returns>
 								        public bool OverlapsWith(ulong address, ulong size)
 								        {
 								            return Address < address + size && address < EndAddress;
 								        }
 								    }
 								    /// <summary>
 								    /// A structure used to track GPU modified ranges within a buffer.
 								    /// </summary>
 								    class BufferModifiedRangeList : RangeList<BufferModifiedRange>
 								    {
-												Smaller initial size for ModifiedRangeList & directly inherit range list (#2663)

This fixes a potential regression with the new range list changes, where the cost for creating new ones would be rather large due to creating a 1024 size array. Also reduces cost for range list inheritance by using the first existing range list as a base, rather than creating a new one then adding both lists to it.

The growth size for the RangeList is now identical to its initial size. Every 32 elements was probably a little too common - now it is 1024 for most things and 8 for the buffer modified range list.

The Unmapped and SyncMethod methods have been changed to ensure that they behave properly if the range list is set null. Cleaned up a few calls to use the null-conditional operator.
											
										
										
											2021-10-04 11:38:59 -07:00
+								        private const int BackingInitialSize = 8;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        private GpuContext _context;
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        private Buffer _parent;
 								        private Action<ulong, ulong> _flushAction;
 								        private List<BufferMigration> _sources;
 								        private BufferMigration _migrationTarget;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								        private object _lock = new object();
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <summary>
 								        /// Whether the modified range list has any entries or not.
 								        /// </summary>
 								        public bool HasRanges
 								        {
 								            get
 								            {
 								                lock (_lock)
 								                {
 								                    return Count > 0;
 								                }
 								            }
 								        }
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        /// <summary>
 								        /// Creates a new instance of a modified range list.
 								        /// </summary>
 								        /// <param name="context">GPU context that the buffer range list belongs to</param>
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <param name="parent">The parent buffer that owns this range list</param>
 								        /// <param name="flushAction">The flush action for the parent buffer</param>
 								        public BufferModifiedRangeList(GpuContext context, Buffer parent, Action<ulong, ulong> flushAction) : base(BackingInitialSize)
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        {
 								            _context = context;
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								            _parent = parent;
 								            _flushAction = flushAction;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        }
 								        /// <summary>
 								        /// Given an input range, calls the given action with sub-ranges which exclude any of the modified regions.
 								        /// </summary>
 								        /// <param name="address">Start address of the query range</param>
 								        /// <param name="size">Size of the query range in bytes</param>
 								        /// <param name="action">Action to perform for each remaining sub-range of the input range</param>
 								        public void ExcludeModifiedRegions(ulong address, ulong size, Action<ulong, ulong> action)
 								        {
 								            lock (_lock)
 								            {
 								                // Slices a given region using the modified regions in the list. Calls the action for the new slices.
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();
 								                int count = FindOverlapsNonOverlapping(address, size, ref overlaps);
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								                for (int i = 0; i < count; i++)
 								                {
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                    BufferModifiedRange overlap = overlaps[i];
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								                    if (overlap.Address > address)
 								                    {
 								                        // The start of the remaining region is uncovered by this overlap. Call the action for it.
 								                        action(address, overlap.Address - address);
 								                    }
 								                    // Remaining region is after this overlap.
 								                    size -= overlap.EndAddress - address;
 								                    address = overlap.EndAddress;
 								                }
 								                if ((long)size > 0)
 								                {
 								                    // If there is any region left after removing the overlaps, signal it.
 								                    action(address, size);
 								                }
 								            }
 								        }
 								        /// <summary>
 								        /// Signal that a region of the buffer has been modified, and add the new region to the range list.
 								        /// Any overlapping ranges will be (partially) removed.
 								        /// </summary>
 								        /// <param name="address">Start address of the modified region</param>
 								        /// <param name="size">Size of the modified region in bytes</param>
 								        public void SignalModified(ulong address, ulong size)
 								        {
 								            // Must lock, as this can affect flushes from the background thread.
 								            lock (_lock)
 								            {
 								                // We may overlap with some existing modified regions. They must be cut into by the new entry.
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();
 								                int count = FindOverlapsNonOverlapping(address, size, ref overlaps);
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								                ulong endAddress = address + size;
 								                ulong syncNumber = _context.SyncNumber;
 								                for (int i = 0; i < count; i++)
 								                {
 								                    // The overlaps must be removed or split.
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                    BufferModifiedRange overlap = overlaps[i];
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								                    if (overlap.Address == address && overlap.Size == size)
 								                    {
 								                        // Region already exists. Just update the existing sync number.
 								                        overlap.SyncNumber = syncNumber;
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                        overlap.Parent = this;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								                        return;
 								                    }
 								                    Remove(overlap);
 								                    if (overlap.Address < address && overlap.EndAddress > address)
 								                    {
 								                        // A split item must be created behind this overlap.
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                        Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber, overlap.Parent));
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                    }
 								                    if (overlap.Address < endAddress && overlap.EndAddress > endAddress)
 								                    {
 								                        // A split item must be created after this overlap.
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                        Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber, overlap.Parent));
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                    }
 								                }
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                Add(new BufferModifiedRange(address, size, syncNumber, this));
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
 								        }
 								        /// <summary>
 								        /// Gets modified ranges within the specified region, and then fires the given action for each range individually.
 								        /// </summary>
 								        /// <param name="address">Start address to query</param>
 								        /// <param name="size">Size to query</param>
 								        /// <param name="rangeAction">The action to call for each modified range</param>
 								        public void GetRanges(ulong address, ulong size, Action<ulong, ulong> rangeAction)
 								        {
 								            int count = 0;
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								            ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            // Range list must be consistent for this operation.
 								            lock (_lock)
 								            {
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                count = FindOverlapsNonOverlapping(address, size, ref overlaps);
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
 								            for (int i = 0; i < count; i++)
 								            {
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                BufferModifiedRange overlap = overlaps[i];
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                rangeAction(overlap.Address, overlap.Size);
 								            }
 								        }
 								        /// <summary>
 								        /// Queries if a range exists within the specified region.
 								        /// </summary>
 								        /// <param name="address">Start address to query</param>
 								        /// <param name="size">Size to query</param>
 								        /// <returns>True if a range exists in the specified region, false otherwise</returns>
 								        public bool HasRange(ulong address, ulong size)
 								        {
 								            // Range list must be consistent for this operation.
 								            lock (_lock)
 								            {
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                return FindOverlapsNonOverlapping(address, size, ref ThreadStaticArray<BufferModifiedRange>.Get()) > 0;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
 								        }
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <summary>
 								        /// Performs the given range action, or one from a migration that overlaps and has not synced yet.
 								        /// </summary>
 								        /// <param name="offset">The offset to pass to the action</param>
 								        /// <param name="size">The size to pass to the action</param>
 								        /// <param name="syncNumber">The sync number that has been reached</param>
 								        /// <param name="parent">The modified range list that originally owned this range</param>
 								        /// <param name="rangeAction">The action to perform</param>
 								        public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent, Action<ulong, ulong> rangeAction)
 								        {
 								            bool firstSource = true;
 								            if (parent != this)
 								            {
 								                lock (_lock)
 								                {
 								                    if (_sources != null)
 								                    {
 								                        foreach (BufferMigration source in _sources)
 								                        {
 								                            if (source.Overlaps(offset, size, syncNumber))
 								                            {
 								                                if (firstSource && !source.FullyMatches(offset, size))
 								                                {
 								                                    // Perform this buffer's action first. The migrations will run after.
 								                                    rangeAction(offset, size);
 								                                }
 								                                source.RangeActionWithMigration(offset, size, syncNumber, parent);
 								                                firstSource = false;
 								                            }
 								                        }
 								                    }
 								                }
 								            }
 								            if (firstSource)
 								            {
 								                // No overlapping migrations, or they are not meant for this range, flush the data using the given action.
 								                rangeAction(offset, size);
 								            }
 								        }
 								        /// <summary>
 								        /// Removes modified ranges ready by the sync number from the list, and flushes their buffer data within a given address range.
 								        /// </summary>
 								        /// <param name="overlaps">Overlapping ranges to check</param>
 								        /// <param name="rangeCount">Number of overlapping ranges</param>
 								        /// <param name="highestDiff">The highest difference between an overlapping range's sync number and the current one</param>
 								        /// <param name="currentSync">The current sync number</param>
 								        /// <param name="address">The start address of the flush range</param>
 								        /// <param name="endAddress">The end address of the flush range</param>
 								        private void RemoveRangesAndFlush(
 								            BufferModifiedRange[] overlaps,
 								            int rangeCount,
 								            long highestDiff,
 								            ulong currentSync,
 								            ulong address,
 								            ulong endAddress)
 								        {
 								            lock (_lock)
 								            {
 								                if (_migrationTarget == null)
 								                {
 								                    ulong waitSync = currentSync + (ulong)highestDiff;
 								                    for (int i = 0; i < rangeCount; i++)
 								                    {
 								                        BufferModifiedRange overlap = overlaps[i];
 								                        long diff = (long)(overlap.SyncNumber - currentSync);
 								                        if (diff <= highestDiff)
 								                        {
 								                            ulong clampAddress = Math.Max(address, overlap.Address);
 								                            ulong clampEnd = Math.Min(endAddress, overlap.EndAddress);
 								                            ClearPart(overlap, clampAddress, clampEnd);
 								                            RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, overlap.Parent, _flushAction);
 								                        }
 								                    }
 								                    return;
 								                }
 								            }
 								            // There is a migration target to call instead. This can't be changed after set so accessing it outside the lock is fine.
 								            _migrationTarget.Destination.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress);
 								        }
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        /// <summary>
 								        /// Gets modified ranges within the specified region, waits on ones from a previous sync number,
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// and then fires the flush action for each range individually.
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        /// </summary>
 								        /// <remarks>
 								        /// This function assumes it is called from the background thread.
 								        /// Modifications from the current sync number are ignored because the guest should not expect them to be available yet.
 								        /// They will remain reserved, so that any data sync prioritizes the data in the GPU.
 								        /// </remarks>
 								        /// <param name="address">Start address to query</param>
 								        /// <param name="size">Size to query</param>
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        public void WaitForAndFlushRanges(ulong address, ulong size)
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        {
 								            ulong endAddress = address + size;
 								            ulong currentSync = _context.SyncNumber;
 								            int rangeCount = 0;
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								            ref var overlaps = ref ThreadStaticArray<BufferModifiedRange>.Get();
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            // Range list must be consistent for this operation
 								            lock (_lock)
 								            {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                if (_migrationTarget != null)
 								                {
 								                    rangeCount = -1;
 								                }
 								                else
 								                {
 								                    rangeCount = FindOverlapsNonOverlapping(address, size, ref overlaps);
 								                }
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								            if (rangeCount == -1)
 								            {
 								                _migrationTarget.Destination.WaitForAndFlushRanges(address, size);
 								                return;
 								            }
 								            else if (rangeCount == 0)
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            {
 								                return;
 								            }
 								            // First, determine which syncpoint to wait on.
 								            // This is the latest syncpoint that is not equal to the current sync.
 								            long highestDiff = long.MinValue;
 								            for (int i = 0; i < rangeCount; i++)
 								            {
-												Add a Multithreading layer for the GAL, multi-thread shader compilation at runtime (#2501)

* Initial Implementation

About as fast as nvidia GL multithreading, can be improved with faster command queuing.

* Struct based command list

Speeds up a bit. Still a lot of time lost to resource copy.

* Do shader init while the render thread is active.

* Introduce circular span pool V1

Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.

* Refactor SpanRef some more

Use a struct to represent SpanRef, rather than a reference.

* Flush buffers on background thread

* Use a span for UpdateRenderScale.

Much faster than copying the array.

* Calculate command size using reflection

* WIP parallel shaders

* Some minor optimisation

* Only 2 max refs per command now.

The command with 3 refs is gone. :relieved:

* Don't cast on the GPU side

* Remove redundant casts, force sync on window present

* Fix Shader Cache

* Fix host shader save.

* Fixup to work with new renderer stuff

* Make command Run static, use array of delegates as lookup

Profile says this takes less time than the previous way.

* Bring up to date

* Add settings toggle. Fix Muiltithreading Off mode.

* Fix warning.

* Release tracking lock for flushes

* Fix Conditional Render fast path with threaded gal

* Make handle iteration safe when releasing the lock

This is mostly temporary.

* Attempt to set backend threading on driver

Only really works on nvidia before launching a game.

* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions

* Update buffer set commands

* Some cleanup

* Only use stutter workaround when using opengl renderer non-threaded

* Add host-conditional reservation of counter events

There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.

* Address Feedback

* Make counter flush tracked again.

Hopefully does not cause any issues this time.

* Wait for FlushTo on the main queue thread.

Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)

* Add SDL2 headless integration

* Add HLE macro commands.

Co-authored-by: Mary <mary@mary.zone>
											
										
										
											2021-08-26 15:31:29 -07:00
+								                BufferModifiedRange overlap = overlaps[i];
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
 								                long diff = (long)(overlap.SyncNumber - currentSync);
 								                if (diff < 0 && diff > highestDiff)
 								                {
 								                    highestDiff = diff;
 								                }
 								            }
 								            if (highestDiff == long.MinValue)
 								            {
 								                return;
 								            }
 								            // Wait for the syncpoint.
 								            _context.Renderer.WaitSync(currentSync + (ulong)highestDiff);
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								            RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress);
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        }
 								        /// <summary>
 								        /// Inherit ranges from another modified range list.
 								        /// </summary>
 								        /// <param name="ranges">The range list to inherit from</param>
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <param name="registerRangeAction">The action to call for each modified range</param>
 								        public void InheritRanges(BufferModifiedRangeList ranges, Action<ulong, ulong> registerRangeAction)
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        {
 								            BufferModifiedRange[] inheritRanges;
 								            lock (ranges._lock)
 								            {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                BufferMigration migration = new(ranges._parent, ranges._flushAction, ranges, this, _context.SyncNumber);
 								                ranges._parent.IncrementReferenceCount();
 								                ranges._migrationTarget = migration;
 								                _context.RegisterBufferMigration(migration);
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                inheritRanges = ranges.ToArray();
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                lock (_lock)
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                    (_sources ??= new List<BufferMigration>()).Add(migration);
 								                    foreach (BufferModifiedRange range in inheritRanges)
 								                    {
 								                        Add(range);
 								                    }
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                }
 								            }
 								            ulong currentSync = _context.SyncNumber;
 								            foreach (BufferModifiedRange range in inheritRanges)
 								            {
 								                if (range.SyncNumber != currentSync)
 								                {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                    registerRangeAction(range.Address, range.Size);
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								                }
 								            }
 								        }
-												Reregister flush actions when taking a buffer's modified range list.

Fixes a regression from #2663 where buffer flush would not happen after a resize. Specifically caused the world map in Yoshi's Crafted World to flash.

I have other planned changes to this class so this might change soon, but this regression could affect a lot so it couldn't wait.

											
										
										
											2021-10-06 16:00:56 -07:00
+								        /// <summary>
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// Removes a source buffer migration, indicating its copy has completed.
-												Reregister flush actions when taking a buffer's modified range list.

Fixes a regression from #2663 where buffer flush would not happen after a resize. Specifically caused the world map in Yoshi's Crafted World to flash.

I have other planned changes to this class so this might change soon, but this regression could affect a lot so it couldn't wait.

											
										
										
											2021-10-06 16:00:56 -07:00
+								        /// </summary>
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								        /// <param name="migration">The migration to remove</param>
 								        public void RemoveMigration(BufferMigration migration)
-												Reregister flush actions when taking a buffer's modified range list.

Fixes a regression from #2663 where buffer flush would not happen after a resize. Specifically caused the world map in Yoshi's Crafted World to flash.

I have other planned changes to this class so this might change soon, but this regression could affect a lot so it couldn't wait.

											
										
										
											2021-10-06 16:00:56 -07:00
+								        {
-												Avoid potential race

											
										
										
											2021-10-06 17:13:51 -07:00
+								            lock (_lock)
 								            {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                _sources.Remove(migration);
-												Reregister flush actions when taking a buffer's modified range list.

Fixes a regression from #2663 where buffer flush would not happen after a resize. Specifically caused the world map in Yoshi's Crafted World to flash.

I have other planned changes to this class so this might change soon, but this regression could affect a lot so it couldn't wait.

											
										
										
											2021-10-06 16:00:56 -07:00
+								            }
 								        }
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								        private void ClearPart(BufferModifiedRange overlap, ulong address, ulong endAddress)
 								        {
 								            Remove(overlap);
 								            // If the overlap extends outside of the clear range, make sure those parts still exist.
 								            if (overlap.Address < address)
 								            {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber, overlap.Parent));
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
 								            if (overlap.EndAddress > endAddress)
 								            {
-												GPU: Track buffer migrations and flush source on incomplete copy (#3952)

* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
											
										
										
											2022-12-01 07:30:13 -08:00
+								                Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber, overlap.Parent));
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
 								        }
 								        /// <summary>
 								        /// Clear modified ranges within the specified area.
 								        /// </summary>
 								        /// <param name="address">Start address to clear</param>
 								        /// <param name="size">Size to clear</param>
 								        public void Clear(ulong address, ulong size)
 								        {
 								            lock (_lock)
 								            {
 								                // This function can be called from any thread, so it cannot use the arrays for background or foreground.
 								                BufferModifiedRange[] toClear = new BufferModifiedRange[1];
 								                int rangeCount = FindOverlapsNonOverlapping(address, size, ref toClear);
 								                ulong endAddress = address + size;
 								                for (int i = 0; i < rangeCount; i++)
 								                {
 								                    BufferModifiedRange overlap = toClear[i];
 								                    ClearPart(overlap, address, endAddress);
 								                }
 								            }
 								        }
 								        /// <summary>
 								        /// Clear all modified ranges.
 								        /// </summary>
 								        public void Clear()
 								        {
 								            lock (_lock)
 								            {
-												Array based RangeList that caches Address/EndAddress (#2642)

* Array based RangeList that caches Address/EndAddress

In isolation, this was more than 2x faster than the RangeList that checks using the interface. In practice I'm seeing much better results than I expected. The array is used because checking it is slightly faster than using a list, which loses time to struct copies, but I still want that data locality.

A method has been added to the list to update the cached end address, as some users of the RangeList currently modify it dynamically.

Greatly improves performance in Super Mario Odyssey, Xenoblade and any other GPU limited games.

* Address Feedback
											
										
										
											2021-09-19 05:22:26 -07:00
+								                Count = 0;
-												Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790)

* Initial implementation of buffer flush (VERY WIP)

* Host shaders need to be rebuilt for the SSBO write flag.

* New approach with reserved regions and gl sync

* Fix a ton of buffer issues.

* Remove unused buffer unmapped behaviour

* Revert "Remove unused buffer unmapped behaviour"

This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece.

* Delete modified ranges on unmap

Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap.

* Cache some more delegates.

* Dispose Sync on Close

* Also create host sync for GPFifo syncpoint increment.

* Copy buffer optimization, add docs

* Fix race condition with OpenGL Sync

* Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle

* Performance: Only flush individual pages of SSBO at a time

This avoids flushing large amounts of data when only a small amount is actually used.

* Signal Modified rather than flushing after clear

* Fix some docs and code style.

* Introduce a new test for tracking memory protection.

Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master)

* Address Comments

* Add host sync for SetReference

This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise.

* Make PageAlign static

* Re-enable read tracking, for reads.
											
										
										
											2021-01-17 12:08:06 -08:00
+								            }
 								        }
 								    }
 								}