Move solution and projects to src

This commit is contained in:
TSR Berry
2023-04-08 01:22:00 +02:00
committed by Mary
parent cd124bda58
commit cee7121058
3466 changed files with 55 additions and 55 deletions

View File

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Shader
{
public enum AlphaTestOp
{
Never = 1,
Less,
Equal,
LessOrEqual,
Greater,
NotEqual,
GreaterOrEqual,
Always
}
}

View File

@ -0,0 +1,38 @@
using Ryujinx.Graphics.Shader.Translation;
using System;
namespace Ryujinx.Graphics.Shader
{
public enum AttributeType : byte
{
// Generic types.
Float,
Sint,
Uint
}
static class AttributeTypeExtensions
{
public static string ToVec4Type(this AttributeType type)
{
return type switch
{
AttributeType.Float => "vec4",
AttributeType.Sint => "ivec4",
AttributeType.Uint => "uvec4",
_ => throw new ArgumentException($"Invalid attribute type \"{type}\".")
};
}
public static AggregateType ToAggregateType(this AttributeType type)
{
return type switch
{
AttributeType.Float => AggregateType.FP32,
AttributeType.Sint => AggregateType.S32,
AttributeType.Uint => AggregateType.U32,
_ => throw new ArgumentException($"Invalid attribute type \"{type}\".")
};
}
}
}

View File

@ -0,0 +1,26 @@
namespace Ryujinx.Graphics.Shader
{
public struct BufferDescriptor
{
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
public readonly int Binding;
public readonly int Slot;
public BufferUsageFlags Flags;
public BufferDescriptor(int binding, int slot)
{
Binding = binding;
Slot = slot;
Flags = BufferUsageFlags.None;
}
public BufferDescriptor SetFlag(BufferUsageFlags flag)
{
Flags |= flag;
return this;
}
}
}

View File

@ -0,0 +1,18 @@
using System;
namespace Ryujinx.Graphics.Shader
{
/// <summary>
/// Flags that indicate how a buffer will be used in a shader.
/// </summary>
[Flags]
public enum BufferUsageFlags
{
None = 0,
/// <summary>
/// Buffer is written to.
/// </summary>
Write = 1 << 0
}
}

View File

@ -0,0 +1,95 @@
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System.Text;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
class CodeGenContext
{
public const string Tab = " ";
public StructuredFunction CurrentFunction { get; set; }
public StructuredProgramInfo Info { get; }
public ShaderConfig Config { get; }
public OperandManager OperandManager { get; }
private readonly StringBuilder _sb;
private int _level;
private string _indentation;
public CodeGenContext(StructuredProgramInfo info, ShaderConfig config)
{
Info = info;
Config = config;
OperandManager = new OperandManager();
_sb = new StringBuilder();
}
public void AppendLine()
{
_sb.AppendLine();
}
public void AppendLine(string str)
{
_sb.AppendLine(_indentation + str);
}
public string GetCode()
{
return _sb.ToString();
}
public void EnterScope()
{
AppendLine("{");
_level++;
UpdateIndentation();
}
public void LeaveScope(string suffix = "")
{
if (_level == 0)
{
return;
}
_level--;
UpdateIndentation();
AppendLine("}" + suffix);
}
public StructuredFunction GetFunction(int id)
{
return Info.Functions[id];
}
private void UpdateIndentation()
{
_indentation = GetIndentation(_level);
}
private static string GetIndentation(int level)
{
string indentation = string.Empty;
for (int index = 0; index < level; index++)
{
indentation += Tab;
}
return indentation;
}
}
}

View File

@ -0,0 +1,818 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class Declarations
{
public static void Declare(CodeGenContext context, StructuredProgramInfo info)
{
context.AppendLine(context.Config.Options.TargetApi == TargetApi.Vulkan ? "#version 460 core" : "#version 450 core");
context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable");
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{
context.AppendLine("#extension GL_ARB_shader_ballot : enable");
}
else
{
context.AppendLine("#extension GL_KHR_shader_subgroup_basic : enable");
context.AppendLine("#extension GL_KHR_shader_subgroup_ballot : enable");
}
context.AppendLine("#extension GL_ARB_shader_group_vote : enable");
context.AppendLine("#extension GL_EXT_shader_image_load_formatted : enable");
context.AppendLine("#extension GL_EXT_texture_shadow_lod : enable");
if (context.Config.Stage == ShaderStage.Compute)
{
context.AppendLine("#extension GL_ARB_compute_shader : enable");
}
else if (context.Config.Stage == ShaderStage.Fragment)
{
if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
{
context.AppendLine("#extension GL_ARB_fragment_shader_interlock : enable");
}
else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel())
{
context.AppendLine("#extension GL_INTEL_fragment_shader_ordering : enable");
}
}
else
{
if (context.Config.Stage == ShaderStage.Vertex)
{
context.AppendLine("#extension GL_ARB_shader_draw_parameters : enable");
}
context.AppendLine("#extension GL_ARB_shader_viewport_layer_array : enable");
}
if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
{
context.AppendLine("#extension GL_NV_geometry_shader_passthrough : enable");
}
if (context.Config.GpuAccessor.QueryHostSupportsViewportMask())
{
context.AppendLine("#extension GL_NV_viewport_array2 : enable");
}
context.AppendLine("#pragma optionNV(fastmath off)");
context.AppendLine();
context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;");
context.AppendLine();
if (context.Config.Stage == ShaderStage.Compute)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4);
if (localMemorySize != 0)
{
string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
context.AppendLine();
}
int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4);
if (sharedMemorySize != 0)
{
string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize);
context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];");
context.AppendLine();
}
}
else if (context.Config.LocalMemorySize != 0)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
context.AppendLine();
}
var cBufferDescriptors = context.Config.GetConstantBufferDescriptors();
if (cBufferDescriptors.Length != 0)
{
DeclareUniforms(context, cBufferDescriptors);
context.AppendLine();
}
var sBufferDescriptors = context.Config.GetStorageBufferDescriptors();
if (sBufferDescriptors.Length != 0)
{
DeclareStorages(context, sBufferDescriptors);
context.AppendLine();
}
var textureDescriptors = context.Config.GetTextureDescriptors();
if (textureDescriptors.Length != 0)
{
DeclareSamplers(context, textureDescriptors);
context.AppendLine();
}
var imageDescriptors = context.Config.GetImageDescriptors();
if (imageDescriptors.Length != 0)
{
DeclareImages(context, imageDescriptors);
context.AppendLine();
}
if (context.Config.Stage != ShaderStage.Compute)
{
if (context.Config.Stage == ShaderStage.Geometry)
{
InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology();
string inPrimitive = inputTopology.ToGlslString();
context.AppendLine($"layout (invocations = {context.Config.ThreadsPerInputPrimitive}, {inPrimitive}) in;");
if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
{
context.AppendLine($"layout (passthrough) in gl_PerVertex");
context.EnterScope();
context.AppendLine("vec4 gl_Position;");
context.AppendLine("float gl_PointSize;");
context.AppendLine("float gl_ClipDistance[];");
context.LeaveScope(";");
}
else
{
string outPrimitive = context.Config.OutputTopology.ToGlslString();
int maxOutputVertices = context.Config.GpPassthrough
? inputTopology.ToInputVertices()
: context.Config.MaxOutputVertices;
context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;");
}
context.AppendLine();
}
else if (context.Config.Stage == ShaderStage.TessellationControl)
{
int threadsPerInputPrimitive = context.Config.ThreadsPerInputPrimitive;
context.AppendLine($"layout (vertices = {threadsPerInputPrimitive}) out;");
context.AppendLine();
}
else if (context.Config.Stage == ShaderStage.TessellationEvaluation)
{
bool tessCw = context.Config.GpuAccessor.QueryTessCw();
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
// We invert the front face on Vulkan backend, so we need to do that here aswell.
tessCw = !tessCw;
}
string patchType = context.Config.GpuAccessor.QueryTessPatchType().ToGlsl();
string spacing = context.Config.GpuAccessor.QueryTessSpacing().ToGlsl();
string windingOrder = tessCw ? "cw" : "ccw";
context.AppendLine($"layout ({patchType}, {spacing}, {windingOrder}) in;");
context.AppendLine();
}
if (context.Config.UsedInputAttributes != 0 || context.Config.GpPassthrough)
{
DeclareInputAttributes(context, info);
context.AppendLine();
}
if (context.Config.UsedOutputAttributes != 0 || context.Config.Stage != ShaderStage.Fragment)
{
DeclareOutputAttributes(context, info);
context.AppendLine();
}
if (context.Config.UsedInputAttributesPerPatch.Count != 0)
{
DeclareInputAttributesPerPatch(context, context.Config.UsedInputAttributesPerPatch);
context.AppendLine();
}
if (context.Config.UsedOutputAttributesPerPatch.Count != 0)
{
DeclareUsedOutputAttributesPerPatch(context, context.Config.UsedOutputAttributesPerPatch);
context.AppendLine();
}
if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline)
{
var tfOutput = context.Config.GetTransformFeedbackOutput(AttributeConsts.PositionX);
if (tfOutput.Valid)
{
context.AppendLine($"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) out gl_PerVertex");
context.EnterScope();
context.AppendLine("vec4 gl_Position;");
context.LeaveScope(context.Config.Stage == ShaderStage.TessellationControl ? " gl_out[];" : ";");
}
}
}
else
{
string localSizeX = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeX());
string localSizeY = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeY());
string localSizeZ = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeZ());
context.AppendLine(
"layout (" +
$"local_size_x = {localSizeX}, " +
$"local_size_y = {localSizeY}, " +
$"local_size_z = {localSizeZ}) in;");
context.AppendLine();
}
bool isFragment = context.Config.Stage == ShaderStage.Fragment;
if (isFragment || context.Config.Stage == ShaderStage.Compute || context.Config.Stage == ShaderStage.Vertex)
{
if (isFragment && context.Config.GpuAccessor.QueryEarlyZForce())
{
context.AppendLine("layout(early_fragment_tests) in;");
context.AppendLine();
}
if ((context.Config.UsedFeatures & (FeatureFlags.FragCoordXY | FeatureFlags.IntegerSampling)) != 0)
{
string stage = OperandManager.GetShaderStagePrefix(context.Config.Stage);
int scaleElements = context.Config.GetTextureDescriptors().Length + context.Config.GetImageDescriptors().Length;
if (isFragment)
{
scaleElements++; // Also includes render target scale, for gl_FragCoord.
}
DeclareSupportUniformBlock(context, context.Config.Stage, scaleElements);
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IntegerSampling) && scaleElements != 0)
{
AppendHelperFunction(context, $"Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_{stage}.glsl");
context.AppendLine();
}
}
else if (isFragment || context.Config.Stage == ShaderStage.Vertex)
{
DeclareSupportUniformBlock(context, context.Config.Stage, 0);
}
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighU32) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
}
}
private static string GetTfLayout(TransformFeedbackOutput tfOutput)
{
if (tfOutput.Valid)
{
return $"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) ";
}
return string.Empty;
}
public static void DeclareLocals(CodeGenContext context, StructuredFunction function)
{
foreach (AstOperand decl in function.Locals)
{
string name = context.OperandManager.DeclareLocal(decl);
context.AppendLine(GetVarTypeName(context, decl.VarType) + " " + name + ";");
}
}
public static string GetVarTypeName(CodeGenContext context, AggregateType type, bool precise = true)
{
if (context.Config.GpuAccessor.QueryHostReducedPrecision())
{
precise = false;
}
return type switch
{
AggregateType.Void => "void",
AggregateType.Bool => "bool",
AggregateType.FP32 => precise ? "precise float" : "float",
AggregateType.FP64 => "double",
AggregateType.S32 => "int",
AggregateType.U32 => "uint",
AggregateType.Vector2 | AggregateType.Bool => "bvec2",
AggregateType.Vector2 | AggregateType.FP32 => precise ? "precise vec2" : "vec2",
AggregateType.Vector2 | AggregateType.FP64 => "dvec2",
AggregateType.Vector2 | AggregateType.S32 => "ivec2",
AggregateType.Vector2 | AggregateType.U32 => "uvec2",
AggregateType.Vector3 | AggregateType.Bool => "bvec3",
AggregateType.Vector3 | AggregateType.FP32 => precise ? "precise vec3" : "vec3",
AggregateType.Vector3 | AggregateType.FP64 => "dvec3",
AggregateType.Vector3 | AggregateType.S32 => "ivec3",
AggregateType.Vector3 | AggregateType.U32 => "uvec3",
AggregateType.Vector4 | AggregateType.Bool => "bvec4",
AggregateType.Vector4 | AggregateType.FP32 => precise ? "precise vec4" : "vec4",
AggregateType.Vector4 | AggregateType.FP64 => "dvec4",
AggregateType.Vector4 | AggregateType.S32 => "ivec4",
AggregateType.Vector4 | AggregateType.U32 => "uvec4",
_ => throw new ArgumentException($"Invalid variable type \"{type}\".")
};
}
private static void DeclareUniforms(CodeGenContext context, BufferDescriptor[] descriptors)
{
string ubSize = "[" + NumberFormatter.FormatInt(Constants.ConstantBufferSize / 16) + "]";
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
{
string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
ubName += "_" + DefaultNames.UniformNamePrefix;
string blockName = $"{ubName}_{DefaultNames.BlockSuffix}";
context.AppendLine($"layout (binding = {context.Config.FirstConstantBufferBinding}, std140) uniform {blockName}");
context.EnterScope();
context.AppendLine("vec4 " + DefaultNames.DataName + ubSize + ";");
context.LeaveScope($" {ubName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];");
}
else
{
foreach (var descriptor in descriptors)
{
string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
ubName += "_" + DefaultNames.UniformNamePrefix + descriptor.Slot;
context.AppendLine($"layout (binding = {descriptor.Binding}, std140) uniform {ubName}");
context.EnterScope();
context.AppendLine("vec4 " + OperandManager.GetUbName(context.Config.Stage, descriptor.Slot, false) + ubSize + ";");
context.LeaveScope(";");
}
}
}
private static void DeclareStorages(CodeGenContext context, BufferDescriptor[] descriptors)
{
string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
sbName += "_" + DefaultNames.StorageNamePrefix;
string blockName = $"{sbName}_{DefaultNames.BlockSuffix}";
string layout = context.Config.Options.TargetApi == TargetApi.Vulkan ? ", set = 1" : string.Empty;
context.AppendLine($"layout (binding = {context.Config.FirstStorageBufferBinding}{layout}, std430) buffer {blockName}");
context.EnterScope();
context.AppendLine("uint " + DefaultNames.DataName + "[];");
context.LeaveScope($" {sbName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];");
}
private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
{
int arraySize = 0;
foreach (var descriptor in descriptors)
{
if (descriptor.Type.HasFlag(SamplerType.Indexed))
{
if (arraySize == 0)
{
arraySize = ShaderConfig.SamplerArraySize;
}
else if (--arraySize != 0)
{
continue;
}
}
string indexExpr = NumberFormatter.FormatInt(arraySize);
string samplerName = OperandManager.GetSamplerName(
context.Config.Stage,
descriptor.CbufSlot,
descriptor.HandleIndex,
descriptor.Type.HasFlag(SamplerType.Indexed),
indexExpr);
string samplerTypeName = descriptor.Type.ToGlslSamplerType();
string layout = string.Empty;
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
layout = ", set = 2";
}
context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {samplerTypeName} {samplerName};");
}
}
private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors)
{
int arraySize = 0;
foreach (var descriptor in descriptors)
{
if (descriptor.Type.HasFlag(SamplerType.Indexed))
{
if (arraySize == 0)
{
arraySize = ShaderConfig.SamplerArraySize;
}
else if (--arraySize != 0)
{
continue;
}
}
string indexExpr = NumberFormatter.FormatInt(arraySize);
string imageName = OperandManager.GetImageName(
context.Config.Stage,
descriptor.CbufSlot,
descriptor.HandleIndex,
descriptor.Format,
descriptor.Type.HasFlag(SamplerType.Indexed),
indexExpr);
string imageTypeName = descriptor.Type.ToGlslImageType(descriptor.Format.GetComponentType());
if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent))
{
imageTypeName = "coherent " + imageTypeName;
}
string layout = descriptor.Format.ToGlslFormat();
if (!string.IsNullOrEmpty(layout))
{
layout = ", " + layout;
}
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
layout = $", set = 3{layout}";
}
context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {imageTypeName} {imageName};");
}
}
private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info)
{
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IaIndexing))
{
string suffix = context.Config.Stage == ShaderStage.Geometry ? "[]" : string.Empty;
context.AppendLine($"layout (location = 0) in vec4 {DefaultNames.IAttributePrefix}{suffix}[{Constants.MaxAttributes}];");
}
else
{
int usedAttributes = context.Config.UsedInputAttributes | context.Config.PassthroughAttributes;
while (usedAttributes != 0)
{
int index = BitOperations.TrailingZeroCount(usedAttributes);
DeclareInputAttribute(context, info, index);
usedAttributes &= ~(1 << index);
}
}
}
private static void DeclareInputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs)
{
foreach (int attr in attrs.Order())
{
DeclareInputAttributePerPatch(context, attr);
}
}
private static void DeclareInputAttribute(CodeGenContext context, StructuredProgramInfo info, int attr)
{
string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: false) ? "[]" : string.Empty;
string iq = string.Empty;
if (context.Config.Stage == ShaderStage.Fragment)
{
iq = context.Config.ImapTypes[attr].GetFirstUsedType() switch
{
PixelImap.Constant => "flat ",
PixelImap.ScreenLinear => "noperspective ",
_ => string.Empty
};
}
string name = $"{DefaultNames.IAttributePrefix}{attr}";
if (context.Config.TransformFeedbackEnabled && context.Config.Stage == ShaderStage.Fragment)
{
int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0);
if (components > 1)
{
string type = components switch
{
2 => "vec2",
3 => "vec3",
4 => "vec4",
_ => "float"
};
context.AppendLine($"layout (location = {attr}) in {type} {name};");
}
for (int c = components > 1 ? components : 0; c < 4; c++)
{
char swzMask = "xyzw"[c];
context.AppendLine($"layout (location = {attr}, component = {c}) {iq}in float {name}_{swzMask}{suffix};");
}
}
else
{
bool passthrough = (context.Config.PassthroughAttributes & (1 << attr)) != 0;
string pass = passthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough() ? "passthrough, " : string.Empty;
string type;
if (context.Config.Stage == ShaderStage.Vertex)
{
type = context.Config.GpuAccessor.QueryAttributeType(attr).ToVec4Type();
}
else
{
type = AttributeType.Float.ToVec4Type();
}
context.AppendLine($"layout ({pass}location = {attr}) {iq}in {type} {name}{suffix};");
}
}
private static void DeclareInputAttributePerPatch(CodeGenContext context, int attr)
{
int location = context.Config.GetPerPatchAttributeLocation(attr);
string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}";
context.AppendLine($"layout (location = {location}) patch in vec4 {name};");
}
private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info)
{
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
{
context.AppendLine($"layout (location = 0) out vec4 {DefaultNames.OAttributePrefix}[{Constants.MaxAttributes}];");
}
else
{
int usedAttributes = context.Config.UsedOutputAttributes;
if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable())
{
int firstOutput = BitOperations.TrailingZeroCount(usedAttributes);
int mask = 3 << firstOutput;
if ((usedAttributes & mask) == mask)
{
usedAttributes &= ~mask;
DeclareOutputDualSourceBlendAttribute(context, firstOutput);
}
}
while (usedAttributes != 0)
{
int index = BitOperations.TrailingZeroCount(usedAttributes);
DeclareOutputAttribute(context, index);
usedAttributes &= ~(1 << index);
}
}
}
private static void DeclareOutputAttribute(CodeGenContext context, int attr)
{
string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: true) ? "[]" : string.Empty;
string name = $"{DefaultNames.OAttributePrefix}{attr}{suffix}";
if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline)
{
int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0);
if (components > 1)
{
string type = components switch
{
2 => "vec2",
3 => "vec3",
4 => "vec4",
_ => "float"
};
string xfb = string.Empty;
var tfOutput = context.Config.GetTransformFeedbackOutput(attr, 0);
if (tfOutput.Valid)
{
xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}";
}
context.AppendLine($"layout (location = {attr}{xfb}) out {type} {name};");
}
for (int c = components > 1 ? components : 0; c < 4; c++)
{
char swzMask = "xyzw"[c];
string xfb = string.Empty;
var tfOutput = context.Config.GetTransformFeedbackOutput(attr, c);
if (tfOutput.Valid)
{
xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}";
}
context.AppendLine($"layout (location = {attr}, component = {c}{xfb}) out float {name}_{swzMask};");
}
}
else
{
string type = context.Config.Stage != ShaderStage.Fragment ? "vec4" :
context.Config.GpuAccessor.QueryFragmentOutputType(attr) switch
{
AttributeType.Sint => "ivec4",
AttributeType.Uint => "uvec4",
_ => "vec4"
};
if (context.Config.GpuAccessor.QueryHostReducedPrecision() && context.Config.Stage == ShaderStage.Vertex && attr == 0)
{
context.AppendLine($"layout (location = {attr}) invariant out {type} {name};");
}
else
{
context.AppendLine($"layout (location = {attr}) out {type} {name};");
}
}
}
private static void DeclareOutputDualSourceBlendAttribute(CodeGenContext context, int attr)
{
string name = $"{DefaultNames.OAttributePrefix}{attr}";
string name2 = $"{DefaultNames.OAttributePrefix}{(attr + 1)}";
context.AppendLine($"layout (location = {attr}, index = 0) out vec4 {name};");
context.AppendLine($"layout (location = {attr}, index = 1) out vec4 {name2};");
}
private static bool IsArrayAttributeGlsl(ShaderStage stage, bool isOutAttr)
{
if (isOutAttr)
{
return stage == ShaderStage.TessellationControl;
}
else
{
return stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry;
}
}
private static void DeclareUsedOutputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs)
{
foreach (int attr in attrs.Order())
{
DeclareOutputAttributePerPatch(context, attr);
}
}
private static void DeclareOutputAttributePerPatch(CodeGenContext context, int attr)
{
int location = context.Config.GetPerPatchAttributeLocation(attr);
string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}";
context.AppendLine($"layout (location = {location}) patch out vec4 {name};");
}
private static void DeclareSupportUniformBlock(CodeGenContext context, ShaderStage stage, int scaleElements)
{
bool needsSupportBlock = stage == ShaderStage.Fragment ||
(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable());
if (!needsSupportBlock && scaleElements == 0)
{
return;
}
context.AppendLine($"layout (binding = 0, std140) uniform {DefaultNames.SupportBlockName}");
context.EnterScope();
switch (stage)
{
case ShaderStage.Fragment:
case ShaderStage.Vertex:
context.AppendLine($"uint {DefaultNames.SupportBlockAlphaTestName};");
context.AppendLine($"bool {DefaultNames.SupportBlockIsBgraName}[{SupportBuffer.FragmentIsBgraCount}];");
context.AppendLine($"vec4 {DefaultNames.SupportBlockViewportInverse};");
context.AppendLine($"int {DefaultNames.SupportBlockFragmentScaleCount};");
break;
case ShaderStage.Compute:
context.AppendLine($"uint s_reserved[{SupportBuffer.ComputeRenderScaleOffset / SupportBuffer.FieldSize}];");
break;
}
context.AppendLine($"float {DefaultNames.SupportBlockRenderScaleName}[{SupportBuffer.RenderScaleMaxCount}];");
context.LeaveScope(";");
context.AppendLine();
}
private static void AppendHelperFunction(CodeGenContext context, string filename)
{
string code = EmbeddedResources.ReadAllText(filename);
code = code.Replace("\t", CodeGenContext.Tab);
code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix);
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{
code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubGroupInvocationARB");
code = code.Replace("$SUBGROUP_BROADCAST$", "readInvocationARB");
}
else
{
code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubgroupInvocationID");
code = code.Replace("$SUBGROUP_BROADCAST$", "subgroupBroadcast");
}
context.AppendLine(code);
context.AppendLine();
}
}
}

View File

@ -0,0 +1,37 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class DefaultNames
{
public const string LocalNamePrefix = "temp";
public const string SamplerNamePrefix = "tex";
public const string ImageNamePrefix = "img";
public const string PerPatchAttributePrefix = "patch_attr_";
public const string IAttributePrefix = "in_attr";
public const string OAttributePrefix = "out_attr";
public const string StorageNamePrefix = "s";
public const string DataName = "data";
public const string SupportBlockName = "support_block";
public const string SupportBlockAlphaTestName = "s_alpha_test";
public const string SupportBlockIsBgraName = "s_is_bgra";
public const string SupportBlockViewportInverse = "s_viewport_inverse";
public const string SupportBlockFragmentScaleCount = "s_frag_scale_count";
public const string SupportBlockRenderScaleName = "s_render_scale";
public const string BlockSuffix = "block";
public const string UniformNamePrefix = "c";
public const string UniformNameSuffix = "data";
public const string LocalMemoryName = "local_mem";
public const string SharedMemoryName = "shared_mem";
public const string ArgumentNamePrefix = "a";
public const string UndefinedName = "undef";
}
}

View File

@ -0,0 +1,154 @@
using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class GlslGenerator
{
private const string MainFunctionName = "main";
public static string Generate(StructuredProgramInfo info, ShaderConfig config)
{
CodeGenContext context = new CodeGenContext(info, config);
Declarations.Declare(context, info);
if (info.Functions.Count != 0)
{
for (int i = 1; i < info.Functions.Count; i++)
{
context.AppendLine($"{GetFunctionSignature(context, info.Functions[i])};");
}
context.AppendLine();
for (int i = 1; i < info.Functions.Count; i++)
{
PrintFunction(context, info, info.Functions[i]);
context.AppendLine();
}
}
PrintFunction(context, info, info.Functions[0], MainFunctionName);
return context.GetCode();
}
private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null)
{
context.CurrentFunction = function;
context.AppendLine(GetFunctionSignature(context, function, funcName));
context.EnterScope();
Declarations.DeclareLocals(context, function);
PrintBlock(context, function.MainBlock);
context.LeaveScope();
}
private static string GetFunctionSignature(CodeGenContext context, StructuredFunction function, string funcName = null)
{
string[] args = new string[function.InArguments.Length + function.OutArguments.Length];
for (int i = 0; i < function.InArguments.Length; i++)
{
args[i] = $"{Declarations.GetVarTypeName(context, function.InArguments[i])} {OperandManager.GetArgumentName(i)}";
}
for (int i = 0; i < function.OutArguments.Length; i++)
{
int j = i + function.InArguments.Length;
args[j] = $"out {Declarations.GetVarTypeName(context, function.OutArguments[i])} {OperandManager.GetArgumentName(j)}";
}
return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
}
private static void PrintBlock(CodeGenContext context, AstBlock block)
{
AstBlockVisitor visitor = new AstBlockVisitor(block);
visitor.BlockEntered += (sender, e) =>
{
switch (e.Block.Type)
{
case AstBlockType.DoWhile:
context.AppendLine("do");
break;
case AstBlockType.Else:
context.AppendLine("else");
break;
case AstBlockType.ElseIf:
context.AppendLine($"else if ({GetCondExpr(context, e.Block.Condition)})");
break;
case AstBlockType.If:
context.AppendLine($"if ({GetCondExpr(context, e.Block.Condition)})");
break;
default: throw new InvalidOperationException($"Found unexpected block type \"{e.Block.Type}\".");
}
context.EnterScope();
};
visitor.BlockLeft += (sender, e) =>
{
context.LeaveScope();
if (e.Block.Type == AstBlockType.DoWhile)
{
context.AppendLine($"while ({GetCondExpr(context, e.Block.Condition)});");
}
};
foreach (IAstNode node in visitor.Visit())
{
if (node is AstOperation operation)
{
string expr = InstGen.GetExpression(context, operation);
if (expr != null)
{
context.AppendLine(expr + ";");
}
}
else if (node is AstAssignment assignment)
{
AggregateType dstType = OperandManager.GetNodeDestType(context, assignment.Destination);
AggregateType srcType = OperandManager.GetNodeDestType(context, assignment.Source);
string dest = InstGen.GetExpression(context, assignment.Destination);
string src = ReinterpretCast(context, assignment.Source, srcType, dstType);
context.AppendLine(dest + " = " + src + ";");
}
else if (node is AstComment comment)
{
context.AppendLine("// " + comment.Comment);
}
else
{
throw new InvalidOperationException($"Found unexpected node type \"{node?.GetType().Name ?? "null"}\".");
}
}
}
private static string GetCondExpr(CodeGenContext context, IAstNode cond)
{
AggregateType srcType = OperandManager.GetNodeDestType(context, cond);
return ReinterpretCast(context, cond, srcType, AggregateType.Bool);
}
}
}

View File

@ -0,0 +1,21 @@
int Helper_AtomicMaxS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}

View File

@ -0,0 +1,21 @@
int Helper_AtomicMaxS32(int index, int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int index, int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}

View File

@ -0,0 +1,22 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class HelperFunctionNames
{
public static string AtomicMaxS32 = "Helper_AtomicMaxS32";
public static string AtomicMinS32 = "Helper_AtomicMinS32";
public static string MultiplyHighS32 = "Helper_MultiplyHighS32";
public static string MultiplyHighU32 = "Helper_MultiplyHighU32";
public static string Shuffle = "Helper_Shuffle";
public static string ShuffleDown = "Helper_ShuffleDown";
public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd";
public static string StoreShared16 = "Helper_StoreShared16";
public static string StoreShared8 = "Helper_StoreShared8";
public static string StoreStorage16 = "Helper_StoreStorage16";
public static string StoreStorage8 = "Helper_StoreStorage8";
}
}

View File

@ -0,0 +1,7 @@
int Helper_MultiplyHighS32(int x, int y)
{
int msb;
int lsb;
imulExtended(x, y, msb, lsb);
return msb;
}

View File

@ -0,0 +1,7 @@
uint Helper_MultiplyHighU32(uint x, uint y)
{
uint msb;
uint lsb;
umulExtended(x, y, msb, lsb);
return msb;
}

View File

@ -0,0 +1,11 @@
float Helper_Shuffle(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId;
valid = srcThreadId <= maxThreadId;
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -0,0 +1,11 @@
float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = $SUBGROUP_INVOCATION$ + index;
valid = srcThreadId <= maxThreadId;
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -0,0 +1,9 @@
float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid)
{
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint srcThreadId = $SUBGROUP_INVOCATION$ - index;
valid = int(srcThreadId) >= int(minThreadId);
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -0,0 +1,11 @@
float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index;
valid = srcThreadId <= maxThreadId;
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -0,0 +1,23 @@
void Helper_StoreShared16(int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
} while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
}
void Helper_StoreShared8(int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
} while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
}

View File

@ -0,0 +1,23 @@
void Helper_StoreStorage16(int index, int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
} while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
}
void Helper_StoreStorage8(int index, int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
} while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
}

View File

@ -0,0 +1,7 @@
float Helper_SwizzleAdd(float x, float y, int mask)
{
vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
int lutIdx = (mask >> (int($SUBGROUP_INVOCATION$ & 3u) * 2)) & 3;
return x * xLut[lutIdx] + y * yLut[lutIdx];
}

View File

@ -0,0 +1,19 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = s_render_scale[samplerIndex];
if (scale == 1.0)
{
return inputVec;
}
return ivec2(vec2(inputVec) * scale);
}
int Helper_TextureSizeUnscale(int size, int samplerIndex)
{
float scale = s_render_scale[samplerIndex];
if (scale == 1.0)
{
return size;
}
return int(float(size) / scale);
}

View File

@ -0,0 +1,26 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = s_render_scale[1 + samplerIndex];
if (scale == 1.0)
{
return inputVec;
}
if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position.
{
return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, 0.0 - scale));
}
else
{
return ivec2(vec2(inputVec) * scale);
}
}
int Helper_TextureSizeUnscale(int size, int samplerIndex)
{
float scale = abs(s_render_scale[1 + samplerIndex]);
if (scale == 1.0)
{
return size;
}
return int(float(size) / scale);
}

View File

@ -0,0 +1,20 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]);
if (scale == 1.0)
{
return inputVec;
}
return ivec2(vec2(inputVec) * scale);
}
int Helper_TextureSizeUnscale(int size, int samplerIndex)
{
float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]);
if (scale == 1.0)
{
return size;
}
return int(float(size) / scale);
}

View File

@ -0,0 +1,238 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenBallot;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenCall;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenFSI;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenPacking;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenVector;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGen
{
public static string GetExpression(CodeGenContext context, IAstNode node)
{
if (node is AstOperation operation)
{
return GetExpression(context, operation);
}
else if (node is AstOperand operand)
{
return context.OperandManager.GetExpression(context, operand);
}
throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\".");
}
public static string Negate(CodeGenContext context, AstOperation operation, InstInfo info)
{
IAstNode src = operation.GetSource(0);
AggregateType type = GetSrcVarType(operation.Inst, 0);
string srcExpr = GetSoureExpr(context, src, type);
string zero;
if (type == AggregateType.FP64)
{
zero = "0.0";
}
else
{
NumberFormatter.TryFormat(0, type, out zero);
}
// Starting in the 496.13 NVIDIA driver, there's an issue with assigning variables to negated expressions.
// (-expr) does not work, but (0.0 - expr) does. This should be removed once the issue is resolved.
return $"{zero} - {Enclose(srcExpr, src, operation.Inst, info, false)}";
}
private static string GetExpression(CodeGenContext context, AstOperation operation)
{
Instruction inst = operation.Inst;
InstInfo info = GetInstructionInfo(inst);
if ((info.Type & InstType.Call) != 0)
{
bool atomic = (info.Type & InstType.Atomic) != 0;
int arity = (int)(info.Type & InstType.ArityMask);
string args = string.Empty;
for (int argIndex = 0; argIndex < arity; argIndex++)
{
// For shared memory access, the second argument is unused and should be ignored.
// It is there to make both storage and shared access have the same number of arguments.
// For storage, both inputs are consumed when the argument index is 0, so we should skip it here.
if (argIndex == 1 && (atomic || operation.StorageKind == StorageKind.SharedMemory))
{
continue;
}
if (argIndex != 0)
{
args += ", ";
}
if (argIndex == 0 && atomic)
{
switch (operation.StorageKind)
{
case StorageKind.SharedMemory: args += LoadShared(context, operation); break;
case StorageKind.StorageBuffer: args += LoadStorage(context, operation); break;
default: throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
}
}
else
{
AggregateType dstType = GetSrcVarType(inst, argIndex);
args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
}
}
return info.OpName + '(' + args + ')';
}
else if ((info.Type & InstType.Op) != 0)
{
string op = info.OpName;
// Return may optionally have a return value (and in this case it is unary).
if (inst == Instruction.Return && operation.SourcesCount != 0)
{
return $"{op} {GetSoureExpr(context, operation.GetSource(0), context.CurrentFunction.ReturnType)}";
}
int arity = (int)(info.Type & InstType.ArityMask);
string[] expr = new string[arity];
for (int index = 0; index < arity; index++)
{
IAstNode src = operation.GetSource(index);
string srcExpr = GetSoureExpr(context, src, GetSrcVarType(inst, index));
bool isLhs = arity == 2 && index == 0;
expr[index] = Enclose(srcExpr, src, inst, info, isLhs);
}
switch (arity)
{
case 0:
return op;
case 1:
return op + expr[0];
case 2:
return $"{expr[0]} {op} {expr[1]}";
case 3:
return $"{expr[0]} {op[0]} {expr[1]} {op[1]} {expr[2]}";
}
}
else if ((info.Type & InstType.Special) != 0)
{
switch (inst & Instruction.Mask)
{
case Instruction.Ballot:
return Ballot(context, operation);
case Instruction.Call:
return Call(context, operation);
case Instruction.FSIBegin:
return FSIBegin(context);
case Instruction.FSIEnd:
return FSIEnd(context);
case Instruction.ImageLoad:
case Instruction.ImageStore:
case Instruction.ImageAtomic:
return ImageLoadOrStore(context, operation);
case Instruction.Load:
return Load(context, operation);
case Instruction.LoadConstant:
return LoadConstant(context, operation);
case Instruction.LoadLocal:
return LoadLocal(context, operation);
case Instruction.LoadShared:
return LoadShared(context, operation);
case Instruction.LoadStorage:
return LoadStorage(context, operation);
case Instruction.Lod:
return Lod(context, operation);
case Instruction.Negate:
return Negate(context, operation, info);
case Instruction.PackDouble2x32:
return PackDouble2x32(context, operation);
case Instruction.PackHalf2x16:
return PackHalf2x16(context, operation);
case Instruction.Store:
return Store(context, operation);
case Instruction.StoreLocal:
return StoreLocal(context, operation);
case Instruction.StoreShared:
return StoreShared(context, operation);
case Instruction.StoreShared16:
return StoreShared16(context, operation);
case Instruction.StoreShared8:
return StoreShared8(context, operation);
case Instruction.StoreStorage:
return StoreStorage(context, operation);
case Instruction.StoreStorage16:
return StoreStorage16(context, operation);
case Instruction.StoreStorage8:
return StoreStorage8(context, operation);
case Instruction.TextureSample:
return TextureSample(context, operation);
case Instruction.TextureSize:
return TextureSize(context, operation);
case Instruction.UnpackDouble2x32:
return UnpackDouble2x32(context, operation);
case Instruction.UnpackHalf2x16:
return UnpackHalf2x16(context, operation);
case Instruction.VectorExtract:
return VectorExtract(context, operation);
}
}
throw new InvalidOperationException($"Unexpected instruction type \"{info.Type}\".");
}
}
}

View File

@ -0,0 +1,27 @@
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenBallot
{
public static string Ballot(CodeGenContext context, AstOperation operation)
{
AggregateType dstType = GetSrcVarType(operation.Inst, 0);
string arg = GetSoureExpr(context, operation.GetSource(0), dstType);
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{
return $"unpackUint2x32(ballotARB({arg})).x";
}
else
{
return $"subgroupBallot({arg}).x";
}
}
}
}

View File

@ -0,0 +1,29 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using System.Diagnostics;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenCall
{
public static string Call(CodeGenContext context, AstOperation operation)
{
AstOperand funcId = (AstOperand)operation.GetSource(0);
Debug.Assert(funcId.Type == OperandType.Constant);
var function = context.GetFunction(funcId.Value);
string[] args = new string[operation.SourcesCount - 1];
for (int i = 0; i < args.Length; i++)
{
args[i] = GetSoureExpr(context, operation.GetSource(i + 1), function.GetArgumentType(i));
}
return $"{function.Name}({string.Join(", ", args)})";
}
}
}

View File

@ -0,0 +1,29 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenFSI
{
public static string FSIBegin(CodeGenContext context)
{
if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
{
return "beginInvocationInterlockARB()";
}
else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel())
{
return "beginFragmentShaderOrderingINTEL()";
}
return null;
}
public static string FSIEnd(CodeGenContext context)
{
if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
{
return "endInvocationInterlockARB()";
}
return null;
}
}
}

View File

@ -0,0 +1,231 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenHelper
{
private static readonly InstInfo[] _infoTable;
static InstGenHelper()
{
_infoTable = new InstInfo[(int)Instruction.Count];
Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd");
Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd");
Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap");
Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32);
Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax");
Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32);
Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin");
Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr");
Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange");
Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomicXor");
Add(Instruction.Absolute, InstType.CallUnary, "abs");
Add(Instruction.Add, InstType.OpBinaryCom, "+", 2);
Add(Instruction.Ballot, InstType.Special);
Add(Instruction.Barrier, InstType.CallNullary, "barrier");
Add(Instruction.BitCount, InstType.CallUnary, "bitCount");
Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert");
Add(Instruction.BitfieldReverse, InstType.CallUnary, "bitfieldReverse");
Add(Instruction.BitwiseAnd, InstType.OpBinaryCom, "&", 6);
Add(Instruction.BitwiseExclusiveOr, InstType.OpBinaryCom, "^", 7);
Add(Instruction.BitwiseNot, InstType.OpUnary, "~", 0);
Add(Instruction.BitwiseOr, InstType.OpBinaryCom, "|", 8);
Add(Instruction.Call, InstType.Special);
Add(Instruction.Ceiling, InstType.CallUnary, "ceil");
Add(Instruction.Clamp, InstType.CallTernary, "clamp");
Add(Instruction.ClampU32, InstType.CallTernary, "clamp");
Add(Instruction.CompareEqual, InstType.OpBinaryCom, "==", 5);
Add(Instruction.CompareGreater, InstType.OpBinary, ">", 4);
Add(Instruction.CompareGreaterOrEqual, InstType.OpBinary, ">=", 4);
Add(Instruction.CompareGreaterOrEqualU32, InstType.OpBinary, ">=", 4);
Add(Instruction.CompareGreaterU32, InstType.OpBinary, ">", 4);
Add(Instruction.CompareLess, InstType.OpBinary, "<", 4);
Add(Instruction.CompareLessOrEqual, InstType.OpBinary, "<=", 4);
Add(Instruction.CompareLessOrEqualU32, InstType.OpBinary, "<=", 4);
Add(Instruction.CompareLessU32, InstType.OpBinary, "<", 4);
Add(Instruction.CompareNotEqual, InstType.OpBinaryCom, "!=", 5);
Add(Instruction.ConditionalSelect, InstType.OpTernary, "?:", 12);
Add(Instruction.ConvertFP32ToFP64, InstType.CallUnary, "double");
Add(Instruction.ConvertFP64ToFP32, InstType.CallUnary, "float");
Add(Instruction.ConvertFP32ToS32, InstType.CallUnary, "int");
Add(Instruction.ConvertFP32ToU32, InstType.CallUnary, "uint");
Add(Instruction.ConvertFP64ToS32, InstType.CallUnary, "int");
Add(Instruction.ConvertFP64ToU32, InstType.CallUnary, "uint");
Add(Instruction.ConvertS32ToFP32, InstType.CallUnary, "float");
Add(Instruction.ConvertS32ToFP64, InstType.CallUnary, "double");
Add(Instruction.ConvertU32ToFP32, InstType.CallUnary, "float");
Add(Instruction.ConvertU32ToFP64, InstType.CallUnary, "double");
Add(Instruction.Cosine, InstType.CallUnary, "cos");
Add(Instruction.Ddx, InstType.CallUnary, "dFdx");
Add(Instruction.Ddy, InstType.CallUnary, "dFdy");
Add(Instruction.Discard, InstType.OpNullary, "discard");
Add(Instruction.Divide, InstType.OpBinary, "/", 1);
Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex");
Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive");
Add(Instruction.ExponentB2, InstType.CallUnary, "exp2");
Add(Instruction.FSIBegin, InstType.Special);
Add(Instruction.FSIEnd, InstType.Special);
Add(Instruction.FindLSB, InstType.CallUnary, "findLSB");
Add(Instruction.FindMSBS32, InstType.CallUnary, "findMSB");
Add(Instruction.FindMSBU32, InstType.CallUnary, "findMSB");
Add(Instruction.Floor, InstType.CallUnary, "floor");
Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma");
Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier");
Add(Instruction.ImageLoad, InstType.Special);
Add(Instruction.ImageStore, InstType.Special);
Add(Instruction.ImageAtomic, InstType.Special);
Add(Instruction.IsNan, InstType.CallUnary, "isnan");
Add(Instruction.Load, InstType.Special);
Add(Instruction.LoadConstant, InstType.Special);
Add(Instruction.LoadLocal, InstType.Special);
Add(Instruction.LoadShared, InstType.Special);
Add(Instruction.LoadStorage, InstType.Special);
Add(Instruction.Lod, InstType.Special);
Add(Instruction.LogarithmB2, InstType.CallUnary, "log2");
Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9);
Add(Instruction.LogicalExclusiveOr, InstType.OpBinaryCom, "^^", 10);
Add(Instruction.LogicalNot, InstType.OpUnary, "!", 0);
Add(Instruction.LogicalOr, InstType.OpBinaryCom, "||", 11);
Add(Instruction.LoopBreak, InstType.OpNullary, "break");
Add(Instruction.LoopContinue, InstType.OpNullary, "continue");
Add(Instruction.PackDouble2x32, InstType.Special);
Add(Instruction.PackHalf2x16, InstType.Special);
Add(Instruction.Maximum, InstType.CallBinary, "max");
Add(Instruction.MaximumU32, InstType.CallBinary, "max");
Add(Instruction.MemoryBarrier, InstType.CallNullary, "memoryBarrier");
Add(Instruction.Minimum, InstType.CallBinary, "min");
Add(Instruction.MinimumU32, InstType.CallBinary, "min");
Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1);
Add(Instruction.MultiplyHighS32, InstType.CallBinary, HelperFunctionNames.MultiplyHighS32);
Add(Instruction.MultiplyHighU32, InstType.CallBinary, HelperFunctionNames.MultiplyHighU32);
Add(Instruction.Negate, InstType.Special);
Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "inversesqrt");
Add(Instruction.Return, InstType.OpNullary, "return");
Add(Instruction.Round, InstType.CallUnary, "roundEven");
Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3);
Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3);
Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3);
Add(Instruction.Shuffle, InstType.CallQuaternary, HelperFunctionNames.Shuffle);
Add(Instruction.ShuffleDown, InstType.CallQuaternary, HelperFunctionNames.ShuffleDown);
Add(Instruction.ShuffleUp, InstType.CallQuaternary, HelperFunctionNames.ShuffleUp);
Add(Instruction.ShuffleXor, InstType.CallQuaternary, HelperFunctionNames.ShuffleXor);
Add(Instruction.Sine, InstType.CallUnary, "sin");
Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt");
Add(Instruction.Store, InstType.Special);
Add(Instruction.StoreLocal, InstType.Special);
Add(Instruction.StoreShared, InstType.Special);
Add(Instruction.StoreShared16, InstType.Special);
Add(Instruction.StoreShared8, InstType.Special);
Add(Instruction.StoreStorage, InstType.Special);
Add(Instruction.StoreStorage16, InstType.Special);
Add(Instruction.StoreStorage8, InstType.Special);
Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
Add(Instruction.TextureSample, InstType.Special);
Add(Instruction.TextureSize, InstType.Special);
Add(Instruction.Truncate, InstType.CallUnary, "trunc");
Add(Instruction.UnpackDouble2x32, InstType.Special);
Add(Instruction.UnpackHalf2x16, InstType.Special);
Add(Instruction.VectorExtract, InstType.Special);
Add(Instruction.VoteAll, InstType.CallUnary, "allInvocationsARB");
Add(Instruction.VoteAllEqual, InstType.CallUnary, "allInvocationsEqualARB");
Add(Instruction.VoteAny, InstType.CallUnary, "anyInvocationARB");
}
private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0)
{
_infoTable[(int)inst] = new InstInfo(flags, opName, precedence);
}
public static InstInfo GetInstructionInfo(Instruction inst)
{
return _infoTable[(int)(inst & Instruction.Mask)];
}
public static string GetSoureExpr(CodeGenContext context, IAstNode node, AggregateType dstType)
{
return ReinterpretCast(context, node, OperandManager.GetNodeDestType(context, node), dstType);
}
public static string Enclose(string expr, IAstNode node, Instruction pInst, bool isLhs)
{
InstInfo pInfo = GetInstructionInfo(pInst);
return Enclose(expr, node, pInst, pInfo, isLhs);
}
public static string Enclose(string expr, IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs = false)
{
if (NeedsParenthesis(node, pInst, pInfo, isLhs))
{
expr = "(" + expr + ")";
}
return expr;
}
public static bool NeedsParenthesis(IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs)
{
// If the node isn't a operation, then it can only be a operand,
// and those never needs to be surrounded in parenthesis.
if (!(node is AstOperation operation))
{
// This is sort of a special case, if this is a negative constant,
// and it is consumed by a unary operation, we need to put on the parenthesis,
// as in GLSL a sequence like --2 or ~-1 is not valid.
if (IsNegativeConst(node) && pInfo.Type == InstType.OpUnary)
{
return true;
}
return false;
}
if ((pInfo.Type & (InstType.Call | InstType.Special)) != 0)
{
return false;
}
InstInfo info = _infoTable[(int)(operation.Inst & Instruction.Mask)];
if ((info.Type & (InstType.Call | InstType.Special)) != 0)
{
return false;
}
if (info.Precedence < pInfo.Precedence)
{
return false;
}
if (info.Precedence == pInfo.Precedence && isLhs)
{
return false;
}
if (pInst == operation.Inst && info.Type == InstType.OpBinaryCom)
{
return false;
}
return true;
}
private static bool IsNegativeConst(IAstNode node)
{
if (!(node is AstOperand operand))
{
return false;
}
return operand.Type == OperandType.Constant && operand.Value < 0;
}
}
}

View File

@ -0,0 +1,939 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Text;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenMemory
{
public static string ImageLoadOrStore(CodeGenContext context, AstOperation operation)
{
AstTextureOperation texOp = (AstTextureOperation)operation;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
// TODO: Bindless texture support. For now we just return 0/do nothing.
if (isBindless)
{
switch (texOp.Inst)
{
case Instruction.ImageStore:
return "// imageStore(bindless)";
case Instruction.ImageLoad:
AggregateType componentType = texOp.Format.GetComponentType();
NumberFormatter.TryFormat(0, componentType, out string imageConst);
AggregateType outputType = texOp.GetVectorType(componentType);
if ((outputType & AggregateType.ElementCountMask) != 0)
{
return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({imageConst})";
}
return imageConst;
default:
return NumberFormatter.FormatInt(0);
}
}
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
var texCallBuilder = new StringBuilder();
if (texOp.Inst == Instruction.ImageAtomic)
{
texCallBuilder.Append((texOp.Flags & TextureFlags.AtomicMask) switch {
TextureFlags.Add => "imageAtomicAdd",
TextureFlags.Minimum => "imageAtomicMin",
TextureFlags.Maximum => "imageAtomicMax",
TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value.
TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value.
TextureFlags.BitwiseAnd => "imageAtomicAnd",
TextureFlags.BitwiseOr => "imageAtomicOr",
TextureFlags.BitwiseXor => "imageAtomicXor",
TextureFlags.Swap => "imageAtomicExchange",
TextureFlags.CAS => "imageAtomicCompSwap",
_ => "imageAtomicAdd",
});
}
else
{
texCallBuilder.Append(texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore");
}
int srcIndex = isBindless ? 1 : 0;
string Src(AggregateType type)
{
return GetSoureExpr(context, texOp.GetSource(srcIndex++), type);
}
string indexExpr = null;
if (isIndexed)
{
indexExpr = Src(AggregateType.S32);
}
string imageName = OperandManager.GetImageName(context.Config.Stage, texOp, indexExpr);
texCallBuilder.Append('(');
texCallBuilder.Append(imageName);
int coordsCount = texOp.Type.GetDimensions();
int pCount = coordsCount + (isArray ? 1 : 0);
void Append(string str)
{
texCallBuilder.Append(", ");
texCallBuilder.Append(str);
}
string ApplyScaling(string vector)
{
if (context.Config.Stage.SupportsRenderScale() &&
texOp.Inst == Instruction.ImageLoad &&
!isBindless &&
!isIndexed)
{
// Image scales start after texture ones.
int scaleIndex = context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp);
if (pCount == 3 && isArray)
{
// The array index is not scaled, just x and y.
vector = $"ivec3(Helper_TexelFetchScale(({vector}).xy, {scaleIndex}), ({vector}).z)";
}
else if (pCount == 2 && !isArray)
{
vector = $"Helper_TexelFetchScale({vector}, {scaleIndex})";
}
}
return vector;
}
if (pCount > 1)
{
string[] elems = new string[pCount];
for (int index = 0; index < pCount; index++)
{
elems[index] = Src(AggregateType.S32);
}
Append(ApplyScaling($"ivec{pCount}({string.Join(", ", elems)})"));
}
else
{
Append(Src(AggregateType.S32));
}
if (texOp.Inst == Instruction.ImageStore)
{
AggregateType type = texOp.Format.GetComponentType();
string[] cElems = new string[4];
for (int index = 0; index < 4; index++)
{
if (srcIndex < texOp.SourcesCount)
{
cElems[index] = Src(type);
}
else
{
cElems[index] = type switch
{
AggregateType.S32 => NumberFormatter.FormatInt(0),
AggregateType.U32 => NumberFormatter.FormatUint(0),
_ => NumberFormatter.FormatFloat(0)
};
}
}
string prefix = type switch
{
AggregateType.S32 => "i",
AggregateType.U32 => "u",
_ => string.Empty
};
Append($"{prefix}vec4({string.Join(", ", cElems)})");
}
if (texOp.Inst == Instruction.ImageAtomic)
{
AggregateType type = texOp.Format.GetComponentType();
if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS)
{
Append(Src(type)); // Compare value.
}
string value = (texOp.Flags & TextureFlags.AtomicMask) switch
{
TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value
TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value
_ => Src(type)
};
Append(value);
texCallBuilder.Append(')');
if (type != AggregateType.S32)
{
texCallBuilder
.Insert(0, "int(")
.Append(')');
}
}
else
{
texCallBuilder.Append(')');
if (texOp.Inst == Instruction.ImageLoad)
{
texCallBuilder.Append(GetMaskMultiDest(texOp.Index));
}
}
return texCallBuilder.ToString();
}
public static string Load(CodeGenContext context, AstOperation operation)
{
return GenerateLoadOrStore(context, operation, isStore: false);
}
public static string LoadConstant(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
offsetExpr = Enclose(offsetExpr, src2, Instruction.ShiftRightS32, isLhs: true);
var config = context.Config;
bool indexElement = !config.GpuAccessor.QueryHostHasVectorIndexingBug();
if (src1 is AstOperand operand && operand.Type == OperandType.Constant)
{
bool cbIndexable = config.UsedFeatures.HasFlag(Translation.FeatureFlags.CbIndexing);
return OperandManager.GetConstantBufferName(operand.Value, offsetExpr, config.Stage, cbIndexable, indexElement);
}
else
{
string slotExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
return OperandManager.GetConstantBufferName(slotExpr, offsetExpr, config.Stage, indexElement);
}
}
public static string LoadLocal(CodeGenContext context, AstOperation operation)
{
return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName);
}
public static string LoadShared(CodeGenContext context, AstOperation operation)
{
return LoadLocalOrShared(context, operation, DefaultNames.SharedMemoryName);
}
private static string LoadLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName)
{
IAstNode src1 = operation.GetSource(0);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
return $"{arrayName}[{offsetExpr}]";
}
public static string LoadStorage(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
}
public static string Lod(CodeGenContext context, AstOperation operation)
{
AstTextureOperation texOp = (AstTextureOperation)operation;
int coordsCount = texOp.Type.GetDimensions();
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
// TODO: Bindless texture support. For now we just return 0.
if (isBindless)
{
return NumberFormatter.FormatFloat(0);
}
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
string indexExpr = null;
if (isIndexed)
{
indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32);
}
string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr);
int coordsIndex = isBindless || isIndexed ? 1 : 0;
string coordsExpr;
if (coordsCount > 1)
{
string[] elems = new string[coordsCount];
for (int index = 0; index < coordsCount; index++)
{
elems[index] = GetSoureExpr(context, texOp.GetSource(coordsIndex + index), AggregateType.FP32);
}
coordsExpr = "vec" + coordsCount + "(" + string.Join(", ", elems) + ")";
}
else
{
coordsExpr = GetSoureExpr(context, texOp.GetSource(coordsIndex), AggregateType.FP32);
}
return $"textureQueryLod({samplerName}, {coordsExpr}){GetMask(texOp.Index)}";
}
public static string Store(CodeGenContext context, AstOperation operation)
{
return GenerateLoadOrStore(context, operation, isStore: true);
}
public static string StoreLocal(CodeGenContext context, AstOperation operation)
{
return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName);
}
public static string StoreShared(CodeGenContext context, AstOperation operation)
{
return StoreLocalOrShared(context, operation, DefaultNames.SharedMemoryName);
}
private static string StoreLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
return $"{arrayName}[{offsetExpr}] = {src}";
}
public static string StoreShared16(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})";
}
public static string StoreShared8(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})";
}
public static string StoreStorage(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
IAstNode src3 = operation.GetSource(2);
string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
return $"{sb} = {src}";
}
public static string StoreStorage16(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
IAstNode src3 = operation.GetSource(2);
string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})";
}
public static string StoreStorage8(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
IAstNode src3 = operation.GetSource(2);
string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})";
}
public static string TextureSample(CodeGenContext context, AstOperation operation)
{
AstTextureOperation texOp = (AstTextureOperation)operation;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
bool colorIsVector = isGather || !isShadow;
SamplerType type = texOp.Type & SamplerType.Mask;
bool is2D = type == SamplerType.Texture2D;
bool isCube = type == SamplerType.TextureCube;
// 2D Array and Cube shadow samplers with LOD level or bias requires an extension.
// If the extension is not supported, just remove the LOD parameter.
if (isArray && isShadow && (is2D || isCube) && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod())
{
hasLodBias = false;
hasLodLevel = false;
}
// Cube shadow samplers with LOD level requires an extension.
// If the extension is not supported, just remove the LOD level parameter.
if (isShadow && isCube && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod())
{
hasLodLevel = false;
}
// TODO: Bindless texture support. For now we just return 0.
if (isBindless)
{
string scalarValue = NumberFormatter.FormatFloat(0);
if (colorIsVector)
{
AggregateType outputType = texOp.GetVectorType(AggregateType.FP32);
if ((outputType & AggregateType.ElementCountMask) != 0)
{
return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({scalarValue})";
}
}
return scalarValue;
}
string texCall = intCoords ? "texelFetch" : "texture";
if (isGather)
{
texCall += "Gather";
}
else if (hasDerivatives)
{
texCall += "Grad";
}
else if (hasLodLevel && !intCoords)
{
texCall += "Lod";
}
if (hasOffset)
{
texCall += "Offset";
}
else if (hasOffsets)
{
texCall += "Offsets";
}
int srcIndex = isBindless ? 1 : 0;
string Src(AggregateType type)
{
return GetSoureExpr(context, texOp.GetSource(srcIndex++), type);
}
string indexExpr = null;
if (isIndexed)
{
indexExpr = Src(AggregateType.S32);
}
string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr);
texCall += "(" + samplerName;
int coordsCount = texOp.Type.GetDimensions();
int pCount = coordsCount;
int arrayIndexElem = -1;
if (isArray)
{
arrayIndexElem = pCount++;
}
// The sampler 1D shadow overload expects a
// dummy value on the middle of the vector, who knows why...
bool hasDummy1DShadowElem = texOp.Type == (SamplerType.Texture1D | SamplerType.Shadow);
if (hasDummy1DShadowElem)
{
pCount++;
}
if (isShadow && !isGather)
{
pCount++;
}
// On textureGather*, the comparison value is
// always specified as an extra argument.
bool hasExtraCompareArg = isShadow && isGather;
if (pCount == 5)
{
pCount = 4;
hasExtraCompareArg = true;
}
void Append(string str)
{
texCall += ", " + str;
}
AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32;
string AssemblePVector(int count)
{
if (count > 1)
{
string[] elems = new string[count];
for (int index = 0; index < count; index++)
{
if (arrayIndexElem == index)
{
elems[index] = Src(AggregateType.S32);
if (!intCoords)
{
elems[index] = "float(" + elems[index] + ")";
}
}
else if (index == 1 && hasDummy1DShadowElem)
{
elems[index] = NumberFormatter.FormatFloat(0);
}
else
{
elems[index] = Src(coordType);
}
}
string prefix = intCoords ? "i" : string.Empty;
return prefix + "vec" + count + "(" + string.Join(", ", elems) + ")";
}
else
{
return Src(coordType);
}
}
string ApplyScaling(string vector)
{
if (intCoords)
{
if (context.Config.Stage.SupportsRenderScale() &&
!isBindless &&
!isIndexed)
{
int index = context.Config.FindTextureDescriptorIndex(texOp);
if (pCount == 3 && isArray)
{
// The array index is not scaled, just x and y.
vector = "ivec3(Helper_TexelFetchScale((" + vector + ").xy, " + index + "), (" + vector + ").z)";
}
else if (pCount == 2 && !isArray)
{
vector = "Helper_TexelFetchScale(" + vector + ", " + index + ")";
}
}
}
return vector;
}
string ApplyBias(string vector)
{
int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
if (isGather && gatherBiasPrecision != 0)
{
// GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
// Offset by the gather precision divided by 2 to correct for rounding.
if (pCount == 1)
{
vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))";
}
else
{
vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))";
}
}
return vector;
}
Append(ApplyBias(ApplyScaling(AssemblePVector(pCount))));
string AssembleDerivativesVector(int count)
{
if (count > 1)
{
string[] elems = new string[count];
for (int index = 0; index < count; index++)
{
elems[index] = Src(AggregateType.FP32);
}
return "vec" + count + "(" + string.Join(", ", elems) + ")";
}
else
{
return Src(AggregateType.FP32);
}
}
if (hasExtraCompareArg)
{
Append(Src(AggregateType.FP32));
}
if (hasDerivatives)
{
Append(AssembleDerivativesVector(coordsCount)); // dPdx
Append(AssembleDerivativesVector(coordsCount)); // dPdy
}
if (isMultisample)
{
Append(Src(AggregateType.S32));
}
else if (hasLodLevel)
{
Append(Src(coordType));
}
string AssembleOffsetVector(int count)
{
if (count > 1)
{
string[] elems = new string[count];
for (int index = 0; index < count; index++)
{
elems[index] = Src(AggregateType.S32);
}
return "ivec" + count + "(" + string.Join(", ", elems) + ")";
}
else
{
return Src(AggregateType.S32);
}
}
if (hasOffset)
{
Append(AssembleOffsetVector(coordsCount));
}
else if (hasOffsets)
{
texCall += $", ivec{coordsCount}[4](";
texCall += AssembleOffsetVector(coordsCount) + ", ";
texCall += AssembleOffsetVector(coordsCount) + ", ";
texCall += AssembleOffsetVector(coordsCount) + ", ";
texCall += AssembleOffsetVector(coordsCount) + ")";
}
if (hasLodBias)
{
Append(Src(AggregateType.FP32));
}
// textureGather* optional extra component index,
// not needed for shadow samplers.
if (isGather && !isShadow)
{
Append(Src(AggregateType.S32));
}
texCall += ")" + (colorIsVector ? GetMaskMultiDest(texOp.Index) : "");
return texCall;
}
public static string TextureSize(CodeGenContext context, AstOperation operation)
{
AstTextureOperation texOp = (AstTextureOperation)operation;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
// TODO: Bindless texture support. For now we just return 0.
if (isBindless)
{
return NumberFormatter.FormatInt(0);
}
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
string indexExpr = null;
if (isIndexed)
{
indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32);
}
string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr);
if (texOp.Index == 3)
{
return $"textureQueryLevels({samplerName})";
}
else
{
(TextureDescriptor descriptor, int descriptorIndex) = context.Config.FindTextureDescriptor(texOp);
bool hasLod = !descriptor.Type.HasFlag(SamplerType.Multisample) && descriptor.Type != SamplerType.TextureBuffer;
string texCall;
if (hasLod)
{
int lodSrcIndex = isBindless || isIndexed ? 1 : 0;
IAstNode lod = operation.GetSource(lodSrcIndex);
string lodExpr = GetSoureExpr(context, lod, GetSrcVarType(operation.Inst, lodSrcIndex));
texCall = $"textureSize({samplerName}, {lodExpr}){GetMask(texOp.Index)}";
}
else
{
texCall = $"textureSize({samplerName}){GetMask(texOp.Index)}";
}
if (context.Config.Stage.SupportsRenderScale() &&
(texOp.Index < 2 || (texOp.Type & SamplerType.Mask) == SamplerType.Texture3D) &&
!isBindless &&
!isIndexed)
{
texCall = $"Helper_TextureSizeUnscale({texCall}, {descriptorIndex})";
}
return texCall;
}
}
private static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
{
StorageKind storageKind = operation.StorageKind;
string varName;
AggregateType varType;
int srcIndex = 0;
switch (storageKind)
{
case StorageKind.Input:
case StorageKind.InputPerPatch:
case StorageKind.Output:
case StorageKind.OutputPerPatch:
if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant)
{
throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
}
IoVariable ioVariable = (IoVariable)varId.Value;
bool isOutput = storageKind.IsOutput();
bool isPerPatch = storageKind.IsPerPatch();
int location = -1;
int component = 0;
if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput))
{
if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant)
{
throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand.");
}
location = vecIndex.Value;
if (operation.SourcesCount > srcIndex &&
operation.GetSource(srcIndex) is AstOperand elemIndex &&
elemIndex.Type == OperandType.Constant &&
context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput))
{
component = elemIndex.Value;
srcIndex++;
}
}
(varName, varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch);
if (IoMap.IsPerVertexBuiltIn(context.Config.Stage, ioVariable, isOutput))
{
// Since those exist both as input and output on geometry and tessellation shaders,
// we need the gl_in and gl_out prefixes to disambiguate.
if (storageKind == StorageKind.Input)
{
string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32);
varName = $"gl_in[{expr}].{varName}";
}
else if (storageKind == StorageKind.Output)
{
string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32);
varName = $"gl_out[{expr}].{varName}";
}
}
int firstSrcIndex = srcIndex;
int inputsCount = isStore ? operation.SourcesCount - 1 : operation.SourcesCount;
for (; srcIndex < inputsCount; srcIndex++)
{
IAstNode src = operation.GetSource(srcIndex);
if ((varType & AggregateType.ElementCountMask) != 0 &&
srcIndex == inputsCount - 1 &&
src is AstOperand elementIndex &&
elementIndex.Type == OperandType.Constant)
{
varName += "." + "xyzw"[elementIndex.Value & 3];
}
else if (srcIndex == firstSrcIndex && context.Config.Stage == ShaderStage.TessellationControl && storageKind == StorageKind.Output)
{
// GLSL requires that for tessellation control shader outputs,
// that the index expression must be *exactly* "gl_InvocationID",
// otherwise the compilation fails.
// TODO: Get rid of this and use expression propagation to make sure we generate the correct code from IR.
varName += "[gl_InvocationID]";
}
else
{
varName += $"[{GetSoureExpr(context, src, AggregateType.S32)}]";
}
}
break;
default:
throw new InvalidOperationException($"Invalid storage kind {storageKind}.");
}
if (isStore)
{
varType &= AggregateType.ElementTypeMask;
varName = $"{varName} = {GetSoureExpr(context, operation.GetSource(srcIndex), varType)}";
}
return varName;
}
private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage)
{
string sbName = OperandManager.GetShaderStagePrefix(stage);
sbName += "_" + DefaultNames.StorageNamePrefix;
return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]";
}
private static string GetMask(int index)
{
return $".{"rgba".AsSpan(index, 1)}";
}
private static string GetMaskMultiDest(int mask)
{
string swizzle = ".";
for (int i = 0; i < 4; i++)
{
if ((mask & (1 << i)) != 0)
{
swizzle += "xyzw"[i];
}
}
return swizzle;
}
}
}

View File

@ -0,0 +1,56 @@
using Ryujinx.Graphics.Shader.StructuredIr;
using System;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenPacking
{
public static string PackDouble2x32(CodeGenContext context, AstOperation operation)
{
IAstNode src0 = operation.GetSource(0);
IAstNode src1 = operation.GetSource(1);
string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0));
string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1));
return $"packDouble2x32(uvec2({src0Expr}, {src1Expr}))";
}
public static string PackHalf2x16(CodeGenContext context, AstOperation operation)
{
IAstNode src0 = operation.GetSource(0);
IAstNode src1 = operation.GetSource(1);
string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0));
string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1));
return $"packHalf2x16(vec2({src0Expr}, {src1Expr}))";
}
public static string UnpackDouble2x32(CodeGenContext context, AstOperation operation)
{
IAstNode src = operation.GetSource(0);
string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0));
return $"unpackDouble2x32({srcExpr}){GetMask(operation.Index)}";
}
public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation)
{
IAstNode src = operation.GetSource(0);
string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0));
return $"unpackHalf2x16({srcExpr}){GetMask(operation.Index)}";
}
private static string GetMask(int index)
{
return $".{"xy".AsSpan(index, 1)}";
}
}
}

View File

@ -0,0 +1,32 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenVector
{
public static string VectorExtract(CodeGenContext context, AstOperation operation)
{
IAstNode vector = operation.GetSource(0);
IAstNode index = operation.GetSource(1);
string vectorExpr = GetSoureExpr(context, vector, OperandManager.GetNodeDestType(context, vector));
if (index is AstOperand indexOperand && indexOperand.Type == OperandType.Constant)
{
char elem = "xyzw"[indexOperand.Value];
return $"{vectorExpr}.{elem}";
}
else
{
string indexExpr = GetSoureExpr(context, index, GetSrcVarType(operation.Inst, 1));
return $"{vectorExpr}[{indexExpr}]";
}
}
}
}

View File

@ -0,0 +1,18 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
readonly struct InstInfo
{
public InstType Type { get; }
public string OpName { get; }
public int Precedence { get; }
public InstInfo(InstType type, string opName, int precedence)
{
Type = type;
OpName = opName;
Precedence = precedence;
}
}
}

View File

@ -0,0 +1,33 @@
using System;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
[Flags]
enum InstType
{
OpNullary = Op | 0,
OpUnary = Op | 1,
OpBinary = Op | 2,
OpBinaryCom = Op | 2 | Commutative,
OpTernary = Op | 3,
CallNullary = Call | 0,
CallUnary = Call | 1,
CallBinary = Call | 2,
CallTernary = Call | 3,
CallQuaternary = Call | 4,
// The atomic instructions have one extra operand,
// for the storage slot and offset pair.
AtomicBinary = Call | Atomic | 3,
AtomicTernary = Call | Atomic | 4,
Commutative = 1 << 8,
Op = 1 << 9,
Call = 1 << 10,
Atomic = 1 << 11,
Special = 1 << 12,
ArityMask = 0xff
}
}

View File

@ -0,0 +1,145 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System.Globalization;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class IoMap
{
public static (string, AggregateType) GetGlslVariable(
ShaderConfig config,
IoVariable ioVariable,
int location,
int component,
bool isOutput,
bool isPerPatch)
{
return ioVariable switch
{
IoVariable.BackColorDiffuse => ("gl_BackColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
IoVariable.BackColorSpecular => ("gl_BackSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
IoVariable.BaseInstance => ("gl_BaseInstanceARB", AggregateType.S32),
IoVariable.BaseVertex => ("gl_BaseVertexARB", AggregateType.S32),
IoVariable.ClipDistance => ("gl_ClipDistance", AggregateType.Array | AggregateType.FP32),
IoVariable.CtaId => ("gl_WorkGroupID", AggregateType.Vector3 | AggregateType.U32),
IoVariable.DrawIndex => ("gl_DrawIDARB", AggregateType.S32),
IoVariable.FogCoord => ("gl_FogFragCoord", AggregateType.FP32), // Deprecated.
IoVariable.FragmentCoord => ("gl_FragCoord", AggregateType.Vector4 | AggregateType.FP32),
IoVariable.FragmentOutputColor => GetFragmentOutputColorVariableName(config, location),
IoVariable.FragmentOutputDepth => ("gl_FragDepth", AggregateType.FP32),
IoVariable.FragmentOutputIsBgra => (DefaultNames.SupportBlockIsBgraName, AggregateType.Array | AggregateType.Bool),
IoVariable.FrontColorDiffuse => ("gl_FrontColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
IoVariable.FrontColorSpecular => ("gl_FrontSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
IoVariable.FrontFacing => ("gl_FrontFacing", AggregateType.Bool),
IoVariable.InstanceId => ("gl_InstanceID", AggregateType.S32),
IoVariable.InstanceIndex => ("gl_InstanceIndex", AggregateType.S32),
IoVariable.InvocationId => ("gl_InvocationID", AggregateType.S32),
IoVariable.Layer => ("gl_Layer", AggregateType.S32),
IoVariable.PatchVertices => ("gl_PatchVerticesIn", AggregateType.S32),
IoVariable.PointCoord => ("gl_PointCoord", AggregateType.Vector2 | AggregateType.FP32),
IoVariable.PointSize => ("gl_PointSize", AggregateType.FP32),
IoVariable.Position => ("gl_Position", AggregateType.Vector4 | AggregateType.FP32),
IoVariable.PrimitiveId => GetPrimitiveIdVariableName(config.Stage, isOutput),
IoVariable.SubgroupEqMask => GetSubgroupMaskVariableName(config, "Eq"),
IoVariable.SubgroupGeMask => GetSubgroupMaskVariableName(config, "Ge"),
IoVariable.SubgroupGtMask => GetSubgroupMaskVariableName(config, "Gt"),
IoVariable.SubgroupLaneId => GetSubgroupInvocationIdVariableName(config),
IoVariable.SubgroupLeMask => GetSubgroupMaskVariableName(config, "Le"),
IoVariable.SubgroupLtMask => GetSubgroupMaskVariableName(config, "Lt"),
IoVariable.SupportBlockRenderScale => (DefaultNames.SupportBlockRenderScaleName, AggregateType.Array | AggregateType.FP32),
IoVariable.SupportBlockViewInverse => (DefaultNames.SupportBlockViewportInverse, AggregateType.Vector2 | AggregateType.FP32),
IoVariable.TessellationCoord => ("gl_TessCoord", AggregateType.Vector3 | AggregateType.FP32),
IoVariable.TessellationLevelInner => ("gl_TessLevelInner", AggregateType.Array | AggregateType.FP32),
IoVariable.TessellationLevelOuter => ("gl_TessLevelOuter", AggregateType.Array | AggregateType.FP32),
IoVariable.TextureCoord => ("gl_TexCoord", AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
IoVariable.ThreadId => ("gl_LocalInvocationID", AggregateType.Vector3 | AggregateType.U32),
IoVariable.ThreadKill => ("gl_HelperInvocation", AggregateType.Bool),
IoVariable.UserDefined => GetUserDefinedVariableName(config, location, component, isOutput, isPerPatch),
IoVariable.VertexId => ("gl_VertexID", AggregateType.S32),
IoVariable.VertexIndex => ("gl_VertexIndex", AggregateType.S32),
IoVariable.ViewportIndex => ("gl_ViewportIndex", AggregateType.S32),
IoVariable.ViewportMask => ("gl_ViewportMask", AggregateType.Array | AggregateType.S32),
_ => (null, AggregateType.Invalid)
};
}
public static bool IsPerVertexBuiltIn(ShaderStage stage, IoVariable ioVariable, bool isOutput)
{
switch (ioVariable)
{
case IoVariable.Layer:
case IoVariable.ViewportIndex:
case IoVariable.PointSize:
case IoVariable.Position:
case IoVariable.ClipDistance:
case IoVariable.PointCoord:
case IoVariable.ViewportMask:
if (isOutput)
{
return stage == ShaderStage.TessellationControl;
}
else
{
return stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry;
}
}
return false;
}
private static (string, AggregateType) GetFragmentOutputColorVariableName(ShaderConfig config, int location)
{
if (location < 0)
{
return (DefaultNames.OAttributePrefix, config.GetFragmentOutputColorType(0));
}
string name = DefaultNames.OAttributePrefix + location.ToString(CultureInfo.InvariantCulture);
return (name, config.GetFragmentOutputColorType(location));
}
private static (string, AggregateType) GetPrimitiveIdVariableName(ShaderStage stage, bool isOutput)
{
// The geometry stage has an additional gl_PrimitiveIDIn variable.
return (isOutput || stage != ShaderStage.Geometry ? "gl_PrimitiveID" : "gl_PrimitiveIDIn", AggregateType.S32);
}
private static (string, AggregateType) GetSubgroupMaskVariableName(ShaderConfig config, string cc)
{
return config.GpuAccessor.QueryHostSupportsShaderBallot()
? ($"unpackUint2x32(gl_SubGroup{cc}MaskARB)", AggregateType.Vector2 | AggregateType.U32)
: ($"gl_Subgroup{cc}Mask", AggregateType.Vector4 | AggregateType.U32);
}
private static (string, AggregateType) GetSubgroupInvocationIdVariableName(ShaderConfig config)
{
return config.GpuAccessor.QueryHostSupportsShaderBallot()
? ("gl_SubGroupInvocationARB", AggregateType.U32)
: ("gl_SubgroupInvocationID", AggregateType.U32);
}
private static (string, AggregateType) GetUserDefinedVariableName(ShaderConfig config, int location, int component, bool isOutput, bool isPerPatch)
{
string name = isPerPatch
? DefaultNames.PerPatchAttributePrefix
: (isOutput ? DefaultNames.OAttributePrefix : DefaultNames.IAttributePrefix);
if (location < 0)
{
return (name, config.GetUserDefinedType(0, isOutput));
}
name += location.ToString(CultureInfo.InvariantCulture);
if (config.HasPerLocationInputOrOutputComponent(IoVariable.UserDefined, location, component, isOutput))
{
name += "_" + "xyzw"[component & 3];
}
return (name, config.GetUserDefinedType(location, isOutput));
}
}
}

View File

@ -0,0 +1,104 @@
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Globalization;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class NumberFormatter
{
private const int MaxDecimal = 256;
public static bool TryFormat(int value, AggregateType dstType, out string formatted)
{
if (dstType == AggregateType.FP32)
{
return TryFormatFloat(BitConverter.Int32BitsToSingle(value), out formatted);
}
else if (dstType == AggregateType.S32)
{
formatted = FormatInt(value);
}
else if (dstType == AggregateType.U32)
{
formatted = FormatUint((uint)value);
}
else if (dstType == AggregateType.Bool)
{
formatted = value != 0 ? "true" : "false";
}
else
{
throw new ArgumentException($"Invalid variable type \"{dstType}\".");
}
return true;
}
public static string FormatFloat(float value)
{
if (!TryFormatFloat(value, out string formatted))
{
throw new ArgumentException("Failed to convert float value to string.");
}
return formatted;
}
public static bool TryFormatFloat(float value, out string formatted)
{
if (float.IsNaN(value) || float.IsInfinity(value))
{
formatted = null;
return false;
}
formatted = value.ToString("G9", CultureInfo.InvariantCulture);
if (!(formatted.Contains('.') ||
formatted.Contains('e') ||
formatted.Contains('E')))
{
formatted += ".0";
}
return true;
}
public static string FormatInt(int value, AggregateType dstType)
{
if (dstType == AggregateType.S32)
{
return FormatInt(value);
}
else if (dstType == AggregateType.U32)
{
return FormatUint((uint)value);
}
else
{
throw new ArgumentException($"Invalid variable type \"{dstType}\".");
}
}
public static string FormatInt(int value)
{
if (value <= MaxDecimal && value >= -MaxDecimal)
{
return value.ToString(CultureInfo.InvariantCulture);
}
return "0x" + value.ToString("X", CultureInfo.InvariantCulture);
}
public static string FormatUint(uint value)
{
if (value <= MaxDecimal && value >= 0)
{
return value.ToString(CultureInfo.InvariantCulture) + "u";
}
return "0x" + value.ToString("X", CultureInfo.InvariantCulture) + "u";
}
}
}

View File

@ -0,0 +1,254 @@
using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
class OperandManager
{
private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" };
private Dictionary<AstOperand, string> _locals;
public OperandManager()
{
_locals = new Dictionary<AstOperand, string>();
}
public string DeclareLocal(AstOperand operand)
{
string name = $"{DefaultNames.LocalNamePrefix}_{_locals.Count}";
_locals.Add(operand, name);
return name;
}
public string GetExpression(CodeGenContext context, AstOperand operand)
{
return operand.Type switch
{
OperandType.Argument => GetArgumentName(operand.Value),
OperandType.Constant => NumberFormatter.FormatInt(operand.Value),
OperandType.ConstantBuffer => GetConstantBufferName(operand, context.Config),
OperandType.LocalVariable => _locals[operand],
OperandType.Undefined => DefaultNames.UndefinedName,
_ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".")
};
}
private static string GetConstantBufferName(AstOperand operand, ShaderConfig config)
{
return GetConstantBufferName(operand.CbufSlot, operand.CbufOffset, config.Stage, config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing));
}
public static string GetConstantBufferName(int slot, int offset, ShaderStage stage, bool cbIndexable)
{
return $"{GetUbName(stage, slot, cbIndexable)}[{offset >> 2}].{GetSwizzleMask(offset & 3)}";
}
private static string GetVec4Indexed(string vectorName, string indexExpr, bool indexElement)
{
if (indexElement)
{
return $"{vectorName}[{indexExpr}]";
}
string result = $"{vectorName}.x";
for (int i = 1; i < 4; i++)
{
result = $"(({indexExpr}) == {i}) ? ({vectorName}.{GetSwizzleMask(i)}) : ({result})";
}
return $"({result})";
}
public static string GetConstantBufferName(int slot, string offsetExpr, ShaderStage stage, bool cbIndexable, bool indexElement)
{
return GetVec4Indexed(GetUbName(stage, slot, cbIndexable) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement);
}
public static string GetConstantBufferName(string slotExpr, string offsetExpr, ShaderStage stage, bool indexElement)
{
return GetVec4Indexed(GetUbName(stage, slotExpr) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement);
}
public static string GetUbName(ShaderStage stage, int slot, bool cbIndexable)
{
if (cbIndexable)
{
return GetUbName(stage, NumberFormatter.FormatInt(slot, AggregateType.S32));
}
return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}{slot}_{DefaultNames.UniformNameSuffix}";
}
private static string GetUbName(ShaderStage stage, string slotExpr)
{
return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}[{slotExpr}].{DefaultNames.DataName}";
}
public static string GetSamplerName(ShaderStage stage, AstTextureOperation texOp, string indexExpr)
{
return GetSamplerName(stage, texOp.CbufSlot, texOp.Handle, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr);
}
public static string GetSamplerName(ShaderStage stage, int cbufSlot, int handle, bool indexed, string indexExpr)
{
string suffix = cbufSlot < 0 ? $"_tcb_{handle:X}" : $"_cb{cbufSlot}_{handle:X}";
if (indexed)
{
suffix += $"a[{indexExpr}]";
}
return GetShaderStagePrefix(stage) + "_" + DefaultNames.SamplerNamePrefix + suffix;
}
public static string GetImageName(ShaderStage stage, AstTextureOperation texOp, string indexExpr)
{
return GetImageName(stage, texOp.CbufSlot, texOp.Handle, texOp.Format, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr);
}
public static string GetImageName(
ShaderStage stage,
int cbufSlot,
int handle,
TextureFormat format,
bool indexed,
string indexExpr)
{
string suffix = cbufSlot < 0
? $"_tcb_{handle:X}_{format.ToGlslFormat()}"
: $"_cb{cbufSlot}_{handle:X}_{format.ToGlslFormat()}";
if (indexed)
{
suffix += $"a[{indexExpr}]";
}
return GetShaderStagePrefix(stage) + "_" + DefaultNames.ImageNamePrefix + suffix;
}
public static string GetShaderStagePrefix(ShaderStage stage)
{
int index = (int)stage;
if ((uint)index >= _stagePrefixes.Length)
{
return "invalid";
}
return _stagePrefixes[index];
}
private static char GetSwizzleMask(int value)
{
return "xyzw"[value];
}
public static string GetArgumentName(int argIndex)
{
return $"{DefaultNames.ArgumentNamePrefix}{argIndex}";
}
public static AggregateType GetNodeDestType(CodeGenContext context, IAstNode node)
{
// TODO: Get rid of that function entirely and return the type from the operation generation
// functions directly, like SPIR-V does.
if (node is AstOperation operation)
{
if (operation.Inst == Instruction.Load)
{
switch (operation.StorageKind)
{
case StorageKind.Input:
case StorageKind.InputPerPatch:
case StorageKind.Output:
case StorageKind.OutputPerPatch:
if (!(operation.GetSource(0) is AstOperand varId) || varId.Type != OperandType.Constant)
{
throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
}
IoVariable ioVariable = (IoVariable)varId.Value;
bool isOutput = operation.StorageKind == StorageKind.Output || operation.StorageKind == StorageKind.OutputPerPatch;
bool isPerPatch = operation.StorageKind == StorageKind.InputPerPatch || operation.StorageKind == StorageKind.OutputPerPatch;
int location = 0;
int component = 0;
if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput))
{
if (!(operation.GetSource(1) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant)
{
throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
}
location = vecIndex.Value;
if (operation.SourcesCount > 2 &&
operation.GetSource(2) is AstOperand elemIndex &&
elemIndex.Type == OperandType.Constant &&
context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput))
{
component = elemIndex.Value;
}
}
(_, AggregateType varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch);
return varType & AggregateType.ElementTypeMask;
}
}
else if (operation.Inst == Instruction.Call)
{
AstOperand funcId = (AstOperand)operation.GetSource(0);
Debug.Assert(funcId.Type == OperandType.Constant);
return context.GetFunction(funcId.Value).ReturnType;
}
else if (operation.Inst == Instruction.VectorExtract)
{
return GetNodeDestType(context, operation.GetSource(0)) & ~AggregateType.ElementCountMask;
}
else if (operation is AstTextureOperation texOp)
{
if (texOp.Inst == Instruction.ImageLoad ||
texOp.Inst == Instruction.ImageStore ||
texOp.Inst == Instruction.ImageAtomic)
{
return texOp.GetVectorType(texOp.Format.GetComponentType());
}
else if (texOp.Inst == Instruction.TextureSample)
{
return texOp.GetVectorType(GetDestVarType(operation.Inst));
}
}
return GetDestVarType(operation.Inst);
}
else if (node is AstOperand operand)
{
if (operand.Type == OperandType.Argument)
{
int argIndex = operand.Value;
return context.CurrentFunction.GetArgumentType(argIndex);
}
return OperandInfo.GetVarType(operand);
}
else
{
throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\".");
}
}
}
}

View File

@ -0,0 +1,87 @@
using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class TypeConversion
{
public static string ReinterpretCast(
CodeGenContext context,
IAstNode node,
AggregateType srcType,
AggregateType dstType)
{
if (node is AstOperand operand && operand.Type == OperandType.Constant)
{
if (NumberFormatter.TryFormat(operand.Value, dstType, out string formatted))
{
return formatted;
}
}
string expr = InstGen.GetExpression(context, node);
return ReinterpretCast(expr, node, srcType, dstType);
}
private static string ReinterpretCast(string expr, IAstNode node, AggregateType srcType, AggregateType dstType)
{
if (srcType == dstType)
{
return expr;
}
if (srcType == AggregateType.FP32)
{
switch (dstType)
{
case AggregateType.Bool: return $"(floatBitsToInt({expr}) != 0)";
case AggregateType.S32: return $"floatBitsToInt({expr})";
case AggregateType.U32: return $"floatBitsToUint({expr})";
}
}
else if (dstType == AggregateType.FP32)
{
switch (srcType)
{
case AggregateType.Bool: return $"intBitsToFloat({ReinterpretBoolToInt(expr, node, AggregateType.S32)})";
case AggregateType.S32: return $"intBitsToFloat({expr})";
case AggregateType.U32: return $"uintBitsToFloat({expr})";
}
}
else if (srcType == AggregateType.Bool)
{
return ReinterpretBoolToInt(expr, node, dstType);
}
else if (dstType == AggregateType.Bool)
{
expr = InstGenHelper.Enclose(expr, node, Instruction.CompareNotEqual, isLhs: true);
return $"({expr} != 0)";
}
else if (dstType == AggregateType.S32)
{
return $"int({expr})";
}
else if (dstType == AggregateType.U32)
{
return $"uint({expr})";
}
throw new ArgumentException($"Invalid reinterpret cast from \"{srcType}\" to \"{dstType}\".");
}
private static string ReinterpretBoolToInt(string expr, IAstNode node, AggregateType dstType)
{
string trueExpr = NumberFormatter.FormatInt(IrConsts.True, dstType);
string falseExpr = NumberFormatter.FormatInt(IrConsts.False, dstType);
expr = InstGenHelper.Enclose(expr, node, Instruction.ConditionalSelect, isLhs: false);
return $"({expr} ? {trueExpr} : {falseExpr})";
}
}
}

View File

@ -0,0 +1,409 @@
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using Spv.Generator;
using System;
using System.Collections.Generic;
using static Spv.Specification;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
using IrConsts = IntermediateRepresentation.IrConsts;
using IrOperandType = IntermediateRepresentation.OperandType;
partial class CodeGenContext : Module
{
private const uint SpirvVersionMajor = 1;
private const uint SpirvVersionMinor = 3;
private const uint SpirvVersionRevision = 0;
private const uint SpirvVersionPacked = (SpirvVersionMajor << 16) | (SpirvVersionMinor << 8) | SpirvVersionRevision;
public StructuredProgramInfo Info { get; }
public ShaderConfig Config { get; }
public int InputVertices { get; }
public Dictionary<int, Instruction> UniformBuffers { get; } = new Dictionary<int, Instruction>();
public Instruction SupportBuffer { get; set; }
public Instruction UniformBuffersArray { get; set; }
public Instruction StorageBuffersArray { get; set; }
public Instruction LocalMemory { get; set; }
public Instruction SharedMemory { get; set; }
public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>();
public Dictionary<TextureMeta, (Instruction, Instruction, Instruction)> Samplers { get; } = new Dictionary<TextureMeta, (Instruction, Instruction, Instruction)>();
public Dictionary<TextureMeta, (Instruction, Instruction)> Images { get; } = new Dictionary<TextureMeta, (Instruction, Instruction)>();
public Dictionary<IoDefinition, Instruction> Inputs { get; } = new Dictionary<IoDefinition, Instruction>();
public Dictionary<IoDefinition, Instruction> Outputs { get; } = new Dictionary<IoDefinition, Instruction>();
public Dictionary<IoDefinition, Instruction> InputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>();
public Dictionary<IoDefinition, Instruction> OutputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>();
public Instruction CoordTemp { get; set; }
private readonly Dictionary<AstOperand, Instruction> _locals = new Dictionary<AstOperand, Instruction>();
private readonly Dictionary<int, Instruction[]> _localForArgs = new Dictionary<int, Instruction[]>();
private readonly Dictionary<int, Instruction> _funcArgs = new Dictionary<int, Instruction>();
private readonly Dictionary<int, (StructuredFunction, Instruction)> _functions = new Dictionary<int, (StructuredFunction, Instruction)>();
private class BlockState
{
private int _entryCount;
private readonly List<Instruction> _labels = new List<Instruction>();
public Instruction GetNextLabel(CodeGenContext context)
{
return GetLabel(context, _entryCount);
}
public Instruction GetNextLabelAutoIncrement(CodeGenContext context)
{
return GetLabel(context, _entryCount++);
}
public Instruction GetLabel(CodeGenContext context, int index)
{
while (index >= _labels.Count)
{
_labels.Add(context.Label());
}
return _labels[index];
}
}
private readonly Dictionary<AstBlock, BlockState> _labels = new Dictionary<AstBlock, BlockState>();
public Dictionary<AstBlock, (Instruction, Instruction)> LoopTargets { get; set; }
public AstBlock CurrentBlock { get; private set; }
public SpirvDelegates Delegates { get; }
public CodeGenContext(
StructuredProgramInfo info,
ShaderConfig config,
GeneratorPool<Instruction> instPool,
GeneratorPool<LiteralInteger> integerPool) : base(SpirvVersionPacked, instPool, integerPool)
{
Info = info;
Config = config;
if (config.Stage == ShaderStage.Geometry)
{
InputTopology inPrimitive = config.GpuAccessor.QueryPrimitiveTopology();
InputVertices = inPrimitive switch
{
InputTopology.Points => 1,
InputTopology.Lines => 2,
InputTopology.LinesAdjacency => 2,
InputTopology.Triangles => 3,
InputTopology.TrianglesAdjacency => 3,
_ => throw new InvalidOperationException($"Invalid input topology \"{inPrimitive}\".")
};
}
AddCapability(Capability.Shader);
AddCapability(Capability.Float64);
SetMemoryModel(AddressingModel.Logical, MemoryModel.GLSL450);
Delegates = new SpirvDelegates(this);
}
public void StartFunction()
{
_locals.Clear();
_localForArgs.Clear();
_funcArgs.Clear();
}
public void EnterBlock(AstBlock block)
{
CurrentBlock = block;
AddLabel(GetBlockStateLazy(block).GetNextLabelAutoIncrement(this));
}
public Instruction GetFirstLabel(AstBlock block)
{
return GetBlockStateLazy(block).GetLabel(this, 0);
}
public Instruction GetNextLabel(AstBlock block)
{
return GetBlockStateLazy(block).GetNextLabel(this);
}
private BlockState GetBlockStateLazy(AstBlock block)
{
if (!_labels.TryGetValue(block, out var blockState))
{
blockState = new BlockState();
_labels.Add(block, blockState);
}
return blockState;
}
public Instruction NewBlock()
{
var label = Label();
Branch(label);
AddLabel(label);
return label;
}
public Instruction[] GetMainInterface()
{
var mainInterface = new List<Instruction>();
mainInterface.AddRange(Inputs.Values);
mainInterface.AddRange(Outputs.Values);
mainInterface.AddRange(InputsPerPatch.Values);
mainInterface.AddRange(OutputsPerPatch.Values);
return mainInterface.ToArray();
}
public void DeclareLocal(AstOperand local, Instruction spvLocal)
{
_locals.Add(local, spvLocal);
}
public void DeclareLocalForArgs(int funcIndex, Instruction[] spvLocals)
{
_localForArgs.Add(funcIndex, spvLocals);
}
public void DeclareArgument(int argIndex, Instruction spvLocal)
{
_funcArgs.Add(argIndex, spvLocal);
}
public void DeclareFunction(int funcIndex, StructuredFunction function, Instruction spvFunc)
{
_functions.Add(funcIndex, (function, spvFunc));
}
public Instruction GetFP32(IAstNode node)
{
return Get(AggregateType.FP32, node);
}
public Instruction GetFP64(IAstNode node)
{
return Get(AggregateType.FP64, node);
}
public Instruction GetS32(IAstNode node)
{
return Get(AggregateType.S32, node);
}
public Instruction GetU32(IAstNode node)
{
return Get(AggregateType.U32, node);
}
public Instruction Get(AggregateType type, IAstNode node)
{
if (node is AstOperation operation)
{
var opResult = Instructions.Generate(this, operation);
return BitcastIfNeeded(type, opResult.Type, opResult.Value);
}
else if (node is AstOperand operand)
{
return operand.Type switch
{
IrOperandType.Argument => GetArgument(type, operand),
IrOperandType.Constant => GetConstant(type, operand),
IrOperandType.ConstantBuffer => GetConstantBuffer(type, operand),
IrOperandType.LocalVariable => GetLocal(type, operand),
IrOperandType.Undefined => GetUndefined(type),
_ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".")
};
}
throw new NotImplementedException(node.GetType().Name);
}
public Instruction GetWithType(IAstNode node, out AggregateType type)
{
if (node is AstOperation operation)
{
var opResult = Instructions.Generate(this, operation);
type = opResult.Type;
return opResult.Value;
}
else if (node is AstOperand operand)
{
switch (operand.Type)
{
case IrOperandType.LocalVariable:
type = operand.VarType;
return GetLocal(type, operand);
default:
throw new ArgumentException($"Invalid operand type \"{operand.Type}\".");
}
}
throw new NotImplementedException(node.GetType().Name);
}
private Instruction GetUndefined(AggregateType type)
{
return type switch
{
AggregateType.Bool => ConstantFalse(TypeBool()),
AggregateType.FP32 => Constant(TypeFP32(), 0f),
AggregateType.FP64 => Constant(TypeFP64(), 0d),
_ => Constant(GetType(type), 0)
};
}
public Instruction GetConstant(AggregateType type, AstOperand operand)
{
return type switch
{
AggregateType.Bool => operand.Value != 0 ? ConstantTrue(TypeBool()) : ConstantFalse(TypeBool()),
AggregateType.FP32 => Constant(TypeFP32(), BitConverter.Int32BitsToSingle(operand.Value)),
AggregateType.FP64 => Constant(TypeFP64(), (double)BitConverter.Int32BitsToSingle(operand.Value)),
AggregateType.S32 => Constant(TypeS32(), operand.Value),
AggregateType.U32 => Constant(TypeU32(), (uint)operand.Value),
_ => throw new ArgumentException($"Invalid type \"{type}\".")
};
}
public Instruction GetConstantBuffer(AggregateType type, AstOperand operand)
{
var i1 = Constant(TypeS32(), 0);
var i2 = Constant(TypeS32(), operand.CbufOffset >> 2);
var i3 = Constant(TypeU32(), operand.CbufOffset & 3);
Instruction elemPointer;
if (UniformBuffersArray != null)
{
var ubVariable = UniformBuffersArray;
var i0 = Constant(TypeS32(), operand.CbufSlot);
elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i0, i1, i2, i3);
}
else
{
var ubVariable = UniformBuffers[operand.CbufSlot];
elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i1, i2, i3);
}
return BitcastIfNeeded(type, AggregateType.FP32, Load(TypeFP32(), elemPointer));
}
public Instruction GetLocalPointer(AstOperand local)
{
return _locals[local];
}
public Instruction[] GetLocalForArgsPointers(int funcIndex)
{
return _localForArgs[funcIndex];
}
public Instruction GetArgumentPointer(AstOperand funcArg)
{
return _funcArgs[funcArg.Value];
}
public Instruction GetLocal(AggregateType dstType, AstOperand local)
{
var srcType = local.VarType;
return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetLocalPointer(local)));
}
public Instruction GetArgument(AggregateType dstType, AstOperand funcArg)
{
var srcType = funcArg.VarType;
return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetArgumentPointer(funcArg)));
}
public (StructuredFunction, Instruction) GetFunction(int funcIndex)
{
return _functions[funcIndex];
}
public Instruction GetType(AggregateType type, int length = 1)
{
if ((type & AggregateType.Array) != 0)
{
return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length));
}
else if ((type & AggregateType.ElementCountMask) != 0)
{
int vectorLength = (type & AggregateType.ElementCountMask) switch
{
AggregateType.Vector2 => 2,
AggregateType.Vector3 => 3,
AggregateType.Vector4 => 4,
_ => 1
};
return TypeVector(GetType(type & ~AggregateType.ElementCountMask), vectorLength);
}
return type switch
{
AggregateType.Void => TypeVoid(),
AggregateType.Bool => TypeBool(),
AggregateType.FP32 => TypeFP32(),
AggregateType.FP64 => TypeFP64(),
AggregateType.S32 => TypeS32(),
AggregateType.U32 => TypeU32(),
_ => throw new ArgumentException($"Invalid attribute type \"{type}\".")
};
}
public Instruction BitcastIfNeeded(AggregateType dstType, AggregateType srcType, Instruction value)
{
if (dstType == srcType)
{
return value;
}
if (dstType == AggregateType.Bool)
{
return INotEqual(TypeBool(), BitcastIfNeeded(AggregateType.S32, srcType, value), Constant(TypeS32(), 0));
}
else if (srcType == AggregateType.Bool)
{
var intTrue = Constant(TypeS32(), IrConsts.True);
var intFalse = Constant(TypeS32(), IrConsts.False);
return BitcastIfNeeded(dstType, AggregateType.S32, Select(TypeS32(), value, intTrue, intFalse));
}
else
{
return Bitcast(GetType(dstType, 1), value);
}
}
public Instruction TypeS32()
{
return TypeInt(32, true);
}
public Instruction TypeU32()
{
return TypeInt(32, false);
}
public Instruction TypeFP32()
{
return TypeFloat(32);
}
public Instruction TypeFP64()
{
return TypeFloat(64);
}
}
}

View File

@ -0,0 +1,615 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using Spv.Generator;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using static Spv.Specification;
using SpvInstruction = Spv.Generator.Instruction;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
static class Declarations
{
private static readonly string[] StagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" };
public static void DeclareParameters(CodeGenContext context, StructuredFunction function)
{
DeclareParameters(context, function.InArguments, 0);
DeclareParameters(context, function.OutArguments, function.InArguments.Length);
}
private static void DeclareParameters(CodeGenContext context, IEnumerable<AggregateType> argTypes, int argIndex)
{
foreach (var argType in argTypes)
{
var argPointerType = context.TypePointer(StorageClass.Function, context.GetType(argType));
var spvArg = context.FunctionParameter(argPointerType);
context.DeclareArgument(argIndex++, spvArg);
}
}
public static void DeclareLocals(CodeGenContext context, StructuredFunction function)
{
foreach (AstOperand local in function.Locals)
{
var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(local.VarType));
var spvLocal = context.Variable(localPointerType, StorageClass.Function);
context.AddLocalVariable(spvLocal);
context.DeclareLocal(local, spvLocal);
}
var ivector2Type = context.TypeVector(context.TypeS32(), 2);
var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type);
var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function);
context.AddLocalVariable(coordTemp);
context.CoordTemp = coordTemp;
}
public static void DeclareLocalForArgs(CodeGenContext context, List<StructuredFunction> functions)
{
for (int funcIndex = 0; funcIndex < functions.Count; funcIndex++)
{
StructuredFunction function = functions[funcIndex];
SpvInstruction[] locals = new SpvInstruction[function.InArguments.Length];
for (int i = 0; i < function.InArguments.Length; i++)
{
var type = function.GetArgumentType(i);
var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(type));
var spvLocal = context.Variable(localPointerType, StorageClass.Function);
context.AddLocalVariable(spvLocal);
locals[i] = spvLocal;
}
context.DeclareLocalForArgs(funcIndex, locals);
}
}
public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info)
{
if (context.Config.Stage == ShaderStage.Compute)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4);
if (localMemorySize != 0)
{
DeclareLocalMemory(context, localMemorySize);
}
int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4);
if (sharedMemorySize != 0)
{
DeclareSharedMemory(context, sharedMemorySize);
}
}
else if (context.Config.LocalMemorySize != 0)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
DeclareLocalMemory(context, localMemorySize);
}
DeclareSupportBuffer(context);
DeclareUniformBuffers(context, context.Config.GetConstantBufferDescriptors());
DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors());
DeclareSamplers(context, context.Config.GetTextureDescriptors());
DeclareImages(context, context.Config.GetImageDescriptors());
DeclareInputsAndOutputs(context, info);
}
private static void DeclareLocalMemory(CodeGenContext context, int size)
{
context.LocalMemory = DeclareMemory(context, StorageClass.Private, size);
}
private static void DeclareSharedMemory(CodeGenContext context, int size)
{
context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size);
}
private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size)
{
var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size));
var pointerType = context.TypePointer(storage, arrayType);
var variable = context.Variable(pointerType, storage);
context.AddGlobalVariable(variable);
return variable;
}
private static void DeclareSupportBuffer(CodeGenContext context)
{
if (!context.Config.Stage.SupportsRenderScale() && !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable()))
{
return;
}
var isBgraArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), SupportBuffer.FragmentIsBgraCount));
var viewportInverseVectorType = context.TypeVector(context.TypeFP32(), 4);
var renderScaleArrayType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), SupportBuffer.RenderScaleMaxCount));
context.Decorate(isBgraArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize);
context.Decorate(renderScaleArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize);
var supportBufferStructType = context.TypeStruct(false, context.TypeU32(), isBgraArrayType, viewportInverseVectorType, context.TypeS32(), renderScaleArrayType);
context.MemberDecorate(supportBufferStructType, 0, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentAlphaTestOffset);
context.MemberDecorate(supportBufferStructType, 1, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentIsBgraOffset);
context.MemberDecorate(supportBufferStructType, 2, Decoration.Offset, (LiteralInteger)SupportBuffer.ViewportInverseOffset);
context.MemberDecorate(supportBufferStructType, 3, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentRenderScaleCountOffset);
context.MemberDecorate(supportBufferStructType, 4, Decoration.Offset, (LiteralInteger)SupportBuffer.GraphicsRenderScaleOffset);
context.Decorate(supportBufferStructType, Decoration.Block);
var supportBufferPointerType = context.TypePointer(StorageClass.Uniform, supportBufferStructType);
var supportBufferVariable = context.Variable(supportBufferPointerType, StorageClass.Uniform);
context.Decorate(supportBufferVariable, Decoration.DescriptorSet, (LiteralInteger)0);
context.Decorate(supportBufferVariable, Decoration.Binding, (LiteralInteger)0);
context.AddGlobalVariable(supportBufferVariable);
context.SupportBuffer = supportBufferVariable;
}
private static void DeclareUniformBuffers(CodeGenContext context, BufferDescriptor[] descriptors)
{
if (descriptors.Length == 0)
{
return;
}
uint ubSize = Constants.ConstantBufferSize / 16;
var ubArrayType = context.TypeArray(context.TypeVector(context.TypeFP32(), 4), context.Constant(context.TypeU32(), ubSize), true);
context.Decorate(ubArrayType, Decoration.ArrayStride, (LiteralInteger)16);
var ubStructType = context.TypeStruct(true, ubArrayType);
context.Decorate(ubStructType, Decoration.Block);
context.MemberDecorate(ubStructType, 0, Decoration.Offset, (LiteralInteger)0);
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
{
int count = descriptors.Max(x => x.Slot) + 1;
var ubStructArrayType = context.TypeArray(ubStructType, context.Constant(context.TypeU32(), count));
var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructArrayType);
var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform);
context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_u");
context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0);
context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstConstantBufferBinding);
context.AddGlobalVariable(ubVariable);
context.UniformBuffersArray = ubVariable;
}
else
{
var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType);
foreach (var descriptor in descriptors)
{
var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform);
context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_c{descriptor.Slot}");
context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0);
context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding);
context.AddGlobalVariable(ubVariable);
context.UniformBuffers.Add(descriptor.Slot, ubVariable);
}
}
}
private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors)
{
if (descriptors.Length == 0)
{
return;
}
int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0;
int count = descriptors.Max(x => x.Slot) + 1;
var sbArrayType = context.TypeRuntimeArray(context.TypeU32());
context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4);
var sbStructType = context.TypeStruct(true, sbArrayType);
context.Decorate(sbStructType, Decoration.BufferBlock);
context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0);
var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count));
var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType);
var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform);
context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s");
context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding);
context.AddGlobalVariable(sbVariable);
context.StorageBuffersArray = sbVariable;
}
private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
{
foreach (var descriptor in descriptors)
{
var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format);
if (context.Samplers.ContainsKey(meta))
{
continue;
}
int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 2 : 0;
var dim = (descriptor.Type & SamplerType.Mask) switch
{
SamplerType.Texture1D => Dim.Dim1D,
SamplerType.Texture2D => Dim.Dim2D,
SamplerType.Texture3D => Dim.Dim3D,
SamplerType.TextureCube => Dim.Cube,
SamplerType.TextureBuffer => Dim.Buffer,
_ => throw new InvalidOperationException($"Invalid sampler type \"{descriptor.Type & SamplerType.Mask}\".")
};
var imageType = context.TypeImage(
context.TypeFP32(),
dim,
descriptor.Type.HasFlag(SamplerType.Shadow),
descriptor.Type.HasFlag(SamplerType.Array),
descriptor.Type.HasFlag(SamplerType.Multisample),
1,
ImageFormat.Unknown);
var nameSuffix = meta.CbufSlot < 0 ? $"_tcb_{meta.Handle:X}" : $"_cb{meta.CbufSlot}_{meta.Handle:X}";
var sampledImageType = context.TypeSampledImage(imageType);
var sampledImagePointerType = context.TypePointer(StorageClass.UniformConstant, sampledImageType);
var sampledImageVariable = context.Variable(sampledImagePointerType, StorageClass.UniformConstant);
context.Samplers.Add(meta, (imageType, sampledImageType, sampledImageVariable));
context.SamplersTypes.Add(meta, descriptor.Type);
context.Name(sampledImageVariable, $"{GetStagePrefix(context.Config.Stage)}_tex{nameSuffix}");
context.Decorate(sampledImageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
context.Decorate(sampledImageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding);
context.AddGlobalVariable(sampledImageVariable);
}
}
private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors)
{
foreach (var descriptor in descriptors)
{
var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format);
if (context.Images.ContainsKey(meta))
{
continue;
}
int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 3 : 0;
var dim = GetDim(descriptor.Type);
var imageType = context.TypeImage(
context.GetType(meta.Format.GetComponentType()),
dim,
descriptor.Type.HasFlag(SamplerType.Shadow),
descriptor.Type.HasFlag(SamplerType.Array),
descriptor.Type.HasFlag(SamplerType.Multisample),
AccessQualifier.ReadWrite,
GetImageFormat(meta.Format));
var nameSuffix = meta.CbufSlot < 0 ?
$"_tcb_{meta.Handle:X}_{meta.Format.ToGlslFormat()}" :
$"_cb{meta.CbufSlot}_{meta.Handle:X}_{meta.Format.ToGlslFormat()}";
var imagePointerType = context.TypePointer(StorageClass.UniformConstant, imageType);
var imageVariable = context.Variable(imagePointerType, StorageClass.UniformConstant);
context.Images.Add(meta, (imageType, imageVariable));
context.Name(imageVariable, $"{GetStagePrefix(context.Config.Stage)}_img{nameSuffix}");
context.Decorate(imageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
context.Decorate(imageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding);
if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent))
{
context.Decorate(imageVariable, Decoration.Coherent);
}
context.AddGlobalVariable(imageVariable);
}
}
private static Dim GetDim(SamplerType type)
{
return (type & SamplerType.Mask) switch
{
SamplerType.Texture1D => Dim.Dim1D,
SamplerType.Texture2D => Dim.Dim2D,
SamplerType.Texture3D => Dim.Dim3D,
SamplerType.TextureCube => Dim.Cube,
SamplerType.TextureBuffer => Dim.Buffer,
_ => throw new ArgumentException($"Invalid sampler type \"{type & SamplerType.Mask}\".")
};
}
private static ImageFormat GetImageFormat(TextureFormat format)
{
return format switch
{
TextureFormat.Unknown => ImageFormat.Unknown,
TextureFormat.R8Unorm => ImageFormat.R8,
TextureFormat.R8Snorm => ImageFormat.R8Snorm,
TextureFormat.R8Uint => ImageFormat.R8ui,
TextureFormat.R8Sint => ImageFormat.R8i,
TextureFormat.R16Float => ImageFormat.R16f,
TextureFormat.R16Unorm => ImageFormat.R16,
TextureFormat.R16Snorm => ImageFormat.R16Snorm,
TextureFormat.R16Uint => ImageFormat.R16ui,
TextureFormat.R16Sint => ImageFormat.R16i,
TextureFormat.R32Float => ImageFormat.R32f,
TextureFormat.R32Uint => ImageFormat.R32ui,
TextureFormat.R32Sint => ImageFormat.R32i,
TextureFormat.R8G8Unorm => ImageFormat.Rg8,
TextureFormat.R8G8Snorm => ImageFormat.Rg8Snorm,
TextureFormat.R8G8Uint => ImageFormat.Rg8ui,
TextureFormat.R8G8Sint => ImageFormat.Rg8i,
TextureFormat.R16G16Float => ImageFormat.Rg16f,
TextureFormat.R16G16Unorm => ImageFormat.Rg16,
TextureFormat.R16G16Snorm => ImageFormat.Rg16Snorm,
TextureFormat.R16G16Uint => ImageFormat.Rg16ui,
TextureFormat.R16G16Sint => ImageFormat.Rg16i,
TextureFormat.R32G32Float => ImageFormat.Rg32f,
TextureFormat.R32G32Uint => ImageFormat.Rg32ui,
TextureFormat.R32G32Sint => ImageFormat.Rg32i,
TextureFormat.R8G8B8A8Unorm => ImageFormat.Rgba8,
TextureFormat.R8G8B8A8Snorm => ImageFormat.Rgba8Snorm,
TextureFormat.R8G8B8A8Uint => ImageFormat.Rgba8ui,
TextureFormat.R8G8B8A8Sint => ImageFormat.Rgba8i,
TextureFormat.R16G16B16A16Float => ImageFormat.Rgba16f,
TextureFormat.R16G16B16A16Unorm => ImageFormat.Rgba16,
TextureFormat.R16G16B16A16Snorm => ImageFormat.Rgba16Snorm,
TextureFormat.R16G16B16A16Uint => ImageFormat.Rgba16ui,
TextureFormat.R16G16B16A16Sint => ImageFormat.Rgba16i,
TextureFormat.R32G32B32A32Float => ImageFormat.Rgba32f,
TextureFormat.R32G32B32A32Uint => ImageFormat.Rgba32ui,
TextureFormat.R32G32B32A32Sint => ImageFormat.Rgba32i,
TextureFormat.R10G10B10A2Unorm => ImageFormat.Rgb10A2,
TextureFormat.R10G10B10A2Uint => ImageFormat.Rgb10a2ui,
TextureFormat.R11G11B10Float => ImageFormat.R11fG11fB10f,
_ => throw new ArgumentException($"Invalid texture format \"{format}\".")
};
}
private static void DeclareInputsAndOutputs(CodeGenContext context, StructuredProgramInfo info)
{
foreach (var ioDefinition in info.IoDefinitions)
{
var ioVariable = ioDefinition.IoVariable;
// Those are actually from constant buffer, rather than being actual inputs or outputs,
// so we must ignore them here as they are declared as part of the support buffer.
// TODO: Delete this after we represent this properly on the IR (as a constant buffer rather than "input").
if (ioVariable == IoVariable.FragmentOutputIsBgra ||
ioVariable == IoVariable.SupportBlockRenderScale ||
ioVariable == IoVariable.SupportBlockViewInverse)
{
continue;
}
bool isOutput = ioDefinition.StorageKind.IsOutput();
bool isPerPatch = ioDefinition.StorageKind.IsPerPatch();
PixelImap iq = PixelImap.Unused;
if (context.Config.Stage == ShaderStage.Fragment)
{
if (ioVariable == IoVariable.UserDefined)
{
iq = context.Config.ImapTypes[ioDefinition.Location].GetFirstUsedType();
}
else
{
(_, AggregateType varType) = IoMap.GetSpirvBuiltIn(ioVariable);
AggregateType elemType = varType & AggregateType.ElementTypeMask;
if (elemType == AggregateType.S32 || elemType == AggregateType.U32)
{
iq = PixelImap.Constant;
}
}
}
DeclareInputOrOutput(context, ioDefinition, isOutput, isPerPatch, iq);
}
}
private static void DeclareInputOrOutput(CodeGenContext context, IoDefinition ioDefinition, bool isOutput, bool isPerPatch, PixelImap iq = PixelImap.Unused)
{
IoVariable ioVariable = ioDefinition.IoVariable;
var storageClass = isOutput ? StorageClass.Output : StorageClass.Input;
bool isBuiltIn;
BuiltIn builtIn = default;
AggregateType varType;
if (ioVariable == IoVariable.UserDefined)
{
varType = context.Config.GetUserDefinedType(ioDefinition.Location, isOutput);
isBuiltIn = false;
}
else if (ioVariable == IoVariable.FragmentOutputColor)
{
varType = context.Config.GetFragmentOutputColorType(ioDefinition.Location);
isBuiltIn = false;
}
else
{
(builtIn, varType) = IoMap.GetSpirvBuiltIn(ioVariable);
isBuiltIn = true;
if (varType == AggregateType.Invalid)
{
throw new InvalidOperationException($"Unknown variable {ioVariable}.");
}
}
bool hasComponent = context.Config.HasPerLocationInputOrOutputComponent(ioVariable, ioDefinition.Location, ioDefinition.Component, isOutput);
if (hasComponent)
{
varType &= AggregateType.ElementTypeMask;
}
else if (ioVariable == IoVariable.UserDefined && context.Config.HasTransformFeedbackOutputs(isOutput))
{
varType &= AggregateType.ElementTypeMask;
varType |= context.Config.GetTransformFeedbackOutputComponents(ioDefinition.Location, ioDefinition.Component) switch
{
2 => AggregateType.Vector2,
3 => AggregateType.Vector3,
4 => AggregateType.Vector4,
_ => AggregateType.Invalid
};
}
var spvType = context.GetType(varType, IoMap.GetSpirvBuiltInArrayLength(ioVariable));
bool builtInPassthrough = false;
if (!isPerPatch && IoMap.IsPerVertex(ioVariable, context.Config.Stage, isOutput))
{
int arraySize = context.Config.Stage == ShaderStage.Geometry ? context.InputVertices : 32;
spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), (LiteralInteger)arraySize));
if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
{
builtInPassthrough = true;
}
}
if (context.Config.Stage == ShaderStage.TessellationControl && isOutput && !isPerPatch)
{
spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), context.Config.ThreadsPerInputPrimitive));
}
var spvPointerType = context.TypePointer(storageClass, spvType);
var spvVar = context.Variable(spvPointerType, storageClass);
if (builtInPassthrough)
{
context.Decorate(spvVar, Decoration.PassthroughNV);
}
if (isBuiltIn)
{
if (isPerPatch)
{
context.Decorate(spvVar, Decoration.Patch);
}
if (context.Config.GpuAccessor.QueryHostReducedPrecision() && ioVariable == IoVariable.Position)
{
context.Decorate(spvVar, Decoration.Invariant);
}
context.Decorate(spvVar, Decoration.BuiltIn, (LiteralInteger)builtIn);
}
else if (isPerPatch)
{
context.Decorate(spvVar, Decoration.Patch);
if (ioVariable == IoVariable.UserDefined)
{
int location = context.Config.GetPerPatchAttributeLocation(ioDefinition.Location);
context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location);
}
}
else if (ioVariable == IoVariable.UserDefined)
{
context.Decorate(spvVar, Decoration.Location, (LiteralInteger)ioDefinition.Location);
if (hasComponent)
{
context.Decorate(spvVar, Decoration.Component, (LiteralInteger)ioDefinition.Component);
}
if (!isOutput &&
!isPerPatch &&
(context.Config.PassthroughAttributes & (1 << ioDefinition.Location)) != 0 &&
context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
{
context.Decorate(spvVar, Decoration.PassthroughNV);
}
}
else if (ioVariable == IoVariable.FragmentOutputColor)
{
int location = ioDefinition.Location;
if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable())
{
int firstLocation = BitOperations.TrailingZeroCount(context.Config.UsedOutputAttributes);
int index = location - firstLocation;
int mask = 3 << firstLocation;
if ((uint)index < 2 && (context.Config.UsedOutputAttributes & mask) == mask)
{
context.Decorate(spvVar, Decoration.Location, (LiteralInteger)firstLocation);
context.Decorate(spvVar, Decoration.Index, (LiteralInteger)index);
}
else
{
context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location);
}
}
else
{
context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location);
}
}
if (!isOutput)
{
switch (iq)
{
case PixelImap.Constant:
context.Decorate(spvVar, Decoration.Flat);
break;
case PixelImap.ScreenLinear:
context.Decorate(spvVar, Decoration.NoPerspective);
break;
}
}
else if (context.Config.TryGetTransformFeedbackOutput(
ioVariable,
ioDefinition.Location,
ioDefinition.Component,
out var transformFeedbackOutput))
{
context.Decorate(spvVar, Decoration.XfbBuffer, (LiteralInteger)transformFeedbackOutput.Buffer);
context.Decorate(spvVar, Decoration.XfbStride, (LiteralInteger)transformFeedbackOutput.Stride);
context.Decorate(spvVar, Decoration.Offset, (LiteralInteger)transformFeedbackOutput.Offset);
}
context.AddGlobalVariable(spvVar);
var dict = isPerPatch
? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch)
: (isOutput ? context.Outputs : context.Inputs);
dict.Add(ioDefinition, spvVar);
}
private static string GetStagePrefix(ShaderStage stage)
{
return StagePrefixes[(int)stage];
}
}
}

View File

@ -0,0 +1,22 @@
using System;
using static Spv.Specification;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
static class EnumConversion
{
public static ExecutionModel Convert(this ShaderStage stage)
{
return stage switch
{
ShaderStage.Compute => ExecutionModel.GLCompute,
ShaderStage.Vertex => ExecutionModel.Vertex,
ShaderStage.TessellationControl => ExecutionModel.TessellationControl,
ShaderStage.TessellationEvaluation => ExecutionModel.TessellationEvaluation,
ShaderStage.Geometry => ExecutionModel.Geometry,
ShaderStage.Fragment => ExecutionModel.Fragment,
_ => throw new ArgumentException($"Invalid shader stage \"{stage}\".")
};
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,86 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Spv.Specification;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
static class IoMap
{
// At least 16 attributes are guaranteed by the spec.
private const int MaxAttributes = 16;
public static (BuiltIn, AggregateType) GetSpirvBuiltIn(IoVariable ioVariable)
{
return ioVariable switch
{
IoVariable.BaseInstance => (BuiltIn.BaseInstance, AggregateType.S32),
IoVariable.BaseVertex => (BuiltIn.BaseVertex, AggregateType.S32),
IoVariable.ClipDistance => (BuiltIn.ClipDistance, AggregateType.Array | AggregateType.FP32),
IoVariable.CtaId => (BuiltIn.WorkgroupId, AggregateType.Vector3 | AggregateType.U32),
IoVariable.DrawIndex => (BuiltIn.DrawIndex, AggregateType.S32),
IoVariable.FragmentCoord => (BuiltIn.FragCoord, AggregateType.Vector4 | AggregateType.FP32),
IoVariable.FragmentOutputDepth => (BuiltIn.FragDepth, AggregateType.FP32),
IoVariable.FrontFacing => (BuiltIn.FrontFacing, AggregateType.Bool),
IoVariable.InstanceId => (BuiltIn.InstanceId, AggregateType.S32),
IoVariable.InstanceIndex => (BuiltIn.InstanceIndex, AggregateType.S32),
IoVariable.InvocationId => (BuiltIn.InvocationId, AggregateType.S32),
IoVariable.Layer => (BuiltIn.Layer, AggregateType.S32),
IoVariable.PatchVertices => (BuiltIn.PatchVertices, AggregateType.S32),
IoVariable.PointCoord => (BuiltIn.PointCoord, AggregateType.Vector2 | AggregateType.FP32),
IoVariable.PointSize => (BuiltIn.PointSize, AggregateType.FP32),
IoVariable.Position => (BuiltIn.Position, AggregateType.Vector4 | AggregateType.FP32),
IoVariable.PrimitiveId => (BuiltIn.PrimitiveId, AggregateType.S32),
IoVariable.SubgroupEqMask => (BuiltIn.SubgroupEqMask, AggregateType.Vector4 | AggregateType.U32),
IoVariable.SubgroupGeMask => (BuiltIn.SubgroupGeMask, AggregateType.Vector4 | AggregateType.U32),
IoVariable.SubgroupGtMask => (BuiltIn.SubgroupGtMask, AggregateType.Vector4 | AggregateType.U32),
IoVariable.SubgroupLaneId => (BuiltIn.SubgroupLocalInvocationId, AggregateType.U32),
IoVariable.SubgroupLeMask => (BuiltIn.SubgroupLeMask, AggregateType.Vector4 | AggregateType.U32),
IoVariable.SubgroupLtMask => (BuiltIn.SubgroupLtMask, AggregateType.Vector4 | AggregateType.U32),
IoVariable.TessellationCoord => (BuiltIn.TessCoord, AggregateType.Vector3 | AggregateType.FP32),
IoVariable.TessellationLevelInner => (BuiltIn.TessLevelInner, AggregateType.Array | AggregateType.FP32),
IoVariable.TessellationLevelOuter => (BuiltIn.TessLevelOuter, AggregateType.Array | AggregateType.FP32),
IoVariable.ThreadId => (BuiltIn.LocalInvocationId, AggregateType.Vector3 | AggregateType.U32),
IoVariable.ThreadKill => (BuiltIn.HelperInvocation, AggregateType.Bool),
IoVariable.VertexId => (BuiltIn.VertexId, AggregateType.S32),
IoVariable.VertexIndex => (BuiltIn.VertexIndex, AggregateType.S32),
IoVariable.ViewportIndex => (BuiltIn.ViewportIndex, AggregateType.S32),
IoVariable.ViewportMask => (BuiltIn.ViewportMaskNV, AggregateType.Array | AggregateType.S32),
_ => (default, AggregateType.Invalid)
};
}
public static int GetSpirvBuiltInArrayLength(IoVariable ioVariable)
{
return ioVariable switch
{
IoVariable.ClipDistance => 8,
IoVariable.TessellationLevelInner => 2,
IoVariable.TessellationLevelOuter => 4,
IoVariable.ViewportMask => 1,
IoVariable.UserDefined => MaxAttributes,
_ => 1
};
}
public static bool IsPerVertex(IoVariable ioVariable, ShaderStage stage, bool isOutput)
{
switch (ioVariable)
{
case IoVariable.Layer:
case IoVariable.ViewportIndex:
case IoVariable.PointSize:
case IoVariable.Position:
case IoVariable.UserDefined:
case IoVariable.ClipDistance:
case IoVariable.PointCoord:
case IoVariable.ViewportMask:
return !isOutput &&
(stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry);
}
return false;
}
}
}

View File

@ -0,0 +1,19 @@
using Ryujinx.Graphics.Shader.Translation;
using Spv.Generator;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
readonly struct OperationResult
{
public static OperationResult Invalid => new OperationResult(AggregateType.Invalid, null);
public AggregateType Type { get; }
public Instruction Value { get; }
public OperationResult(AggregateType type, Instruction value)
{
Type = type;
Value = value;
}
}
}

View File

@ -0,0 +1,227 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using static Spv.Specification;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
using SpvInstruction = Spv.Generator.Instruction;
static class ScalingHelpers
{
public static SpvInstruction ApplyScaling(
CodeGenContext context,
AstTextureOperation texOp,
SpvInstruction vector,
bool intCoords,
bool isBindless,
bool isIndexed,
bool isArray,
int pCount)
{
if (intCoords)
{
if (context.Config.Stage.SupportsRenderScale() &&
!isBindless &&
!isIndexed)
{
int index = texOp.Inst == Instruction.ImageLoad
? context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp)
: context.Config.FindTextureDescriptorIndex(texOp);
if (pCount == 3 && isArray)
{
return ApplyScaling2DArray(context, vector, index);
}
else if (pCount == 2 && !isArray)
{
return ApplyScaling2D(context, vector, index);
}
}
}
return vector;
}
private static SpvInstruction ApplyScaling2DArray(CodeGenContext context, SpvInstruction vector, int index)
{
// The array index is not scaled, just x and y.
var vectorXY = context.VectorShuffle(context.TypeVector(context.TypeS32(), 2), vector, vector, 0, 1);
var vectorZ = context.CompositeExtract(context.TypeS32(), vector, 2);
var vectorXYScaled = ApplyScaling2D(context, vectorXY, index);
var vectorScaled = context.CompositeConstruct(context.TypeVector(context.TypeS32(), 3), vectorXYScaled, vectorZ);
return vectorScaled;
}
private static SpvInstruction ApplyScaling2D(CodeGenContext context, SpvInstruction vector, int index)
{
var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32());
var fieldIndex = context.Constant(context.TypeU32(), 4);
var scaleIndex = context.Constant(context.TypeU32(), index);
if (context.Config.Stage == ShaderStage.Vertex)
{
var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32());
var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3));
var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer);
scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount);
}
scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1));
var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex);
var scale = context.Load(context.TypeFP32(), scaleElemPointer);
var ivector2Type = context.TypeVector(context.TypeS32(), 2);
var localVector = context.CoordTemp;
var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f));
var mergeLabel = context.Label();
if (context.Config.Stage == ShaderStage.Fragment)
{
var scaledInterpolatedLabel = context.Label();
var scaledNoInterpolationLabel = context.Label();
var needsInterpolation = context.FOrdLessThan(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 0f));
context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone);
context.BranchConditional(needsInterpolation, scaledInterpolatedLabel, scaledNoInterpolationLabel);
// scale < 0.0
context.AddLabel(scaledInterpolatedLabel);
ApplyScalingInterpolated(context, localVector, vector, scale);
context.Branch(mergeLabel);
// scale >= 0.0
context.AddLabel(scaledNoInterpolationLabel);
ApplyScalingNoInterpolation(context, localVector, vector, scale);
context.Branch(mergeLabel);
context.AddLabel(mergeLabel);
var passthroughLabel = context.Label();
var finalMergeLabel = context.Label();
context.SelectionMerge(finalMergeLabel, SelectionControlMask.MaskNone);
context.BranchConditional(passthrough, passthroughLabel, finalMergeLabel);
context.AddLabel(passthroughLabel);
context.Store(localVector, vector);
context.Branch(finalMergeLabel);
context.AddLabel(finalMergeLabel);
return context.Load(ivector2Type, localVector);
}
else
{
var passthroughLabel = context.Label();
var scaledLabel = context.Label();
context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone);
context.BranchConditional(passthrough, passthroughLabel, scaledLabel);
// scale == 1.0
context.AddLabel(passthroughLabel);
context.Store(localVector, vector);
context.Branch(mergeLabel);
// scale != 1.0
context.AddLabel(scaledLabel);
ApplyScalingNoInterpolation(context, localVector, vector, scale);
context.Branch(mergeLabel);
context.AddLabel(mergeLabel);
return context.Load(ivector2Type, localVector);
}
}
private static void ApplyScalingInterpolated(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale)
{
var vector2Type = context.TypeVector(context.TypeFP32(), 2);
var scaleNegated = context.FNegate(context.TypeFP32(), scale);
var scaleVector = context.CompositeConstruct(vector2Type, scaleNegated, scaleNegated);
var vectorFloat = context.ConvertSToF(vector2Type, vector);
var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scaleNegated);
var fragCoordPointer = context.Inputs[new IoDefinition(StorageKind.Input, IoVariable.FragmentCoord)];
var fragCoord = context.Load(context.TypeVector(context.TypeFP32(), 4), fragCoordPointer);
var fragCoordXY = context.VectorShuffle(vector2Type, fragCoord, fragCoord, 0, 1);
var scaleMod = context.FMod(vector2Type, fragCoordXY, scaleVector);
var vectorInterpolated = context.FAdd(vector2Type, vectorScaled, scaleMod);
context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorInterpolated));
}
private static void ApplyScalingNoInterpolation(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale)
{
if (context.Config.Stage == ShaderStage.Vertex)
{
scale = context.GlslFAbs(context.TypeFP32(), scale);
}
var vector2Type = context.TypeVector(context.TypeFP32(), 2);
var vectorFloat = context.ConvertSToF(vector2Type, vector);
var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scale);
context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorScaled));
}
public static SpvInstruction ApplyUnscaling(
CodeGenContext context,
AstTextureOperation texOp,
SpvInstruction size,
bool isBindless,
bool isIndexed)
{
if (context.Config.Stage.SupportsRenderScale() &&
!isBindless &&
!isIndexed)
{
int index = context.Config.FindTextureDescriptorIndex(texOp);
var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32());
var fieldIndex = context.Constant(context.TypeU32(), 4);
var scaleIndex = context.Constant(context.TypeU32(), index);
if (context.Config.Stage == ShaderStage.Vertex)
{
var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32());
var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3));
var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer);
scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount);
}
scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1));
var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex);
var scale = context.GlslFAbs(context.TypeFP32(), context.Load(context.TypeFP32(), scaleElemPointer));
var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f));
var sizeFloat = context.ConvertSToF(context.TypeFP32(), size);
var sizeUnscaled = context.FDiv(context.TypeFP32(), sizeFloat, scale);
var sizeUnscaledInt = context.ConvertFToS(context.TypeS32(), sizeUnscaled);
return context.Select(context.TypeS32(), passthrough, size, sizeUnscaledInt);
}
return size;
}
}
}

View File

@ -0,0 +1,226 @@
using FuncBinaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
using FuncQuaternaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
using FuncTernaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
using FuncUnaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
/// <summary>
/// Delegate cache for SPIR-V instruction generators. Avoids delegate allocation when passing generators as arguments.
/// </summary>
internal readonly struct SpirvDelegates
{
// Unary
public readonly FuncUnaryInstruction GlslFAbs;
public readonly FuncUnaryInstruction GlslSAbs;
public readonly FuncUnaryInstruction GlslCeil;
public readonly FuncUnaryInstruction GlslCos;
public readonly FuncUnaryInstruction GlslExp2;
public readonly FuncUnaryInstruction GlslFloor;
public readonly FuncUnaryInstruction GlslLog2;
public readonly FuncUnaryInstruction FNegate;
public readonly FuncUnaryInstruction SNegate;
public readonly FuncUnaryInstruction GlslInverseSqrt;
public readonly FuncUnaryInstruction GlslRoundEven;
public readonly FuncUnaryInstruction GlslSin;
public readonly FuncUnaryInstruction GlslSqrt;
public readonly FuncUnaryInstruction GlslTrunc;
// UnaryBool
public readonly FuncUnaryInstruction LogicalNot;
// UnaryFP32
public readonly FuncUnaryInstruction DPdx;
public readonly FuncUnaryInstruction DPdy;
// UnaryS32
public readonly FuncUnaryInstruction BitCount;
public readonly FuncUnaryInstruction BitReverse;
public readonly FuncUnaryInstruction Not;
// Compare
public readonly FuncBinaryInstruction FOrdEqual;
public readonly FuncBinaryInstruction IEqual;
public readonly FuncBinaryInstruction FOrdGreaterThan;
public readonly FuncBinaryInstruction SGreaterThan;
public readonly FuncBinaryInstruction FOrdGreaterThanEqual;
public readonly FuncBinaryInstruction SGreaterThanEqual;
public readonly FuncBinaryInstruction FOrdLessThan;
public readonly FuncBinaryInstruction SLessThan;
public readonly FuncBinaryInstruction FOrdLessThanEqual;
public readonly FuncBinaryInstruction SLessThanEqual;
public readonly FuncBinaryInstruction FOrdNotEqual;
public readonly FuncBinaryInstruction INotEqual;
// CompareU32
public readonly FuncBinaryInstruction UGreaterThanEqual;
public readonly FuncBinaryInstruction UGreaterThan;
public readonly FuncBinaryInstruction ULessThanEqual;
public readonly FuncBinaryInstruction ULessThan;
// Binary
public readonly FuncBinaryInstruction FAdd;
public readonly FuncBinaryInstruction IAdd;
public readonly FuncBinaryInstruction FDiv;
public readonly FuncBinaryInstruction SDiv;
public readonly FuncBinaryInstruction GlslFMax;
public readonly FuncBinaryInstruction GlslSMax;
public readonly FuncBinaryInstruction GlslFMin;
public readonly FuncBinaryInstruction GlslSMin;
public readonly FuncBinaryInstruction FMul;
public readonly FuncBinaryInstruction IMul;
public readonly FuncBinaryInstruction FSub;
public readonly FuncBinaryInstruction ISub;
// BinaryBool
public readonly FuncBinaryInstruction LogicalAnd;
public readonly FuncBinaryInstruction LogicalNotEqual;
public readonly FuncBinaryInstruction LogicalOr;
// BinaryS32
public readonly FuncBinaryInstruction BitwiseAnd;
public readonly FuncBinaryInstruction BitwiseXor;
public readonly FuncBinaryInstruction BitwiseOr;
public readonly FuncBinaryInstruction ShiftLeftLogical;
public readonly FuncBinaryInstruction ShiftRightArithmetic;
public readonly FuncBinaryInstruction ShiftRightLogical;
// BinaryU32
public readonly FuncBinaryInstruction GlslUMax;
public readonly FuncBinaryInstruction GlslUMin;
// AtomicMemoryBinary
public readonly FuncQuaternaryInstruction AtomicIAdd;
public readonly FuncQuaternaryInstruction AtomicAnd;
public readonly FuncQuaternaryInstruction AtomicSMin;
public readonly FuncQuaternaryInstruction AtomicUMin;
public readonly FuncQuaternaryInstruction AtomicSMax;
public readonly FuncQuaternaryInstruction AtomicUMax;
public readonly FuncQuaternaryInstruction AtomicOr;
public readonly FuncQuaternaryInstruction AtomicExchange;
public readonly FuncQuaternaryInstruction AtomicXor;
// Ternary
public readonly FuncTernaryInstruction GlslFClamp;
public readonly FuncTernaryInstruction GlslSClamp;
public readonly FuncTernaryInstruction GlslFma;
// TernaryS32
public readonly FuncTernaryInstruction BitFieldSExtract;
public readonly FuncTernaryInstruction BitFieldUExtract;
// TernaryU32
public readonly FuncTernaryInstruction GlslUClamp;
// QuaternaryS32
public readonly FuncQuaternaryInstruction BitFieldInsert;
public SpirvDelegates(CodeGenContext context)
{
// Unary
GlslFAbs = context.GlslFAbs;
GlslSAbs = context.GlslSAbs;
GlslCeil = context.GlslCeil;
GlslCos = context.GlslCos;
GlslExp2 = context.GlslExp2;
GlslFloor = context.GlslFloor;
GlslLog2 = context.GlslLog2;
FNegate = context.FNegate;
SNegate = context.SNegate;
GlslInverseSqrt = context.GlslInverseSqrt;
GlslRoundEven = context.GlslRoundEven;
GlslSin = context.GlslSin;
GlslSqrt = context.GlslSqrt;
GlslTrunc = context.GlslTrunc;
// UnaryBool
LogicalNot = context.LogicalNot;
// UnaryFP32
DPdx = context.DPdx;
DPdy = context.DPdy;
// UnaryS32
BitCount = context.BitCount;
BitReverse = context.BitReverse;
Not = context.Not;
// Compare
FOrdEqual = context.FOrdEqual;
IEqual = context.IEqual;
FOrdGreaterThan = context.FOrdGreaterThan;
SGreaterThan = context.SGreaterThan;
FOrdGreaterThanEqual = context.FOrdGreaterThanEqual;
SGreaterThanEqual = context.SGreaterThanEqual;
FOrdLessThan = context.FOrdLessThan;
SLessThan = context.SLessThan;
FOrdLessThanEqual = context.FOrdLessThanEqual;
SLessThanEqual = context.SLessThanEqual;
FOrdNotEqual = context.FOrdNotEqual;
INotEqual = context.INotEqual;
// CompareU32
UGreaterThanEqual = context.UGreaterThanEqual;
UGreaterThan = context.UGreaterThan;
ULessThanEqual = context.ULessThanEqual;
ULessThan = context.ULessThan;
// Binary
FAdd = context.FAdd;
IAdd = context.IAdd;
FDiv = context.FDiv;
SDiv = context.SDiv;
GlslFMax = context.GlslFMax;
GlslSMax = context.GlslSMax;
GlslFMin = context.GlslFMin;
GlslSMin = context.GlslSMin;
FMul = context.FMul;
IMul = context.IMul;
FSub = context.FSub;
ISub = context.ISub;
// BinaryBool
LogicalAnd = context.LogicalAnd;
LogicalNotEqual = context.LogicalNotEqual;
LogicalOr = context.LogicalOr;
// BinaryS32
BitwiseAnd = context.BitwiseAnd;
BitwiseXor = context.BitwiseXor;
BitwiseOr = context.BitwiseOr;
ShiftLeftLogical = context.ShiftLeftLogical;
ShiftRightArithmetic = context.ShiftRightArithmetic;
ShiftRightLogical = context.ShiftRightLogical;
// BinaryU32
GlslUMax = context.GlslUMax;
GlslUMin = context.GlslUMin;
// AtomicMemoryBinary
AtomicIAdd = context.AtomicIAdd;
AtomicAnd = context.AtomicAnd;
AtomicSMin = context.AtomicSMin;
AtomicUMin = context.AtomicUMin;
AtomicSMax = context.AtomicSMax;
AtomicUMax = context.AtomicUMax;
AtomicOr = context.AtomicOr;
AtomicExchange = context.AtomicExchange;
AtomicXor = context.AtomicXor;
// Ternary
GlslFClamp = context.GlslFClamp;
GlslSClamp = context.GlslSClamp;
GlslFma = context.GlslFma;
// TernaryS32
BitFieldSExtract = context.BitFieldSExtract;
BitFieldUExtract = context.BitFieldUExtract;
// TernaryU32
GlslUClamp = context.GlslUClamp;
// QuaternaryS32
BitFieldInsert = context.BitFieldInsert;
}
}
}

View File

@ -0,0 +1,415 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using static Spv.Specification;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
using SpvInstruction = Spv.Generator.Instruction;
using SpvInstructionPool = Spv.Generator.GeneratorPool<Spv.Generator.Instruction>;
using SpvLiteralInteger = Spv.Generator.LiteralInteger;
using SpvLiteralIntegerPool = Spv.Generator.GeneratorPool<Spv.Generator.LiteralInteger>;
static class SpirvGenerator
{
// Resource pools for Spirv generation. Note: Increase count when more threads are being used.
private const int GeneratorPoolCount = 1;
private static ObjectPool<SpvInstructionPool> InstructionPool;
private static ObjectPool<SpvLiteralIntegerPool> IntegerPool;
private static object PoolLock;
static SpirvGenerator()
{
InstructionPool = new (() => new SpvInstructionPool(), GeneratorPoolCount);
IntegerPool = new (() => new SpvLiteralIntegerPool(), GeneratorPoolCount);
PoolLock = new object();
}
private const HelperFunctionsMask NeedsInvocationIdMask =
HelperFunctionsMask.Shuffle |
HelperFunctionsMask.ShuffleDown |
HelperFunctionsMask.ShuffleUp |
HelperFunctionsMask.ShuffleXor |
HelperFunctionsMask.SwizzleAdd;
public static byte[] Generate(StructuredProgramInfo info, ShaderConfig config)
{
SpvInstructionPool instPool;
SpvLiteralIntegerPool integerPool;
lock (PoolLock)
{
instPool = InstructionPool.Allocate();
integerPool = IntegerPool.Allocate();
}
CodeGenContext context = new CodeGenContext(info, config, instPool, integerPool);
context.AddCapability(Capability.GroupNonUniformBallot);
context.AddCapability(Capability.GroupNonUniformShuffle);
context.AddCapability(Capability.GroupNonUniformVote);
context.AddCapability(Capability.ImageBuffer);
context.AddCapability(Capability.ImageGatherExtended);
context.AddCapability(Capability.ImageQuery);
context.AddCapability(Capability.SampledBuffer);
if (config.TransformFeedbackEnabled && config.LastInVertexPipeline)
{
context.AddCapability(Capability.TransformFeedback);
}
if (config.Stage == ShaderStage.Fragment)
{
if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Input, IoVariable.Layer)))
{
context.AddCapability(Capability.Geometry);
}
if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
{
context.AddCapability(Capability.FragmentShaderPixelInterlockEXT);
context.AddExtension("SPV_EXT_fragment_shader_interlock");
}
}
else if (config.Stage == ShaderStage.Geometry)
{
context.AddCapability(Capability.Geometry);
if (config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
{
context.AddExtension("SPV_NV_geometry_shader_passthrough");
context.AddCapability(Capability.GeometryShaderPassthroughNV);
}
}
else if (config.Stage == ShaderStage.TessellationControl || config.Stage == ShaderStage.TessellationEvaluation)
{
context.AddCapability(Capability.Tessellation);
}
else if (config.Stage == ShaderStage.Vertex)
{
context.AddCapability(Capability.DrawParameters);
}
if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.ViewportMask)))
{
context.AddExtension("SPV_NV_viewport_array2");
context.AddCapability(Capability.ShaderViewportMaskNV);
}
if ((info.HelperFunctionsMask & NeedsInvocationIdMask) != 0)
{
info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.SubgroupLaneId));
}
Declarations.DeclareAll(context, info);
for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++)
{
var function = info.Functions[funcIndex];
var retType = context.GetType(function.ReturnType);
var funcArgs = new SpvInstruction[function.InArguments.Length + function.OutArguments.Length];
for (int argIndex = 0; argIndex < funcArgs.Length; argIndex++)
{
var argType = context.GetType(function.GetArgumentType(argIndex));
var argPointerType = context.TypePointer(StorageClass.Function, argType);
funcArgs[argIndex] = argPointerType;
}
var funcType = context.TypeFunction(retType, false, funcArgs);
var spvFunc = context.Function(retType, FunctionControlMask.MaskNone, funcType);
context.DeclareFunction(funcIndex, function, spvFunc);
}
for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++)
{
Generate(context, info, funcIndex);
}
byte[] result = context.Generate();
lock (PoolLock)
{
InstructionPool.Release(instPool);
IntegerPool.Release(integerPool);
}
return result;
}
private static void Generate(CodeGenContext context, StructuredProgramInfo info, int funcIndex)
{
var function = info.Functions[funcIndex];
(_, var spvFunc) = context.GetFunction(funcIndex);
context.AddFunction(spvFunc);
context.StartFunction();
Declarations.DeclareParameters(context, function);
context.EnterBlock(function.MainBlock);
Declarations.DeclareLocals(context, function);
Declarations.DeclareLocalForArgs(context, info.Functions);
Generate(context, function.MainBlock);
// Functions must always end with a return.
if (!(function.MainBlock.Last is AstOperation operation) ||
(operation.Inst != Instruction.Return && operation.Inst != Instruction.Discard))
{
context.Return();
}
context.FunctionEnd();
if (funcIndex == 0)
{
context.AddEntryPoint(context.Config.Stage.Convert(), spvFunc, "main", context.GetMainInterface());
if (context.Config.Stage == ShaderStage.TessellationControl)
{
context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive);
}
else if (context.Config.Stage == ShaderStage.TessellationEvaluation)
{
switch (context.Config.GpuAccessor.QueryTessPatchType())
{
case TessPatchType.Isolines:
context.AddExecutionMode(spvFunc, ExecutionMode.Isolines);
break;
case TessPatchType.Triangles:
context.AddExecutionMode(spvFunc, ExecutionMode.Triangles);
break;
case TessPatchType.Quads:
context.AddExecutionMode(spvFunc, ExecutionMode.Quads);
break;
}
switch (context.Config.GpuAccessor.QueryTessSpacing())
{
case TessSpacing.EqualSpacing:
context.AddExecutionMode(spvFunc, ExecutionMode.SpacingEqual);
break;
case TessSpacing.FractionalEventSpacing:
context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalEven);
break;
case TessSpacing.FractionalOddSpacing:
context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalOdd);
break;
}
bool tessCw = context.Config.GpuAccessor.QueryTessCw();
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
// We invert the front face on Vulkan backend, so we need to do that here as well.
tessCw = !tessCw;
}
if (tessCw)
{
context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCw);
}
else
{
context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCcw);
}
}
else if (context.Config.Stage == ShaderStage.Geometry)
{
InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology();
context.AddExecutionMode(spvFunc, inputTopology switch
{
InputTopology.Points => ExecutionMode.InputPoints,
InputTopology.Lines => ExecutionMode.InputLines,
InputTopology.LinesAdjacency => ExecutionMode.InputLinesAdjacency,
InputTopology.Triangles => ExecutionMode.Triangles,
InputTopology.TrianglesAdjacency => ExecutionMode.InputTrianglesAdjacency,
_ => throw new InvalidOperationException($"Invalid input topology \"{inputTopology}\".")
});
context.AddExecutionMode(spvFunc, ExecutionMode.Invocations, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive);
context.AddExecutionMode(spvFunc, context.Config.OutputTopology switch
{
OutputTopology.PointList => ExecutionMode.OutputPoints,
OutputTopology.LineStrip => ExecutionMode.OutputLineStrip,
OutputTopology.TriangleStrip => ExecutionMode.OutputTriangleStrip,
_ => throw new InvalidOperationException($"Invalid output topology \"{context.Config.OutputTopology}\".")
});
int maxOutputVertices = context.Config.GpPassthrough ? context.InputVertices : context.Config.MaxOutputVertices;
context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices);
}
else if (context.Config.Stage == ShaderStage.Fragment)
{
context.AddExecutionMode(spvFunc, context.Config.Options.TargetApi == TargetApi.Vulkan
? ExecutionMode.OriginUpperLeft
: ExecutionMode.OriginLowerLeft);
if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.FragmentOutputDepth)))
{
context.AddExecutionMode(spvFunc, ExecutionMode.DepthReplacing);
}
if (context.Config.GpuAccessor.QueryEarlyZForce())
{
context.AddExecutionMode(spvFunc, ExecutionMode.EarlyFragmentTests);
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0 &&
context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
{
context.AddExecutionMode(spvFunc, ExecutionMode.PixelInterlockOrderedEXT);
}
}
else if (context.Config.Stage == ShaderStage.Compute)
{
var localSizeX = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeX();
var localSizeY = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeY();
var localSizeZ = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeZ();
context.AddExecutionMode(
spvFunc,
ExecutionMode.LocalSize,
localSizeX,
localSizeY,
localSizeZ);
}
if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline)
{
context.AddExecutionMode(spvFunc, ExecutionMode.Xfb);
}
}
}
private static void Generate(CodeGenContext context, AstBlock block)
{
AstBlockVisitor visitor = new AstBlockVisitor(block);
var loopTargets = new Dictionary<AstBlock, (SpvInstruction, SpvInstruction)>();
context.LoopTargets = loopTargets;
visitor.BlockEntered += (sender, e) =>
{
AstBlock mergeBlock = e.Block.Parent;
if (e.Block.Type == AstBlockType.If)
{
AstBlock ifTrueBlock = e.Block;
AstBlock ifFalseBlock;
if (AstHelper.Next(e.Block) is AstBlock nextBlock && nextBlock.Type == AstBlockType.Else)
{
ifFalseBlock = nextBlock;
}
else
{
ifFalseBlock = mergeBlock;
}
var condition = context.Get(AggregateType.Bool, e.Block.Condition);
context.SelectionMerge(context.GetNextLabel(mergeBlock), SelectionControlMask.MaskNone);
context.BranchConditional(condition, context.GetNextLabel(ifTrueBlock), context.GetNextLabel(ifFalseBlock));
}
else if (e.Block.Type == AstBlockType.DoWhile)
{
var continueTarget = context.Label();
loopTargets.Add(e.Block, (context.NewBlock(), continueTarget));
context.LoopMerge(context.GetNextLabel(mergeBlock), continueTarget, LoopControlMask.MaskNone);
context.Branch(context.GetFirstLabel(e.Block));
}
context.EnterBlock(e.Block);
};
visitor.BlockLeft += (sender, e) =>
{
if (e.Block.Parent != null)
{
if (e.Block.Type == AstBlockType.DoWhile)
{
// This is a loop, we need to jump back to the loop header
// if the condition is true.
AstBlock mergeBlock = e.Block.Parent;
(var loopTarget, var continueTarget) = loopTargets[e.Block];
context.Branch(continueTarget);
context.AddLabel(continueTarget);
var condition = context.Get(AggregateType.Bool, e.Block.Condition);
context.BranchConditional(condition, loopTarget, context.GetNextLabel(mergeBlock));
}
else
{
// We only need a branch if the last instruction didn't
// already cause the program to exit or jump elsewhere.
bool lastIsCf = e.Block.Last is AstOperation lastOp &&
(lastOp.Inst == Instruction.Discard ||
lastOp.Inst == Instruction.LoopBreak ||
lastOp.Inst == Instruction.LoopContinue ||
lastOp.Inst == Instruction.Return);
if (!lastIsCf)
{
context.Branch(context.GetNextLabel(e.Block.Parent));
}
}
bool hasElse = AstHelper.Next(e.Block) is AstBlock nextBlock &&
(nextBlock.Type == AstBlockType.Else ||
nextBlock.Type == AstBlockType.ElseIf);
// Re-enter the parent block.
if (e.Block.Parent != null && !hasElse)
{
context.EnterBlock(e.Block.Parent);
}
}
};
foreach (IAstNode node in visitor.Visit())
{
if (node is AstAssignment assignment)
{
var dest = (AstOperand)assignment.Destination;
if (dest.Type == OperandType.LocalVariable)
{
var source = context.Get(dest.VarType, assignment.Source);
context.Store(context.GetLocalPointer(dest), source);
}
else if (dest.Type == OperandType.Argument)
{
var source = context.Get(dest.VarType, assignment.Source);
context.Store(context.GetArgumentPointer(dest), source);
}
else
{
throw new NotImplementedException(dest.Type.ToString());
}
}
else if (node is AstOperation operation)
{
Instructions.Generate(context, operation);
}
}
}
}
}

View File

@ -0,0 +1,4 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
{
readonly record struct TextureMeta(int CbufSlot, int Handle, TextureFormat Format);
}

View File

@ -0,0 +1,16 @@
namespace Ryujinx.Graphics.Shader
{
static class Constants
{
public const int ConstantBufferSize = 0x10000; // In bytes
public const int MaxAttributes = 16;
public const int AllAttributesMask = (int)(uint.MaxValue >> (32 - MaxAttributes));
public const int NvnBaseVertexByteOffset = 0x640;
public const int NvnBaseInstanceByteOffset = 0x644;
public const int NvnDrawIndexByteOffset = 0x648;
public const int StorageAlignment = 16;
}
}

View File

@ -0,0 +1,168 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Ryujinx.Graphics.Shader.Decoders
{
class PushOpInfo
{
public InstOp Op { get; }
public Dictionary<Block, Operand> Consumers;
public PushOpInfo(InstOp op)
{
Op = op;
Consumers = new Dictionary<Block, Operand>();
}
}
readonly struct SyncTarget
{
public PushOpInfo PushOpInfo { get; }
public int PushOpId { get; }
public SyncTarget(PushOpInfo pushOpInfo, int pushOpId)
{
PushOpInfo = pushOpInfo;
PushOpId = pushOpId;
}
}
class Block
{
public ulong Address { get; set; }
public ulong EndAddress { get; set; }
public List<Block> Predecessors { get; }
public List<Block> Successors { get; }
public List<InstOp> OpCodes { get; }
public List<PushOpInfo> PushOpCodes { get; }
public Dictionary<ulong, SyncTarget> SyncTargets { get; }
public Block(ulong address)
{
Address = address;
Predecessors = new List<Block>();
Successors = new List<Block>();
OpCodes = new List<InstOp>();
PushOpCodes = new List<PushOpInfo>();
SyncTargets = new Dictionary<ulong, SyncTarget>();
}
public void Split(Block rightBlock)
{
int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
if (OpCodes[splitIndex].Address < rightBlock.Address)
{
splitIndex++;
}
int splitCount = OpCodes.Count - splitIndex;
if (splitCount <= 0)
{
throw new ArgumentException("Can't split at right block address.");
}
rightBlock.EndAddress = EndAddress;
rightBlock.Successors.AddRange(Successors);
rightBlock.Predecessors.Add(this);
EndAddress = rightBlock.Address;
Successors.Clear();
Successors.Add(rightBlock);
// Move ops.
rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
OpCodes.RemoveRange(splitIndex, splitCount);
// Update push consumers that points to this block.
foreach (SyncTarget syncTarget in SyncTargets.Values)
{
PushOpInfo pushOpInfo = syncTarget.PushOpInfo;
Operand local = pushOpInfo.Consumers[this];
pushOpInfo.Consumers.Remove(this);
pushOpInfo.Consumers.Add(rightBlock, local);
}
foreach ((ulong key, SyncTarget value) in SyncTargets)
{
rightBlock.SyncTargets.Add(key, value);
}
SyncTargets.Clear();
// Move push ops.
for (int i = 0; i < PushOpCodes.Count; i++)
{
if (PushOpCodes[i].Op.Address >= rightBlock.Address)
{
int count = PushOpCodes.Count - i;
rightBlock.PushOpCodes.AddRange(PushOpCodes.Skip(i));
PushOpCodes.RemoveRange(i, count);
break;
}
}
}
private static int BinarySearch(List<InstOp> opCodes, ulong address)
{
int left = 0;
int middle = 0;
int right = opCodes.Count - 1;
while (left <= right)
{
int size = right - left;
middle = left + (size >> 1);
InstOp opCode = opCodes[middle];
if (address == opCode.Address)
{
break;
}
if (address < opCode.Address)
{
right = middle - 1;
}
else
{
left = middle + 1;
}
}
return middle;
}
public InstOp GetLastOp()
{
if (OpCodes.Count != 0)
{
return OpCodes[OpCodes.Count - 1];
}
return default;
}
public bool HasNext()
{
InstOp lastOp = GetLastOp();
return OpCodes.Count != 0 && !Decoder.IsUnconditionalBranch(ref lastOp);
}
public void AddPushOp(InstOp op)
{
PushOpCodes.Add(new PushOpInfo(op));
}
}
}

View File

@ -0,0 +1,48 @@
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Decoders
{
class DecodedFunction
{
private readonly HashSet<DecodedFunction> _callers;
public bool IsCompilerGenerated => Type != FunctionType.User;
public FunctionType Type { get; set; }
public int Id { get; set; }
public ulong Address { get; }
public Block[] Blocks { get; private set; }
public DecodedFunction(ulong address)
{
Address = address;
_callers = new HashSet<DecodedFunction>();
Type = FunctionType.User;
Id = -1;
}
public void SetBlocks(Block[] blocks)
{
if (Blocks != null)
{
throw new InvalidOperationException("Blocks have already been set.");
}
Blocks = blocks;
}
public void AddCaller(DecodedFunction caller)
{
_callers.Add(caller);
}
public void RemoveCaller(DecodedFunction caller)
{
if (_callers.Remove(caller) && _callers.Count == 0)
{
Type = FunctionType.Unused;
}
}
}
}

View File

@ -0,0 +1,57 @@
using System;
using System.Collections;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Decoders
{
readonly struct DecodedProgram : IEnumerable<DecodedFunction>
{
public DecodedFunction MainFunction { get; }
private readonly IReadOnlyDictionary<ulong, DecodedFunction> _functions;
private readonly List<DecodedFunction> _functionsWithId;
public int FunctionsWithIdCount => _functionsWithId.Count;
public DecodedProgram(DecodedFunction mainFunction, IReadOnlyDictionary<ulong, DecodedFunction> functions)
{
MainFunction = mainFunction;
_functions = functions;
_functionsWithId = new List<DecodedFunction>();
}
public DecodedFunction GetFunctionByAddress(ulong address)
{
if (_functions.TryGetValue(address, out DecodedFunction function))
{
return function;
}
return null;
}
public DecodedFunction GetFunctionById(int id)
{
if ((uint)id >= (uint)_functionsWithId.Count)
{
throw new ArgumentOutOfRangeException(nameof(id));
}
return _functionsWithId[id];
}
public void AddFunctionAndSetId(DecodedFunction function)
{
function.Id = _functionsWithId.Count;
_functionsWithId.Add(function);
}
public IEnumerator<DecodedFunction> GetEnumerator()
{
return _functions.Values.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
}

View File

@ -0,0 +1,765 @@
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Decoders
{
static class Decoder
{
public static DecodedProgram Decode(ShaderConfig config, ulong startAddress)
{
Queue<DecodedFunction> functionsQueue = new Queue<DecodedFunction>();
Dictionary<ulong, DecodedFunction> functionsVisited = new Dictionary<ulong, DecodedFunction>();
DecodedFunction EnqueueFunction(ulong address)
{
if (!functionsVisited.TryGetValue(address, out DecodedFunction function))
{
functionsVisited.Add(address, function = new DecodedFunction(address));
functionsQueue.Enqueue(function);
}
return function;
}
DecodedFunction mainFunction = EnqueueFunction(0);
while (functionsQueue.TryDequeue(out DecodedFunction currentFunction))
{
List<Block> blocks = new List<Block>();
Queue<Block> workQueue = new Queue<Block>();
Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
Block GetBlock(ulong blkAddress)
{
if (!visited.TryGetValue(blkAddress, out Block block))
{
block = new Block(blkAddress);
workQueue.Enqueue(block);
visited.Add(blkAddress, block);
}
return block;
}
GetBlock(currentFunction.Address);
bool hasNewTarget;
do
{
while (workQueue.TryDequeue(out Block currBlock))
{
// Check if the current block is inside another block.
if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
{
Block nBlock = blocks[nBlkIndex];
if (nBlock.Address == currBlock.Address)
{
throw new InvalidOperationException("Found duplicate block address on the list.");
}
nBlock.Split(currBlock);
blocks.Insert(nBlkIndex + 1, currBlock);
continue;
}
// If we have a block after the current one, set the limit address.
ulong limitAddress = ulong.MaxValue;
if (nBlkIndex != blocks.Count)
{
Block nBlock = blocks[nBlkIndex];
int nextIndex = nBlkIndex + 1;
if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
{
limitAddress = blocks[nextIndex].Address;
}
else if (nBlock.Address > currBlock.Address)
{
limitAddress = blocks[nBlkIndex].Address;
}
}
FillBlock(config, currBlock, limitAddress, startAddress);
if (currBlock.OpCodes.Count != 0)
{
// We should have blocks for all possible branch targets,
// including those from PBK/PCNT/SSY instructions.
foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
{
GetBlock(pushOp.Op.GetAbsoluteAddress());
}
// Set child blocks. "Branch" is the block the branch instruction
// points to (when taken), "Next" is the block at the next address,
// executed when the branch is not taken. For Unconditional Branches
// or end of program, Next is null.
InstOp lastOp = currBlock.GetLastOp();
if (lastOp.Name == InstName.Cal)
{
EnqueueFunction(lastOp.GetAbsoluteAddress()).AddCaller(currentFunction);
}
else if (lastOp.Name == InstName.Bra)
{
Block succBlock = GetBlock(lastOp.GetAbsoluteAddress());
currBlock.Successors.Add(succBlock);
succBlock.Predecessors.Add(currBlock);
}
if (!IsUnconditionalBranch(ref lastOp))
{
Block succBlock = GetBlock(currBlock.EndAddress);
currBlock.Successors.Insert(0, succBlock);
succBlock.Predecessors.Add(currBlock);
}
}
// Insert the new block on the list (sorted by address).
if (blocks.Count != 0)
{
Block nBlock = blocks[nBlkIndex];
blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
}
else
{
blocks.Add(currBlock);
}
}
// Propagate SSY/PBK addresses into their uses (SYNC/BRK).
foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
{
for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
{
PropagatePushOp(visited, block, pushOpIndex);
}
}
// Try to find targets for BRX (indirect branch) instructions.
hasNewTarget = FindBrxTargets(config, blocks, GetBlock);
// If we discovered new branch targets from the BRX instruction,
// we need another round of decoding to decode the new blocks.
// Additionally, we may have more SSY/PBK targets to propagate,
// and new BRX instructions.
}
while (hasNewTarget);
currentFunction.SetBlocks(blocks.ToArray());
}
return new DecodedProgram(mainFunction, functionsVisited);
}
private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
{
index = 0;
int left = 0;
int right = blocks.Count - 1;
while (left <= right)
{
int size = right - left;
int middle = left + (size >> 1);
Block block = blocks[middle];
index = middle;
if (address >= block.Address && address < block.EndAddress)
{
return true;
}
if (address < block.Address)
{
right = middle - 1;
}
else
{
left = middle + 1;
}
}
return false;
}
private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
{
IGpuAccessor gpuAccessor = config.GpuAccessor;
ulong address = block.Address;
int bufferOffset = 0;
ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty;
InstOp op = default;
do
{
if (address + 7 >= limitAddress)
{
break;
}
// Ignore scheduling instructions, which are written every 32 bytes.
if ((address & 0x1f) == 0)
{
address += 8;
bufferOffset++;
continue;
}
if (bufferOffset >= buffer.Length)
{
buffer = gpuAccessor.GetCode(startAddress + address, 8);
bufferOffset = 0;
}
ulong opCode = buffer[bufferOffset++];
op = InstTable.GetOp(address, opCode);
if (op.Props.HasFlag(InstProps.TexB))
{
config.SetUsedFeature(FeatureFlags.Bindless);
}
if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa)
{
SetUserAttributeUses(config, op.Name, opCode);
}
else if (op.Name == InstName.Pbk || op.Name == InstName.Pcnt || op.Name == InstName.Ssy)
{
block.AddPushOp(op);
}
block.OpCodes.Add(op);
address += 8;
}
while (!op.Props.HasFlag(InstProps.Bra));
block.EndAddress = address;
}
private static void SetUserAttributeUses(ShaderConfig config, InstName name, ulong opCode)
{
int offset;
int count = 1;
bool isStore = false;
bool indexed = false;
bool perPatch = false;
if (name == InstName.Ast)
{
InstAst opAst = new InstAst(opCode);
count = (int)opAst.AlSize + 1;
offset = opAst.Imm11;
indexed = opAst.Phys;
perPatch = opAst.P;
isStore = true;
}
else if (name == InstName.Ald)
{
InstAld opAld = new InstAld(opCode);
count = (int)opAld.AlSize + 1;
offset = opAld.Imm11;
indexed = opAld.Phys;
perPatch = opAld.P;
isStore = opAld.O;
}
else /* if (name == InstName.Ipa) */
{
InstIpa opIpa = new InstIpa(opCode);
offset = opIpa.Imm10;
indexed = opIpa.Idx;
}
if (indexed)
{
if (isStore)
{
config.SetAllOutputUserAttributes();
config.SetUsedFeature(FeatureFlags.OaIndexing);
}
else
{
config.SetAllInputUserAttributes();
config.SetUsedFeature(FeatureFlags.IaIndexing);
}
}
else
{
for (int elemIndex = 0; elemIndex < count; elemIndex++)
{
int attr = offset + elemIndex * 4;
if (perPatch)
{
if (attr >= AttributeConsts.UserAttributePerPatchBase && attr < AttributeConsts.UserAttributePerPatchEnd)
{
int userAttr = attr - AttributeConsts.UserAttributePerPatchBase;
int index = userAttr / 16;
if (isStore)
{
config.SetOutputUserAttributePerPatch(index);
}
else
{
config.SetInputUserAttributePerPatch(index);
}
}
}
else if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
{
int userAttr = attr - AttributeConsts.UserAttributeBase;
int index = userAttr / 16;
if (isStore)
{
config.SetOutputUserAttribute(index);
}
else
{
config.SetInputUserAttribute(index, (userAttr >> 2) & 3);
}
}
if (!isStore &&
(attr == AttributeConsts.FogCoord ||
(attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) ||
(attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)))
{
config.SetUsedFeature(FeatureFlags.FixedFuncAttr);
}
}
}
}
public static bool IsUnconditionalBranch(ref InstOp op)
{
return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra);
}
private static bool IsUnconditional(ref InstOp op)
{
InstConditional condOp = new InstConditional(op.RawOpCode);
if ((op.Name == InstName.Bra || op.Name == InstName.Exit) && condOp.Ccc != Ccc.T)
{
return false;
}
return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv;
}
private static bool FindBrxTargets(ShaderConfig config, IEnumerable<Block> blocks, Func<ulong, Block> getBlock)
{
bool hasNewTarget = false;
foreach (Block block in blocks)
{
InstOp lastOp = block.GetLastOp();
bool hasNext = block.HasNext();
if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0))
{
HashSet<ulong> visited = new HashSet<ulong>();
InstBrx opBrx = new InstBrx(lastOp.RawOpCode);
ulong baseOffset = lastOp.GetAbsoluteAddress();
// An indirect branch could go anywhere,
// try to get the possible target offsets from the constant buffer.
(int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA);
if (cbOffsetsCount != 0)
{
hasNewTarget = true;
}
for (int i = 0; i < cbOffsetsCount; i++)
{
uint targetOffset = config.ConstantBuffer1Read(cbBaseOffset + i * 4);
ulong targetAddress = baseOffset + targetOffset;
if (visited.Add(targetAddress))
{
Block target = getBlock(targetAddress);
target.Predecessors.Add(block);
block.Successors.Add(target);
}
}
}
}
return hasNewTarget;
}
private static (int, int) FindBrxTargetRange(Block block, int brxReg)
{
// Try to match the following pattern:
//
// IMNMX.U32 Rx, Rx, UpperBound, PT
// SHL Rx, Rx, 0x2
// LDC Rx, c[0x1][Rx+BaseOffset]
//
// Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
// The above pattern is assumed to be generated by the compiler before BRX,
// as the instruction is usually used to implement jump tables for switch statement optimizations.
// On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
// located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
HashSet<Block> visited = new HashSet<Block>();
var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc)
{
return (0, 0);
}
GetOp<InstLdc>(ldcLocation, out var opLdc);
if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0)
{
return (0, 0);
}
var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA);
if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl))
{
return (0, 0);
}
GetOp<InstShlI>(shlLocation, out var opShl);
if (opShl.Imm20 != 2)
{
return (0, 0);
}
var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA);
if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx))
{
return (0, 0);
}
GetOp<InstImnmxI>(imnmxLocation, out var opImnmx);
if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv)
{
return (0, 0);
}
return (opLdc.CbufOffset, opImnmx.Imm20 + 1);
}
private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged
{
ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode;
op = Unsafe.As<ulong, T>(ref rawOp);
}
private readonly struct BlockLocation
{
public Block Block { get; }
public int Index { get; }
public BlockLocation(Block block, int index)
{
Block = block;
Index = index;
}
public bool IsImmInst(InstName name)
{
InstOp op = Block.OpCodes[Index];
return op.Name == name && op.Props.HasFlag(InstProps.Ib);
}
}
private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
{
Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
toVisit.Enqueue(location);
visited.Add(location.Block);
while (toVisit.TryDequeue(out var currentLocation))
{
Block block = currentLocation.Block;
for (int i = currentLocation.Index - 1; i >= 0; i--)
{
if (WritesToRegister(block.OpCodes[i], regIndex))
{
return new BlockLocation(block, i);
}
}
foreach (Block predecessor in block.Predecessors)
{
if (visited.Add(predecessor))
{
toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
}
}
}
return new BlockLocation(null, 0);
}
private static bool WritesToRegister(InstOp op, int regIndex)
{
// Predicate instruction only ever writes to predicate, so we shouldn't check those.
if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0)
{
return false;
}
if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex)
{
return true;
}
return (byte)op.RawOpCode == regIndex;
}
private enum MergeType
{
Brk,
Cont,
Sync
}
private struct PathBlockState
{
public Block Block { get; }
private enum RestoreType
{
None,
PopPushOp,
PushBranchOp
}
private RestoreType _restoreType;
private ulong _restoreValue;
private MergeType _restoreMergeType;
public bool ReturningFromVisit => _restoreType != RestoreType.None;
public PathBlockState(Block block)
{
Block = block;
_restoreType = RestoreType.None;
_restoreValue = 0;
_restoreMergeType = default;
}
public PathBlockState(int oldStackSize)
{
Block = null;
_restoreType = RestoreType.PopPushOp;
_restoreValue = (ulong)oldStackSize;
_restoreMergeType = default;
}
public PathBlockState(ulong syncAddress, MergeType mergeType)
{
Block = null;
_restoreType = RestoreType.PushBranchOp;
_restoreValue = syncAddress;
_restoreMergeType = mergeType;
}
public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
{
if (_restoreType == RestoreType.PushBranchOp)
{
branchStack.Push((_restoreValue, _restoreMergeType));
}
else if (_restoreType == RestoreType.PopPushOp)
{
while (branchStack.Count > (uint)_restoreValue)
{
branchStack.Pop();
}
}
}
}
private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
{
PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex];
InstOp pushOp = pushOpInfo.Op;
Block target = blocks[pushOp.GetAbsoluteAddress()];
Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
HashSet<Block> visited = new HashSet<Block>();
Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
void Push(PathBlockState pbs)
{
// When block is null, this means we are pushing a restore operation.
// Restore operations are used to undo the work done inside a block
// when we return from it, for example it pops addresses pushed by
// SSY/PBK instructions inside the block, and pushes addresses poped
// by SYNC/BRK.
// For blocks, if it's already visited, we just ignore to avoid going
// around in circles and getting stuck here.
if (pbs.Block == null || !visited.Contains(pbs.Block))
{
workQueue.Push(pbs);
}
}
Push(new PathBlockState(currBlock));
while (workQueue.TryPop(out PathBlockState pbs))
{
if (pbs.ReturningFromVisit)
{
pbs.RestoreStackState(branchStack);
continue;
}
Block current = pbs.Block;
// If the block was already processed, we just ignore it, otherwise
// we would push the same child blocks of an already processed block,
// and go around in circles until memory is exhausted.
if (!visited.Add(current))
{
continue;
}
int pushOpsCount = current.PushOpCodes.Count;
if (pushOpsCount != 0)
{
Push(new PathBlockState(branchStack.Count));
for (int index = pushOpIndex; index < pushOpsCount; index++)
{
InstOp currentPushOp = current.PushOpCodes[index].Op;
MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name);
branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
}
}
pushOpIndex = 0;
bool hasNext = current.HasNext();
if (hasNext)
{
Push(new PathBlockState(current.Successors[0]));
}
InstOp lastOp = current.GetLastOp();
if (IsPopBranch(lastOp.Name))
{
MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name);
bool found = true;
ulong targetAddress = 0UL;
MergeType mergeType;
do
{
if (branchStack.Count == 0)
{
found = false;
break;
}
(targetAddress, mergeType) = branchStack.Pop();
// Push the target address (this will be used to push the address
// back into the PBK/PCNT/SSY stack when we return from that block),
Push(new PathBlockState(targetAddress, mergeType));
}
while (mergeType != popMergeType);
// Make sure we found the correct address,
// the push and pop instruction types must match, so:
// - BRK can only consume addresses pushed by PBK.
// - CONT can only consume addresses pushed by PCNT.
// - SYNC can only consume addresses pushed by SSY.
if (found)
{
if (branchStack.Count == 0)
{
// If the entire stack was consumed, then the current pop instruction
// just consumed the address from our push instruction.
if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count)))
{
pushOpInfo.Consumers.Add(current, Local());
target.Predecessors.Add(current);
current.Successors.Add(target);
}
}
else
{
// Push the block itself into the work queue for processing.
Push(new PathBlockState(blocks[targetAddress]));
}
}
}
else
{
// By adding them in descending order (sorted by address), we process the blocks
// in order (of ascending address), since we work with a LIFO.
foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address))
{
if (!hasNext || possibleTarget != current.Successors[0])
{
Push(new PathBlockState(possibleTarget));
}
}
}
}
}
public static bool IsPopBranch(InstName name)
{
return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync;
}
private static MergeType GetMergeTypeFromPush(InstName name)
{
return name switch
{
InstName.Pbk => MergeType.Brk,
InstName.Pcnt => MergeType.Cont,
_ => MergeType.Sync
};
}
private static MergeType GetMergeTypeFromPop(InstName name)
{
return name switch
{
InstName.Brk => MergeType.Brk,
InstName.Cont => MergeType.Cont,
_ => MergeType.Sync
};
}
}
}

View File

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Shader.Decoders
{
enum FunctionType : byte
{
User,
Unused,
BuiltInFSIBegin,
BuiltInFSIEnd
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,188 @@
namespace Ryujinx.Graphics.Shader.Decoders
{
enum InstName : byte
{
Invalid = 0,
Al2p,
Ald,
Ast,
Atom,
AtomCas,
Atoms,
AtomsCas,
B2r,
Bar,
Bfe,
Bfi,
Bpt,
Bra,
Brk,
Brx,
Cal,
Cctl,
Cctll,
Cctlt,
Cont,
Cset,
Csetp,
Cs2r,
Dadd,
Depbar,
Dfma,
Dmnmx,
Dmul,
Dset,
Dsetp,
Exit,
F2f,
F2i,
Fadd,
Fadd32i,
Fchk,
Fcmp,
Ffma,
Ffma32i,
Flo,
Fmnmx,
Fmul,
Fmul32i,
Fset,
Fsetp,
Fswzadd,
Getcrsptr,
Getlmembase,
Hadd2,
Hadd232i,
Hfma2,
Hmul2,
Hmul232i,
Hset2,
Hsetp2,
I2f,
I2i,
Iadd,
Iadd32i,
Iadd3,
Icmp,
Ide,
Idp,
Imad,
Imad32i,
Imadsp,
Imnmx,
Imul,
Imul32i,
Ipa,
Isberd,
Iscadd,
Iscadd32i,
Iset,
Isetp,
Jcal,
Jmp,
Jmx,
Kil,
Ld,
Ldc,
Ldg,
Ldl,
Lds,
Lea,
LeaHi,
Lepc,
Longjmp,
Lop,
Lop3,
Lop32i,
Membar,
Mov,
Mov32i,
Mufu,
Nop,
Out,
P2r,
Pbk,
Pcnt,
Pexit,
Pixld,
Plongjmp,
Popc,
Pret,
Prmt,
Pset,
Psetp,
R2b,
R2p,
Ram,
Red,
Ret,
Rro,
Rtt,
S2r,
Sam,
Sel,
Setcrsptr,
Setlmembase,
Shf,
Shf_2,
Shf_3,
Shf_4,
Shfl,
Shl,
Shr,
Ssy,
St,
Stg,
Stl,
Stp,
Sts,
SuatomB,
Suatom,
SuatomB2,
SuatomCasB,
SuatomCas,
SuldDB,
SuldD,
SuldB,
Suld,
SuredB,
Sured,
SustDB,
SustD,
SustB,
Sust,
Sync,
Tex,
TexB,
Texs,
TexsF16,
Tld,
TldB,
Tlds,
TldsF16,
Tld4,
Tld4B,
Tld4s,
Tld4sF16,
Tmml,
TmmlB,
Txa,
Txd,
TxdB,
Txq,
TxqB,
Vabsdiff,
Vabsdiff4,
Vadd,
Vmad,
Vmnmx,
Vote,
Votevtg,
Vset,
Vsetp,
Vshl,
Vshr,
Xmad,
}
}

View File

@ -0,0 +1,27 @@
using Ryujinx.Graphics.Shader.Instructions;
namespace Ryujinx.Graphics.Shader.Decoders
{
readonly struct InstOp
{
public readonly ulong Address;
public readonly ulong RawOpCode;
public readonly InstEmitter Emitter;
public readonly InstProps Props;
public readonly InstName Name;
public InstOp(ulong address, ulong rawOpCode, InstName name, InstEmitter emitter, InstProps props)
{
Address = address;
RawOpCode = rawOpCode;
Name = name;
Emitter = emitter;
Props = props;
}
public ulong GetAbsoluteAddress()
{
return (ulong)((long)Address + (((int)(RawOpCode >> 20) << 8) >> 8) + 8);
}
}
}

View File

@ -0,0 +1,28 @@
namespace Ryujinx.Graphics.Shader.Decoders
{
enum InstProps : ushort
{
None = 0,
Rd = 1 << 0,
Rd2 = 1 << 1,
Ra = 1 << 2,
Rb = 1 << 3,
Rb2 = 1 << 4,
Ib = 1 << 5,
Rc = 1 << 6,
Pd = 1 << 7,
LPd = 2 << 7,
SPd = 3 << 7,
TPd = 4 << 7,
VPd = 5 << 7,
PdMask = 7 << 7,
Pdn = 1 << 10,
Ps = 1 << 11,
Tex = 1 << 12,
TexB = 1 << 13,
Bra = 1 << 14,
NoPred = 1 << 15
}
}

View File

@ -0,0 +1,390 @@
using Ryujinx.Graphics.Shader.Instructions;
using System;
namespace Ryujinx.Graphics.Shader.Decoders
{
static class InstTable
{
private const int EncodingBits = 14;
private readonly struct TableEntry
{
public InstName Name { get; }
public InstEmitter Emitter { get; }
public InstProps Props { get; }
public int XBits { get; }
public TableEntry(InstName name, InstEmitter emitter, InstProps props, int xBits)
{
Name = name;
Emitter = emitter;
Props = props;
XBits = xBits;
}
}
private static TableEntry[] _opCodes;
static InstTable()
{
_opCodes = new TableEntry[1 << EncodingBits];
#region Instructions
Add("1110111110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Al2p, InstEmit.Al2p, InstProps.Rd | InstProps.Ra);
Add("1110111111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ald, InstEmit.Ald, InstProps.Rd | InstProps.Ra);
Add("1110111111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ast, InstEmit.Ast, InstProps.Ra | InstProps.Rb2 | InstProps.Rc);
Add("11101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Atom, InstEmit.Atom, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("111011101111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.AtomCas, InstEmit.AtomCas, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("11101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Atoms, InstEmit.Atoms, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("111011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.AtomsCas, InstEmit.AtomsCas, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("1111000010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B2r, InstEmit.B2r, InstProps.Rd | InstProps.Ra | InstProps.VPd);
Add("1111000010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bar, InstEmit.Bar, InstProps.Ra | InstProps.Ps);
Add("0101110000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x00000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeC, InstProps.Rd | InstProps.Ra);
Add("0101101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x11110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("0100101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("0101001111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("111000111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bpt, InstEmit.Bpt, InstProps.NoPred);
Add("111000100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bra, InstEmit.Bra, InstProps.Bra);
Add("111000110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Brk, InstEmit.Brk, InstProps.Bra);
Add("111000100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Brx, InstEmit.Brx, InstProps.Ra | InstProps.Bra);
Add("111000100110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cal, InstEmit.Cal, InstProps.Bra | InstProps.NoPred);
Add("11101111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctl, InstEmit.Cctl, InstProps.Ra);
Add("1110111110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctll, InstEmit.Cctll, InstProps.Ra);
Add("1110101111110xx0000000000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt);
Add("1110101111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt, InstProps.Rc);
Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont, InstEmit.Cont, InstProps.Bra);
Add("0101000010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cset, InstEmit.Cset, InstProps.Rd | InstProps.Ps);
Add("0101000010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Csetp, InstEmit.Csetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0101000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cs2r, InstEmit.Cs2r, InstProps.Rd);
Add("0101110001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x01110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddC, InstProps.Rd | InstProps.Ra);
Add("1111000011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Depbar, InstEmit.Depbar);
Add("010110110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x0111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("0101110001010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011100x01010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("0100110001010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("0101110010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x10000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulC, InstProps.Rd | InstProps.Ra);
Add("010110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011001x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("010010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("010110111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0011011x1000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("010010111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("111000110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Exit, InstEmit.Exit, InstProps.Bra);
Add("0101110010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fR, InstProps.Rd | InstProps.Rb);
Add("0011100x10101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fI, InstProps.Rd | InstProps.Ib);
Add("0100110010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fC, InstProps.Rd);
Add("0101110010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iR, InstProps.Rd | InstProps.Rb);
Add("0011100x10110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iI, InstProps.Rd | InstProps.Ib);
Add("0100110010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iC, InstProps.Rd);
Add("0101110001011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x01011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110001011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddC, InstProps.Rd | InstProps.Ra);
Add("000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd32i, InstEmit.Fadd32i, InstProps.Rd | InstProps.Ra);
Add("0101110010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkR, InstProps.Ra | InstProps.Rb | InstProps.Pd);
Add("0011100x10001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkI, InstProps.Ra | InstProps.Ib | InstProps.Pd);
Add("0100110010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkC, InstProps.Ra | InstProps.Pd);
Add("010110111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x1010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011001x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i, InstEmit.Ffma32i, InstProps.Rd | InstProps.Ra);
Add("0101110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloR, InstProps.Rd | InstProps.Rb);
Add("0011100x00110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloI, InstProps.Rd | InstProps.Ib);
Add("0100110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloC, InstProps.Rd);
Add("0101110001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011100x01100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("0100110001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("0101110001101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x01101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110001101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulC, InstProps.Rd | InstProps.Ra);
Add("00011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul32i, InstEmit.Fmul32i, InstProps.Rd | InstProps.Ra);
Add("01011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("01001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("010110111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0011011x1011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("010010111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0101000011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fswzadd, InstEmit.Fswzadd, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("111000101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Getcrsptr, InstEmit.Getcrsptr, InstProps.Rd | InstProps.NoPred);
Add("111000101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Getlmembase, InstEmit.Getlmembase, InstProps.Rd | InstProps.NoPred);
Add("0101110100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2R, InstProps.Rd | InstProps.Ra);
Add("0111101x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2I, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0111101x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2C, InstProps.Rd | InstProps.Ra);
Add("0010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd232i, InstEmit.Hadd232i, InstProps.Rd | InstProps.Ra);
Add("0101110100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("01110xxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("01110xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2C, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("01100xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2Rc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("0010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma232i, InstProps.Rd | InstProps.Ra);
Add("0101110100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2R, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0111100x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2I, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0111100x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2C, InstProps.Rd | InstProps.Ra);
Add("0010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul232i, InstEmit.Hmul232i, InstProps.Rd | InstProps.Ra);
Add("0101110100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0111110x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("0111110x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2C, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("0101110100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2R, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0111111x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2I, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0111111x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2C, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0101110010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fR, InstProps.Rd | InstProps.Rb);
Add("0011100x10111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fI, InstProps.Rd | InstProps.Ib);
Add("0100110010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fC, InstProps.Rd);
Add("0101110011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iR, InstProps.Rd | InstProps.Rb);
Add("0011100x11100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iI, InstProps.Rd | InstProps.Ib);
Add("0100110011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iC, InstProps.Rd);
Add("0101110000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x00010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddC, InstProps.Rd | InstProps.Ra);
Add("0001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd32i, InstEmit.Iadd32i, InstProps.Rd | InstProps.Ra);
Add("010111001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011100x1100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010011001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3C, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010110110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x0100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("111000111001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ide, InstEmit.Ide, InstProps.NoPred);
Add("0101001111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Idp, InstEmit.IdpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0101001111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Idp, InstEmit.IdpC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011010x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad32i, InstEmit.Imad32i, InstProps.Rd | InstProps.Ra);
Add("010110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011010x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("0101110000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011100x00100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("0100110000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("0101110000111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x00111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110000111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulC, InstProps.Rd | InstProps.Ra);
Add("00011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul32i, InstEmit.Imul32i, InstProps.Rd | InstProps.Ra);
Add("11100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ipa, InstEmit.Ipa, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("1110111111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isberd, InstEmit.Isberd, InstProps.Rd | InstProps.Ra);
Add("0101110000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x00011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddC, InstProps.Rd | InstProps.Ra);
Add("000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd32i, InstEmit.Iscadd32i, InstProps.Rd | InstProps.Ra);
Add("010110110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011011x0101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("010010110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("010110110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("0011011x0110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("010010110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("111000100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jcal, InstEmit.Jcal, InstProps.Bra);
Add("111000100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jmp, InstEmit.Jmp, InstProps.Ra | InstProps.Bra);
Add("111000100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jmx, InstEmit.Jmx, InstProps.Ra | InstProps.Bra);
Add("111000110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Kil, InstEmit.Kil, InstProps.Bra);
Add("100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ld, InstEmit.Ld, InstProps.Rd | InstProps.Ra);
Add("1110111110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldc, InstEmit.Ldc, InstProps.Rd | InstProps.Ra);
Add("1110111011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldg, InstEmit.Ldg, InstProps.Rd | InstProps.Ra);
Add("1110111101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldl, InstEmit.Ldl, InstProps.Rd | InstProps.Ra);
Add("1110111101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lds, InstEmit.Lds, InstProps.Rd | InstProps.Ra);
Add("0101101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.LPd);
Add("0011011x11010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.LPd);
Add("0100101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaC, InstProps.Rd | InstProps.Ra | InstProps.LPd);
Add("0101101111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.LeaHi, InstEmit.LeaHiR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd);
Add("000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.LeaHi, InstEmit.LeaHiC, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.LPd);
Add("0101000011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lepc, InstEmit.Lepc);
Add("111000110001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Longjmp, InstEmit.Longjmp, InstProps.Bra);
Add("0101110001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.LPd);
Add("0011100x01000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.LPd);
Add("0100110001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopC, InstProps.Rd | InstProps.Ra | InstProps.LPd);
Add("0101101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd);
Add("001111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("0000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3C, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop32i, InstEmit.Lop32i, InstProps.Rd | InstProps.Ra);
Add("1110111110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Membar, InstEmit.Membar);
Add("0101110010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovR, InstProps.Rd | InstProps.Ra);
Add("0011100x10011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovI, InstProps.Rd | InstProps.Ib);
Add("0100110010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovC, InstProps.Rd);
Add("000000010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov32i, InstEmit.Mov32i, InstProps.Rd);
Add("0101000010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mufu, InstEmit.Mufu, InstProps.Rd | InstProps.Ra);
Add("0101000010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Nop, InstEmit.Nop);
Add("1111101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("1111011x11100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("1110101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutC, InstProps.Rd | InstProps.Ra);
Add("0101110011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x11101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rC, InstProps.Rd | InstProps.Ra);
Add("111000101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pbk, InstEmit.Pbk, InstProps.NoPred);
Add("111000101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pcnt, InstEmit.Pcnt, InstProps.NoPred);
Add("111000100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pexit, InstEmit.Pexit);
Add("1110111111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pixld, InstEmit.Pixld, InstProps.Rd | InstProps.Ra | InstProps.VPd);
Add("111000101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Plongjmp, InstEmit.Plongjmp, InstProps.Bra | InstProps.NoPred);
Add("0101110000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcR, InstProps.Rd | InstProps.Rb);
Add("0011100x00001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcI, InstProps.Rd | InstProps.Ib);
Add("0100110000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcC, InstProps.Rd);
Add("111000100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pret, InstEmit.Pret, InstProps.NoPred);
Add("010110111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x1100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("010010111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("0101000010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pset, InstEmit.Pset, InstProps.Rd | InstProps.Ps);
Add("0101000010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Psetp, InstEmit.Psetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("1111000011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2b, InstEmit.R2b, InstProps.Rb);
Add("0101110011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pR, InstProps.Ra | InstProps.Rb);
Add("0011100x11110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pI, InstProps.Ra | InstProps.Ib);
Add("0100110011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pC, InstProps.Ra);
Add("111000111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ram, InstEmit.Ram, InstProps.NoPred);
Add("1110101111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Red, InstEmit.Red, InstProps.Ra | InstProps.Rb2);
Add("111000110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ret, InstEmit.Ret, InstProps.Bra);
Add("0101110010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroR, InstProps.Rd | InstProps.Rb);
Add("0011100x10010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroI, InstProps.Rd | InstProps.Ib);
Add("0100110010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroC, InstProps.Rd);
Add("111000110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rtt, InstEmit.Rtt, InstProps.NoPred);
Add("1111000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.S2r, InstEmit.S2r, InstProps.Rd);
Add("111000110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sam, InstEmit.Sam, InstProps.NoPred);
Add("0101110010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
Add("0011100x10100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
Add("0100110010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
Add("111000101110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Setcrsptr, InstEmit.Setcrsptr, InstProps.Ra | InstProps.NoPred);
Add("111000101111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Setlmembase, InstEmit.Setlmembase, InstProps.Ra | InstProps.NoPred);
Add("0101101111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfLR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0101110011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfRR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x11111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfLI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("0011100x11111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfRI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("1110111100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shfl, InstEmit.Shfl, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd);
Add("0101110001001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x01001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110001001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlC, InstProps.Rd | InstProps.Ra);
Add("0101110000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("0011100x00101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
Add("0100110000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrC, InstProps.Rd | InstProps.Ra);
Add("111000101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ssy, InstEmit.Ssy, InstProps.NoPred);
Add("101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.St, InstEmit.St, InstProps.Rd | InstProps.Ra);
Add("1110111011011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stg, InstEmit.Stg, InstProps.Rd | InstProps.Ra);
Add("1110111101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stl, InstEmit.Stl, InstProps.Rd | InstProps.Ra);
Add("1110111010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, InstProps.NoPred);
Add("1110111101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sts, InstEmit.Sts, InstProps.Rd | InstProps.Ra);
Add("1110101001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomB, InstEmit.SuatomB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("11101010x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Suatom, InstEmit.Suatom, InstProps.Rd | InstProps.Ra | InstProps.Rb);
Add("1110101110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomB2, InstEmit.SuatomB2, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("1110101011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomCasB, InstEmit.SuatomCasB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.SPd);
Add("1110101x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomCas, InstEmit.SuatomCas, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.SPd);
Add("1110101100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldDB, InstEmit.SuldDB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.SPd | InstProps.TexB);
Add("1110101100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldD, InstEmit.SuldD, InstProps.Rd | InstProps.Ra | InstProps.SPd | InstProps.Tex);
Add("1110101100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldB, InstEmit.SuldB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.SPd | InstProps.TexB);
Add("1110101100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Suld, InstEmit.Suld, InstProps.Rd | InstProps.Ra | InstProps.SPd | InstProps.Tex);
Add("1110101101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuredB, InstEmit.SuredB, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("1110101101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sured, InstEmit.Sured, InstProps.Rd | InstProps.Ra);
Add("1110101100110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustDB, InstEmit.SustDB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.TexB);
Add("1110101100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustD, InstEmit.SustD, InstProps.Rd | InstProps.Ra | InstProps.Tex);
Add("1110101100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustB, InstEmit.SustB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.TexB);
Add("1110101100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sust, InstEmit.Sust, InstProps.Rd | InstProps.Ra | InstProps.Tex);
Add("1111000011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sync, InstEmit.Sync, InstProps.Bra);
Add("11000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tex, InstEmit.Tex, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
Add("1101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TexB, InstEmit.TexB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
Add("1101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Texs, InstEmit.Texs, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("1101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TexsF16, InstEmit.TexsF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("11011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld, InstEmit.Tld, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
Add("11011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TldB, InstEmit.TldB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
Add("1101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tlds, InstEmit.Tlds, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("1101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TldsF16, InstEmit.TldsF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4, InstEmit.Tld4, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
Add("1101111011xxxxxxxxxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4B, InstEmit.Tld4B, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
Add("1101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4s, InstEmit.Tld4s, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("1101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4sF16, InstEmit.Tld4sF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("1101111101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tmml, InstEmit.Tmml, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Tex);
Add("1101111101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TmmlB, InstEmit.TmmlB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TexB);
Add("1101111101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txa, InstEmit.Txa, InstProps.Rd | InstProps.Ra | InstProps.Tex);
Add("110111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txd, InstEmit.Txd, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
Add("1101111001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TxdB, InstEmit.TxdB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
Add("1101111101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txq, InstEmit.Txq, InstProps.Rd | InstProps.Ra | InstProps.Tex);
Add("1101111101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TxqB, InstEmit.TxqB, InstProps.Rd | InstProps.Ra | InstProps.TexB);
Add("01010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vabsdiff, InstEmit.Vabsdiff, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("010100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vabsdiff4, InstEmit.Vabsdiff4, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vadd, InstEmit.Vadd, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("01011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vmad, InstEmit.Vmad, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vmnmx, InstEmit.Vmnmx, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0101000011011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vote, InstEmit.Vote, InstProps.Rd | InstProps.VPd | InstProps.Ps);
Add("0101000011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Votevtg, InstEmit.Votevtg);
Add("0100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vset, InstEmit.Vset, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0101000011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vsetp, InstEmit.Vsetp, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
Add("01010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vshl, InstEmit.Vshl, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("01010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vshr, InstEmit.Vshr, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0101101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
Add("0011011x00xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
Add("0100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
Add("010100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
#endregion
}
private static void Add(string encoding, InstName name, InstEmitter emitter, InstProps props = InstProps.None)
{
ReadOnlySpan<char> encodingPart = encoding.AsSpan(0, EncodingBits);
int bit = encodingPart.Length - 1;
int value = 0;
int xMask = 0;
int xBits = 0;
int[] xPos = new int[encodingPart.Length];
for (int index = 0; index < encodingPart.Length; index++, bit--)
{
char chr = encodingPart[index];
if (chr == '1')
{
value |= 1 << bit;
}
else if (chr == 'x')
{
xMask |= 1 << bit;
xPos[xBits++] = bit;
}
}
xMask = ~xMask;
TableEntry entry = new TableEntry(name, emitter, props, xBits);
for (int index = 0; index < (1 << xBits); index++)
{
value &= xMask;
for (int x = 0; x < xBits; x++)
{
value |= ((index >> x) & 1) << xPos[x];
}
if (_opCodes[value].Emitter == null || _opCodes[value].XBits > xBits)
{
_opCodes[value] = entry;
}
}
}
public static InstOp GetOp(ulong address, ulong opCode)
{
ref TableEntry entry = ref _opCodes[opCode >> (64 - EncodingBits)];
if (entry.Emitter != null)
{
return new InstOp(address, opCode, entry.Name, entry.Emitter, entry.Props);
}
return new InstOp(address, opCode, InstName.Invalid, null, InstProps.None);
}
}
}

View File

@ -0,0 +1,36 @@
using System;
namespace Ryujinx.Graphics.Shader.Decoders
{
readonly struct Register : IEquatable<Register>
{
public int Index { get; }
public RegisterType Type { get; }
public bool IsRZ => Type == RegisterType.Gpr && Index == RegisterConsts.RegisterZeroIndex;
public bool IsPT => Type == RegisterType.Predicate && Index == RegisterConsts.PredicateTrueIndex;
public Register(int index, RegisterType type)
{
Index = index;
Type = type;
}
public override int GetHashCode()
{
return (ushort)Index | ((ushort)Type << 16);
}
public override bool Equals(object obj)
{
return obj is Register reg && Equals(reg);
}
public bool Equals(Register other)
{
return other.Index == Index &&
other.Type == Type;
}
}
}

View File

@ -0,0 +1,13 @@
namespace Ryujinx.Graphics.Shader.Decoders
{
static class RegisterConsts
{
public const int GprsCount = 255;
public const int PredsCount = 7;
public const int FlagsCount = 4;
public const int TotalCount = GprsCount + PredsCount + FlagsCount;
public const int RegisterZeroIndex = GprsCount;
public const int PredicateTrueIndex = PredsCount;
}
}

View File

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Shader.Decoders
{
enum RegisterType
{
Flag,
Gpr,
Predicate,
}
}

View File

@ -0,0 +1,528 @@
using System;
namespace Ryujinx.Graphics.Shader
{
/// <summary>
/// GPU state access interface.
/// </summary>
public interface IGpuAccessor
{
/// <summary>
/// Prints a log message.
/// </summary>
/// <param name="message">Message to print</param>
void Log(string message)
{
// No default log output.
}
/// <summary>
/// Reads data from the constant buffer 1.
/// </summary>
/// <param name="offset">Offset in bytes to read from</param>
/// <returns>Value at the given offset</returns>
uint ConstantBuffer1Read(int offset)
{
return 0;
}
/// <summary>
/// Gets a span of the specified memory location, containing shader code.
/// </summary>
/// <param name="address">GPU virtual address of the data</param>
/// <param name="minimumSize">Minimum size that the returned span may have</param>
/// <returns>Span of the memory location</returns>
ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize);
/// <summary>
/// Queries the alpha test comparison operator that is being used currently.
/// If alpha test is disabled, it should be set to <see cref="AlphaTestOp.Always"/>.
/// </summary>
/// <returns>Current alpha test comparison</returns>
AlphaTestOp QueryAlphaTestCompare()
{
return AlphaTestOp.Always;
}
/// <summary>
/// Queries the current alpha test reference value used by the comparison.
/// </summary>
/// <returns>Current alpha test reference value</returns>
float QueryAlphaTestReference()
{
return 0f;
}
/// <summary>
/// Queries the type of the vertex shader input attribute at the specified <paramref name="location"/>.
/// </summary>
/// <param name="location">Location of the input attribute</param>
/// <returns>Input type</returns>
AttributeType QueryAttributeType(int location)
{
return AttributeType.Float;
}
/// <summary>
/// Queries whenever the alpha-to-coverage dithering feature is enabled.
/// </summary>
/// <returns>True if the feature is enabled, false otherwise</returns>
bool QueryAlphaToCoverageDitherEnable()
{
return false;
}
/// <summary>
/// Queries the binding number of a constant buffer.
/// </summary>
/// <param name="index">Constant buffer index</param>
/// <returns>Binding number</returns>
int QueryBindingConstantBuffer(int index)
{
return index;
}
/// <summary>
/// Queries the binding number of a storage buffer.
/// </summary>
/// <param name="index">Storage buffer index</param>
/// <returns>Binding number</returns>
int QueryBindingStorageBuffer(int index)
{
return index;
}
/// <summary>
/// Queries the binding number of a texture.
/// </summary>
/// <param name="index">Texture index</param>
/// <param name="isBuffer">Indicates if the texture is a buffer texture</param>
/// <returns>Binding number</returns>
int QueryBindingTexture(int index, bool isBuffer)
{
return index;
}
/// <summary>
/// Queries the binding number of an image.
/// </summary>
/// <param name="index">Image index</param>
/// <param name="isBuffer">Indicates if the image is a buffer image</param>
/// <returns>Binding number</returns>
int QueryBindingImage(int index, bool isBuffer)
{
return index;
}
/// <summary>
/// Queries output type for fragment shaders.
/// </summary>
/// <param name="location">Location of the framgent output</param>
/// <returns>Output location</returns>
AttributeType QueryFragmentOutputType(int location)
{
return AttributeType.Float;
}
/// <summary>
/// Queries Local Size X for compute shaders.
/// </summary>
/// <returns>Local Size X</returns>
int QueryComputeLocalSizeX()
{
return 1;
}
/// <summary>
/// Queries Local Size Y for compute shaders.
/// </summary>
/// <returns>Local Size Y</returns>
int QueryComputeLocalSizeY()
{
return 1;
}
/// <summary>
/// Queries Local Size Z for compute shaders.
/// </summary>
/// <returns>Local Size Z</returns>
int QueryComputeLocalSizeZ()
{
return 1;
}
/// <summary>
/// Queries Local Memory size in bytes for compute shaders.
/// </summary>
/// <returns>Local Memory size in bytes</returns>
int QueryComputeLocalMemorySize()
{
return 0x1000;
}
/// <summary>
/// Queries Shared Memory size in bytes for compute shaders.
/// </summary>
/// <returns>Shared Memory size in bytes</returns>
int QueryComputeSharedMemorySize()
{
return 0xc000;
}
/// <summary>
/// Queries Constant Buffer usage information.
/// </summary>
/// <returns>A mask where each bit set indicates a bound constant buffer</returns>
uint QueryConstantBufferUse()
{
return 0;
}
/// <summary>
/// Queries whenever the current draw has written the base vertex and base instance into Constant Buffer 0.
/// </summary>
/// <returns>True if the shader translator can assume that the constant buffer contains the base IDs, false otherwise</returns>
bool QueryHasConstantBufferDrawParameters()
{
return false;
}
/// <summary>
/// Queries whenever the current draw uses unaligned storage buffer addresses.
/// </summary>
/// <returns>True if any storage buffer address is not aligned to 16 bytes, false otherwise</returns>
bool QueryHasUnalignedStorageBuffer()
{
return false;
}
/// <summary>
/// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias.
/// </summary>
/// <returns>Bits of gather operation precision to use for coordinate bias</returns>
int QueryHostGatherBiasPrecision()
{
return 0;
}
/// <summary>
/// Queries host about whether to reduce precision to improve performance.
/// </summary>
/// <returns>True if precision is limited to vertex position, false otherwise</returns>
bool QueryHostReducedPrecision()
{
return false;
}
/// <summary>
/// Queries dual source blend state.
/// </summary>
/// <returns>True if blending is enabled with a dual source blend equation, false otherwise</returns>
bool QueryDualSourceBlendEnable()
{
return false;
}
/// <summary>
/// Queries host about the presence of the FrontFacing built-in variable bug.
/// </summary>
/// <returns>True if the bug is present on the host device used, false otherwise</returns>
bool QueryHostHasFrontFacingBug()
{
return false;
}
/// <summary>
/// Queries host about the presence of the vector indexing bug.
/// </summary>
/// <returns>True if the bug is present on the host device used, false otherwise</returns>
bool QueryHostHasVectorIndexingBug()
{
return false;
}
/// <summary>
/// Queries host storage buffer alignment required.
/// </summary>
/// <returns>Host storage buffer alignment in bytes</returns>
int QueryHostStorageBufferOffsetAlignment()
{
return 16;
}
/// <summary>
/// Queries host support for texture formats with BGRA component order (such as BGRA8).
/// </summary>
/// <returns>True if BGRA formats are supported, false otherwise</returns>
bool QueryHostSupportsBgraFormat()
{
return true;
}
/// <summary>
/// Queries host support for fragment shader ordering critical sections on the shader code.
/// </summary>
/// <returns>True if fragment shader interlock is supported, false otherwise</returns>
bool QueryHostSupportsFragmentShaderInterlock()
{
return true;
}
/// <summary>
/// Queries host support for fragment shader ordering scoped critical sections on the shader code.
/// </summary>
/// <returns>True if fragment shader ordering is supported, false otherwise</returns>
bool QueryHostSupportsFragmentShaderOrderingIntel()
{
return false;
}
/// <summary>
/// Queries host GPU geometry shader support.
/// </summary>
/// <returns>True if the GPU and driver supports geometry shaders, false otherwise</returns>
bool QueryHostSupportsGeometryShader()
{
return true;
}
/// <summary>
/// Queries host GPU geometry shader passthrough support.
/// </summary>
/// <returns>True if the GPU and driver supports geometry shader passthrough, false otherwise</returns>
bool QueryHostSupportsGeometryShaderPassthrough()
{
return true;
}
/// <summary>
/// Queries host support for readable images without a explicit format declaration on the shader.
/// </summary>
/// <returns>True if formatted image load is supported, false otherwise</returns>
bool QueryHostSupportsImageLoadFormatted()
{
return true;
}
/// <summary>
/// Queries host support for writes to the layer from vertex or tessellation shader stages.
/// </summary>
/// <returns>True if writes to the layer from vertex or tessellation are supported, false otherwise</returns>
bool QueryHostSupportsLayerVertexTessellation()
{
return true;
}
/// <summary>
/// Queries host GPU non-constant texture offset support.
/// </summary>
/// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns>
bool QueryHostSupportsNonConstantTextureOffset()
{
return true;
}
/// <summary>
/// Queries host GPU shader ballot support.
/// </summary>
/// <returns>True if the GPU and driver supports shader ballot, false otherwise</returns>
bool QueryHostSupportsShaderBallot()
{
return true;
}
/// <summary>
/// Queries host GPU support for signed normalized buffer texture formats.
/// </summary>
/// <returns>True if the GPU and driver supports the formats, false otherwise</returns>
bool QueryHostSupportsSnormBufferTextureFormat()
{
return true;
}
/// <summary>
/// Queries host GPU texture shadow LOD support.
/// </summary>
/// <returns>True if the GPU and driver supports texture shadow LOD, false otherwise</returns>
bool QueryHostSupportsTextureShadowLod()
{
return true;
}
/// <summary>
/// Queries host support for writes to the viewport index from vertex or tessellation shader stages.
/// </summary>
/// <returns>True if writes to the viewport index from vertex or tessellation are supported, false otherwise</returns>
bool QueryHostSupportsViewportIndexVertexTessellation()
{
return true;
}
/// <summary>
/// Queries host GPU shader viewport mask output support.
/// </summary>
/// <returns>True if the GPU and driver supports shader viewport mask output, false otherwise</returns>
bool QueryHostSupportsViewportMask()
{
return true;
}
/// <summary>
/// Queries the point size from the GPU state, used when it is not explicitly set on the shader.
/// </summary>
/// <returns>Current point size</returns>
float QueryPointSize()
{
return 1f;
}
/// <summary>
/// Queries the state that indicates if the program point size should be explicitly set on the shader
/// or read from the GPU state.
/// </summary>
/// <returns>True if the shader is expected to set the point size explicitly, false otherwise</returns>
bool QueryProgramPointSize()
{
return true;
}
/// <summary>
/// Queries sampler type information.
/// </summary>
/// <param name="handle">Texture handle</param>
/// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
/// <returns>The sampler type value for the given handle</returns>
SamplerType QuerySamplerType(int handle, int cbufSlot = -1)
{
return SamplerType.Texture2D;
}
/// <summary>
/// Queries texture coordinate normalization information.
/// </summary>
/// <param name="handle">Texture handle</param>
/// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
/// <returns>True if the coordinates are normalized, false otherwise</returns>
bool QueryTextureCoordNormalized(int handle, int cbufSlot = -1)
{
return true;
}
/// <summary>
/// Queries current primitive topology for geometry shaders.
/// </summary>
/// <returns>Current primitive topology</returns>
InputTopology QueryPrimitiveTopology()
{
return InputTopology.Points;
}
/// <summary>
/// Queries the tessellation evaluation shader primitive winding order.
/// </summary>
/// <returns>True if the primitive winding order is clockwise, false if counter-clockwise</returns>
bool QueryTessCw()
{
return false;
}
/// <summary>
/// Queries the tessellation evaluation shader abstract patch type.
/// </summary>
/// <returns>Abstract patch type</returns>
TessPatchType QueryTessPatchType()
{
return TessPatchType.Triangles;
}
/// <summary>
/// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch.
/// </summary>
/// <returns>Spacing between tessellated vertices of the patch</returns>
TessSpacing QueryTessSpacing()
{
return TessSpacing.EqualSpacing;
}
/// <summary>
/// Queries texture format information, for shaders using image load or store.
/// </summary>
/// <remarks>
/// This only returns non-compressed color formats.
/// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned.
/// </remarks>
/// <param name="handle">Texture handle</param>
/// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
/// <returns>Color format of the non-compressed texture</returns>
TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1)
{
return TextureFormat.R8G8B8A8Unorm;
}
/// <summary>
/// Queries depth mode information from the GPU state.
/// </summary>
/// <returns>True if current depth mode is -1 to 1, false if 0 to 1</returns>
bool QueryTransformDepthMinusOneToOne()
{
return false;
}
/// <summary>
/// Queries transform feedback enable state.
/// </summary>
/// <returns>True if the shader uses transform feedback, false otherwise</returns>
bool QueryTransformFeedbackEnabled()
{
return false;
}
/// <summary>
/// Queries the varying locations that should be written to the transform feedback buffer.
/// </summary>
/// <param name="bufferIndex">Index of the transform feedback buffer</param>
/// <returns>Varying locations for the specified buffer</returns>
ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex)
{
return ReadOnlySpan<byte>.Empty;
}
/// <summary>
/// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer.
/// </summary>
/// <param name="bufferIndex">Index of the transform feedback buffer</param>
/// <returns>Stride for the specified buffer</returns>
int QueryTransformFeedbackStride(int bufferIndex)
{
return 0;
}
/// <summary>
/// Queries if host state forces early depth testing.
/// </summary>
/// <returns>True if early depth testing is forced</returns>
bool QueryEarlyZForce()
{
return false;
}
/// <summary>
/// Queries if host state disables the viewport transform.
/// </summary>
/// <returns>True if the viewport transform is disabled</returns>
bool QueryViewportTransformDisable()
{
return false;
}
/// <summary>
/// Registers a texture used by the shader.
/// </summary>
/// <param name="handle">Texture handle word offset</param>
/// <param name="cbufSlot">Constant buffer slot where the texture handle is located</param>
void RegisterTexture(int handle, int cbufSlot)
{
// Only useful when recording information for a disk shader cache.
}
}
}

View File

@ -0,0 +1,40 @@
namespace Ryujinx.Graphics.Shader
{
public enum InputTopology : byte
{
Points,
Lines,
LinesAdjacency,
Triangles,
TrianglesAdjacency
}
static class InputTopologyExtensions
{
public static string ToGlslString(this InputTopology topology)
{
return topology switch
{
InputTopology.Points => "points",
InputTopology.Lines => "lines",
InputTopology.LinesAdjacency => "lines_adjacency",
InputTopology.Triangles => "triangles",
InputTopology.TrianglesAdjacency => "triangles_adjacency",
_ => "points"
};
}
public static int ToInputVertices(this InputTopology topology)
{
return topology switch
{
InputTopology.Points => 1,
InputTopology.Lines or
InputTopology.LinesAdjacency => 2,
InputTopology.Triangles or
InputTopology.TrianglesAdjacency => 3,
_ => 1
};
}
}
}

View File

@ -0,0 +1,351 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class AttributeMap
{
private enum StagesMask : byte
{
None = 0,
Compute = 1 << (int)ShaderStage.Compute,
Vertex = 1 << (int)ShaderStage.Vertex,
TessellationControl = 1 << (int)ShaderStage.TessellationControl,
TessellationEvaluation = 1 << (int)ShaderStage.TessellationEvaluation,
Geometry = 1 << (int)ShaderStage.Geometry,
Fragment = 1 << (int)ShaderStage.Fragment,
Tessellation = TessellationControl | TessellationEvaluation,
VertexTessellationGeometry = Vertex | Tessellation | Geometry,
TessellationGeometryFragment = Tessellation | Geometry | Fragment,
AllGraphics = Vertex | Tessellation | Geometry | Fragment
}
private struct AttributeEntry
{
public int BaseOffset { get; }
public AggregateType Type { get; }
public IoVariable IoVariable { get; }
public StagesMask InputMask { get; }
public StagesMask OutputMask { get; }
public AttributeEntry(
int baseOffset,
AggregateType type,
IoVariable ioVariable,
StagesMask inputMask,
StagesMask outputMask)
{
BaseOffset = baseOffset;
Type = type;
IoVariable = ioVariable;
InputMask = inputMask;
OutputMask = outputMask;
}
}
private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributes;
private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributesPerPatch;
static AttributeMap()
{
_attributes = CreateMap();
_attributesPerPatch = CreatePerPatchMap();
}
private static IReadOnlyDictionary<int, AttributeEntry> CreateMap()
{
var map = new Dictionary<int, AttributeEntry>();
Add(map, 0x060, AggregateType.S32, IoVariable.PrimitiveId, StagesMask.TessellationGeometryFragment, StagesMask.Geometry);
Add(map, 0x064, AggregateType.S32, IoVariable.Layer, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x068, AggregateType.S32, IoVariable.ViewportIndex, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x06c, AggregateType.FP32, IoVariable.PointSize, StagesMask.None, StagesMask.VertexTessellationGeometry);
Add(map, 0x070, AggregateType.Vector4 | AggregateType.FP32, IoVariable.Position, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x080, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.AllGraphics, StagesMask.VertexTessellationGeometry, 32);
Add(map, 0x280, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x290, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2a0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2b0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2c0, AggregateType.Array | AggregateType.FP32, IoVariable.ClipDistance, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry, 8);
Add(map, 0x2e0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.PointCoord, StagesMask.Fragment, StagesMask.None);
Add(map, 0x2e8, AggregateType.FP32, IoVariable.FogCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2f0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationCoord, StagesMask.TessellationEvaluation, StagesMask.None);
Add(map, 0x2f8, AggregateType.S32, IoVariable.InstanceId, StagesMask.Vertex, StagesMask.None);
Add(map, 0x2fc, AggregateType.S32, IoVariable.VertexId, StagesMask.Vertex, StagesMask.None);
Add(map, 0x300, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TextureCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x3a0, AggregateType.Array | AggregateType.S32, IoVariable.ViewportMask, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x3fc, AggregateType.Bool, IoVariable.FrontFacing, StagesMask.Fragment, StagesMask.None);
return map;
}
private static IReadOnlyDictionary<int, AttributeEntry> CreatePerPatchMap()
{
var map = new Dictionary<int, AttributeEntry>();
Add(map, 0x000, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TessellationLevelOuter, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
Add(map, 0x010, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationLevelInner, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
Add(map, 0x018, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.TessellationEvaluation, StagesMask.TessellationControl, 31, 0x200);
return map;
}
private static void Add(
Dictionary<int, AttributeEntry> attributes,
int offset,
AggregateType type,
IoVariable ioVariable,
StagesMask inputMask,
StagesMask outputMask,
int count = 1,
int upperBound = 0x400)
{
int baseOffset = offset;
int elementsCount = GetElementCount(type);
for (int index = 0; index < count; index++)
{
for (int elementIndex = 0; elementIndex < elementsCount; elementIndex++)
{
attributes.Add(offset, new AttributeEntry(baseOffset, type, ioVariable, inputMask, outputMask));
offset += 4;
if (offset >= upperBound)
{
return;
}
}
}
}
public static Operand GenerateAttributeLoad(EmitterContext context, Operand primVertex, int offset, bool isOutput, bool isPerPatch)
{
if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
return Const(0);
}
StagesMask validUseMask = isOutput ? entry.OutputMask : entry.InputMask;
if (((StagesMask)(1 << (int)context.Config.Stage) & validUseMask) == StagesMask.None)
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
return Const(0);
}
if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
return Const(0);
}
if (HasInvocationId(context.Config.Stage, isOutput) && !isPerPatch)
{
primVertex = context.Load(StorageKind.Input, IoVariable.InvocationId);
}
int innerOffset = offset - entry.BaseOffset;
int innerIndex = innerOffset / 4;
StorageKind storageKind = isPerPatch
? (isOutput ? StorageKind.OutputPerPatch : StorageKind.InputPerPatch)
: (isOutput ? StorageKind.Output : StorageKind.Input);
IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
AggregateType type = GetType(context.Config, isOutput, innerIndex, in entry);
int elementCount = GetElementCount(type);
bool isArray = type.HasFlag(AggregateType.Array);
bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput);
bool hasElementIndex = elementCount > 1;
if (hasArrayIndex && hasElementIndex)
{
int arrayIndex = innerIndex / elementCount;
int elementIndex = innerIndex - (arrayIndex * elementCount);
return primVertex == null || isArray
? context.Load(storageKind, ioVariable, primVertex, Const(arrayIndex), Const(elementIndex))
: context.Load(storageKind, ioVariable, Const(arrayIndex), primVertex, Const(elementIndex));
}
else if (hasArrayIndex || hasElementIndex)
{
return primVertex == null || isArray || !hasArrayIndex
? context.Load(storageKind, ioVariable, primVertex, Const(innerIndex))
: context.Load(storageKind, ioVariable, Const(innerIndex), primVertex);
}
else
{
return context.Load(storageKind, ioVariable, primVertex);
}
}
public static void GenerateAttributeStore(EmitterContext context, int offset, bool isPerPatch, Operand value)
{
if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
return;
}
if (((StagesMask)(1 << (int)context.Config.Stage) & entry.OutputMask) == StagesMask.None)
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
return;
}
if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
return;
}
Operand invocationId = null;
if (HasInvocationId(context.Config.Stage, isOutput: true) && !isPerPatch)
{
invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
}
int innerOffset = offset - entry.BaseOffset;
int innerIndex = innerOffset / 4;
StorageKind storageKind = isPerPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
AggregateType type = GetType(context.Config, isOutput: true, innerIndex, in entry);
int elementCount = GetElementCount(type);
bool isArray = type.HasFlag(AggregateType.Array);
bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput: true);
bool hasElementIndex = elementCount > 1;
if (hasArrayIndex && hasElementIndex)
{
int arrayIndex = innerIndex / elementCount;
int elementIndex = innerIndex - (arrayIndex * elementCount);
if (invocationId == null || isArray)
{
context.Store(storageKind, ioVariable, invocationId, Const(arrayIndex), Const(elementIndex), value);
}
else
{
context.Store(storageKind, ioVariable, Const(arrayIndex), invocationId, Const(elementIndex), value);
}
}
else if (hasArrayIndex || hasElementIndex)
{
if (invocationId == null || isArray || !hasArrayIndex)
{
context.Store(storageKind, ioVariable, invocationId, Const(innerIndex), value);
}
else
{
context.Store(storageKind, ioVariable, Const(innerIndex), invocationId, value);
}
}
else
{
context.Store(storageKind, ioVariable, invocationId, value);
}
}
private static bool IsSupportedByHost(IGpuAccessor gpuAccessor, ShaderStage stage, IoVariable ioVariable)
{
if (ioVariable == IoVariable.ViewportIndex && stage != ShaderStage.Geometry && stage != ShaderStage.Fragment)
{
return gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation();
}
else if (ioVariable == IoVariable.ViewportMask)
{
return gpuAccessor.QueryHostSupportsViewportMask();
}
return true;
}
public static IoVariable GetIoVariable(ShaderConfig config, int offset, out int location)
{
location = 0;
if (!_attributes.TryGetValue(offset, out AttributeEntry entry))
{
return IoVariable.Invalid;
}
if (((StagesMask)(1 << (int)config.Stage) & entry.OutputMask) == StagesMask.None)
{
return IoVariable.Invalid;
}
if (config.HasPerLocationInputOrOutput(entry.IoVariable, isOutput: true))
{
location = (offset - entry.BaseOffset) / 16;
}
return GetIoVariable(config.Stage, in entry);
}
private static IoVariable GetIoVariable(ShaderStage stage, in AttributeEntry entry)
{
if (entry.IoVariable == IoVariable.Position && stage == ShaderStage.Fragment)
{
return IoVariable.FragmentCoord;
}
return entry.IoVariable;
}
private static AggregateType GetType(ShaderConfig config, bool isOutput, int innerIndex, in AttributeEntry entry)
{
AggregateType type = entry.Type;
if (entry.IoVariable == IoVariable.UserDefined)
{
type = config.GetUserDefinedType(innerIndex / 4, isOutput);
}
else if (entry.IoVariable == IoVariable.FragmentOutputColor)
{
type = config.GetFragmentOutputColorType(innerIndex / 4);
}
return type;
}
public static bool HasPrimitiveVertex(ShaderStage stage, bool isOutput)
{
if (isOutput)
{
return false;
}
return stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry;
}
public static bool HasInvocationId(ShaderStage stage, bool isOutput)
{
return isOutput && stage == ShaderStage.TessellationControl;
}
private static int GetElementCount(AggregateType type)
{
return (type & AggregateType.ElementCountMask) switch
{
AggregateType.Vector2 => 2,
AggregateType.Vector3 => 3,
AggregateType.Vector4 => 4,
_ => 1
};
}
}
}

View File

@ -0,0 +1,379 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void AtomCas(EmitterContext context)
{
InstAtomCas op = context.GetOp<InstAtomCas>();
context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented.");
}
public static void AtomsCas(EmitterContext context)
{
InstAtomsCas op = context.GetOp<InstAtomsCas>();
context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented.");
}
public static void B2r(EmitterContext context)
{
InstB2r op = context.GetOp<InstB2r>();
context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented.");
}
public static void Bpt(EmitterContext context)
{
InstBpt op = context.GetOp<InstBpt>();
context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented.");
}
public static void Cctl(EmitterContext context)
{
InstCctl op = context.GetOp<InstCctl>();
context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented.");
}
public static void Cctll(EmitterContext context)
{
InstCctll op = context.GetOp<InstCctll>();
context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented.");
}
public static void Cctlt(EmitterContext context)
{
InstCctlt op = context.GetOp<InstCctlt>();
context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented.");
}
public static void Cs2r(EmitterContext context)
{
InstCs2r op = context.GetOp<InstCs2r>();
context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented.");
}
public static void FchkR(EmitterContext context)
{
InstFchkR op = context.GetOp<InstFchkR>();
context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented.");
}
public static void FchkI(EmitterContext context)
{
InstFchkI op = context.GetOp<InstFchkI>();
context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented.");
}
public static void FchkC(EmitterContext context)
{
InstFchkC op = context.GetOp<InstFchkC>();
context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented.");
}
public static void Getcrsptr(EmitterContext context)
{
InstGetcrsptr op = context.GetOp<InstGetcrsptr>();
context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented.");
}
public static void Getlmembase(EmitterContext context)
{
InstGetlmembase op = context.GetOp<InstGetlmembase>();
context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented.");
}
public static void Ide(EmitterContext context)
{
InstIde op = context.GetOp<InstIde>();
context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented.");
}
public static void IdpR(EmitterContext context)
{
InstIdpR op = context.GetOp<InstIdpR>();
context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented.");
}
public static void IdpC(EmitterContext context)
{
InstIdpC op = context.GetOp<InstIdpC>();
context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented.");
}
public static void ImadspR(EmitterContext context)
{
InstImadspR op = context.GetOp<InstImadspR>();
context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented.");
}
public static void ImadspI(EmitterContext context)
{
InstImadspI op = context.GetOp<InstImadspI>();
context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented.");
}
public static void ImadspC(EmitterContext context)
{
InstImadspC op = context.GetOp<InstImadspC>();
context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented.");
}
public static void ImadspRc(EmitterContext context)
{
InstImadspRc op = context.GetOp<InstImadspRc>();
context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented.");
}
public static void Jcal(EmitterContext context)
{
InstJcal op = context.GetOp<InstJcal>();
context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented.");
}
public static void Jmp(EmitterContext context)
{
InstJmp op = context.GetOp<InstJmp>();
context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented.");
}
public static void Jmx(EmitterContext context)
{
InstJmx op = context.GetOp<InstJmx>();
context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented.");
}
public static void Ld(EmitterContext context)
{
InstLd op = context.GetOp<InstLd>();
context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented.");
}
public static void Lepc(EmitterContext context)
{
InstLepc op = context.GetOp<InstLepc>();
context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented.");
}
public static void Longjmp(EmitterContext context)
{
InstLongjmp op = context.GetOp<InstLongjmp>();
context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented.");
}
public static void P2rR(EmitterContext context)
{
InstP2rR op = context.GetOp<InstP2rR>();
context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented.");
}
public static void P2rI(EmitterContext context)
{
InstP2rI op = context.GetOp<InstP2rI>();
context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented.");
}
public static void P2rC(EmitterContext context)
{
InstP2rC op = context.GetOp<InstP2rC>();
context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented.");
}
public static void Pexit(EmitterContext context)
{
InstPexit op = context.GetOp<InstPexit>();
context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented.");
}
public static void Pixld(EmitterContext context)
{
InstPixld op = context.GetOp<InstPixld>();
context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented.");
}
public static void Plongjmp(EmitterContext context)
{
InstPlongjmp op = context.GetOp<InstPlongjmp>();
context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented.");
}
public static void Pret(EmitterContext context)
{
InstPret op = context.GetOp<InstPret>();
context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented.");
}
public static void PrmtR(EmitterContext context)
{
InstPrmtR op = context.GetOp<InstPrmtR>();
context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented.");
}
public static void PrmtI(EmitterContext context)
{
InstPrmtI op = context.GetOp<InstPrmtI>();
context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented.");
}
public static void PrmtC(EmitterContext context)
{
InstPrmtC op = context.GetOp<InstPrmtC>();
context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented.");
}
public static void PrmtRc(EmitterContext context)
{
InstPrmtRc op = context.GetOp<InstPrmtRc>();
context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented.");
}
public static void R2b(EmitterContext context)
{
InstR2b op = context.GetOp<InstR2b>();
context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented.");
}
public static void Ram(EmitterContext context)
{
InstRam op = context.GetOp<InstRam>();
context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented.");
}
public static void Rtt(EmitterContext context)
{
InstRtt op = context.GetOp<InstRtt>();
context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented.");
}
public static void Sam(EmitterContext context)
{
InstSam op = context.GetOp<InstSam>();
context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented.");
}
public static void Setcrsptr(EmitterContext context)
{
InstSetcrsptr op = context.GetOp<InstSetcrsptr>();
context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented.");
}
public static void Setlmembase(EmitterContext context)
{
InstSetlmembase op = context.GetOp<InstSetlmembase>();
context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented.");
}
public static void St(EmitterContext context)
{
InstSt op = context.GetOp<InstSt>();
context.Config.GpuAccessor.Log("Shader instruction St is not implemented.");
}
public static void Stp(EmitterContext context)
{
InstStp op = context.GetOp<InstStp>();
context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented.");
}
public static void Txa(EmitterContext context)
{
InstTxa op = context.GetOp<InstTxa>();
context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented.");
}
public static void Vabsdiff(EmitterContext context)
{
InstVabsdiff op = context.GetOp<InstVabsdiff>();
context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented.");
}
public static void Vabsdiff4(EmitterContext context)
{
InstVabsdiff4 op = context.GetOp<InstVabsdiff4>();
context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented.");
}
public static void Vadd(EmitterContext context)
{
InstVadd op = context.GetOp<InstVadd>();
context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented.");
}
public static void Votevtg(EmitterContext context)
{
InstVotevtg op = context.GetOp<InstVotevtg>();
context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented.");
}
public static void Vset(EmitterContext context)
{
InstVset op = context.GetOp<InstVset>();
context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented.");
}
public static void Vshl(EmitterContext context)
{
InstVshl op = context.GetOp<InstVshl>();
context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented.");
}
public static void Vshr(EmitterContext context)
{
InstVshr op = context.GetOp<InstVshr>();
context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented.");
}
}
}

View File

@ -0,0 +1,160 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class InstEmitAluHelper
{
public static long GetIntMin(IDstFmt type)
{
return type switch
{
IDstFmt.U16 => ushort.MinValue,
IDstFmt.S16 => short.MinValue,
IDstFmt.U32 => uint.MinValue,
IDstFmt.S32 => int.MinValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMax(IDstFmt type)
{
return type switch
{
IDstFmt.U16 => ushort.MaxValue,
IDstFmt.S16 => short.MaxValue,
IDstFmt.U32 => uint.MaxValue,
IDstFmt.S32 => int.MaxValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMin(ISrcDstFmt type)
{
return type switch
{
ISrcDstFmt.U8 => byte.MinValue,
ISrcDstFmt.S8 => sbyte.MinValue,
ISrcDstFmt.U16 => ushort.MinValue,
ISrcDstFmt.S16 => short.MinValue,
ISrcDstFmt.U32 => uint.MinValue,
ISrcDstFmt.S32 => int.MinValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMax(ISrcDstFmt type)
{
return type switch
{
ISrcDstFmt.U8 => byte.MaxValue,
ISrcDstFmt.S8 => sbyte.MaxValue,
ISrcDstFmt.U16 => ushort.MaxValue,
ISrcDstFmt.S16 => short.MaxValue,
ISrcDstFmt.U32 => uint.MaxValue,
ISrcDstFmt.S32 => int.MaxValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred)
{
return logicOp switch
{
BoolOp.And => context.BitwiseAnd(input, pred),
BoolOp.Or => context.BitwiseOr(input, pred),
BoolOp.Xor => context.BitwiseExclusiveOr(input, pred),
_ => input
};
}
public static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
{
return type switch
{
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
_ => src
};
}
public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
{
if (!setCC)
{
return;
}
if (extended)
{
// When the operation is extended, it means we are doing
// the operation on a long word with any number of bits,
// so we need to AND the zero flag from result with the
// previous result when extended is specified, to ensure
// we have ZF set only if all words are zero, and not just
// the last one.
Operand oldZF = GetZF();
Operand res = context.BitwiseAnd(context.ICompareEqual(dest, Const(0)), oldZF);
context.Copy(GetZF(), res);
}
else
{
context.Copy(GetZF(), context.ICompareEqual(dest, Const(0)));
}
context.Copy(GetNF(), context.ICompareLess(dest, Const(0)));
}
public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32)
{
if (setCC)
{
Operand zero = ConstF(0);
if (fpType == Instruction.FP64)
{
zero = context.FP32ConvertToFP64(zero);
}
context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType));
context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType));
}
}
public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high)
{
low = context.BitwiseNot(low);
high = context.BitwiseNot(high);
low = AddWithCarry(context, low, Const(1), out Operand carryOut);
high = context.IAdd(high, carryOut);
return (low, high);
}
public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
{
Operand result = context.IAdd(lhs, rhs);
// C = Rd < Rn
carryOut = context.INegate(context.ICompareLessUnsigned(result, lhs));
return result;
}
}
}

View File

@ -0,0 +1,383 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Al2p(EmitterContext context)
{
InstAl2p op = context.GetOp<InstAl2p>();
context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11)));
}
public static void Ald(EmitterContext context)
{
InstAld op = context.GetOp<InstAld>();
// Some of those attributes are per invocation,
// so we should ignore any primitive vertex indexing for those.
bool hasPrimitiveVertex = AttributeMap.HasPrimitiveVertex(context.Config.Stage, op.O) && !op.P;
if (!op.Phys)
{
hasPrimitiveVertex &= HasPrimitiveVertex(op.Imm11);
}
Operand primVertex = hasPrimitiveVertex ? context.Copy(GetSrcReg(context, op.SrcB)) : null;
for (int index = 0; index < (int)op.AlSize + 1; index++)
{
Register rd = new Register(op.Dest + index, RegisterType.Gpr);
if (rd.IsRZ)
{
break;
}
if (op.Phys)
{
Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand vecIndex = context.ShiftRightU32(offset, Const(4));
Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
StorageKind storageKind = op.O ? StorageKind.Output : StorageKind.Input;
context.Copy(Register(rd), context.Load(storageKind, IoVariable.UserDefined, primVertex, vecIndex, elemIndex));
}
else if (op.SrcB == RegisterConsts.RegisterZeroIndex || op.P)
{
int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
context.FlagAttributeRead(offset);
bool isOutput = op.O && CanLoadOutput(offset);
if (!op.P && !isOutput && TryConvertIdToIndexForVulkan(context, offset, out Operand value))
{
context.Copy(Register(rd), value);
}
else
{
context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P));
}
}
else
{
int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
context.FlagAttributeRead(offset);
bool isOutput = op.O && CanLoadOutput(offset);
context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, false));
}
}
}
public static void Ast(EmitterContext context)
{
InstAst op = context.GetOp<InstAst>();
for (int index = 0; index < (int)op.AlSize + 1; index++)
{
if (op.SrcB + index > RegisterConsts.RegisterZeroIndex)
{
break;
}
Register rd = new Register(op.SrcB + index, RegisterType.Gpr);
if (op.Phys)
{
Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand vecIndex = context.ShiftRightU32(offset, Const(4));
Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
Operand invocationId = AttributeMap.HasInvocationId(context.Config.Stage, isOutput: true)
? context.Load(StorageKind.Input, IoVariable.InvocationId)
: null;
context.Store(StorageKind.Output, IoVariable.UserDefined, invocationId, vecIndex, elemIndex, Register(rd));
}
else
{
// TODO: Support indirect stores using Ra.
int offset = op.Imm11 + index * 4;
if (!context.Config.IsUsedOutputAttribute(offset))
{
return;
}
offset = FixedFuncToUserAttribute(context.Config, offset, isOutput: true);
context.FlagAttributeWritten(offset);
AttributeMap.GenerateAttributeStore(context, offset, op.P, Register(rd));
}
}
}
public static void Ipa(EmitterContext context)
{
InstIpa op = context.GetOp<InstIpa>();
context.FlagAttributeRead(op.Imm10);
Operand res;
bool isFixedFunc = false;
if (op.Idx)
{
Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand vecIndex = context.ShiftRightU32(offset, Const(4));
Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
res = context.Load(StorageKind.Input, IoVariable.UserDefined, null, vecIndex, elemIndex);
res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
}
else
{
isFixedFunc = TryFixedFuncToUserAttributeIpa(context, op.Imm10, out res);
if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd)
{
int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4;
if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
{
res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
}
}
else if (op.Imm10 == AttributeConsts.PositionX || op.Imm10 == AttributeConsts.PositionY)
{
// FragCoord X/Y must be divided by the render target scale, if resolution scaling is active,
// because the shader code is not expecting scaled values.
res = context.FPDivide(res, context.Load(StorageKind.Input, IoVariable.SupportBlockRenderScale, null, Const(0)));
}
else if (op.Imm10 == AttributeConsts.FrontFacing && context.Config.GpuAccessor.QueryHostHasFrontFacingBug())
{
// gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs.
// This weird trick makes it behave.
res = context.ICompareLess(context.INegate(context.IConvertS32ToFP32(res)), Const(0));
}
}
if (op.IpaOp == IpaOp.Multiply && !isFixedFunc)
{
Operand srcB = GetSrcReg(context, op.SrcB);
res = context.FPMultiply(res, srcB);
}
res = context.FPSaturate(res, op.Sat);
context.Copy(GetDest(op.Dest), res);
}
public static void Isberd(EmitterContext context)
{
InstIsberd op = context.GetOp<InstIsberd>();
// This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory.
// Here, we just propagate the offset, as the result from this instruction is usually
// used with ALD to perform vertex load on geometry or tessellation shaders.
// The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex.
// Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex.
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}
public static void OutR(EmitterContext context)
{
InstOutR op = context.GetOp<InstOutR>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
public static void OutI(EmitterContext context)
{
InstOutI op = context.GetOp<InstOutI>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
public static void OutC(EmitterContext context)
{
InstOutC op = context.GetOp<InstOutC>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
private static void EmitOut(EmitterContext context, bool emit, bool cut)
{
if (!(emit || cut))
{
context.Config.GpuAccessor.Log("Invalid OUT encoding.");
}
if (emit)
{
if (context.Config.LastInVertexPipeline)
{
context.PrepareForVertexReturn(out var tempXLocal, out var tempYLocal, out var tempZLocal);
context.EmitVertex();
// Restore output position value before transformation.
if (tempXLocal != null)
{
context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(0)), tempXLocal);
}
if (tempYLocal != null)
{
context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(1)), tempYLocal);
}
if (tempZLocal != null)
{
context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(2)), tempZLocal);
}
}
else
{
context.EmitVertex();
}
}
if (cut)
{
context.EndPrimitive();
}
}
private static bool HasPrimitiveVertex(int attr)
{
return attr != AttributeConsts.PrimitiveId &&
attr != AttributeConsts.TessCoordX &&
attr != AttributeConsts.TessCoordY;
}
private static bool CanLoadOutput(int attr)
{
return attr != AttributeConsts.TessCoordX && attr != AttributeConsts.TessCoordY;
}
private static bool TryFixedFuncToUserAttributeIpa(EmitterContext context, int attr, out Operand selectedAttr)
{
if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.BackColorDiffuseR)
{
// TODO: If two sided rendering is enabled, then this should return
// FrontColor if the fragment is front facing, and back color otherwise.
selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
return true;
}
else if (attr == AttributeConsts.FogCoord)
{
// TODO: We likely need to emulate the fixed-function functionality for FogCoord here.
selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
return true;
}
else if (attr >= AttributeConsts.BackColorDiffuseR && attr < AttributeConsts.ClipDistance0)
{
selectedAttr = ConstF(((attr >> 2) & 3) == 3 ? 1f : 0f);
return true;
}
else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
{
selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
return true;
}
selectedAttr = GenerateIpaLoad(context, attr);
return false;
}
private static Operand GenerateIpaLoad(EmitterContext context, int offset)
{
return AttributeMap.GenerateAttributeLoad(context, null, offset, isOutput: false, isPerPatch: false);
}
private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput)
{
bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput);
config.SetLayerOutputAttribute(attr);
}
else if (attr == AttributeConsts.FogCoord)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FogCoord, fixedStartAttr, isOutput);
}
else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr + 1, isOutput);
}
else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 5, isOutput);
}
return attr;
}
private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, int baseAttr, int baseIndex, bool isOutput)
{
int index = (attr - baseAttr) >> 4;
int userAttrIndex = config.GetFreeUserAttribute(isOutput, baseIndex + index);
if ((uint)userAttrIndex < Constants.MaxAttributes)
{
attr = AttributeConsts.UserAttributeBase + userAttrIndex * 16 + (attr & 0xf);
if (isOutput)
{
config.SetOutputUserAttributeFixedFunc(userAttrIndex);
}
else
{
config.SetInputUserAttributeFixedFunc(userAttrIndex);
}
}
else
{
config.GpuAccessor.Log($"No enough user attributes for fixed attribute offset 0x{attr:X}.");
}
return attr;
}
private static bool TryConvertIdToIndexForVulkan(EmitterContext context, int attr, out Operand value)
{
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
if (attr == AttributeConsts.InstanceId)
{
value = context.ISubtract(
context.Load(StorageKind.Input, IoVariable.InstanceIndex),
context.Load(StorageKind.Input, IoVariable.BaseInstance));
return true;
}
else if (attr == AttributeConsts.VertexId)
{
value = context.Load(StorageKind.Input, IoVariable.VertexIndex);
return true;
}
}
value = null;
return false;
}
}
}

View File

@ -0,0 +1,44 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Bar(EmitterContext context)
{
InstBar op = context.GetOp<InstBar>();
// TODO: Support other modes.
if (op.BarOp == BarOp.Sync)
{
context.Barrier();
}
else
{
context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}.");
}
}
public static void Depbar(EmitterContext context)
{
InstDepbar op = context.GetOp<InstDepbar>();
// No operation.
}
public static void Membar(EmitterContext context)
{
InstMembar op = context.GetOp<InstMembar>();
if (op.Membar == Decoders.Membar.Cta)
{
context.GroupMemoryBarrier();
}
else
{
context.MemoryBarrier();
}
}
}
}

View File

@ -0,0 +1,194 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void BfeR(EmitterContext context)
{
InstBfeR op = context.GetOp<InstBfeR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfeI(EmitterContext context)
{
InstBfeI op = context.GetOp<InstBfeI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfeC(EmitterContext context)
{
InstBfeC op = context.GetOp<InstBfeC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfiR(EmitterContext context)
{
InstBfiR op = context.GetOp<InstBfiR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiI(EmitterContext context)
{
InstBfiI op = context.GetOp<InstBfiI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiC(EmitterContext context)
{
InstBfiC op = context.GetOp<InstBfiC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiRc(EmitterContext context)
{
InstBfiRc op = context.GetOp<InstBfiRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void FloR(EmitterContext context)
{
InstFloR op = context.GetOp<InstFloR>();
EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void FloI(EmitterContext context)
{
InstFloI op = context.GetOp<InstFloI>();
EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void FloC(EmitterContext context)
{
InstFloC op = context.GetOp<InstFloC>();
EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void PopcR(EmitterContext context)
{
InstPopcR op = context.GetOp<InstPopcR>();
EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB);
}
public static void PopcI(EmitterContext context)
{
InstPopcI op = context.GetOp<InstPopcI>();
EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB);
}
public static void PopcC(EmitterContext context)
{
InstPopcC op = context.GetOp<InstPopcC>();
EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB);
}
private static void EmitBfe(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
bool bitReverse,
bool isSigned)
{
if (bitReverse)
{
srcA = context.BitfieldReverse(srcA);
}
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = isSigned
? context.BitfieldExtractS32(srcA, position, size)
: context.BitfieldExtractU32(srcA, position, size);
context.Copy(GetDest(rd), res);
// TODO: CC, X, corner cases.
}
private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd)
{
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = context.BitfieldInsert(srcC, srcA, position, size);
context.Copy(GetDest(rd), res);
}
private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned)
{
Operand srcB = context.BitwiseNot(src, invert);
Operand res;
if (sh)
{
res = context.FindLSB(context.BitfieldReverse(srcB));
}
else
{
res = isSigned
? context.FindMSBS32(srcB)
: context.FindMSBU32(srcB);
}
context.Copy(GetDest(rd), res);
}
private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert)
{
Operand srcB = context.BitwiseNot(src, invert);
Operand res = context.BitCount(srcB);
context.Copy(GetDest(rd), res);
}
}
}

View File

@ -0,0 +1,87 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Cset(EmitterContext context)
{
InstCset op = context.GetOp<InstCset>();
Operand res = GetCondition(context, op.Ccc);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
res = GetPredLogicalOp(context, op.Bop, res, srcPred);
Operand dest = GetDest(op.Dest);
if (op.BVal)
{
context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
}
else
{
context.Copy(dest, res);
}
// TODO: CC.
}
public static void Csetp(EmitterContext context)
{
InstCsetp op = context.GetOp<InstCsetp>();
Operand p0Res = GetCondition(context, op.Ccc);
Operand p1Res = context.BitwiseNot(p0Res);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred);
p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
// TODO: CC.
}
private static Operand GetCondition(EmitterContext context, Ccc cond, int defaultCond = IrConsts.True)
{
return cond switch
{
Ccc.F => Const(IrConsts.False),
Ccc.Lt => context.BitwiseExclusiveOr(context.BitwiseAnd(GetNF(), context.BitwiseNot(GetZF())), GetVF()),
Ccc.Eq => context.BitwiseAnd(context.BitwiseNot(GetNF()), GetZF()),
Ccc.Le => context.BitwiseExclusiveOr(GetNF(), context.BitwiseOr(GetZF(), GetVF())),
Ccc.Gt => context.BitwiseNot(context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF())),
Ccc.Ne => context.BitwiseNot(GetZF()),
Ccc.Ge => context.BitwiseNot(context.BitwiseExclusiveOr(GetNF(), GetVF())),
Ccc.Num => context.BitwiseNot(context.BitwiseAnd(GetNF(), GetZF())),
Ccc.Nan => context.BitwiseAnd(GetNF(), GetZF()),
Ccc.Ltu => context.BitwiseExclusiveOr(GetNF(), GetVF()),
Ccc.Equ => GetZF(),
Ccc.Leu => context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF()),
Ccc.Gtu => context.BitwiseExclusiveOr(context.BitwiseNot(GetNF()), context.BitwiseOr(GetVF(), GetZF())),
Ccc.Neu => context.BitwiseOr(GetNF(), context.BitwiseNot(GetZF())),
Ccc.Geu => context.BitwiseExclusiveOr(context.BitwiseOr(context.BitwiseNot(GetNF()), GetZF()), GetVF()),
Ccc.T => Const(IrConsts.True),
Ccc.Off => context.BitwiseNot(GetVF()),
Ccc.Lo => context.BitwiseNot(GetCF()),
Ccc.Sff => context.BitwiseNot(GetNF()),
Ccc.Ls => context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())),
Ccc.Hi => context.BitwiseAnd(GetCF(), context.BitwiseNot(GetZF())),
Ccc.Sft => GetNF(),
Ccc.Hs => GetCF(),
Ccc.Oft => GetVF(),
Ccc.Rle => context.BitwiseOr(GetNF(), GetZF()),
Ccc.Rgt => context.BitwiseNot(context.BitwiseOr(GetNF(), GetZF())),
_ => Const(defaultCond)
};
}
}
}

View File

@ -0,0 +1,425 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void F2fR(EmitterContext context)
{
InstF2fR op = context.GetOp<InstF2fR>();
var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2fI(EmitterContext context)
{
InstF2fI op = context.GetOp<InstF2fI>();
var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2fC(EmitterContext context)
{
InstF2fC op = context.GetOp<InstF2fC>();
var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2iR(EmitterContext context)
{
InstF2iR op = context.GetOp<InstF2iR>();
var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void F2iI(EmitterContext context)
{
InstF2iI op = context.GetOp<InstF2iI>();
var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void F2iC(EmitterContext context)
{
InstF2iC op = context.GetOp<InstF2iC>();
var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void I2fR(EmitterContext context)
{
InstI2fR op = context.GetOp<InstI2fR>();
var src = GetSrcReg(context, op.SrcB);
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2fI(EmitterContext context)
{
InstI2fI op = context.GetOp<InstI2fI>();
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2fC(EmitterContext context)
{
InstI2fC op = context.GetOp<InstI2fC>();
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2iR(EmitterContext context)
{
InstI2iR op = context.GetOp<InstI2iR>();
var src = GetSrcReg(context, op.SrcB);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
public static void I2iI(EmitterContext context)
{
InstI2iI op = context.GetOp<InstI2iI>();
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
public static void I2iC(EmitterContext context)
{
InstI2iC op = context.GetOp<InstI2iC>();
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
private static void EmitF2F(
EmitterContext context,
DstFmt srcType,
DstFmt dstType,
IntegerRound roundingMode,
Operand src,
int rd,
bool absolute,
bool negate,
bool saturate)
{
Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType());
if (srcType == dstType)
{
srcB = roundingMode switch
{
IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()),
IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()),
IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()),
IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()),
_ => srcB
};
}
// We don't need to handle conversions between FP16 <-> FP32
// since we do FP16 operations as FP32 directly.
// FP16 <-> FP64 conversions are invalid.
if (srcType == DstFmt.F32 && dstType == DstFmt.F64)
{
srcB = context.FP32ConvertToFP64(srcB);
}
else if (srcType == DstFmt.F64 && dstType == DstFmt.F32)
{
srcB = context.FP64ConvertToFP32(srcB);
}
srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType());
WriteFP(context, dstType, srcB, rd);
// TODO: CC.
}
private static void EmitF2I(
EmitterContext context,
DstFmt srcType,
IDstFmt dstType,
RoundMode2 roundingMode,
Operand src,
int rd,
bool absolute,
bool negate)
{
if (dstType == IDstFmt.U64)
{
context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I.");
}
Instruction fpType = srcType.ToInstFPType();
bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64;
bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16;
Operand srcB = context.FPAbsNeg(src, absolute, negate, fpType);
srcB = roundingMode switch
{
RoundMode2.Round => context.FPRound(srcB, fpType),
RoundMode2.Floor => context.FPFloor(srcB, fpType),
RoundMode2.Ceil => context.FPCeiling(srcB, fpType),
RoundMode2.Trunc => context.FPTruncate(srcB, fpType),
_ => srcB
};
if (!isSignedInt)
{
// Negative float to uint cast is undefined, so we clamp the value before conversion.
Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0);
srcB = context.FPMaximum(srcB, c0, fpType);
}
if (srcType == DstFmt.F64)
{
srcB = isSignedInt
? context.FP64ConvertToS32(srcB)
: context.FP64ConvertToU32(srcB);
}
else
{
srcB = isSignedInt
? context.FP32ConvertToS32(srcB)
: context.FP32ConvertToU32(srcB);
}
if (isSmallInt)
{
int min = (int)GetIntMin(dstType);
int max = (int)GetIntMax(dstType);
srcB = isSignedInt
? context.IClampS32(srcB, Const(min), Const(max))
: context.IClampU32(srcB, Const(min), Const(max));
}
Operand dest = GetDest(rd);
context.Copy(dest, srcB);
// TODO: CC.
}
private static void EmitI2F(
EmitterContext context,
ISrcFmt srcType,
DstFmt dstType,
Operand src,
ByteSel byteSelection,
int rd,
bool absolute,
bool negate)
{
bool isSignedInt =
srcType == ISrcFmt.S8 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.S32 ||
srcType == ISrcFmt.S64;
bool isSmallInt =
srcType == ISrcFmt.U16 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.U8 ||
srcType == ISrcFmt.S8;
// TODO: Handle S/U64.
Operand srcB = context.IAbsNeg(src, absolute, negate);
if (isSmallInt)
{
int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8;
srcB = isSignedInt
? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size))
: context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size));
}
if (dstType == DstFmt.F64)
{
srcB = isSignedInt
? context.IConvertS32ToFP64(srcB)
: context.IConvertU32ToFP64(srcB);
}
else
{
srcB = isSignedInt
? context.IConvertS32ToFP32(srcB)
: context.IConvertU32ToFP32(srcB);
}
WriteFP(context, dstType, srcB, rd);
// TODO: CC.
}
private static void EmitI2I(
EmitterContext context,
ISrcDstFmt srcType,
ISrcDstFmt dstType,
Operand src,
ByteSel byteSelection,
int rd,
bool absolute,
bool negate,
bool saturate,
bool writeCC)
{
if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32)
{
context.Config.GpuAccessor.Log("Invalid I2I encoding.");
return;
}
bool srcIsSignedInt =
srcType == ISrcDstFmt.S8 ||
srcType == ISrcDstFmt.S16 ||
srcType == ISrcDstFmt.S32;
bool dstIsSignedInt =
dstType == ISrcDstFmt.S8 ||
dstType == ISrcDstFmt.S16 ||
dstType == ISrcDstFmt.S32;
bool srcIsSmallInt =
srcType == ISrcDstFmt.U16 ||
srcType == ISrcDstFmt.S16 ||
srcType == ISrcDstFmt.U8 ||
srcType == ISrcDstFmt.S8;
if (srcIsSmallInt)
{
int size = srcType == ISrcDstFmt.U16 || srcType == ISrcDstFmt.S16 ? 16 : 8;
src = srcIsSignedInt
? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size))
: context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size));
}
src = context.IAbsNeg(src, absolute, negate);
if (saturate)
{
int min = (int)GetIntMin(dstType);
int max = (int)GetIntMax(dstType);
src = dstIsSignedInt
? context.IClampS32(src, Const(min), Const(max))
: context.IClampU32(src, Const(min), Const(max));
}
context.Copy(GetDest(rd), src);
SetZnFlags(context, src, writeCC);
}
private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg)
{
if (floatType == DstFmt.F32)
{
return GetSrcReg(context, reg);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcReg(context, reg, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset)
{
if (floatType == DstFmt.F32)
{
return GetSrcCbuf(context, cbufSlot, cbufOffset);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm)
{
if (floatType == DstFmt.F32)
{
return GetSrcImm(context, imm);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcImm(context, imm, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd)
{
Operand dest = GetDest(rd);
if (type == DstFmt.F32)
{
context.Copy(dest, srcB);
}
else if (type == DstFmt.F16)
{
context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
}
else /* if (type == FPType.FP64) */
{
Operand dest2 = GetDest2(rd);
context.Copy(dest, context.UnpackDouble2x32Low(srcB));
context.Copy(dest2, context.UnpackDouble2x32High(srcB));
}
}
private static Instruction ToInstFPType(this DstFmt type)
{
return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32;
}
}
}

View File

@ -0,0 +1,532 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DaddR(EmitterContext context)
{
InstDaddR op = context.GetOp<InstDaddR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DaddI(EmitterContext context)
{
InstDaddI op = context.GetOp<InstDaddI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DaddC(EmitterContext context)
{
InstDaddC op = context.GetOp<InstDaddC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DfmaR(EmitterContext context)
{
InstDfmaR op = context.GetOp<InstDfmaR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaI(EmitterContext context)
{
InstDfmaI op = context.GetOp<InstDfmaI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaC(EmitterContext context)
{
InstDfmaC op = context.GetOp<InstDfmaC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaRc(EmitterContext context)
{
InstDfmaRc op = context.GetOp<InstDfmaRc>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcC, isFP64: true);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DmulR(EmitterContext context)
{
InstDmulR op = context.GetOp<InstDmulR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void DmulI(EmitterContext context)
{
InstDmulI op = context.GetOp<InstDmulI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void DmulC(EmitterContext context)
{
InstDmulC op = context.GetOp<InstDmulC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void FaddR(EmitterContext context)
{
InstFaddR op = context.GetOp<InstFaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void FaddI(EmitterContext context)
{
InstFaddI op = context.GetOp<InstFaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void FaddC(EmitterContext context)
{
InstFaddC op = context.GetOp<InstFaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void Fadd32i(EmitterContext context)
{
InstFadd32i op = context.GetOp<InstFadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void FfmaR(EmitterContext context)
{
InstFfmaR op = context.GetOp<InstFfmaR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaI(EmitterContext context)
{
InstFfmaI op = context.GetOp<InstFfmaI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaC(EmitterContext context)
{
InstFfmaC op = context.GetOp<InstFfmaC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaRc(EmitterContext context)
{
InstFfmaRc op = context.GetOp<InstFfmaRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void Ffma32i(EmitterContext context)
{
InstFfma32i op = context.GetOp<InstFfma32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
var srcC = GetSrcReg(context, op.Dest);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FmulR(EmitterContext context)
{
InstFmulR op = context.GetOp<InstFmulR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void FmulI(EmitterContext context)
{
InstFmulI op = context.GetOp<InstFmulI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void FmulC(EmitterContext context)
{
InstFmulC op = context.GetOp<InstFmulC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void Fmul32i(EmitterContext context)
{
InstFmul32i op = context.GetOp<InstFmul32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC);
}
public static void Hadd2R(EmitterContext context)
{
InstHadd2R op = context.GetOp<InstHadd2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd2I(EmitterContext context)
{
InstHadd2I op = context.GetOp<InstHadd2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd2C(EmitterContext context)
{
InstHadd2C op = context.GetOp<InstHadd2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd232i(EmitterContext context)
{
InstHadd232i op = context.GetOp<InstHadd232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false);
var srcB = GetHalfSrc(context, op.Imm);
EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hfma2R(EmitterContext context)
{
InstHfma2R op = context.GetOp<InstHfma2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2I(EmitterContext context)
{
InstHfma2I op = context.GetOp<InstHfma2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2C(EmitterContext context)
{
InstHfma2C op = context.GetOp<InstHfma2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2Rc(EmitterContext context)
{
InstHfma2Rc op = context.GetOp<InstHfma2Rc>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false);
var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma232i(EmitterContext context)
{
InstHfma232i op = context.GetOp<InstHfma232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.Imm);
var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false);
EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false);
}
public static void Hmul2R(EmitterContext context)
{
InstHmul2R op = context.GetOp<InstHmul2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul2I(EmitterContext context)
{
InstHmul2I op = context.GetOp<InstHmul2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul2C(EmitterContext context)
{
InstHmul2C op = context.GetOp<InstHmul2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul232i(EmitterContext context)
{
InstHmul232i op = context.GetOp<InstHmul232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.Imm32);
EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
private static void EmitFadd(
EmitterContext context,
Instruction fpType,
Operand srcA,
Operand srcB,
int rd,
bool negateA,
bool negateB,
bool absoluteA,
bool absoluteB,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitFfma(
EmitterContext context,
Instruction fpType,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool negateB,
bool negateC,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcB = context.FPNegate(srcB, negateB, fpType);
srcC = context.FPNegate(srcC, negateC, fpType);
Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitFmul(
EmitterContext context,
Instruction fpType,
MultiplyScale scale,
Operand srcA,
Operand srcB,
int rd,
bool negateB,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcB = context.FPNegate(srcB, negateB, fpType);
if (scale != MultiplyScale.NoScale)
{
Operand scaleConst = scale switch
{
MultiplyScale.D2 => ConstF(0.5f),
MultiplyScale.D4 => ConstF(0.25f),
MultiplyScale.D8 => ConstF(0.125f),
MultiplyScale.M2 => ConstF(2f),
MultiplyScale.M4 => ConstF(4f),
MultiplyScale.M8 => ConstF(8f),
_ => ConstF(1f) // Invalid, behave as if it had no scale.
};
if (scaleConst.AsFloat() == 1f)
{
context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\".");
}
if (isFP64)
{
scaleConst = context.FP32ConvertToFP64(scaleConst);
}
srcA = context.FPMultiply(srcA, scaleConst, fpType);
}
Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitHadd2Hmul2(
EmitterContext context,
OFmt swizzle,
Operand[] srcA,
Operand[] srcB,
bool isAdd,
int rd,
bool saturate)
{
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
if (isAdd)
{
res[index] = context.FPAdd(srcA[index], srcB[index]);
}
else
{
res[index] = context.FPMultiply(srcA[index], srcB[index]);
}
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
public static void EmitHfma2(
EmitterContext context,
OFmt swizzle,
Operand[] srcA,
Operand[] srcB,
Operand[] srcC,
int rd,
bool saturate)
{
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]);
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
}
}

View File

@ -0,0 +1,575 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DsetR(EmitterContext context)
{
InstDsetR op = context.GetOp<InstDsetR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetI(EmitterContext context)
{
InstDsetI op = context.GetOp<InstDsetI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetC(EmitterContext context)
{
InstDsetC op = context.GetOp<InstDsetC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetpR(EmitterContext context)
{
InstDsetpR op = context.GetOp<InstDsetpR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void DsetpI(EmitterContext context)
{
InstDsetpI op = context.GetOp<InstDsetpI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void DsetpC(EmitterContext context)
{
InstDsetpC op = context.GetOp<InstDsetpC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void FcmpR(EmitterContext context)
{
InstFcmpR op = context.GetOp<InstFcmpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpI(EmitterContext context)
{
InstFcmpI op = context.GetOp<InstFcmpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpC(EmitterContext context)
{
InstFcmpC op = context.GetOp<InstFcmpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpRc(EmitterContext context)
{
InstFcmpRc op = context.GetOp<InstFcmpRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FsetR(EmitterContext context)
{
InstFsetR op = context.GetOp<InstFsetR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetC(EmitterContext context)
{
InstFsetC op = context.GetOp<InstFsetC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetI(EmitterContext context)
{
InstFsetI op = context.GetOp<InstFsetI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetpR(EmitterContext context)
{
InstFsetpR op = context.GetOp<InstFsetpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void FsetpI(EmitterContext context)
{
InstFsetpI op = context.GetOp<InstFsetpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void FsetpC(EmitterContext context)
{
InstFsetpC op = context.GetOp<InstFsetpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void Hset2R(EmitterContext context)
{
InstHset2R op = context.GetOp<InstHset2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hset2I(EmitterContext context)
{
InstHset2I op = context.GetOp<InstHset2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hset2C(EmitterContext context)
{
InstHset2C op = context.GetOp<InstHset2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hsetp2R(EmitterContext context)
{
InstHsetp2R op = context.GetOp<InstHsetp2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
public static void Hsetp2I(EmitterContext context)
{
InstHsetp2I op = context.GetOp<InstHsetp2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
public static void Hsetp2C(EmitterContext context)
{
InstHsetp2C op = context.GetOp<InstHsetp2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd)
{
Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0));
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(rd), res);
}
private static void EmitFset(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int rd,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool boolFloat,
bool writeCC,
bool isFP64 = false)
{
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
Operand dest = GetDest(rd);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, writeCC);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, writeCC, extended: false);
}
}
private static void EmitFsetp(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC,
bool isFP64 = false)
{
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static void EmitHset2(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand[] srcA,
Operand[] srcB,
int srcPred,
bool srcPredInv,
int rd,
bool boolFloat)
{
Operand[] res = new Operand[2];
res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res[0] = GetPredLogicalOp(context, logicOp, res[0], pred);
res[1] = GetPredLogicalOp(context, logicOp, res[1], pred);
if (boolFloat)
{
res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0));
res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0));
context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1]));
}
else
{
Operand low = context.BitwiseAnd(res[0], Const(0xffff));
Operand high = context.ShiftLeft (res[1], Const(16));
Operand packed = context.BitwiseOr(low, high);
context.Copy(GetDest(rd), packed);
}
}
private static void EmitHsetp2(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand[] srcA,
Operand[] srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool hAnd)
{
Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
if (hAnd)
{
p0Res = context.BitwiseAnd(p0Res, p1Res);
p1Res = context.BitwiseNot(p0Res);
}
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32)
{
Operand res;
if (cond == FComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == FComp.F)
{
res = Const(IrConsts.False);
}
else if (cond == FComp.Nan || cond == FComp.Num)
{
res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType));
if (cond == FComp.Num)
{
res = context.BitwiseNot(res);
}
}
else
{
Instruction inst;
switch (cond & ~FComp.Nan)
{
case FComp.Lt: inst = Instruction.CompareLess; break;
case FComp.Eq: inst = Instruction.CompareEqual; break;
case FComp.Le: inst = Instruction.CompareLessOrEqual; break;
case FComp.Gt: inst = Instruction.CompareGreater; break;
case FComp.Ne: inst = Instruction.CompareNotEqual; break;
case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
default: throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
res = context.Add(inst | fpType, Local(), srcA, srcB);
if ((cond & FComp.Nan) != 0)
{
res = context.BitwiseOr(res, context.IsNan(srcA, fpType));
res = context.BitwiseOr(res, context.IsNan(srcB, fpType));
}
}
return res;
}
}
}

View File

@ -0,0 +1,106 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DmnmxR(EmitterContext context)
{
InstDmnmxR op = context.GetOp<InstDmnmxR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void DmnmxI(EmitterContext context)
{
InstDmnmxI op = context.GetOp<InstDmnmxI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void DmnmxC(EmitterContext context)
{
InstDmnmxC op = context.GetOp<InstDmnmxC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void FmnmxR(EmitterContext context)
{
InstFmnmxR op = context.GetOp<InstFmnmxR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
public static void FmnmxI(EmitterContext context)
{
InstFmnmxI op = context.GetOp<InstFmnmxI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
public static void FmnmxC(EmitterContext context)
{
InstFmnmxC op = context.GetOp<InstFmnmxC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
private static void EmitFmnmx(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcPred,
int rd,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC,
bool isFP64 = false)
{
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand resMin = context.FPMinimum(srcA, srcB, fpType);
Operand resMax = context.FPMaximum(srcA, srcB, fpType);
Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
}
}

View File

@ -0,0 +1,322 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System.Collections.Generic;
using System.Linq;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Bra(EmitterContext context)
{
InstBra op = context.GetOp<InstBra>();
EmitBranch(context, context.CurrBlock.Successors[^1].Address);
}
public static void Brk(EmitterContext context)
{
InstBrk op = context.GetOp<InstBrk>();
EmitBrkContSync(context);
}
public static void Brx(EmitterContext context)
{
InstBrx op = context.GetOp<InstBrx>();
InstOp currOp = context.CurrOp;
int startIndex = context.CurrBlock.HasNext() ? 1 : 0;
if (context.CurrBlock.Successors.Count <= startIndex)
{
context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}.");
return;
}
int offset = (int)currOp.GetAbsoluteAddress();
Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset));
var targets = context.CurrBlock.Successors.Skip(startIndex);
bool allTargetsSinglePred = true;
int total = context.CurrBlock.Successors.Count - startIndex;
int count = 0;
foreach (var target in targets.OrderBy(x => x.Address))
{
if (++count < total && (target.Predecessors.Count > 1 || target.Address <= context.CurrBlock.Address))
{
allTargetsSinglePred = false;
break;
}
}
if (allTargetsSinglePred)
{
// Chain blocks, each target block will check if the BRX target address
// matches its own address, if not, it jumps to the next target which will do the same check,
// until it reaches the last possible target, which executed unconditionally.
// We can only do this if the BRX block is the only predecessor of all target blocks.
// Additionally, this is not supported for blocks located before the current block,
// since it will be too late to insert a label, but this is something that can be improved
// in the future if necessary.
var sortedTargets = targets.OrderBy(x => x.Address);
Block currentTarget = null;
ulong firstTargetAddress = 0;
foreach (Block nextTarget in sortedTargets)
{
if (currentTarget != null)
{
if (currentTarget.Address != nextTarget.Address)
{
context.SetBrxTarget(currentTarget.Address, address, (int)currentTarget.Address, nextTarget.Address);
}
}
else
{
firstTargetAddress = nextTarget.Address;
}
currentTarget = nextTarget;
}
context.Branch(context.GetLabel(firstTargetAddress));
}
else
{
// Emit the branches sequentially.
// This generates slightly worse code, but should work for all cases.
var sortedTargets = targets.OrderByDescending(x => x.Address);
ulong lastTargetAddress = ulong.MaxValue;
count = 0;
foreach (Block target in sortedTargets)
{
Operand label = context.GetLabel(target.Address);
if (++count < total)
{
if (target.Address != lastTargetAddress)
{
context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)target.Address)));
}
lastTargetAddress = target.Address;
}
else
{
context.Branch(label);
}
}
}
}
public static void Cal(EmitterContext context)
{
InstCal op = context.GetOp<InstCal>();
DecodedFunction function = context.Program.GetFunctionByAddress(context.CurrOp.GetAbsoluteAddress());
if (function.IsCompilerGenerated)
{
switch (function.Type)
{
case FunctionType.BuiltInFSIBegin:
context.FSIBegin();
break;
case FunctionType.BuiltInFSIEnd:
context.FSIEnd();
break;
}
}
else
{
context.Call(function.Id, false);
}
}
public static void Cont(EmitterContext context)
{
InstCont op = context.GetOp<InstCont>();
EmitBrkContSync(context);
}
public static void Exit(EmitterContext context)
{
InstExit op = context.GetOp<InstExit>();
if (context.IsNonMain)
{
context.Config.GpuAccessor.Log("Invalid exit on non-main function.");
return;
}
if (op.Ccc == Ccc.T)
{
context.Return();
}
else
{
Operand cond = GetCondition(context, op.Ccc, IrConsts.False);
// If the condition is always false, we don't need to do anything.
if (cond.Type != OperandType.Constant || cond.Value != IrConsts.False)
{
Operand lblSkip = Label();
context.BranchIfFalse(lblSkip, cond);
context.Return();
context.MarkLabel(lblSkip);
}
}
}
public static void Kil(EmitterContext context)
{
InstKil op = context.GetOp<InstKil>();
context.Discard();
}
public static void Pbk(EmitterContext context)
{
InstPbk op = context.GetOp<InstPbk>();
EmitPbkPcntSsy(context);
}
public static void Pcnt(EmitterContext context)
{
InstPcnt op = context.GetOp<InstPcnt>();
EmitPbkPcntSsy(context);
}
public static void Ret(EmitterContext context)
{
InstRet op = context.GetOp<InstRet>();
if (context.IsNonMain)
{
context.Return();
}
else
{
context.Config.GpuAccessor.Log("Invalid return on main function.");
}
}
public static void Ssy(EmitterContext context)
{
InstSsy op = context.GetOp<InstSsy>();
EmitPbkPcntSsy(context);
}
public static void Sync(EmitterContext context)
{
InstSync op = context.GetOp<InstSync>();
EmitBrkContSync(context);
}
private static void EmitPbkPcntSsy(EmitterContext context)
{
var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers;
foreach (KeyValuePair<Block, Operand> kv in consumers)
{
Block consumerBlock = kv.Key;
Operand local = kv.Value;
int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId;
context.Copy(local, Const(id));
}
}
private static void EmitBrkContSync(EmitterContext context)
{
var targets = context.CurrBlock.SyncTargets;
if (targets.Count == 1)
{
// If we have only one target, then the SSY/PBK is basically
// a branch, we can produce better codegen for this case.
EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress());
}
else
{
// TODO: Support CC here as well (condition).
foreach (SyncTarget target in targets.Values)
{
PushOpInfo pushOpInfo = target.PushOpInfo;
Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress());
Operand local = pushOpInfo.Consumers[context.CurrBlock];
context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId)));
}
}
}
private static void EmitBranch(EmitterContext context, ulong address)
{
InstOp op = context.CurrOp;
InstConditional opCond = new InstConditional(op.RawOpCode);
// If we're branching to the next instruction, then the branch
// is useless and we can ignore it.
if (address == op.Address + 8)
{
return;
}
Operand label = context.GetLabel(address);
Operand pred = Register(opCond.Pred, RegisterType.Predicate);
if (opCond.Ccc != Ccc.T)
{
Operand cond = GetCondition(context, opCond.Ccc);
if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
{
pred = cond;
}
else if (opCond.PredInv)
{
pred = context.BitwiseAnd(context.BitwiseNot(pred), cond);
}
else
{
pred = context.BitwiseAnd(pred, cond);
}
context.BranchIfTrue(label, pred);
}
else if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
{
context.Branch(label);
}
else if (opCond.PredInv)
{
context.BranchIfFalse(label, pred);
}
else
{
context.BranchIfTrue(label, pred);
}
}
}
}

View File

@ -0,0 +1,266 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class InstEmitHelper
{
public static Operand GetZF()
{
return Register(0, RegisterType.Flag);
}
public static Operand GetNF()
{
return Register(1, RegisterType.Flag);
}
public static Operand GetCF()
{
return Register(2, RegisterType.Flag);
}
public static Operand GetVF()
{
return Register(3, RegisterType.Flag);
}
public static Operand GetDest(int rd)
{
return Register(rd, RegisterType.Gpr);
}
public static Operand GetDest2(int rd)
{
return Register(rd | 1, RegisterType.Gpr);
}
public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false)
{
if (isFP64)
{
return context.PackDouble2x32(
Cbuf(cbufSlot, cbufOffset),
Cbuf(cbufSlot, cbufOffset + 1));
}
else
{
return Cbuf(cbufSlot, cbufOffset);
}
}
public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false)
{
if (isFP64)
{
return context.PackDouble2x32(Const(0), Const(imm));
}
else
{
return Const(imm);
}
}
public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false)
{
if (isFP64)
{
return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr));
}
else
{
return Register(reg, RegisterType.Gpr);
}
}
public static Operand[] GetHalfSrc(
EmitterContext context,
HalfSwizzle swizzle,
int ra,
bool negate,
bool absolute)
{
Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle);
return FPAbsNeg(context, operands, absolute, negate);
}
public static Operand[] GetHalfSrc(
EmitterContext context,
HalfSwizzle swizzle,
int cbufSlot,
int cbufOffset,
bool negate,
bool absolute)
{
Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle);
return FPAbsNeg(context, operands, absolute, negate);
}
public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1)
{
ushort low = (ushort)(immH0 << 6);
ushort high = (ushort)(immH1 << 6);
return new Operand[]
{
ConstF((float)Unsafe.As<ushort, Half>(ref low)),
ConstF((float)Unsafe.As<ushort, Half>(ref high))
};
}
public static Operand[] GetHalfSrc(EmitterContext context, int imm32)
{
ushort low = (ushort)imm32;
ushort high = (ushort)(imm32 >> 16);
return new Operand[]
{
ConstF((float)Unsafe.As<ushort, Half>(ref low)),
ConstF((float)Unsafe.As<ushort, Half>(ref high))
};
}
public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg)
{
for (int index = 0; index < operands.Length; index++)
{
operands[index] = context.FPAbsNeg(operands[index], abs, neg);
}
return operands;
}
public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle)
{
switch (swizzle)
{
case HalfSwizzle.F16:
return new Operand[]
{
context.UnpackHalf2x16Low (src),
context.UnpackHalf2x16High(src)
};
case HalfSwizzle.F32: return new Operand[] { src, src };
case HalfSwizzle.H0H0:
return new Operand[]
{
context.UnpackHalf2x16Low(src),
context.UnpackHalf2x16Low(src)
};
case HalfSwizzle.H1H1:
return new Operand[]
{
context.UnpackHalf2x16High(src),
context.UnpackHalf2x16High(src)
};
}
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
}
public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd)
{
switch (swizzle)
{
case OFmt.F16: return context.PackHalf2x16(results[0], results[1]);
case OFmt.F32: return results[0];
case OFmt.MrgH0:
{
Operand h1 = GetHalfDest(context, rd, isHigh: true);
return context.PackHalf2x16(results[0], h1);
}
case OFmt.MrgH1:
{
Operand h0 = GetHalfDest(context, rd, isHigh: false);
return context.PackHalf2x16(h0, results[1]);
}
}
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
}
public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh)
{
if (isHigh)
{
return context.UnpackHalf2x16High(GetDest(rd));
}
else
{
return context.UnpackHalf2x16Low(GetDest(rd));
}
}
public static Operand GetPredicate(EmitterContext context, int pred, bool not)
{
Operand local = Register(pred, RegisterType.Predicate);
if (not)
{
local = context.BitwiseNot(local);
}
return local;
}
public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
{
if (isFP64)
{
context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
}
else
{
context.Copy(GetDest(rd), value);
}
}
public static int Imm16ToSInt(int imm16)
{
return (short)imm16;
}
public static int Imm20ToFloat(int imm20)
{
return imm20 << 12;
}
public static int Imm20ToSInt(int imm20)
{
return (imm20 << 12) >> 12;
}
public static int Imm24ToSInt(int imm24)
{
return (imm24 << 8) >> 8;
}
public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits)
{
return context.BitfieldExtractS32(src, Const(0), Const(srcBits));
}
public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits)
{
int mask = (int)(uint.MaxValue >> (32 - srcBits));
return context.BitwiseAnd(src, Const(mask));
}
}
}

View File

@ -0,0 +1,699 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void IaddR(EmitterContext context)
{
InstIaddR op = context.GetOp<InstIaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void IaddI(EmitterContext context)
{
InstIaddI op = context.GetOp<InstIaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void IaddC(EmitterContext context)
{
InstIaddC op = context.GetOp<InstIaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void Iadd32i(EmitterContext context)
{
InstIadd32i op = context.GetOp<InstIadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void Iadd3R(EmitterContext context)
{
InstIadd3R op = context.GetOp<InstIadd3R>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void Iadd3I(EmitterContext context)
{
InstIadd3I op = context.GetOp<InstIadd3I>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void Iadd3C(EmitterContext context)
{
InstIadd3C op = context.GetOp<InstIadd3C>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void ImadR(EmitterContext context)
{
InstImadR op = context.GetOp<InstImadR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadI(EmitterContext context)
{
InstImadI op = context.GetOp<InstImadI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadC(EmitterContext context)
{
InstImadC op = context.GetOp<InstImadC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadRc(EmitterContext context)
{
InstImadRc op = context.GetOp<InstImadRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void Imad32i(EmitterContext context)
{
InstImad32i op = context.GetOp<InstImad32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
var srcC = GetSrcReg(context, op.Dest);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImulR(EmitterContext context)
{
InstImulR op = context.GetOp<InstImulR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImulI(EmitterContext context)
{
InstImulI op = context.GetOp<InstImulI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImulC(EmitterContext context)
{
InstImulC op = context.GetOp<InstImulC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void Imul32i(EmitterContext context)
{
InstImul32i op = context.GetOp<InstImul32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void IscaddR(EmitterContext context)
{
InstIscaddR op = context.GetOp<InstIscaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void IscaddI(EmitterContext context)
{
InstIscaddI op = context.GetOp<InstIscaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void IscaddC(EmitterContext context)
{
InstIscaddC op = context.GetOp<InstIscaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void Iscadd32i(EmitterContext context)
{
InstIscadd32i op = context.GetOp<InstIscadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC);
}
public static void LeaR(EmitterContext context)
{
InstLeaR op = context.GetOp<InstLeaR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaI(EmitterContext context)
{
InstLeaI op = context.GetOp<InstLeaI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaC(EmitterContext context)
{
InstLeaC op = context.GetOp<InstLeaC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaHiR(EmitterContext context)
{
InstLeaHiR op = context.GetOp<InstLeaHiR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaHiC(EmitterContext context)
{
InstLeaHiC op = context.GetOp<InstLeaHiC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
}
public static void XmadR(EmitterContext context)
{
InstXmadR op = context.GetOp<InstXmadR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadI(EmitterContext context)
{
InstXmadI op = context.GetOp<InstXmadI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm16);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadC(EmitterContext context)
{
InstXmadC op = context.GetOp<InstXmadC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadRc(EmitterContext context)
{
InstXmadRc op = context.GetOp<InstXmadRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC);
}
private static void EmitIadd(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
AvgMode avgMode,
bool extended,
bool writeCC)
{
srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
Operand res = context.IAdd(srcA, srcB);
if (extended)
{
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
SetIaddFlags(context, res, srcA, srcB, writeCC, extended);
// TODO: SAT.
context.Copy(GetDest(rd), res);
}
private static void EmitIadd3(
EmitterContext context,
Lrs mode,
Operand srcA,
Operand srcB,
Operand srcC,
HalfSelect partA,
HalfSelect partB,
HalfSelect partC,
int rd,
bool negateA,
bool negateB,
bool negateC)
{
Operand Extend(Operand src, HalfSelect part)
{
if (part == HalfSelect.B32)
{
return src;
}
if (part == HalfSelect.H0)
{
return context.BitwiseAnd(src, Const(0xffff));
}
else if (part == HalfSelect.H1)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}.");
}
return src;
}
srcA = context.INegate(Extend(srcA, partA), negateA);
srcB = context.INegate(Extend(srcB, partB), negateB);
srcC = context.INegate(Extend(srcC, partC), negateC);
Operand res = context.IAdd(srcA, srcB);
if (mode != Lrs.None)
{
if (mode == Lrs.LeftShift)
{
res = context.ShiftLeft(res, Const(16));
}
else if (mode == Lrs.RightShift)
{
res = context.ShiftRightU32(res, Const(16));
}
else
{
// TODO: Warning.
}
}
res = context.IAdd(res, srcC);
context.Copy(GetDest(rd), res);
// TODO: CC, X, corner cases.
}
private static void EmitImad(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
AvgMode avgMode,
bool signedA,
bool signedB,
bool high)
{
srcB = context.INegate(srcB, avgMode == AvgMode.NegA);
srcC = context.INegate(srcC, avgMode == AvgMode.NegB);
Operand res;
if (high)
{
if (signedA && signedB)
{
res = context.MultiplyHighS32(srcA, srcB);
}
else
{
res = context.MultiplyHighU32(srcA, srcB);
if (signedA)
{
res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))));
}
else if (signedB)
{
res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))));
}
}
}
else
{
res = context.IMultiply(srcA, srcB);
}
if (srcC.Type != OperandType.Constant || srcC.Value != 0)
{
res = context.IAdd(res, srcC);
}
// TODO: CC, X, SAT, and more?
context.Copy(GetDest(rd), res);
}
private static void EmitIscadd(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
int shift,
AvgMode avgMode,
bool writeCC)
{
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
Operand res = context.IAdd(srcA, srcB);
SetIaddFlags(context, res, srcA, srcB, writeCC, false);
context.Copy(GetDest(rd), res);
}
public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift)
{
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, negateA);
Operand res = context.IAdd(srcA, srcB);
context.Copy(GetDest(rd), res);
// TODO: CC, X.
}
private static void EmitLeaHi(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool negateA,
int shift)
{
Operand aLow = context.ShiftLeft(srcA, Const(shift));
Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift));
aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift)));
if (negateA)
{
// Perform 64-bit negation by doing bitwise not of the value,
// then adding 1 and carrying over from low to high.
aLow = context.BitwiseNot(aLow);
aHigh = context.BitwiseNot(aHigh);
aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut);
aHigh = context.IAdd(aHigh, aLowCOut);
}
Operand res = context.IAdd(aHigh, srcB);
context.Copy(GetDest(rd), res);
// TODO: CC, X.
}
public static void EmitXmad(
EmitterContext context,
XmadCop2 mode,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool signedA,
bool signedB,
bool highA,
bool highB,
bool productShiftLeft,
bool merge,
bool extended,
bool writeCC)
{
XmadCop modeConv;
switch (mode)
{
case XmadCop2.Cfull:
modeConv = XmadCop.Cfull;
break;
case XmadCop2.Clo:
modeConv = XmadCop.Clo;
break;
case XmadCop2.Chi:
modeConv = XmadCop.Chi;
break;
case XmadCop2.Csfu:
modeConv = XmadCop.Csfu;
break;
default:
context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
return;
}
EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC);
}
public static void EmitXmad(
EmitterContext context,
XmadCop mode,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool signedA,
bool signedB,
bool highA,
bool highB,
bool productShiftLeft,
bool merge,
bool extended,
bool writeCC)
{
var srcBUnmodified = srcB;
Operand Extend16To32(Operand src, bool high, bool signed)
{
if (signed && high)
{
return context.ShiftRightS32(src, Const(16));
}
else if (signed)
{
return context.BitfieldExtractS32(src, Const(0), Const(16));
}
else if (high)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
return context.BitwiseAnd(src, Const(0xffff));
}
}
srcA = Extend16To32(srcA, highA, signedA);
srcB = Extend16To32(srcB, highB, signedB);
Operand res = context.IMultiply(srcA, srcB);
if (productShiftLeft)
{
res = context.ShiftLeft(res, Const(16));
}
switch (mode)
{
case XmadCop.Cfull:
break;
case XmadCop.Clo:
srcC = Extend16To32(srcC, high: false, signed: false);
break;
case XmadCop.Chi:
srcC = Extend16To32(srcC, high: true, signed: false);
break;
case XmadCop.Cbcc:
srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16)));
break;
case XmadCop.Csfu:
Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16));
Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16));
srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB));
break;
default:
context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
return;
}
Operand product = res;
if (extended)
{
// Add with carry.
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
else
{
// Add (no carry in).
res = context.IAdd(res, srcC);
}
SetIaddFlags(context, res, product, srcC, writeCC, extended);
if (merge)
{
res = context.BitwiseAnd(res, Const(0xffff));
res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16)));
}
context.Copy(GetDest(rd), res);
}
private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended)
{
if (!setCC)
{
return;
}
if (extended)
{
// C = (d == a && CIn) || d < a
Operand tempC0 = context.ICompareEqual(res, srcA);
Operand tempC1 = context.ICompareLessUnsigned(res, srcA);
tempC0 = context.BitwiseAnd(tempC0, GetCF());
context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1));
}
else
{
// C = d < a
context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA));
}
// V = (d ^ a) & ~(a ^ b) < 0
Operand tempV0 = context.BitwiseExclusiveOr(res, srcA);
Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB);
tempV1 = context.BitwiseNot(tempV1);
Operand tempV = context.BitwiseAnd(tempV0, tempV1);
context.Copy(GetVF(), context.ICompareLess(tempV, Const(0)));
SetZnFlags(context, res, setCC: true, extended: extended);
}
}
}

View File

@ -0,0 +1,310 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void IcmpR(EmitterContext context)
{
InstIcmpR op = context.GetOp<InstIcmpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpI(EmitterContext context)
{
InstIcmpI op = context.GetOp<InstIcmpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpC(EmitterContext context)
{
InstIcmpC op = context.GetOp<InstIcmpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpRc(EmitterContext context)
{
InstIcmpRc op = context.GetOp<InstIcmpRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IsetR(EmitterContext context)
{
InstIsetR op = context.GetOp<InstIsetR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetI(EmitterContext context)
{
InstIsetI op = context.GetOp<InstIsetI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetC(EmitterContext context)
{
InstIsetC op = context.GetOp<InstIsetC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetpR(EmitterContext context)
{
InstIsetpR op = context.GetOp<InstIsetpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
public static void IsetpI(EmitterContext context)
{
InstIsetpI op = context.GetOp<InstIsetpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
public static void IsetpC(EmitterContext context)
{
InstIsetpC op = context.GetOp<InstIsetpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
private static void EmitIcmp(
EmitterContext context,
IComp cmpOp,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool isSigned)
{
Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned);
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(rd), res);
}
private static void EmitIset(
EmitterContext context,
IComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int rd,
bool boolFloat,
bool isSigned,
bool extended,
bool writeCC)
{
Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
Operand dest = GetDest(rd);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, writeCC);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, writeCC, extended);
}
}
private static void EmitIsetp(
EmitterContext context,
IComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool isSigned,
bool extended)
{
Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetIntComparison(
EmitterContext context,
IComp cond,
Operand srcA,
Operand srcB,
bool isSigned,
bool extended)
{
return extended
? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned)
: GetIntComparison(context, cond, srcA, srcB, isSigned);
}
private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
{
Operand res;
if (cond == IComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == IComp.F)
{
res = Const(IrConsts.False);
}
else
{
res = context.ISubtract(srcA, srcB);
res = context.IAdd(res, context.BitwiseNot(GetCF()));
switch (cond)
{
case IComp.Eq: // r = xh == yh && xl == yl
res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF());
break;
case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl)
Operand notC = context.BitwiseNot(GetCF());
Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt);
break;
case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl)
Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF()));
Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe);
break;
case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl)
Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF());
Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC);
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt);
break;
case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl)
Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF());
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe);
break;
case IComp.Ne: // r = xh != yh || xl != yl
res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF()));
break;
default:
throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
}
return res;
}
private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
{
Operand res;
if (cond == IComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == IComp.F)
{
res = Const(IrConsts.False);
}
else
{
var inst = cond switch
{
IComp.Lt => Instruction.CompareLessU32,
IComp.Eq => Instruction.CompareEqual,
IComp.Le => Instruction.CompareLessOrEqualU32,
IComp.Gt => Instruction.CompareGreaterU32,
IComp.Ne => Instruction.CompareNotEqual,
IComp.Ge => Instruction.CompareGreaterOrEqualU32,
_ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".")
};
if (isSigned)
{
switch (cond)
{
case IComp.Lt: inst = Instruction.CompareLess; break;
case IComp.Le: inst = Instruction.CompareLessOrEqual; break;
case IComp.Gt: inst = Instruction.CompareGreater; break;
case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
}
}
res = context.Add(inst, Local(), srcA, srcB);
}
return res;
}
}
}

View File

@ -0,0 +1,167 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private const int PT = RegisterConsts.PredicateTrueIndex;
public static void LopR(EmitterContext context)
{
InstLopR op = context.GetOp<InstLopR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void LopI(EmitterContext context)
{
InstLopI op = context.GetOp<InstLopI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void LopC(EmitterContext context)
{
InstLopC op = context.GetOp<InstLopC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void Lop32i(EmitterContext context)
{
InstLop32i op = context.GetOp<InstLop32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void Lop3R(EmitterContext context)
{
InstLop3R op = context.GetOp<InstLop3R>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC);
}
public static void Lop3I(EmitterContext context)
{
InstLop3I op = context.GetOp<InstLop3I>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
}
public static void Lop3C(EmitterContext context)
{
InstLop3C op = context.GetOp<InstLop3C>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
}
private static void EmitLop(
EmitterContext context,
LogicOp logicOp,
PredicateOp predOp,
Operand srcA,
Operand srcB,
int rd,
int destPred,
bool invertA,
bool invertB,
bool extended,
bool writeCC)
{
srcA = context.BitwiseNot(srcA, invertA);
srcB = context.BitwiseNot(srcB, invertB);
Operand res = logicOp switch
{
LogicOp.And => res = context.BitwiseAnd(srcA, srcB),
LogicOp.Or => res = context.BitwiseOr(srcA, srcB),
LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB),
_ => srcB
};
EmitLopPredWrite(context, res, predOp, destPred);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC, extended);
}
private static void EmitLop3(
EmitterContext context,
int truthTable,
PredicateOp predOp,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
int destPred,
bool extended,
bool writeCC)
{
Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable);
EmitLopPredWrite(context, res, predOp, destPred);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC, extended);
}
private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred)
{
if (pred != RegisterConsts.PredicateTrueIndex)
{
Operand pRes;
if (predOp == PredicateOp.F)
{
pRes = Const(IrConsts.False);
}
else if (predOp == PredicateOp.T)
{
pRes = Const(IrConsts.True);
}
else if (predOp == PredicateOp.Z)
{
pRes = context.ICompareEqual(result, Const(0));
}
else /* if (predOp == Pop.Nz) */
{
pRes = context.ICompareNotEqual(result, Const(0));
}
context.Copy(Register(pred, RegisterType.Predicate), pRes);
}
}
}
}

View File

@ -0,0 +1,71 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void ImnmxR(EmitterContext context)
{
InstImnmxR op = context.GetOp<InstImnmxR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
public static void ImnmxI(EmitterContext context)
{
InstImnmxI op = context.GetOp<InstImnmxI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
public static void ImnmxC(EmitterContext context)
{
InstImnmxC op = context.GetOp<InstImnmxC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
private static void EmitImnmx(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcPred,
int rd,
bool isSignedInt,
bool writeCC)
{
Operand resMin = isSignedInt
? context.IMinimumS32(srcA, srcB)
: context.IMinimumU32(srcA, srcB);
Operand resMax = isSignedInt
? context.IMaximumS32(srcA, srcB)
: context.IMaximumU32(srcA, srcB);
Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC);
// TODO: X flags.
}
}
}

View File

@ -0,0 +1,541 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private enum MemoryRegion
{
Local,
Shared
}
public static void Atom(EmitterContext context)
{
InstAtom op = context.GetOp<InstAtom>();
int sOffset = (op.Imm20 << 12) >> 12;
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
Operand value = GetSrcReg(context, op.SrcB);
Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
context.Copy(GetDest(op.Dest), res);
}
public static void Atoms(EmitterContext context)
{
InstAtoms op = context.GetOp<InstAtoms>();
Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
int sOffset = (op.Imm22 << 10) >> 10;
offset = context.IAdd(offset, Const(sOffset));
Operand value = GetSrcReg(context, op.SrcB);
AtomSize size = op.AtomsSize switch
{
AtomsSize.S32 => AtomSize.S32,
AtomsSize.U64 => AtomSize.U64,
AtomsSize.S64 => AtomSize.S64,
_ => AtomSize.U32
};
Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
context.Copy(GetDest(op.Dest), res);
}
public static void Ldc(EmitterContext context)
{
InstLdc op = context.GetOp<InstLdc>();
if (op.LsSize > LsSize2.B64)
{
context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
return;
}
bool isSmallInt = op.LsSize < LsSize2.B32;
int count = op.LsSize == LsSize2.B64 ? 2 : 1;
Operand slot = Const(op.CbufSlot);
Operand srcA = GetSrcReg(context, op.SrcA);
if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
{
slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
srcA = context.BitwiseAnd(srcA, Const(0xffff));
}
Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
Operand wordOffset = context.ShiftRightU32(addr, Const(2));
Operand bitOffset = GetBitOffset(context, addr);
for (int index = 0; index < count; index++)
{
Register dest = new Register(op.Dest + index, RegisterType.Gpr);
if (dest.IsRZ)
{
break;
}
Operand offset = context.IAdd(wordOffset, Const(index));
Operand value = context.LoadConstant(slot, offset);
if (isSmallInt)
{
value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
}
context.Copy(Register(dest), value);
}
}
public static void Ldg(EmitterContext context)
{
InstLdg op = context.GetOp<InstLdg>();
EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
}
public static void Ldl(EmitterContext context)
{
InstLdl op = context.GetOp<InstLdl>();
EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Lds(EmitterContext context)
{
InstLds op = context.GetOp<InstLds>();
EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Red(EmitterContext context)
{
InstRed op = context.GetOp<InstRed>();
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
}
public static void Stg(EmitterContext context)
{
InstStg op = context.GetOp<InstStg>();
EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
}
public static void Stl(EmitterContext context)
{
InstStl op = context.GetOp<InstStl>();
EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Sts(EmitterContext context)
{
InstSts op = context.GetOp<InstSts>();
EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
private static Operand EmitAtomicOp(
EmitterContext context,
StorageKind storageKind,
AtomOp op,
AtomSize type,
Operand addrLow,
Operand addrHigh,
Operand value)
{
Operand res = Const(0);
switch (op)
{
case AtomOp.Add:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.And:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Xor:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Or:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Max:
if (type == AtomSize.S32)
{
res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
}
else if (type == AtomSize.U32)
{
res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Min:
if (type == AtomSize.S32)
{
res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
}
else if (type == AtomSize.U32)
{
res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
}
return res;
}
private static void EmitLoad(
EmitterContext context,
MemoryRegion region,
LsSize2 size,
Operand srcA,
int rd,
int offset)
{
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
return;
}
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (size)
{
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++)
{
Register dest = new Register(rd + index, RegisterType.Gpr);
if (dest.IsRZ)
{
break;
}
Operand elemOffset = context.IAdd(wordOffset, Const(index));
Operand value = null;
switch (region)
{
case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
}
if (isSmallInt)
{
value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
}
context.Copy(Register(dest), value);
}
}
private static void EmitLdg(
EmitterContext context,
LsSize size,
int ra,
int rd,
int offset,
bool extended)
{
bool isSmallInt = size < LsSize.B32;
int count = GetVectorCount(size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
for (int index = 0; index < count; index++)
{
Register dest = new Register(rd + index, RegisterType.Gpr);
if (dest.IsRZ)
{
break;
}
Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
if (isSmallInt)
{
value = ExtractSmallInt(context, size, bitOffset, value);
}
context.Copy(Register(dest), value);
}
}
private static void EmitStore(
EmitterContext context,
MemoryRegion region,
LsSize2 size,
Operand srcA,
int rd,
int offset)
{
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
return;
}
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (size)
{
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++)
{
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand elemOffset = context.IAdd(wordOffset, Const(index));
if (isSmallInt && region == MemoryRegion.Local)
{
Operand word = context.LoadLocal(elemOffset);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
}
if (region == MemoryRegion.Local)
{
context.StoreLocal(elemOffset, value);
}
else if (region == MemoryRegion.Shared)
{
switch (size)
{
case LsSize2.U8:
case LsSize2.S8:
context.StoreShared8(baseOffset, value);
break;
case LsSize2.U16:
case LsSize2.S16:
context.StoreShared16(baseOffset, value);
break;
default:
context.StoreShared(elemOffset, value);
break;
}
}
}
}
private static void EmitStg(
EmitterContext context,
LsSize2 size,
int ra,
int rd,
int offset,
bool extended)
{
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
return;
}
int count = GetVectorCount((LsSize)size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
for (int index = 0; index < count; index++)
{
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
if (size == LsSize2.U8 || size == LsSize2.S8)
{
context.StoreGlobal8(addrLowOffset, addrHigh, value);
}
else if (size == LsSize2.U16 || size == LsSize2.S16)
{
context.StoreGlobal16(addrLowOffset, addrHigh, value);
}
else
{
context.StoreGlobal(addrLowOffset, addrHigh, value);
}
}
}
private static int GetVectorCount(LsSize size)
{
switch (size)
{
case LsSize.B64:
return 2;
case LsSize.B128:
case LsSize.UB128:
return 4;
}
return 1;
}
private static (Operand, Operand) Get40BitsAddress(
EmitterContext context,
Register ra,
bool extended,
int offset)
{
Operand addrLow = Register(ra);
Operand addrHigh;
if (extended && !ra.IsRZ)
{
addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
}
else
{
addrHigh = Const(0);
}
Operand offs = Const(offset);
addrLow = context.IAdd(addrLow, offs);
if (extended)
{
Operand carry = context.ICompareLessUnsigned(addrLow, offs);
addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
}
return (addrLow, addrHigh);
}
private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
{
// Note: bit offset = (baseOffset & 0b11) * 8.
// Addresses should be always aligned to the integer type,
// so we don't need to take unaligned addresses into account.
return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
}
private static Operand ExtractSmallInt(
EmitterContext context,
LsSize size,
Operand bitOffset,
Operand value)
{
value = context.ShiftRightU32(value, bitOffset);
switch (size)
{
case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
}
return value;
}
private static Operand InsertSmallInt(
EmitterContext context,
LsSize size,
Operand bitOffset,
Operand word,
Operand value)
{
switch (size)
{
case LsSize.U8:
case LsSize.S8:
value = context.BitwiseAnd(value, Const(0xff));
value = context.BitfieldInsert(word, value, bitOffset, Const(8));
break;
case LsSize.U16:
case LsSize.S16:
value = context.BitwiseAnd(value, Const(0xffff));
value = context.BitfieldInsert(word, value, bitOffset, Const(16));
break;
}
return value;
}
}
}

View File

@ -0,0 +1,237 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void MovR(EmitterContext context)
{
InstMovR op = context.GetOp<InstMovR>();
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}
public static void MovI(EmitterContext context)
{
InstMovI op = context.GetOp<InstMovI>();
context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20));
}
public static void MovC(EmitterContext context)
{
InstMovC op = context.GetOp<InstMovC>();
context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset));
}
public static void Mov32i(EmitterContext context)
{
InstMov32i op = context.GetOp<InstMov32i>();
context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32));
}
public static void R2pR(EmitterContext context)
{
InstR2pR op = context.GetOp<InstR2pR>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcReg(context, op.SrcB);
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void R2pI(EmitterContext context)
{
InstR2pI op = context.GetOp<InstR2pI>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void R2pC(EmitterContext context)
{
InstR2pC op = context.GetOp<InstR2pC>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void S2r(EmitterContext context)
{
InstS2r op = context.GetOp<InstS2r>();
Operand src;
switch (op.SReg)
{
case SReg.LaneId:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
break;
case SReg.InvocationId:
src = context.Load(StorageKind.Input, IoVariable.InvocationId);
break;
case SReg.YDirection:
src = ConstF(1); // TODO: Use value from Y direction GPU register.
break;
case SReg.ThreadKill:
src = context.Config.Stage == ShaderStage.Fragment ? context.Load(StorageKind.Input, IoVariable.ThreadKill) : Const(0);
break;
case SReg.InvocationInfo:
if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
{
// Note: Lowest 8-bits seems to contain some primitive index,
// but it seems to be NVIDIA implementation specific as it's only used
// to calculate ISBE offsets, so we can just keep it as zero.
if (context.Config.Stage == ShaderStage.TessellationControl ||
context.Config.Stage == ShaderStage.TessellationEvaluation)
{
src = context.ShiftLeft(context.Load(StorageKind.Input, IoVariable.PatchVertices), Const(16));
}
else
{
src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16);
}
}
else
{
src = Const(0);
}
break;
case SReg.TId:
Operand tidX = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
Operand tidY = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
Operand tidZ = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
tidY = context.ShiftLeft(tidY, Const(16));
tidZ = context.ShiftLeft(tidZ, Const(26));
src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ));
break;
case SReg.TIdX:
src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
break;
case SReg.TIdY:
src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
break;
case SReg.TIdZ:
src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
break;
case SReg.CtaIdX:
src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(0));
break;
case SReg.CtaIdY:
src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(1));
break;
case SReg.CtaIdZ:
src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(2));
break;
case SReg.EqMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0));
break;
case SReg.LtMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0));
break;
case SReg.LeMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0));
break;
case SReg.GtMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0));
break;
case SReg.GeMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0));
break;
default:
src = Const(0);
break;
}
context.Copy(GetDest(op.Dest), src);
}
public static void SelR(EmitterContext context)
{
InstSelR op = context.GetOp<InstSelR>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcReg(context, op.SrcB);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
public static void SelI(EmitterContext context)
{
InstSelI op = context.GetOp<InstSelI>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
public static void SelC(EmitterContext context)
{
InstSelC op = context.GetOp<InstSelC>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr)
{
Operand Test(Operand value, int bit)
{
return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0));
}
if (ccpr)
{
// TODO: Support Register to condition code flags copy.
context.Config.GpuAccessor.Log("R2P.CC not implemented.");
}
else
{
int shift = (int)byteSel * 8;
for (int bit = 0; bit < RegisterConsts.PredsCount; bit++)
{
Operand pred = Register(bit, RegisterType.Predicate);
Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred);
context.Copy(pred, res);
}
}
}
private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd)
{
Operand res = context.ConditionalSelect(srcPred, srcA, srcB);
context.Copy(GetDest(rd), res);
}
}
}

View File

@ -0,0 +1,97 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void RroR(EmitterContext context)
{
InstRroR op = context.GetOp<InstRroR>();
EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB);
}
public static void RroI(EmitterContext context)
{
InstRroI op = context.GetOp<InstRroI>();
EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB);
}
public static void RroC(EmitterContext context)
{
InstRroC op = context.GetOp<InstRroC>();
EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB);
}
public static void Mufu(EmitterContext context)
{
InstMufu op = context.GetOp<InstMufu>();
Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA);
switch (op.MufuOp)
{
case MufuOp.Cos:
res = context.FPCosine(res);
break;
case MufuOp.Sin:
res = context.FPSine(res);
break;
case MufuOp.Ex2:
res = context.FPExponentB2(res);
break;
case MufuOp.Lg2:
res = context.FPLogarithmB2(res);
break;
case MufuOp.Rcp:
res = context.FPReciprocal(res);
break;
case MufuOp.Rsq:
res = context.FPReciprocalSquareRoot(res);
break;
case MufuOp.Rcp64h:
res = context.PackDouble2x32(OperandHelper.Const(0), res);
res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64));
break;
case MufuOp.Rsq64h:
res = context.PackDouble2x32(OperandHelper.Const(0), res);
res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64));
break;
case MufuOp.Sqrt:
res = context.FPSquareRoot(res);
break;
default:
context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\".");
break;
}
context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat));
}
private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB)
{
// This is the range reduction operator,
// we translate it as a simple move, as it
// should be always followed by a matching
// MUFU instruction.
srcB = context.FPAbsNeg(srcB, absB, negB);
context.Copy(GetDest(rd), srcB);
}
}
}

View File

@ -0,0 +1,15 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Nop(EmitterContext context)
{
InstNop op = context.GetOp<InstNop>();
// No operation.
}
}
}

View File

@ -0,0 +1,54 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Pset(EmitterContext context)
{
InstPset op = context.GetOp<InstPset>();
Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv);
Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
res = GetPredLogicalOp(context, op.BoolOpC, res, srcC);
Operand dest = GetDest(op.Dest);
if (op.BVal)
{
context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
}
else
{
context.Copy(dest, res);
}
}
public static void Psetp(EmitterContext context)
{
InstPsetp op = context.GetOp<InstPsetp>();
Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
Operand p1Res = context.BitwiseNot(p0Res);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred);
p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
}
}

View File

@ -0,0 +1,249 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void ShfLR(EmitterContext context)
{
InstShfLR op = context.GetOp<InstShfLR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
}
public static void ShfRR(EmitterContext context)
{
InstShfRR op = context.GetOp<InstShfRR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
}
public static void ShfLI(EmitterContext context)
{
InstShfLI op = context.GetOp<InstShfLI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = Const(op.Imm6);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
}
public static void ShfRI(EmitterContext context)
{
InstShfRI op = context.GetOp<InstShfRI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = Const(op.Imm6);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
}
public static void ShlR(EmitterContext context)
{
InstShlR op = context.GetOp<InstShlR>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M);
}
public static void ShlI(EmitterContext context)
{
InstShlI op = context.GetOp<InstShlI>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M);
}
public static void ShlC(EmitterContext context)
{
InstShlC op = context.GetOp<InstShlC>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M);
}
public static void ShrR(EmitterContext context)
{
InstShrR op = context.GetOp<InstShrR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
public static void ShrI(EmitterContext context)
{
InstShrI op = context.GetOp<InstShrI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
public static void ShrC(EmitterContext context)
{
InstShrC op = context.GetOp<InstShrC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
private static void EmitShf(
EmitterContext context,
MaxShift maxShift,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool mask,
bool left,
bool writeCC)
{
bool isLongShift = maxShift == MaxShift.U64 || maxShift == MaxShift.S64;
bool signedShift = maxShift == MaxShift.S64;
int maxShiftConst = isLongShift ? 64 : 32;
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(maxShiftConst - 1));
}
Operand res;
if (left)
{
// res = (C << B) | (A >> (32 - B))
res = context.ShiftLeft(srcC, srcB);
res = context.BitwiseOr(res, context.ShiftRightU32(srcA, context.ISubtract(Const(32), srcB)));
if (isLongShift)
{
// res = B >= 32 ? A << (B - 32) : res
Operand lowerShift = context.ShiftLeft(srcA, context.ISubtract(srcB, Const(32)));
Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
res = context.ConditionalSelect(shiftGreaterThan31, lowerShift, res);
}
}
else
{
// res = (A >> B) | (C << (32 - B))
res = context.ShiftRightU32(srcA, srcB);
res = context.BitwiseOr(res, context.ShiftLeft(srcC, context.ISubtract(Const(32), srcB)));
if (isLongShift)
{
// res = B >= 32 ? C >> (B - 32) : res
Operand upperShift = signedShift
? context.ShiftRightS32(srcC, context.ISubtract(srcB, Const(32)))
: context.ShiftRightU32(srcC, context.ISubtract(srcB, Const(32)));
Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
res = context.ConditionalSelect(shiftGreaterThan31, upperShift, res);
}
}
if (!mask)
{
// Clamped shift value.
Operand isLessThanMax = context.ICompareLessUnsigned(srcB, Const(maxShiftConst));
res = context.ConditionalSelect(isLessThanMax, res, Const(0));
}
context.Copy(GetDest(rd), res);
if (writeCC)
{
InstEmitAluHelper.SetZnFlags(context, res, writeCC);
}
// TODO: X.
}
private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask)
{
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = context.ShiftLeft(srcA, srcB);
if (!mask)
{
// Clamped shift value.
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, Const(0));
}
// TODO: X, CC.
context.Copy(GetDest(rd), res);
}
private static void EmitShr(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
bool mask,
bool bitReverse,
bool isSigned)
{
if (bitReverse)
{
srcA = context.BitfieldReverse(srcA);
}
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = isSigned
? context.ShiftRightS32(srcA, srcB)
: context.ShiftRightU32(srcA, srcB);
if (!mask)
{
// Clamped shift value.
Operand resShiftBy32;
if (isSigned)
{
resShiftBy32 = context.ShiftRightS32(srcA, Const(31));
}
else
{
resShiftBy32 = Const(0);
}
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, resShiftBy32);
}
// TODO: X, CC.
context.Copy(GetDest(rd), res);
}
}
}

View File

@ -0,0 +1,796 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Numerics;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void SuatomB(EmitterContext context)
{
InstSuatomB op = context.GetOp<InstSuatomB>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: false);
}
public static void Suatom(EmitterContext context)
{
InstSuatom op = context.GetOp<InstSuatom>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
op.TidB,
op.SrcA,
op.SrcB,
0,
op.Dest,
op.Ba,
isBindless: false,
compareAndSwap: false);
}
public static void SuatomB2(EmitterContext context)
{
InstSuatomB2 op = context.GetOp<InstSuatomB2>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: false);
}
public static void SuatomCasB(EmitterContext context)
{
InstSuatomCasB op = context.GetOp<InstSuatomCasB>();
EmitSuatom(
context,
op.Dim,
0,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: true);
}
public static void SuatomCas(EmitterContext context)
{
InstSuatomCas op = context.GetOp<InstSuatomCas>();
EmitSuatom(
context,
op.Dim,
0,
op.Size,
op.TidB,
op.SrcA,
op.SrcB,
0,
op.Dest,
op.Ba,
isBindless: false,
compareAndSwap: true);
}
public static void SuldDB(EmitterContext context)
{
InstSuldDB op = context.GetOp<InstSuldDB>();
EmitSuld(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
}
public static void SuldD(EmitterContext context)
{
InstSuldD op = context.GetOp<InstSuldD>();
EmitSuld(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
}
public static void SuldB(EmitterContext context)
{
InstSuldB op = context.GetOp<InstSuldB>();
EmitSuld(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
}
public static void Suld(EmitterContext context)
{
InstSuld op = context.GetOp<InstSuld>();
EmitSuld(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
}
public static void SuredB(EmitterContext context)
{
InstSuredB op = context.GetOp<InstSuredB>();
EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true);
}
public static void Sured(EmitterContext context)
{
InstSured op = context.GetOp<InstSured>();
EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false);
}
public static void SustDB(EmitterContext context)
{
InstSustDB op = context.GetOp<InstSustDB>();
EmitSust(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
}
public static void SustD(EmitterContext context)
{
InstSustD op = context.GetOp<InstSustD>();
EmitSust(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
}
public static void SustB(EmitterContext context)
{
InstSustB op = context.GetOp<InstSustB>();
EmitSust(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
}
public static void Sust(EmitterContext context)
{
InstSust op = context.GetOp<InstSust>();
EmitSust(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
}
private static void EmitSuatom(
EmitterContext context,
SuDim dimensions,
SuatomOp atomicOp,
SuatomSize size,
int imm,
int srcA,
int srcB,
int srcC,
int dest,
bool byteAddress,
bool isBindless,
bool compareAndSwap)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
Operand destOperand = dest != RegisterConsts.RegisterZeroIndex ? Register(dest, RegisterType.Gpr) : null;
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
// TODO: FP and 64-bit formats.
TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
: GetTextureFormat(size);
if (compareAndSwap)
{
sourcesList.Add(Rb());
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp);
if (isBindless)
{
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
imm,
0,
new[] { destOperand },
sources);
context.Add(operation);
}
private static void EmitSuld(
EmitterContext context,
CacheOpLd cacheOp,
SuDim dimensions,
SuSize size,
int imm,
SuRgba componentMask,
int srcA,
int srcB,
int srcC,
bool useComponents,
bool byteAddress,
bool isBindless)
{
context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image store sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
}
Operand[] sources = sourcesList.ToArray();
int handle = imm;
TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
if (cacheOp == CacheOpLd.Cg)
{
flags |= TextureFlags.Coherent;
}
if (useComponents)
{
Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
int outputIndex = 0;
for (int i = 0; i < dests.Length; i++)
{
if (srcB + i >= RegisterConsts.RegisterZeroIndex)
{
break;
}
dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
}
if (outputIndex != dests.Length)
{
Array.Resize(ref dests, outputIndex);
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageLoad,
type,
flags,
handle,
(int)componentMask,
dests,
sources);
if (!isBindless)
{
operation.Format = context.Config.GetTextureFormat(handle);
}
context.Add(operation);
}
else
{
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
int components = GetComponents(size);
int compMask = (1 << components) - 1;
Operand[] dests = new Operand[components];
int outputIndex = 0;
for (int i = 0; i < dests.Length; i++)
{
if (srcB + i >= RegisterConsts.RegisterZeroIndex)
{
break;
}
dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
}
if (outputIndex != dests.Length)
{
Array.Resize(ref dests, outputIndex);
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageLoad,
type,
GetTextureFormat(size),
flags,
handle,
compMask,
dests,
sources);
context.Add(operation);
switch (size)
{
case SuSize.U8: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 8)); break;
case SuSize.U16: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 16)); break;
case SuSize.S8: context.Copy(dests[0], SignExtendTo32(context, dests[0], 8)); break;
case SuSize.S16: context.Copy(dests[0], SignExtendTo32(context, dests[0], 16)); break;
}
}
}
private static void EmitSured(
EmitterContext context,
SuDim dimensions,
RedOp atomicOp,
SuatomSize size,
int imm,
int srcA,
int srcB,
int srcC,
bool byteAddress,
bool isBindless)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
// TODO: FP and 64-bit formats.
TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
: GetTextureFormat(size);
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp);
if (isBindless)
{
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
imm,
0,
null,
sources);
context.Add(operation);
}
private static void EmitSust(
EmitterContext context,
CacheOpSt cacheOp,
SuDim dimensions,
SuSize size,
int imm,
SuRgba componentMask,
int srcA,
int srcB,
int srcC,
bool useComponents,
bool byteAddress,
bool isBindless)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image store sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
}
TextureFormat format = TextureFormat.Unknown;
if (useComponents)
{
for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
{
if ((compMask & 1) != 0)
{
sourcesList.Add(Rb());
}
}
if (!isBindless)
{
format = context.Config.GetTextureFormat(imm);
}
}
else
{
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
int components = GetComponents(size);
for (int compIndex = 0; compIndex < components; compIndex++)
{
sourcesList.Add(Rb());
}
format = GetTextureFormat(size);
}
Operand[] sources = sourcesList.ToArray();
int handle = imm;
TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
if (cacheOp == CacheOpSt.Cg)
{
flags |= TextureFlags.Coherent;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageStore,
type,
format,
flags,
handle,
0,
null,
sources);
context.Add(operation);
}
private static int GetComponentSizeInBytesLog2(SuatomSize size)
{
return size switch
{
SuatomSize.U32 => 2,
SuatomSize.S32 => 2,
SuatomSize.U64 => 3,
SuatomSize.F32FtzRn => 2,
SuatomSize.F16x2FtzRn => 2,
SuatomSize.S64 => 3,
SuatomSize.Sd32 => 2,
SuatomSize.Sd64 => 3,
_ => 2
};
}
private static TextureFormat GetTextureFormat(SuatomSize size)
{
return size switch
{
SuatomSize.U32 => TextureFormat.R32Uint,
SuatomSize.S32 => TextureFormat.R32Sint,
SuatomSize.U64 => TextureFormat.R32G32Uint,
SuatomSize.F32FtzRn => TextureFormat.R32Float,
SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float,
SuatomSize.S64 => TextureFormat.R32G32Uint,
SuatomSize.Sd32 => TextureFormat.R32Uint,
SuatomSize.Sd64 => TextureFormat.R32G32Uint,
_ => TextureFormat.R32Uint
};
}
private static TextureFlags GetAtomicOpFlags(SuatomOp op)
{
return op switch
{
SuatomOp.Add => TextureFlags.Add,
SuatomOp.Min => TextureFlags.Minimum,
SuatomOp.Max => TextureFlags.Maximum,
SuatomOp.Inc => TextureFlags.Increment,
SuatomOp.Dec => TextureFlags.Decrement,
SuatomOp.And => TextureFlags.BitwiseAnd,
SuatomOp.Or => TextureFlags.BitwiseOr,
SuatomOp.Xor => TextureFlags.BitwiseXor,
SuatomOp.Exch => TextureFlags.Swap,
_ => TextureFlags.Add
};
}
private static int GetComponents(SuSize size)
{
return size switch
{
SuSize.B64 => 2,
SuSize.B128 => 4,
SuSize.UB128 => 4,
_ => 1
};
}
private static int GetComponentSizeInBytesLog2(SuSize size)
{
return size switch
{
SuSize.U8 => 0,
SuSize.S8 => 0,
SuSize.U16 => 1,
SuSize.S16 => 1,
SuSize.B32 => 2,
SuSize.B64 => 3,
SuSize.B128 => 4,
SuSize.UB128 => 4,
_ => 2
};
}
private static TextureFormat GetTextureFormat(SuSize size)
{
return size switch
{
SuSize.U8 => TextureFormat.R8Uint,
SuSize.S8 => TextureFormat.R8Sint,
SuSize.U16 => TextureFormat.R16Uint,
SuSize.S16 => TextureFormat.R16Sint,
SuSize.B32 => TextureFormat.R32Uint,
SuSize.B64 => TextureFormat.R32G32Uint,
SuSize.B128 => TextureFormat.R32G32B32A32Uint,
SuSize.UB128 => TextureFormat.R32G32B32A32Uint,
_ => TextureFormat.R32Uint
};
}
private static SamplerType ConvertSamplerType(SuDim target)
{
return target switch
{
SuDim._1d => SamplerType.Texture1D,
SuDim._1dBuffer => SamplerType.TextureBuffer,
SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array,
SuDim._2d => SamplerType.Texture2D,
SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array,
SuDim._3d => SamplerType.Texture3D,
_ => SamplerType.None
};
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,118 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vmad(EmitterContext context)
{
InstVmad op = context.GetOp<InstVmad>();
bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0;
bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0;
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if (bSigned)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand productLow = context.IMultiply(srcA, srcB);
Operand productHigh;
if (aSigned == bSigned)
{
productHigh = aSigned
? context.MultiplyHighS32(srcA, srcB)
: context.MultiplyHighU32(srcA, srcB);
}
else
{
Operand temp = aSigned
? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))
: context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)));
productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB));
}
if (op.AvgMode == AvgMode.NegA)
{
(productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh);
}
Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry);
Operand resHigh = context.IAdd(productHigh, sumCarry);
if (op.AvgMode == AvgMode.PlusOne)
{
resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry);
resHigh = context.IAdd(resHigh, poCarry);
}
bool resSigned = op.ASelect == VectorSelect.S32 ||
op.BSelect == VectorSelect.S32 ||
op.AvgMode == AvgMode.NegB ||
op.AvgMode == AvgMode.NegA;
int shift = op.VideoScale switch
{
VideoScale.Shr7 => 7,
VideoScale.Shr15 => 15,
_ => 0
};
if (shift != 0)
{
// Low = (Low >> Shift) | (High << (32 - Shift))
// High >>= Shift
resLow = context.ShiftRightU32(resLow, Const(shift));
resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift)));
resHigh = resSigned
? context.ShiftRightS32(resHigh, Const(shift))
: context.ShiftRightU32(resHigh, Const(shift));
}
Operand res = resLow;
if (op.Sat)
{
Operand sign = context.ShiftRightS32(resHigh, Const(31));
if (resSigned)
{
Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31)));
Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue));
res = context.ConditionalSelect(overflow, clampValue, resLow);
}
else
{
Operand overflow = context.ICompareNotEqual(resHigh, Const(0));
res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow);
}
}
context.Copy(GetDest(op.Dest), res);
// TODO: CC.
}
}
}

View File

@ -0,0 +1,183 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vmnmx(EmitterContext context)
{
InstVmnmx op = context.GetOp<InstVmnmx>();
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = GetSrcReg(context, op.SrcC);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if ((op.BSelect & VectorSelect.S8B0) != 0)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand res;
bool resSigned;
if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0))
{
// Signedness is different, but for max, result will always fit a U32,
// since one of the inputs can't be negative, and the result is the one
// with highest value. For min, it will always fit on a S32, since
// one of the input can't be greater than INT_MAX and we want the lowest value.
resSigned = !op.Mn;
res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB);
if ((op.ASelect & VectorSelect.S8B0) != 0)
{
Operand isBGtIntMax = context.ICompareLess(srcB, Const(0));
res = context.ConditionalSelect(isBGtIntMax, srcB, res);
}
else
{
Operand isAGtIntMax = context.ICompareLess(srcA, Const(0));
res = context.ConditionalSelect(isAGtIntMax, srcA, res);
}
}
else
{
// Ra and Rb have the same signedness, so doesn't matter which one we test.
resSigned = (op.ASelect & VectorSelect.S8B0) != 0;
if (op.Mn)
{
res = resSigned
? context.IMaximumS32(srcA, srcB)
: context.IMaximumU32(srcA, srcB);
}
else
{
res = resSigned
? context.IMinimumS32(srcA, srcB)
: context.IMinimumU32(srcA, srcB);
}
}
if (op.Sat)
{
if (op.DFormat && !resSigned)
{
res = context.IMinimumU32(res, Const(int.MaxValue));
}
else if (!op.DFormat && resSigned)
{
res = context.IMaximumS32(res, Const(0));
}
}
switch (op.VideoOp)
{
case VideoOp.Acc:
res = context.IAdd(res, srcC);
break;
case VideoOp.Max:
res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC);
break;
case VideoOp.Min:
res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC);
break;
case VideoOp.Mrg16h:
res = context.BitfieldInsert(srcC, res, Const(16), Const(16));
break;
case VideoOp.Mrg16l:
res = context.BitfieldInsert(srcC, res, Const(0), Const(16));
break;
case VideoOp.Mrg8b0:
res = context.BitfieldInsert(srcC, res, Const(0), Const(8));
break;
case VideoOp.Mrg8b2:
res = context.BitfieldInsert(srcC, res, Const(16), Const(8));
break;
}
context.Copy(GetDest(op.Dest), res);
}
public static void Vsetp(EmitterContext context)
{
InstVsetp op = context.GetOp<InstVsetp>();
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if ((op.BSelect & VectorSelect.S8B0) != 0)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand p0Res;
bool signedA = (op.ASelect & VectorSelect.S8B0) != 0;
bool signedB = (op.BSelect & VectorSelect.S8B0) != 0;
if (signedA != signedB)
{
bool a32 = (op.ASelect & ~VectorSelect.S8B0) == VectorSelect.U32;
bool b32 = (op.BSelect & ~VectorSelect.S8B0) == VectorSelect.U32;
if (!a32 && !b32)
{
// Both values are extended small integer and can always fit in a S32, just do a signed comparison.
p0Res = GetIntComparison(context, op.VComp, srcA, srcB, isSigned: true, extended: false);
}
else
{
// TODO: Mismatching sign case.
p0Res = Const(0);
}
}
else
{
// Sign matches, just do a regular comparison.
p0Res = GetIntComparison(context, op.VComp, srcA, srcB, signedA, extended: false);
}
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p0Res, pred);
p1Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p1Res, pred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
}
}

View File

@ -0,0 +1,84 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Fswzadd(EmitterContext context)
{
InstFswzadd op = context.GetOp<InstFswzadd>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcReg(context, op.SrcB);
Operand dest = GetDest(op.Dest);
context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord));
InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC);
}
public static void Shfl(EmitterContext context)
{
InstShfl op = context.GetOp<InstShfl>();
Operand pred = Register(op.DestPred, RegisterType.Predicate);
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB);
Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC);
(Operand res, Operand valid) = op.ShflMode switch
{
ShflMode.Idx => context.Shuffle(srcA, srcB, srcC),
ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC),
ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC),
ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC),
_ => (null, null)
};
context.Copy(GetDest(op.Dest), res);
context.Copy(pred, valid);
}
public static void Vote(EmitterContext context)
{
InstVote op = context.GetOp<InstVote>();
Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
Operand res = null;
switch (op.VoteMode)
{
case VoteMode.All:
res = context.VoteAll(pred);
break;
case VoteMode.Any:
res = context.VoteAny(pred);
break;
case VoteMode.Eq:
res = context.VoteAllEqual(pred);
break;
}
if (res != null)
{
context.Copy(Register(op.VpDest, RegisterType.Predicate), res);
}
else
{
context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}.");
}
if (op.Dest != RegisterConsts.RegisterZeroIndex)
{
context.Copy(GetDest(op.Dest), context.Ballot(pred));
}
}
}
}

View File

@ -0,0 +1,6 @@
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
delegate void InstEmitter(EmitterContext context);
}

View File

@ -0,0 +1,141 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class Lop3Expression
{
private enum TruthTable : byte
{
False = 0x00, // false
True = 0xff, // true
In = 0xf0, // a
And2 = 0xc0, // a & b
Or2 = 0xfc, // a | b
Xor2 = 0x3c, // a ^ b
And3 = 0x80, // a & b & c
Or3 = 0xfe, // a | b | c
XorAnd = 0x60, // a & (b ^ c)
XorOr = 0xf6, // a | (b ^ c)
OrAnd = 0xe0, // a & (b | c)
AndOr = 0xf8, // a | (b & c)
Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true.
Majority = 0xe8, // Popcount(a, b, c) >= 2
Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off
InverseGamble = 0x7e, // Inverse of Gamble
Dot = 0x1a, // a ^ (c | (a & b))
Mux = 0xca, // a ? b : c
AndXor = 0x78, // a ^ (b & c)
OrXor = 0x1e, // a ^ (b | c)
Xor3 = 0x96, // a ^ b ^ c
}
public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm)
{
for (int i = 0; i < 0x40; i++)
{
TruthTable currImm = (TruthTable)imm;
Operand x = srcA;
Operand y = srcB;
Operand z = srcC;
if ((i & 0x01) != 0)
{
(x, y) = (y, x);
currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0);
}
if ((i & 0x02) != 0)
{
(x, z) = (z, x);
currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0);
}
if ((i & 0x04) != 0)
{
(y, z) = (z, y);
currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0);
}
if ((i & 0x08) != 0)
{
x = context.BitwiseNot(x);
currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4);
}
if ((i & 0x10) != 0)
{
y = context.BitwiseNot(y);
currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2);
}
if ((i & 0x20) != 0)
{
z = context.BitwiseNot(z);
currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1);
}
Operand result = GetExpr(currImm, context, x, y, z);
if (result != null)
{
return result;
}
Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z);
if (notResult != null)
{
return context.BitwiseNot(notResult);
}
}
return null;
}
private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z)
{
return imm switch
{
TruthTable.False => Const(0),
TruthTable.True => Const(-1),
TruthTable.In => x,
TruthTable.And2 => context.BitwiseAnd(x, y),
TruthTable.Or2 => context.BitwiseOr(x, y),
TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y),
TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)),
TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)),
TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)),
TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)),
TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)),
TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)),
TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)),
TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)),
TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)),
TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)),
TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)),
TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)),
_ => null
};
}
private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0)
{
int result = 0;
result |= (((int)imm >> 0) & 1) << bit0;
result |= (((int)imm >> 1) & 1) << bit1;
result |= (((int)imm >> 2) & 1) << bit2;
result |= (((int)imm >> 3) & 1) << bit3;
result |= (((int)imm >> 4) & 1) << bit4;
result |= (((int)imm >> 5) & 1) << bit5;
result |= (((int)imm >> 6) & 1) << bit6;
result |= (((int)imm >> 7) & 1) << bit7;
return (TruthTable)result;
}
}
}

View File

@ -0,0 +1,91 @@
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
class BasicBlock
{
public int Index { get; set; }
public LinkedList<INode> Operations { get; }
private BasicBlock _next;
private BasicBlock _branch;
public BasicBlock Next
{
get => _next;
set => _next = AddSuccessor(_next, value);
}
public BasicBlock Branch
{
get => _branch;
set => _branch = AddSuccessor(_branch, value);
}
public bool HasBranch => _branch != null;
public bool Reachable => Index == 0 || Predecessors.Count != 0;
public List<BasicBlock> Predecessors { get; }
public HashSet<BasicBlock> DominanceFrontiers { get; }
public BasicBlock ImmediateDominator { get; set; }
public BasicBlock()
{
Operations = new LinkedList<INode>();
Predecessors = new List<BasicBlock>();
DominanceFrontiers = new HashSet<BasicBlock>();
}
public BasicBlock(int index) : this()
{
Index = index;
}
private BasicBlock AddSuccessor(BasicBlock oldBlock, BasicBlock newBlock)
{
oldBlock?.Predecessors.Remove(this);
newBlock?.Predecessors.Add(this);
return newBlock;
}
public INode GetLastOp()
{
return Operations.Last?.Value;
}
public void Append(INode node)
{
INode lastOp = GetLastOp();
if (lastOp is Operation operation && IsControlFlowInst(operation.Inst))
{
Operations.AddBefore(Operations.Last, node);
}
else
{
Operations.AddLast(node);
}
}
private static bool IsControlFlowInst(Instruction inst)
{
switch (inst)
{
case Instruction.Branch:
case Instruction.BranchIfFalse:
case Instruction.BranchIfTrue:
case Instruction.Discard:
case Instruction.Return:
return true;
}
return false;
}
}
}

View File

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
class CommentNode : Operation
{
public string Comment { get; }
public CommentNode(string comment) : base(Instruction.Comment, null)
{
Comment = comment;
}
}
}

View File

@ -0,0 +1,23 @@
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
class Function
{
public BasicBlock[] Blocks { get; }
public string Name { get; }
public bool ReturnsValue { get; }
public int InArgumentsCount { get; }
public int OutArgumentsCount { get; }
public Function(BasicBlock[] blocks, string name, bool returnsValue, int inArgumentsCount, int outArgumentsCount)
{
Blocks = blocks;
Name = name;
ReturnsValue = returnsValue;
InArgumentsCount = inArgumentsCount;
OutArgumentsCount = outArgumentsCount;
}
}
}

View File

@ -0,0 +1,15 @@
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
interface INode
{
Operand Dest { get; set; }
int DestsCount { get; }
int SourcesCount { get; }
Operand GetDest(int index);
Operand GetSource(int index);
void SetSource(int index, Operand operand);
}
}

View File

@ -0,0 +1,178 @@
using System;
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
[Flags]
enum Instruction
{
Absolute = 1,
Add,
AtomicAdd,
AtomicAnd,
AtomicCompareAndSwap,
AtomicMinS32,
AtomicMinU32,
AtomicMaxS32,
AtomicMaxU32,
AtomicOr,
AtomicSwap,
AtomicXor,
Ballot,
Barrier,
BitCount,
BitfieldExtractS32,
BitfieldExtractU32,
BitfieldInsert,
BitfieldReverse,
BitwiseAnd,
BitwiseExclusiveOr,
BitwiseNot,
BitwiseOr,
Branch,
BranchIfFalse,
BranchIfTrue,
Call,
Ceiling,
Clamp,
ClampU32,
Comment,
CompareEqual,
CompareGreater,
CompareGreaterOrEqual,
CompareGreaterOrEqualU32,
CompareGreaterU32,
CompareLess,
CompareLessOrEqual,
CompareLessOrEqualU32,
CompareLessU32,
CompareNotEqual,
ConditionalSelect,
ConvertFP32ToFP64,
ConvertFP64ToFP32,
ConvertFP32ToS32,
ConvertFP32ToU32,
ConvertFP64ToS32,
ConvertFP64ToU32,
ConvertS32ToFP32,
ConvertS32ToFP64,
ConvertU32ToFP32,
ConvertU32ToFP64,
Copy,
Cosine,
Ddx,
Ddy,
Discard,
Divide,
EmitVertex,
EndPrimitive,
ExponentB2,
FSIBegin,
FSIEnd,
FindLSB,
FindMSBS32,
FindMSBU32,
Floor,
FusedMultiplyAdd,
GroupMemoryBarrier,
ImageLoad,
ImageStore,
ImageAtomic,
IsNan,
Load,
LoadConstant,
LoadGlobal,
LoadLocal,
LoadShared,
LoadStorage,
Lod,
LogarithmB2,
LogicalAnd,
LogicalExclusiveOr,
LogicalNot,
LogicalOr,
LoopBreak,
LoopContinue,
MarkLabel,
Maximum,
MaximumU32,
MemoryBarrier,
Minimum,
MinimumU32,
Multiply,
MultiplyHighS32,
MultiplyHighU32,
Negate,
PackDouble2x32,
PackHalf2x16,
ReciprocalSquareRoot,
Return,
Round,
ShiftLeft,
ShiftRightS32,
ShiftRightU32,
Shuffle,
ShuffleDown,
ShuffleUp,
ShuffleXor,
Sine,
SquareRoot,
Store,
StoreGlobal,
StoreGlobal16,
StoreGlobal8,
StoreLocal,
StoreShared,
StoreShared16,
StoreShared8,
StoreStorage,
StoreStorage16,
StoreStorage8,
Subtract,
SwizzleAdd,
TextureSample,
TextureSize,
Truncate,
UnpackDouble2x32,
UnpackHalf2x16,
VectorExtract,
VoteAll,
VoteAllEqual,
VoteAny,
Count,
FP32 = 1 << 16,
FP64 = 1 << 17,
Mask = 0xffff
}
static class InstructionExtensions
{
public static bool IsAtomic(this Instruction inst)
{
switch (inst & Instruction.Mask)
{
case Instruction.AtomicAdd:
case Instruction.AtomicAnd:
case Instruction.AtomicCompareAndSwap:
case Instruction.AtomicMaxS32:
case Instruction.AtomicMaxU32:
case Instruction.AtomicMinS32:
case Instruction.AtomicMinU32:
case Instruction.AtomicOr:
case Instruction.AtomicSwap:
case Instruction.AtomicXor:
return true;
}
return false;
}
public static bool IsTextureQuery(this Instruction inst)
{
inst &= Instruction.Mask;
return inst == Instruction.Lod || inst == Instruction.TextureSize;
}
}
}

View File

@ -0,0 +1,51 @@
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
enum IoVariable
{
Invalid,
BackColorDiffuse,
BackColorSpecular,
BaseInstance,
BaseVertex,
ClipDistance,
CtaId,
DrawIndex,
FogCoord,
FragmentCoord,
FragmentOutputColor,
FragmentOutputDepth,
FragmentOutputIsBgra, // TODO: Remove and use constant buffer access.
FrontColorDiffuse,
FrontColorSpecular,
FrontFacing,
InstanceId,
InstanceIndex,
InvocationId,
Layer,
PatchVertices,
PointCoord,
PointSize,
Position,
PrimitiveId,
SubgroupEqMask,
SubgroupGeMask,
SubgroupGtMask,
SubgroupLaneId,
SubgroupLeMask,
SubgroupLtMask,
SupportBlockViewInverse, // TODO: Remove and use constant buffer access.
SupportBlockRenderScale, // TODO: Remove and use constant buffer access.
TessellationCoord,
TessellationLevelInner,
TessellationLevelOuter,
TextureCoord,
ThreadId,
ThreadKill,
UserDefined,
VertexId,
VertexIndex,
ViewportIndex,
ViewportMask
}
}

View File

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
static class IrConsts
{
public const int False = 0;
public const int True = -1;
}
}

Some files were not shown because too many files have changed in this diff Show More