shader_decode: Implement HFMA2

2025-11-28 05:05:05 -08:00 · 2018-12-23 02:26:35 -03:00
parent d6f76307fe
commit dd91650aaf
4 changed files with 60 additions and 5 deletions
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -648,6 +648,7 @@ union Instruction {
            BitField<37, 2, HalfPrecision> precision;
            BitField<32, 1, u64> saturate;
            BitField<31, 1, u64> negate_b;
            BitField<30, 1, u64> negate_c;
            BitField<35, 2, HalfType> type_c;
        } rr;
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <tuple>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -9,6 +11,8 @@
 namespace VideoCommon::Shader {
 using Tegra::Shader::HalfPrecision;
 using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
-    UNIMPLEMENTED();
+    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
    } else {
        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
    }
    constexpr auto identity = HalfType::H0_H1;
    const HalfType type_a = instr.hfma2.type_a;
    const Node op_a = GetRegister(instr.gpr8);
    bool neg_b{}, neg_c{};
    auto [saturate, type_b, op_b, type_c,
          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HFMA2_CR:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, instr.hfma2.type_b,
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
                    GetRegister(instr.gpr39)};
        case OpCode::Id::HFMA2_RC:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
                    instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
        case OpCode::Id::HFMA2_RR:
            neg_b = instr.hfma2.rr.negate_b;
            neg_c = instr.hfma2.rr.negate_c;
            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
        case OpCode::Id::HFMA2_IMM_R:
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
        default:
            return {false, identity, Immediate(0), identity, Immediate(0)};
        }
    }();
    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
    op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
    op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
    MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
    Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
--- a/src/video_core/shader/glsl_decompiler.cpp
+++ b/src/video_core/shader/glsl_decompiler.cpp
@@ -762,9 +762,9 @@ private:
        return GenerateBinaryInfix(operation, "/", type, type, type);
    }
-    std::string FFma(Operation operation) {
+    template <Type type>
-        return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float,
+    std::string Fma(Operation operation) {
-                               Type::Float);
+        return GenerateTernary(operation, "fma", type, type, type, type);
    }
    template <Type type>
@@ -1231,7 +1231,7 @@ private:
        &Add<Type::Float>,
        &Mul<Type::Float>,
        &Div<Type::Float>,
-        &FFma,
+        &Fma<Type::Float>,
        &Negate<Type::Float>,
        &Absolute<Type::Float>,
        &FClamp,
@@ -1289,6 +1289,7 @@ private:
        &Add<Type::HalfFloat>,
        &Mul<Type::HalfFloat>,
        &Fma<Type::HalfFloat>,
        &Absolute<Type::HalfFloat>,
        &HNegate,
        &HMergeF32,
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -110,6 +110,7 @@ enum class OperationCode {
    HAdd,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
    HMul,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
    HFma,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
    HAbsolute, /// (f16vec2 a) -> f16vec2
    HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
    HMergeF32, /// (f16vec2 src) -> float