Merge pull request #361 from lioncash/moreqops

dyncom/armemu: Implement QADD8/QSUB8.
2025-12-04 00:22:18 -08:00 · 2014-12-29 14:53:04 -05:00
parent a7a486bbef e412c0fc46
commit 2d2aa2c0be
4 changed files with 142 additions and 65 deletions
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -2419,8 +2419,7 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index)
 	return inst_base;
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index)     { UNIMPLEMENTED_INSTRUCTION("QADD"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QADD8"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)
+ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index)
 {
 	arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
 	generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
@@ -2438,21 +2437,28 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)

 	return inst_base;
 }
+ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)
+{
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
 ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index)
 {
-	return INTERPRETER_TRANSLATE(qadd16)(inst, index);
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QDADD"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QDSUB"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index)     { UNIMPLEMENTED_INSTRUCTION("QSUB"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QSUB8"); }
+ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index)
+{
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsub16)(unsigned int inst, int index)
 {
-	return INTERPRETER_TRANSLATE(qadd16)(inst, index);
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index)
 {
-	return INTERPRETER_TRANSLATE(qadd16)(inst, index);
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(rev)(unsigned int inst, int index)
 {
@@ -5777,55 +5783,60 @@ unsigned InterpreterMainLoop(ARMul_State* state)
 		GOTO_NEXT_INST;
 	}
 	QADD_INST:
-	QADD8_INST:

+	QADD8_INST:
 	QADD16_INST:
 	QADDSUBX_INST:
+	QSUB8_INST:
 	QSUB16_INST:
 	QSUBADDX_INST:
 	{
 		INC_ICOUNTER;
 		if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
 			generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
-			const s16 rm_lo = (RM & 0xFFFF);
-			const s16 rm_hi = ((RM >> 16) & 0xFFFF);
-			const s16 rn_lo = (RN & 0xFFFF);
-			const s16 rn_hi = ((RN >> 16) & 0xFFFF);
+			const u16 rm_lo = (RM & 0xFFFF);
+			const u16 rm_hi = ((RM >> 16) & 0xFFFF);
+			const u16 rn_lo = (RN & 0xFFFF);
+			const u16 rn_hi = ((RN >> 16) & 0xFFFF);
 			const u8 op2    = inst_cream->op2;

-			s32 lo_result = 0;
-			s32 hi_result = 0;
+			u16 lo_result = 0;
+			u16 hi_result = 0;

 			// QADD16
 			if (op2 == 0x00) {
-				lo_result = (rn_lo + rm_lo);
-				hi_result = (rn_hi + rm_hi);
+				lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_lo);
+				hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_hi);
 			}
 			// QASX
 			else if (op2 == 0x01) {
-				lo_result = (rn_lo - rm_hi);
-				hi_result = (rn_hi + rm_lo);
+				lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_hi);
+				hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_lo);
 			}
 			// QSAX
 			else if (op2 == 0x02) {
-				lo_result = (rn_lo + rm_hi);
-				hi_result = (rn_hi - rm_lo);
+				lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_hi);
+				hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_lo);
 			}
 			// QSUB16
 			else if (op2 == 0x03) {
-				lo_result = (rn_lo - rm_lo);
-				hi_result = (rn_hi - rm_hi);
+				lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_lo);
+				hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_hi);
+			}
+			// QADD8
+			else if (op2 == 0x04) {
+				lo_result = ARMul_SignedSaturatedAdd8(rn_lo & 0xFF, rm_lo & 0xFF) |
+				            ARMul_SignedSaturatedAdd8(rn_lo >> 8, rm_lo >> 8) << 8;
+				hi_result = ARMul_SignedSaturatedAdd8(rn_hi & 0xFF, rm_hi & 0xFF) |
+				            ARMul_SignedSaturatedAdd8(rn_hi >> 8, rm_hi >> 8) << 8;
+			}
+			// QSUB8
+			else if (op2 == 0x07) {
+				lo_result = ARMul_SignedSaturatedSub8(rn_lo & 0xFF, rm_lo & 0xFF) |
+				            ARMul_SignedSaturatedSub8(rn_lo >> 8, rm_lo >> 8) << 8;
+				hi_result = ARMul_SignedSaturatedSub8(rn_hi & 0xFF, rm_hi & 0xFF) |
+				            ARMul_SignedSaturatedSub8(rn_hi >> 8, rm_hi >> 8) << 8;
 			}
-
-			if (lo_result > 0x7FFF)
-				lo_result = 0x7FFF;
-			else if (lo_result < -0x8000)
-				lo_result = -0x8000;
-
-			if (hi_result > 0x7FFF)
-				hi_result = 0x7FFF;
-			else if (hi_result < -0x8000)
-				hi_result = -0x8000;

 			RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
 		}
@@ -5839,7 +5850,6 @@ unsigned InterpreterMainLoop(ARMul_State* state)
 	QDADD_INST:
 	QDSUB_INST:
 	QSUB_INST:
-	QSUB8_INST:
 	REV_INST:
 	{
 		INC_ICOUNTER;
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -5948,56 +5948,58 @@ L_stm_s_takeabort:
                printf("Unhandled v6 insn: %08x", instr);
            }
            break;
-        case 0x62: // QADD16, QASX, QSAX, and QSUB16
-            if ((instr & 0xFF0) == 0xf10 || (instr & 0xFF0) == 0xf30 ||
-                (instr & 0xFF0) == 0xf50 || (instr & 0xFF0) == 0xf70)
+        case 0x62: // QADD16, QASX, QSAX, QSUB16, QADD8, and QSUB8
            {
+                const u8 op2 = BITS(5, 7);
+
                const u8 rd_idx = BITS(12, 15);
                const u8 rn_idx = BITS(16, 19);
                const u8 rm_idx = BITS(0, 3);
-                const s16 rm_lo = (state->Reg[rm_idx] & 0xFFFF);
-                const s16 rm_hi = ((state->Reg[rm_idx] >> 0x10) & 0xFFFF);
-                const s16 rn_lo = (state->Reg[rn_idx] & 0xFFFF);
-                const s16 rn_hi = ((state->Reg[rn_idx] >> 0x10) & 0xFFFF);
+                const u16 rm_lo = (state->Reg[rm_idx] & 0xFFFF);
+                const u16 rm_hi = ((state->Reg[rm_idx] >> 0x10) & 0xFFFF);
+                const u16 rn_lo = (state->Reg[rn_idx] & 0xFFFF);
+                const u16 rn_hi = ((state->Reg[rn_idx] >> 0x10) & 0xFFFF);

-                s32 lo_result;
-                s32 hi_result;
+                u16 lo_result = 0;
+                u16 hi_result = 0;

                // QADD16
-                if ((instr & 0xFF0) == 0xf10) {
-                    lo_result = (rn_lo + rm_lo);
-                    hi_result = (rn_hi + rm_hi);
+                if (op2 == 0x00) {
+                    lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_lo);
+                    hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_hi);
                }
                // QASX
-                else if ((instr & 0xFF0) == 0xf30) {
-                    lo_result = (rn_lo - rm_hi);
-                    hi_result = (rn_hi + rm_lo);
+                else if (op2 == 0x01) {
+                    lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_hi);
+                    hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_lo);
                }
                // QSAX
-                else if ((instr & 0xFF0) == 0xf50) {
-                    lo_result = (rn_lo + rm_hi);
-                    hi_result = (rn_hi - rm_lo);
+                else if (op2 == 0x02) {
+                    lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_hi);
+                    hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_lo);
                }
                // QSUB16
-                else {
-                    lo_result = (rn_lo - rm_lo);
-                    hi_result = (rn_hi - rm_hi);
+                else if (op2 == 0x03) {
+                    lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_lo);
+                    hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_hi);
+                }
+                // QADD8
+                else if (op2 == 0x04) {
+                    lo_result = ARMul_SignedSaturatedAdd8(rn_lo & 0xFF, rm_lo & 0xFF) |
+                                ARMul_SignedSaturatedAdd8(rn_lo >> 8, rm_lo >> 8) << 8;
+                    hi_result = ARMul_SignedSaturatedAdd8(rn_hi & 0xFF, rm_hi & 0xFF) |
+                                ARMul_SignedSaturatedAdd8(rn_hi >> 8, rm_hi >> 8) << 8;
+                }
+                // QSUB8
+                else if (op2 == 0x07) {
+                    lo_result = ARMul_SignedSaturatedSub8(rn_lo & 0xFF, rm_lo & 0xFF) |
+                                ARMul_SignedSaturatedSub8(rn_lo >> 8, rm_lo >> 8) << 8;
+                    hi_result = ARMul_SignedSaturatedSub8(rn_hi & 0xFF, rm_hi & 0xFF) |
+                                ARMul_SignedSaturatedSub8(rn_hi >> 8, rm_hi >> 8) << 8;
                }
-
-                if (lo_result > 0x7FFF)
-                    lo_result = 0x7FFF;
-                else if (lo_result < -0x8000)
-                    lo_result = -0x8000;
-
-                if (hi_result > 0x7FFF)
-                    hi_result = 0x7FFF;
-                else if (hi_result < -0x8000)
-                    hi_result = -0x8000;

                state->Reg[rd_idx] = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
                return 1;
-            } else {
-                printf("Unhandled v6 insn: %08x", BITS(20, 27));
            }
            break;
        case 0x63:
--- a/src/core/arm/interpreter/armsupp.cpp
+++ b/src/core/arm/interpreter/armsupp.cpp
@@ -478,6 +478,66 @@ ARMul_SubOverflow (ARMul_State * state, ARMword a, ARMword b, ARMword result)
    ASSIGNV (SubOverflow (a, b, result));
 }

+/* 8-bit signed saturated addition */
+u8 ARMul_SignedSaturatedAdd8(u8 left, u8 right)
+{
+    u8 result = left + right;
+
+    if (((result ^ left) & 0x80) && ((left ^ right) & 0x80) == 0) {
+        if (left & 0x80)
+            result = 0x80;
+        else
+            result = 0x7F;
+    }
+
+    return result;
+}
+
+/* 8-bit signed saturated subtraction */
+u8 ARMul_SignedSaturatedSub8(u8 left, u8 right)
+{
+    u8 result = left - right;
+
+    if (((result ^ left) & 0x80) && ((left ^ right) & 0x80) != 0) {
+        if (left & 0x80)
+            result = 0x80;
+        else
+            result = 0x7F;
+    }
+
+    return result;
+}
+
+/* 16-bit signed saturated addition */
+u16 ARMul_SignedSaturatedAdd16(u16 left, u16 right)
+{
+    u16 result = left + right;
+
+    if (((result ^ left) & 0x8000) && ((left ^ right) & 0x8000) == 0) {
+        if (left & 0x8000)
+            result = 0x8000;
+        else
+            result = 0x7FFF;
+    }
+
+    return result;
+}
+
+/* 16-bit signed saturated subtraction */
+u16 ARMul_SignedSaturatedSub16(u16 left, u16 right)
+{
+    u16 result = left - right;
+
+    if (((result ^ left) & 0x8000) && ((left ^ right) & 0x8000) != 0) {
+        if (left & 0x8000)
+            result = 0x8000;
+        else
+            result = 0x7FFF;
+    }
+
+    return result;
+}
+
 /* 8-bit unsigned saturated addition */
 u8 ARMul_UnsignedSaturatedAdd8(u8 left, u8 right)
 {
--- a/src/core/arm/skyeye_common/armdefs.h
+++ b/src/core/arm/skyeye_common/armdefs.h
@@ -790,6 +790,11 @@ extern void ARMul_FixSPSR(ARMul_State*, ARMword, ARMword);
 extern void ARMul_ConsolePrint(ARMul_State*, const char*, ...);
 extern void ARMul_SelectProcessor(ARMul_State*, unsigned);

+extern u8 ARMul_SignedSaturatedAdd8(u8, u8);
+extern u8 ARMul_SignedSaturatedSub8(u8, u8);
+extern u16 ARMul_SignedSaturatedAdd16(u16, u16);
+extern u16 ARMul_SignedSaturatedSub16(u16, u16);
+
 extern u8 ARMul_UnsignedSaturatedAdd8(u8, u8);
 extern u16 ARMul_UnsignedSaturatedAdd16(u16, u16);
 extern u8 ARMul_UnsignedSaturatedSub8(u8, u8);