ARM: Merge latest VFP fixes from 3dmoo team.

2024-07-04 23:31:19 +01:00 · 2014-10-29 22:25:54 -04:00 · 2014-10-29 22:25:54 -04:00 · bc6989b075
commit bc6989b075
parent 2ca12e7f38
4 changed files with 2161 additions and 1869 deletions
--- a/src/core/arm/skyeye_common/vfp/vfp.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfp.cpp
@ -28,6 +28,8 @@
 #include "core/arm/skyeye_common/armdefs.h"
 #include "core/arm/skyeye_common/vfp/vfp.h"

+#define DEBUG DBG
+
 //ARMul_State* persistent_state; /* function calls from SoftFloat lib don't have an access to ARMul_state. */

 unsigned
@ -41,11 +43,11 @@ VFPInit (ARMul_State *state)
    //persistent_state = state;
    /* Reset only specify VFP_FPEXC_EN = '0' */

-	return No_exp;
+    return 0;
 }

 unsigned
-VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+VFPMRC (ARMul_State * state, unsigned type, u32 instr, u32 * value)
 {
    /* MRC<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -59,20 +61,19 @@ VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)

    /* CRn/opc1 CRm/opc2 */

-	if (CoProc == 10 || CoProc == 11)
-	{
+    if (CoProc == 10 || CoProc == 11) {
 #define VFP_MRC_TRANS
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_MRC_TRANS
    }
-	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+    DEBUG("Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n",
          instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);

    return ARMul_CANT;
 }

 unsigned
-VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+VFPMCR (ARMul_State * state, unsigned type, u32 instr, u32 value)
 {
    /* MCR<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -85,20 +86,19 @@ VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
    /* TODO check access permission */

    /* CRn/opc1 CRm/opc2 */
-	if (CoProc == 10 || CoProc == 11)
-	{
+    if (CoProc == 10 || CoProc == 11) {
 #define VFP_MCR_TRANS
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_MCR_TRANS
    }
-	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+    DEBUG("Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n",
          instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);

    return ARMul_CANT;
 }

 unsigned
-VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2)
+VFPMRRC (ARMul_State * state, unsigned type, u32 instr, u32 * value1, u32 * value2)
 {
    /* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -107,20 +107,19 @@ VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, AR
    int Rt2 = BITS (16, 19);
    int CRm = BITS (0, 3);

-	if (CoProc == 10 || CoProc == 11)
-	{
+    if (CoProc == 10 || CoProc == 11) {
 #define VFP_MRRC_TRANS
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_MRRC_TRANS
    }
-	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+    DEBUG("Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n",
          instr, CoProc, OPC_1, Rt, Rt2, CRm);

    return ARMul_CANT;
 }

 unsigned
-VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2)
+VFPMCRR (ARMul_State * state, unsigned type, u32 instr, u32 value1, u32 value2)
 {
    /* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -133,20 +132,19 @@ VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMw

    /* CRn/opc1 CRm/opc2 */

-	if (CoProc == 11 || CoProc == 10)
-	{
+    if (CoProc == 11 || CoProc == 10) {
 #define VFP_MCRR_TRANS
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_MCRR_TRANS
    }
-	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+    DEBUG("Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n",
          instr, CoProc, OPC_1, Rt, Rt2, CRm);

    return ARMul_CANT;
 }

 unsigned
-VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+VFPSTC (ARMul_State * state, unsigned type, u32 instr, u32 * value)
 {
    /* STC{L}<c> <coproc>,<CRd>,[<Rn>],<option> */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -161,20 +159,19 @@ VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
    /* TODO check access permission */

    /* VSTM */
-	if ( (P|U|D|W) == 0 )
-	{
-		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+    if ( (P|U|D|W) == 0 ) {
+        DEBUG("In %s, UNDEFINED\n", __FUNCTION__);
+        exit(-1);
    }
-	if (CoProc == 10 || CoProc == 11)
-	{
+    if (CoProc == 10 || CoProc == 11) {
 #if 1
-		if (P == 0 && U == 0 && W == 0)
-		{
-			DEBUG_LOG(ARM11, "VSTM Related encodings\n"); exit(-1);
+        if (P == 0 && U == 0 && W == 0) {
+            DEBUG("VSTM Related encodings\n");
+            exit(-1);
        }
-		if (P == U && W == 1)
-		{
-			DEBUG_LOG(ARM11, "UNDEFINED\n"); exit(-1);
+        if (P == U && W == 1) {
+            DEBUG("UNDEFINED\n");
+            exit(-1);
        }
 #endif

@ -182,14 +179,14 @@ VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_STC_TRANS
    }
-	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+    DEBUG("Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n",
          instr, CoProc, CRd, Rn, imm8, P, U, D, W);

    return ARMul_CANT;
 }

 unsigned
-VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+VFPLDC (ARMul_State * state, unsigned type, u32 instr, u32 value)
 {
    /* LDC{L}<c> <coproc>,<CRd>,[<Rn>] */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -203,24 +200,23 @@ VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value)

    /* TODO check access permission */

-	if ( (P|U|D|W) == 0 )
-	{
-		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+    if ( (P|U|D|W) == 0 ) {
+        DEBUG("In %s, UNDEFINED\n", __FUNCTION__);
+        exit(-1);
    }
-	if (CoProc == 10 || CoProc == 11)
-	{
+    if (CoProc == 10 || CoProc == 11) {
 #define VFP_LDC_TRANS
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_LDC_TRANS
    }
-	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+    DEBUG("Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n",
          instr, CoProc, CRd, Rn, imm8, P, U, D, W);

    return ARMul_CANT;
 }

 unsigned
-VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
+VFPCDP (ARMul_State * state, unsigned type, u32 instr)
 {
    /* CDP<c> <coproc>,<opc1>,<CRd>,<CRn>,<CRm>,<opc2> */
    int CoProc = BITS (8, 11); /* 10 or 11 */
@ -230,12 +226,56 @@ VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
    int CRm = BITS (0, 3);
    int OPC_2 = BITS (5, 7);

+    //ichfly
+    /*if ((instr & 0x0FBF0FD0) == 0x0EB70AC0) //vcvt.f64.f32	d8, s16 (s is bit 0-3 and LSB bit 22) (d is bit 12 - 15 MSB is Bit 6)
+    {
+        struct vfp_double vdd;
+        struct vfp_single vsd;
+        int dn = BITS(12, 15) + (BIT(22) << 4);
+        int sd = (BITS(0, 3) << 1) + BIT(5);
+        s32 n = vfp_get_float(state, sd);
+        vfp_single_unpack(&vsd, n);
+        if (vsd.exponent & 0x80)
+        {
+            vdd.exponent = (vsd.exponent&~0x80) | 0x400;
+        }
+        else
+        {
+            vdd.exponent = vsd.exponent | 0x380;
+        }
+        vdd.sign = vsd.sign;
+        vdd.significand = (u64)(vsd.significand & ~0xC0000000) << 32; // I have no idea why but the 2 uppern bits are not from the significand
+        vfp_put_double(state, vfp_double_pack(&vdd), dn);
+        return ARMul_DONE;
+    }
+    if ((instr & 0x0FBF0FD0) == 0x0EB70BC0) //vcvt.f32.f64	s15, d6
+    {
+        struct vfp_double vdd;
+        struct vfp_single vsd;
+        int sd = BITS(0, 3) + (BIT(5) << 4);
+        int dn = (BITS(12, 15) << 1) + BIT(22);
+        vfp_double_unpack(&vdd, vfp_get_double(state, sd));
+        if (vdd.exponent & 0x400) //todo if the exponent is to low or to high for this convert
+        {
+            vsd.exponent = (vdd.exponent) | 0x80;
+        }
+        else
+        {
+            vsd.exponent = vdd.exponent & ~0x80;
+        }
+        vsd.exponent &= 0xFF;
+       // vsd.exponent = vdd.exponent >> 3;
+        vsd.sign = vdd.sign;
+        vsd.significand = ((u64)(vdd.significand ) >> 32)& ~0xC0000000;
+        vfp_put_float(state, vfp_single_pack(&vsd), dn);
+        return ARMul_DONE;
+    }*/
+
    /* TODO check access permission */

    /* CRn/opc1 CRm/opc2 */

-	if (CoProc == 10 || CoProc == 11)
-	{
+    if (CoProc == 10 || CoProc == 11) {
 #define VFP_CDP_TRANS
 #include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
 #undef VFP_CDP_TRANS
@ -250,7 +290,7 @@ VFPCDP (ARMul_State * state, unsigned type, ARMword instr)

        return ARMul_DONE;
    }
-	DEBUG_LOG(ARM11, "Can't identify %x\n", instr);
+    DEBUG("Can't identify %x\n", instr);
    return ARMul_CANT;
 }

@ -301,13 +341,13 @@ VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
 /* Miscellaneous functions */
 int32_t vfp_get_float(arm_core_t* state, unsigned int reg)
 {
-	DBG("VFP get float: s%d=[%08x]\n", reg, state->ExtReg[reg]);
+    DEBUG("VFP get float: s%d=[%08x]\n", reg, state->ExtReg[reg]);
    return state->ExtReg[reg];
 }

 void vfp_put_float(arm_core_t* state, int32_t val, unsigned int reg)
 {
-	DBG("VFP put float: s%d <= [%08x]\n", reg, val);
+    DEBUG("VFP put float: s%d <= [%08x]\n", reg, val);
    state->ExtReg[reg] = val;
 }

@ -315,13 +355,13 @@ uint64_t vfp_get_double(arm_core_t* state, unsigned int reg)
 {
    uint64_t result;
    result = ((uint64_t) state->ExtReg[reg*2+1])<<32 | state->ExtReg[reg*2];
-	DBG("VFP get double: s[%d-%d]=[%016llx]\n", reg*2+1, reg*2, result);
+    DEBUG("VFP get double: s[%d-%d]=[%016llx]\n", reg*2+1, reg*2, result);
    return result;
 }

 void vfp_put_double(arm_core_t* state, uint64_t val, unsigned int reg)
 {
-	DBG("VFP put double: s[%d-%d] <= [%08x-%08x]\n", reg*2+1, reg*2, (uint32_t) (val>>32), (uint32_t) (val & 0xffffffff));
+    DEBUG("VFP put double: s[%d-%d] <= [%08x-%08x]\n", reg*2+1, reg*2, (uint32_t) (val>>32), (uint32_t) (val & 0xffffffff));
    state->ExtReg[reg*2] = (uint32_t) (val & 0xffffffff);
    state->ExtReg[reg*2+1] = (uint32_t) (val>>32);
 }
@ -338,7 +378,7 @@ void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpsc
    vfpdebug("VFP: raising exceptions %08x\n", exceptions);

    if (exceptions == VFP_EXCEPTION_ERROR) {
-		DEBUG_LOG(ARM11, "unhandled bounce %x\n", inst);
+        DEBUG("unhandled bounce %x\n", inst);
        exit(-1);
        return;
    }
--- a/src/core/arm/skyeye_common/vfp/vfp_helper.h
+++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h
@ -44,7 +44,7 @@
 #define pr_info //printf
 #define pr_debug //printf

-static u32 vfp_fls(int x);
+static u32 fls(int x);
 #define do_div(n, base) {n/=base;}

 /* From vfpinstr.h */
@ -502,7 +502,7 @@ struct op {
 	u32 flags;
 };

-static u32 vfp_fls(int x)
+static u32 fls(int x)
 {
 	int r = 32;

@ -532,4 +532,9 @@ static u32 vfp_fls(int x)

 }

+u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
+u32 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr);
+u32 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr);
+u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr);
+
 #endif
--- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
@ -56,9 +56,9 @@
 #include "core/arm/skyeye_common/vfp/asm_vfp.h"

 static struct vfp_double vfp_double_default_qnan = {
-	//.exponent	= 2047,
-	//.sign		= 0,
-	//.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
+    2047,
+    0,
+    VFP_DOUBLE_SIGNIFICAND_QNAN,
 };

 static void vfp_double_dump(const char *str, struct vfp_double *d)
@ -69,9 +69,9 @@ static void vfp_double_dump(const char *str, struct vfp_double *d)

 static void vfp_double_normalise_denormal(struct vfp_double *vd)
 {
-	int bits = 31 - vfp_fls(vd->significand >> 32);
+    int bits = 31 - fls((ARMword)(vd->significand >> 32));
    if (bits == 31)
-		bits = 63 - vfp_fls(vd->significand);
+        bits = 63 - fls((ARMword)vd->significand);

    vfp_double_dump("normalise_denormal: in", vd);

@ -83,6 +83,134 @@ static void vfp_double_normalise_denormal(struct vfp_double *vd)
    vfp_double_dump("normalise_denormal: out", vd);
 }

+u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
+{
+    u64 significand, incr;
+    int exponent, shift, underflow;
+    u32 rmode;
+
+    vfp_double_dump("pack: in", vd);
+
+    /*
+    * Infinities and NaNs are a special case.
+    */
+    if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
+        goto pack;
+
+    /*
+    * Special-case zero.
+    */
+    if (vd->significand == 0) {
+        vd->exponent = 0;
+        goto pack;
+    }
+
+    exponent = vd->exponent;
+    significand = vd->significand;
+
+    shift = 32 - fls((ARMword)(significand >> 32));
+    if (shift == 32)
+        shift = 64 - fls((ARMword)significand);
+    if (shift) {
+        exponent -= shift;
+        significand <<= shift;
+    }
+
+#if 1
+    vd->exponent = exponent;
+    vd->significand = significand;
+    vfp_double_dump("pack: normalised", vd);
+#endif
+
+    /*
+    * Tiny number?
+    */
+    underflow = exponent < 0;
+    if (underflow) {
+        significand = vfp_shiftright64jamming(significand, -exponent);
+        exponent = 0;
+#if 1
+        vd->exponent = exponent;
+        vd->significand = significand;
+        vfp_double_dump("pack: tiny number", vd);
+#endif
+        if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
+            underflow = 0;
+    }
+
+    /*
+    * Select rounding increment.
+    */
+    incr = 0;
+    rmode = fpscr & FPSCR_RMODE_MASK;
+
+    if (rmode == FPSCR_ROUND_NEAREST) {
+        incr = 1ULL << VFP_DOUBLE_LOW_BITS;
+        if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
+            incr -= 1;
+    }
+    else if (rmode == FPSCR_ROUND_TOZERO) {
+        incr = 0;
+    }
+    else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
+        incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
+
+    pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
+
+    /*
+    * Is our rounding going to overflow?
+    */
+    if ((significand + incr) < significand) {
+        exponent += 1;
+        significand = (significand >> 1) | (significand & 1);
+        incr >>= 1;
+#if 1
+        vd->exponent = exponent;
+        vd->significand = significand;
+        vfp_double_dump("pack: overflow", vd);
+#endif
+    }
+
+    /*
+    * If any of the low bits (which will be shifted out of the
+    * number) are non-zero, the result is inexact.
+    */
+    if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
+        exceptions |= FPSCR_IXC;
+
+    /*
+    * Do our rounding.
+    */
+    significand += incr;
+
+    /*
+    * Infinity?
+    */
+    if (exponent >= 2046) {
+        exceptions |= FPSCR_OFC | FPSCR_IXC;
+        if (incr == 0) {
+            vd->exponent = 2045;
+            vd->significand = 0x7fffffffffffffffULL;
+        }
+        else {
+            vd->exponent = 2047;		/* infinity */
+            vd->significand = 0;
+        }
+    }
+    else {
+        if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
+            exponent = 0;
+        if (exponent || significand > 0x8000000000000000ULL)
+            underflow = 0;
+        if (underflow)
+            exceptions |= FPSCR_UFC;
+        vd->exponent = exponent;
+        vd->significand = significand >> 1;
+    }
+ pack:
+    return 0;
+}
+
 u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
 {
    u64 significand, incr;
@ -108,9 +236,9 @@ u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd,
    exponent = vd->exponent;
    significand = vd->significand;

-	shift = 32 - vfp_fls(significand >> 32);
+    shift = 32 - fls((ARMword)(significand >> 32));
    if (shift == 32)
-		shift = 64 - vfp_fls(significand);
+        shift = 64 - fls((ARMword)significand);
    if (shift) {
        exponent -= shift;
        significand <<= shift;
@ -287,7 +415,7 @@ static u32 vfp_double_fneg(ARMul_State* state, int dd, int unused, int dm, u32 f
 static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
 {
    pr_debug("In %s\n", __FUNCTION__);
-	struct vfp_double vdm, vdd, *vdp;
+    vfp_double vdm, vdd, *vdp;
    int ret, tm;

    vfp_double_unpack(&vdm, vfp_get_double(state, dm));
@ -464,6 +592,49 @@ static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32
    return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr);
 }

+u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr) //ichfly for internal use only
+{
+    struct vfp_single vsd;
+    int tm;
+    u32 exceptions = 0;
+
+    pr_debug("In %s\n", __FUNCTION__);
+
+    tm = vfp_double_type(dm);
+
+    /*
+    * If we have a signalling NaN, signal invalid operation.
+    */
+    if (tm == VFP_SNAN)
+        exceptions = FPSCR_IOC;
+
+    if (tm & VFP_DENORMAL)
+        vfp_double_normalise_denormal(dm);
+
+    vsd.sign = dm->sign;
+    vsd.significand = vfp_hi64to32jamming(dm->significand);
+
+    /*
+    * If we have an infinity or a NaN, the exponent must be 255
+    */
+    if (tm & (VFP_INFINITY | VFP_NAN)) {
+        vsd.exponent = 255;
+        if (tm == VFP_QNAN)
+            vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+        goto pack_nan;
+    }
+    else if (tm & VFP_ZERO)
+        vsd.exponent = 0;
+    else
+        vsd.exponent = dm->exponent - (1023 - 127);
+
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts");
+
+pack_nan:
+    vfp_put_float(state, vfp_single_pack(&vsd), sd);
+    return exceptions;
+}
+
 static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
 {
    struct vfp_double vdm;
@ -564,7 +735,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32
        /*
         * 2^0 <= m < 2^32-2^8
         */
-		d = (vdm.significand << 1) >> shift;
+        d = (ARMword)((vdm.significand << 1) >> shift);
        rem = vdm.significand << (65 - shift);

        if (rmode == FPSCR_ROUND_NEAREST) {
@ -645,7 +816,7 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32
        int shift = 1023 + 63 - vdm.exponent;	/* 58 */
        u64 rem, incr = 0;

-		d = (vdm.significand << 1) >> shift;
+        d = (ARMword)((vdm.significand << 1) >> shift);
        rem = vdm.significand << (65 - shift);

        if (rmode == FPSCR_ROUND_NEAREST) {
@ -660,8 +831,8 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32

        if ((rem + incr) < rem && d < 0xffffffff)
            d += 1;
-		if (d > 0x7fffffff + (vdm.sign != 0)) {
-			d = 0x7fffffff + (vdm.sign != 0);
+        if (d > (0x7fffffff + (vdm.sign != 0))) {
+            d = (0x7fffffff + (vdm.sign != 0));
            exceptions |= FPSCR_IOC;
        } else if (rem)
            exceptions |= FPSCR_IXC;
@ -768,16 +939,14 @@ vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
    return exceptions;
 }

-static u32
-vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
-	       struct vfp_double *vdm, u32 fpscr)
+u32 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,struct vfp_double *vdm, u32 fpscr)
 {
    u32 exp_diff;
    u64 m_sig;

    if (vdn->significand & (1ULL << 63) ||
            vdm->significand & (1ULL << 63)) {
-		pr_info("VFP: bad FP values\n");
+        pr_info("VFP: bad FP values in %s\n", __func__);
        vfp_double_dump("VDN", vdn);
        vfp_double_dump("VDM", vdm);
    }
@ -833,7 +1002,7 @@ vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
    return 0;
 }

-static u32
+u32
 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
                    struct vfp_double *vdm, u32 fpscr)
 {
@ -895,7 +1064,7 @@ vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
 #define NEG_SUBTRACT	(1 << 1)

 static u32
-vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, const char *func)
+vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
 {
    struct vfp_double vdd, vdp, vdn, vdm;
    u32 exceptions;
--- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
@ -56,9 +56,9 @@
 #include "core/arm/skyeye_common/vfp/vfp.h"

 static struct vfp_single vfp_single_default_qnan = {
-	//.exponent	= 255,
-	//.sign		= 0,
-	//.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
+    255,
+    0,
+    VFP_SINGLE_SIGNIFICAND_QNAN,
 };

 static void vfp_single_dump(const char *str, struct vfp_single *s)
@ -69,7 +69,7 @@ static void vfp_single_dump(const char *str, struct vfp_single *s)

 static void vfp_single_normalise_denormal(struct vfp_single *vs)
 {
-	int bits = 31 - vfp_fls(vs->significand);
+    int bits = 31 - fls(vs->significand);

    vfp_single_dump("normalise_denormal: in", vs);

@ -111,7 +111,7 @@ u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs,
     * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
     * significant bit.
     */
-	shift = 32 - vfp_fls(significand);
+    shift = 32 - fls(significand);
    if (shift < 32 && shift) {
        exponent -= shift;
        significand <<= shift;
@ -321,7 +321,7 @@ u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
    {
        u64 v = (u64)a << 31;
        do_div(v, z);
-		return v + (z >> 1);
+        return (u32)(v + (z >> 1));
    }
 }

@ -491,6 +491,47 @@ static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32
    return vfp_compare(state, sd, 1, 0, fpscr);
 }

+static s64 vfp_single_to_doubleintern(ARMul_State* state, s32 m, u32 fpscr) //ichfly for internal use only
+{
+    struct vfp_single vsm;
+    struct vfp_double vdd;
+    int tm;
+    u32 exceptions = 0;
+
+    vfp_single_unpack(&vsm, m);
+
+    tm = vfp_single_type(&vsm);
+
+    /*
+    * If we have a signalling NaN, signal invalid operation.
+    */
+    if (tm == VFP_SNAN)
+        exceptions = FPSCR_IOC;
+
+    if (tm & VFP_DENORMAL)
+        vfp_single_normalise_denormal(&vsm);
+
+    vdd.sign = vsm.sign;
+    vdd.significand = (u64)vsm.significand << 32;
+
+    /*
+    * If we have an infinity or NaN, the exponent must be 2047.
+    */
+    if (tm & (VFP_INFINITY | VFP_NAN)) {
+        vdd.exponent = 2047;
+        if (tm == VFP_QNAN)
+            vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+        goto pack_nan;
+    }
+    else if (tm & VFP_ZERO)
+        vdd.exponent = 0;
+    else
+        vdd.exponent = vsm.exponent + (1023 - 127);
+pack_nan:
+    vfp_double_normaliseroundintern(state, &vdd, fpscr, exceptions, "fcvtd");
+    return vfp_double_pack(&vdd);
+}
+
 static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr)
 {
    struct vfp_single vsm;
@ -684,14 +725,14 @@ static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 f

        if ((rem + incr) < rem && d < 0xffffffff)
            d += 1;
-		if (d > 0x7fffffff + (vsm.sign != 0)) {
-			d = 0x7fffffff + (vsm.sign != 0);
+        if (d > (0x7fffffffu + (vsm.sign != 0))) {
+            d = (0x7fffffffu + (vsm.sign != 0));
            exceptions |= FPSCR_IOC;
        } else if (rem)
            exceptions |= FPSCR_IXC;

        if (vsm.sign)
-			d = -d;
+            d = 0-d;
    } else {
        d = 0;
        if (vsm.exponent | vsm.significand) {
@ -800,7 +841,7 @@ vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,

    if (vsn->significand & 0x80000000 ||
            vsm->significand & 0x80000000) {
-		pr_info("VFP: bad FP values\n");
+        pr_info("VFP: bad FP values in %s\n", __func__);
        vfp_single_dump("VSN", vsn);
        vfp_single_dump("VSM", vsm);
    }
@ -843,7 +884,7 @@ vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
        m_sig = vsn->significand - m_sig;
        if ((s32)m_sig < 0) {
            vsd->sign = vfp_sign_negate(vsd->sign);
-			m_sig = -m_sig;
+            m_sig = 0-m_sig;
        } else if (m_sig == 0) {
            vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
                        FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
@ -917,12 +958,16 @@ vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_s
 #define NEG_SUBTRACT	(1 << 1)

 static u32
-vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, const char *func)
+vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
+{
+    
    {
        struct vfp_single vsd, vsp, vsn, vsm;
        u32 exceptions;
        s32 v;

+
+
        v = vfp_get_float(state, sn);
        pr_debug("VFP: s%u = %08x\n", sn, v);
        vfp_single_unpack(&vsn, v);
@ -934,6 +979,7 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp
            vfp_single_normalise_denormal(&vsm);

        exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
+
        if (negate & NEG_MULTIPLY)
            vsp.sign = vfp_sign_negate(vsp.sign);

@ -948,6 +994,38 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp
        return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, func);
    }

+    struct vfp_double vsd, vsp, vsn, vsm;
+    u32 exceptions;
+    s32 v;
+    s64 vd;
+    s64 md;
+
+    v = vfp_get_float(state, sn);
+    vd = vfp_single_to_doubleintern(state, v, fpscr);
+    vfp_double_unpack(&vsn, vd);
+
+    md = vfp_single_to_doubleintern(state, m, fpscr);
+    vfp_double_unpack(&vsm, md);
+
+    exceptions = vfp_double_multiply(&vsp, &vsn, &vsm, fpscr);
+    if (negate & NEG_MULTIPLY)
+        vsp.sign = vfp_sign_negate(vsp.sign);
+
+    v = vfp_get_float(state, sd);
+    vd = vfp_single_to_doubleintern(state, v, fpscr);
+    vfp_double_unpack(&vsn, vd);
+
+    if (negate & NEG_SUBTRACT)
+        vsn.sign = vfp_sign_negate(vsn.sign);
+
+    exceptions |= vfp_double_add(&vsd, &vsn, &vsp, fpscr);
+
+    s64 debug = vfp_double_pack(&vsd);
+
+    return vfp_double_fcvtsinterncutting(state, sd, &vsd, fpscr);
+
+}
+
 /*
 * Standard operations
 */
@ -1148,7 +1226,7 @@ static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
    {
        u64 significand = (u64)vsn.significand << 32;
        do_div(significand, vsm.significand);
-		vsd.significand = significand;
+        vsd.significand = (u32)significand;
    }
    if ((vsd.significand & 0x3f) == 0)
        vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);