target-arm queue:

* Fix coverity nit in int_to_float code * Don't set Invalid for float-to-int(MAXINT) * Fix fp_status_f16 tininess before rounding * Add various missing insns from the v8.2-FP16 extension * Fix sqrt_f16 exception raising * sdcard: Correct CRC16 offset in sd_function_switch() * tcg: Optionally log FPU state in TCG -d cpu logging -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJa+ulRAAoJEDwlJe0UNgzegCEP/jOPQl96XtXayo0N1i3ZjpwK kkwToh2EW1nyJplskBohyA5jHATQ5DYFRp7yFv6Q7u8CZKj8IrResn+RHX+4tuMG 6INVpdcB0PYe817slrwXgQYfJD3370Cqmg1F2QPn3/v1Kqsf8JjNbf9FcLKKqzLO Ubha9gn8mQfzzFPDuc2ECtUYZcvgqMMUimm4z1WELMENz0iGJTvG9M/XwHuRvcBA sGiQbXTIvGDMg0xH8xUvkC8PLGFeS7YXRTHfsPCfIdMEvPhxitYiRZZ0NTzQ71kW jGoZkUcsQRjQ9g+TebkTYJT/WU0rJAwwY9q0N2mYtxnBlYEsOxwfjEgtKbkcrNy5 OsMPZQVD7n1mt7m4QiJkO/fkDHyj+7DxXDFx5uJraNKT0dyxsJeBMvybLI0rY7yA OBLD5fx96oaIcKFYzani080Dd0gsh14pyZnKou5JfBhnwVMmm0iJK9pxzRct9OUu ki7ER2xIJu3JEI4+v++5Ig/NcIMFn3SBpOdwU0NdV2eMwWUfQh2yD6kBD5k2WoAQ 90314gcax25q1OEd0qEA5TpXX8exItSFsbTCm8GCBNNp5GYJg5bQuHE+sTXSVTDy nsoySwZLW9emZFaWEcqC9y+eh6nnB62g5CCKLm2LxRkqoi8Deb+Bg0zOdWwHAUFc K/48b8tAdV9MYPcV202V =Dusw -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180515' into staging target-arm queue: * Fix coverity nit in int_to_float code * Don't set Invalid for float-to-int(MAXINT) * Fix fp_status_f16 tininess before rounding * Add various missing insns from the v8.2-FP16 extension * Fix sqrt_f16 exception raising * sdcard: Correct CRC16 offset in sd_function_switch() * tcg: Optionally log FPU state in TCG -d cpu logging # gpg: Signature made Tue 15 May 2018 15:06:09 BST # gpg: using RSA key 3C2525ED14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" # gpg: aka "Peter Maydell <pmaydell@gmail.com>" # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20180515: tcg: Optionally log FPU state in TCG -d cpu logging sdcard: Correct CRC16 offset in sd_function_switch() target/arm: Fix sqrt_f16 exception raising target/arm: Implement FMOV (immediate) for fp16 target/arm: Implement FCSEL for fp16 target/arm: Implement FCMP for fp16 target/arm: Implement FP data-processing (3 source) for fp16 target/arm: Implement FP data-processing (2 source) for fp16 target/arm: Introduce and use read_fp_hreg target/arm: Implement FCVT (scalar, fixed-point) for fp16 target/arm: Implement FCVT (scalar, integer) for fp16 target/arm: Early exit after unallocated_encoding in disas_fp_int_conv target/arm: Implement FMOV (general) for fp16 target/arm: Fix fp_status_f16 tininess before rounding fpu/softfloat: Don't set Invalid for float-to-int(MAXINT) fpu/softfloat: int_to_float ensure r fully initialised Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2018-05-15 15:07:34 +01:00 · 2018-05-15 15:07:34 +01:00 · 0275e6b66c
parent ad1b4ec39c ae76518047
commit 0275e6b66c
11 changed files with 428 additions and 71 deletions
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@ -156,11 +156,14 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
        && qemu_log_in_addr_range(itb->pc)) {
        qemu_log_lock();
+        int flags = 0;
+        if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
+            flags |= CPU_DUMP_FPU;
+        }
 #if defined(TARGET_I386)
-        log_cpu_state(cpu, CPU_DUMP_CCOP);
-#else
-        log_cpu_state(cpu, 0);
+        flags |= CPU_DUMP_CCOP;
 #endif
+        log_cpu_state(cpu, flags);
        qemu_log_unlock();
    }
 #endif /* DEBUG_DISAS */
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@ -1368,14 +1368,14 @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode,
            r = UINT64_MAX;
        }
        if (p.sign) {
-            if (r < -(uint64_t) min) {
+            if (r <= -(uint64_t) min) {
                return -r;
            } else {
                s->float_exception_flags = orig_flags | float_flag_invalid;
                return min;
            }
        } else {
-            if (r < max) {
+            if (r <= max) {
                return r;
            } else {
                s->float_exception_flags = orig_flags | float_flag_invalid;
@ -1525,7 +1525,7 @@ FLOAT_TO_UINT(64, 64)

 static FloatParts int_to_float(int64_t a, float_status *status)
 {
-    FloatParts r;
+    FloatParts r = {};
    if (a == 0) {
        r.cls = float_class_zero;
        r.sign = false;
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@ -787,7 +787,7 @@ static void sd_function_switch(SDState *sd, uint32_t arg)
        sd->data[14 + (i >> 1)] = new_func << ((i * 4) & 4);
    }
    memset(&sd->data[17], 0, 47);
-    stw_be_p(sd->data + 65, sd_crc16(sd->data, 64));
+    stw_be_p(sd->data + 64, sd_crc16(sd->data, 64));
 }

 static inline bool sd_wp_addr(SDState *sd, uint64_t addr)
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@ -44,6 +44,7 @@ static inline bool qemu_log_separate(void)
 #define CPU_LOG_PAGE       (1 << 14)
 /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
 #define CPU_LOG_TB_OP_IND  (1 << 16)
+#define CPU_LOG_TB_FPU     (1 << 17)

 /* Lock output for a series of related logs.  Since this is not needed
 * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@ -324,6 +324,8 @@ static void arm_cpu_reset(CPUState *s)
                              &env->vfp.fp_status);
    set_float_detect_tininess(float_tininess_before_rounding,
                              &env->vfp.standard_fp_status);
+    set_float_detect_tininess(float_tininess_before_rounding,
+                              &env->vfp.fp_status_f16);
 #ifndef CONFIG_USER_ONLY
    if (kvm_enabled()) {
        kvm_arm_reset_vcpu(cpu);
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@ -85,6 +85,16 @@ static inline uint32_t float_rel_to_flags(int res)
    return flags;
 }

+uint64_t HELPER(vfp_cmph_a64)(float16 x, float16 y, void *fp_status)
+{
+    return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmpeh_a64)(float16 x, float16 y, void *fp_status)
+{
+    return float_rel_to_flags(float16_compare(x, y, fp_status));
+}
+
 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
 {
    return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@ -19,6 +19,8 @@
 DEF_HELPER_FLAGS_2(udiv64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
 DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, ptr)
+DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, ptr)
 DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@ -11427,8 +11427,12 @@ VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
 #undef VFP_CONV_FIX_A64

 /* Conversion to/from f16 can overflow to infinity before/after scaling.
- * Therefore we convert to f64 (which does not round), scale,
- * and then convert f64 to f16 (which may round).
+ * Therefore we convert to f64, scale, and then convert f64 to f16; or
+ * vice versa for conversion to integer.
+ *
+ * For 16- and 32-bit integers, the conversion to f64 never rounds.
+ * For 64-bit integers, any integer that would cause rounding will also
+ * overflow to f16 infinity, so there is no double rounding problem.
 */

 static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
@ -11446,6 +11450,16 @@ float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
    return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
 }

+float16 HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
+{
+    return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst);
+}
+
+float16 HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
+{
+    return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst);
+}
+
 static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
 {
    if (unlikely(float16_is_any_nan(f))) {
@ -11475,6 +11489,26 @@ uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst)
    return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
 }

+uint32_t HELPER(vfp_toslh)(float16 x, uint32_t shift, void *fpst)
+{
+    return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst);
+}
+
+uint32_t HELPER(vfp_toulh)(float16 x, uint32_t shift, void *fpst)
+{
+    return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst);
+}
+
+uint64_t HELPER(vfp_tosqh)(float16 x, uint32_t shift, void *fpst)
+{
+    return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst);
+}
+
+uint64_t HELPER(vfp_touqh)(float16 x, uint32_t shift, void *fpst)
+{
+    return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst);
+}
+
 /* Set the current fp rounding mode and return the old one.
 * The argument is a softfloat float_round_ value.
 */
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@ -151,6 +151,10 @@ DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
 DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_touqh, i64, f16, i32, ptr)
+DEF_HELPER_3(vfp_tosqh, i64, f16, i32, ptr)
 DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
 DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
 DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)
@ -177,6 +181,8 @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr)
 DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr)
+DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr)

 DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
 DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@ -615,6 +615,14 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
    return v;
 }

+static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
+{
+    TCGv_i32 v = tcg_temp_new_i32();
+
+    tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
+    return v;
+}
+
 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 * If SVE is not enabled, then there are only 128 bits in the vector.
 */
@ -4704,14 +4712,14 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
    }
 }

-static void handle_fp_compare(DisasContext *s, bool is_double,
+static void handle_fp_compare(DisasContext *s, int size,
                              unsigned int rn, unsigned int rm,
                              bool cmp_with_zero, bool signal_all_nans)
 {
    TCGv_i64 tcg_flags = tcg_temp_new_i64();
-    TCGv_ptr fpst = get_fpstatus_ptr(false);
+    TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);

-    if (is_double) {
+    if (size == MO_64) {
        TCGv_i64 tcg_vn, tcg_vm;

        tcg_vn = read_fp_dreg(s, rn);
@ -4728,19 +4736,35 @@ static void handle_fp_compare(DisasContext *s, bool is_double,
        tcg_temp_free_i64(tcg_vn);
        tcg_temp_free_i64(tcg_vm);
    } else {
-        TCGv_i32 tcg_vn, tcg_vm;
+        TCGv_i32 tcg_vn = tcg_temp_new_i32();
+        TCGv_i32 tcg_vm = tcg_temp_new_i32();

-        tcg_vn = read_fp_sreg(s, rn);
+        read_vec_element_i32(s, tcg_vn, rn, 0, size);
        if (cmp_with_zero) {
-            tcg_vm = tcg_const_i32(0);
+            tcg_gen_movi_i32(tcg_vm, 0);
        } else {
-            tcg_vm = read_fp_sreg(s, rm);
+            read_vec_element_i32(s, tcg_vm, rm, 0, size);
        }
-        if (signal_all_nans) {
-            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
-        } else {
-            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+
+        switch (size) {
+        case MO_32:
+            if (signal_all_nans) {
+                gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+            } else {
+                gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+            }
+            break;
+        case MO_16:
+            if (signal_all_nans) {
+                gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+            } else {
+                gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+            }
+            break;
+        default:
+            g_assert_not_reached();
        }
+
        tcg_temp_free_i32(tcg_vn);
        tcg_temp_free_i32(tcg_vm);
    }
@ -4761,16 +4785,35 @@ static void handle_fp_compare(DisasContext *s, bool is_double,
 static void disas_fp_compare(DisasContext *s, uint32_t insn)
 {
    unsigned int mos, type, rm, op, rn, opc, op2r;
+    int size;

    mos = extract32(insn, 29, 3);
-    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+    type = extract32(insn, 22, 2);
    rm = extract32(insn, 16, 5);
    op = extract32(insn, 14, 2);
    rn = extract32(insn, 5, 5);
    opc = extract32(insn, 3, 2);
    op2r = extract32(insn, 0, 3);

-    if (mos || op || op2r || type > 1) {
+    if (mos || op || op2r) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (type) {
+    case 0:
+        size = MO_32;
+        break;
+    case 1:
+        size = MO_64;
+        break;
+    case 3:
+        size = MO_16;
+        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            break;
+        }
+        /* fallthru */
+    default:
        unallocated_encoding(s);
        return;
    }
@ -4779,7 +4822,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn)
        return;
    }

-    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
+    handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
 }

 /* Floating point conditional compare
@ -4793,16 +4836,35 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
    unsigned int mos, type, rm, cond, rn, op, nzcv;
    TCGv_i64 tcg_flags;
    TCGLabel *label_continue = NULL;
+    int size;

    mos = extract32(insn, 29, 3);
-    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+    type = extract32(insn, 22, 2);
    rm = extract32(insn, 16, 5);
    cond = extract32(insn, 12, 4);
    rn = extract32(insn, 5, 5);
    op = extract32(insn, 4, 1);
    nzcv = extract32(insn, 0, 4);

-    if (mos || type > 1) {
+    if (mos) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (type) {
+    case 0:
+        size = MO_32;
+        break;
+    case 1:
+        size = MO_64;
+        break;
+    case 3:
+        size = MO_16;
+        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            break;
+        }
+        /* fallthru */
+    default:
        unallocated_encoding(s);
        return;
    }
@ -4823,7 +4885,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
        gen_set_label(label_match);
    }

-    handle_fp_compare(s, type, rn, rm, false, op);
+    handle_fp_compare(s, size, rn, rm, false, op);

    if (cond < 0x0e) {
        gen_set_label(label_continue);
@ -4841,15 +4903,34 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
    unsigned int mos, type, rm, cond, rn, rd;
    TCGv_i64 t_true, t_false, t_zero;
    DisasCompare64 c;
+    TCGMemOp sz;

    mos = extract32(insn, 29, 3);
-    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+    type = extract32(insn, 22, 2);
    rm = extract32(insn, 16, 5);
    cond = extract32(insn, 12, 4);
    rn = extract32(insn, 5, 5);
    rd = extract32(insn, 0, 5);

-    if (mos || type > 1) {
+    if (mos) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (type) {
+    case 0:
+        sz = MO_32;
+        break;
+    case 1:
+        sz = MO_64;
+        break;
+    case 3:
+        sz = MO_16;
+        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            break;
+        }
+        /* fallthru */
+    default:
        unallocated_encoding(s);
        return;
    }
@ -4858,11 +4939,11 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
        return;
    }

-    /* Zero extend sreg inputs to 64 bits now.  */
+    /* Zero extend sreg & hreg inputs to 64 bits now.  */
    t_true = tcg_temp_new_i64();
    t_false = tcg_temp_new_i64();
-    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
-    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
+    read_vec_element(s, t_true, rn, 0, sz);
+    read_vec_element(s, t_false, rm, 0, sz);

    a64_test_cc(&c, cond);
    t_zero = tcg_const_i64(0);
@ -4871,7 +4952,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
    tcg_temp_free_i64(t_false);
    a64_free_cc(&c);

-    /* Note that sregs write back zeros to the high bits,
+    /* Note that sregs & hregs write back zeros to the high bits,
       and we've already done the zero-extension.  */
    write_fp_dreg(s, rd, t_true);
    tcg_temp_free_i64(t_true);
@ -4881,11 +4962,9 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
 {
    TCGv_ptr fpst = NULL;
-    TCGv_i32 tcg_op = tcg_temp_new_i32();
+    TCGv_i32 tcg_op = read_fp_hreg(s, rn);
    TCGv_i32 tcg_res = tcg_temp_new_i32();

-    read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
-
    switch (opcode) {
    case 0x0: /* FMOV */
        tcg_gen_mov_i32(tcg_res, tcg_op);
@ -4897,7 +4976,8 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
        break;
    case 0x3: /* FSQRT */
-        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
+        fpst = get_fpstatus_ptr(true);
+        gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
        break;
    case 0x8: /* FRINTN */
    case 0x9: /* FRINTP */
@ -5293,6 +5373,61 @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
    tcg_temp_free_i64(tcg_res);
 }

+/* Floating-point data-processing (2 source) - half precision */
+static void handle_fp_2src_half(DisasContext *s, int opcode,
+                                int rd, int rn, int rm)
+{
+    TCGv_i32 tcg_op1;
+    TCGv_i32 tcg_op2;
+    TCGv_i32 tcg_res;
+    TCGv_ptr fpst;
+
+    tcg_res = tcg_temp_new_i32();
+    fpst = get_fpstatus_ptr(true);
+    tcg_op1 = read_fp_hreg(s, rn);
+    tcg_op2 = read_fp_hreg(s, rm);
+
+    switch (opcode) {
+    case 0x0: /* FMUL */
+        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x1: /* FDIV */
+        gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x2: /* FADD */
+        gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x3: /* FSUB */
+        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x4: /* FMAX */
+        gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x5: /* FMIN */
+        gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x6: /* FMAXNM */
+        gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x7: /* FMINNM */
+        gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x8: /* FNMUL */
+        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+        tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_op1);
+    tcg_temp_free_i32(tcg_op2);
+    tcg_temp_free_i32(tcg_res);
+}
+
 /* Floating point data-processing (2 source)
 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
@ -5325,6 +5460,16 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn)
        }
        handle_fp_2src_double(s, opcode, rd, rn, rm);
        break;
+    case 3:
+        if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            unallocated_encoding(s);
+            return;
+        }
+        if (!fp_access_check(s)) {
+            return;
+        }
+        handle_fp_2src_half(s, opcode, rd, rn, rm);
+        break;
    default:
        unallocated_encoding(s);
    }
@ -5406,6 +5551,44 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
    tcg_temp_free_i64(tcg_res);
 }

+/* Floating-point data-processing (3 source) - half precision */
+static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
+                                int rd, int rn, int rm, int ra)
+{
+    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
+    TCGv_i32 tcg_res = tcg_temp_new_i32();
+    TCGv_ptr fpst = get_fpstatus_ptr(true);
+
+    tcg_op1 = read_fp_hreg(s, rn);
+    tcg_op2 = read_fp_hreg(s, rm);
+    tcg_op3 = read_fp_hreg(s, ra);
+
+    /* These are fused multiply-add, and must be done as one
+     * floating point operation with no rounding between the
+     * multiplication and addition steps.
+     * NB that doing the negations here as separate steps is
+     * correct : an input NaN should come out with its sign bit
+     * flipped if it is a negated-input.
+     */
+    if (o1 == true) {
+        tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
+    }
+
+    if (o0 != o1) {
+        tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
+    }
+
+    gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_op1);
+    tcg_temp_free_i32(tcg_op2);
+    tcg_temp_free_i32(tcg_op3);
+    tcg_temp_free_i32(tcg_res);
+}
+
 /* Floating point data-processing (3 source)
 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
 * +---+---+---+-----------+------+----+------+----+------+------+------+
@ -5435,6 +5618,16 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
        }
        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
        break;
+    case 3:
+        if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            unallocated_encoding(s);
+            return;
+        }
+        if (!fp_access_check(s)) {
+            return;
+        }
+        handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
+        break;
    default:
        unallocated_encoding(s);
    }
@ -5482,11 +5675,25 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
 {
    int rd = extract32(insn, 0, 5);
    int imm8 = extract32(insn, 13, 8);
-    int is_double = extract32(insn, 22, 2);
+    int type = extract32(insn, 22, 2);
    uint64_t imm;
    TCGv_i64 tcg_res;
+    TCGMemOp sz;

-    if (is_double > 1) {
+    switch (type) {
+    case 0:
+        sz = MO_32;
+        break;
+    case 1:
+        sz = MO_64;
+        break;
+    case 3:
+        sz = MO_16;
+        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            break;
+        }
+        /* fallthru */
+    default:
        unallocated_encoding(s);
        return;
    }
@ -5495,7 +5702,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
        return;
    }

-    imm = vfp_expand_imm(MO_32 + is_double, imm8);
+    imm = vfp_expand_imm(sz, imm8);

    tcg_res = tcg_const_i64(imm);
    write_fp_dreg(s, rd, tcg_res);
@ -5511,11 +5718,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
                           bool itof, int rmode, int scale, int sf, int type)
 {
    bool is_signed = !(opcode & 1);
-    bool is_double = type;
    TCGv_ptr tcg_fpstatus;
-    TCGv_i32 tcg_shift;
+    TCGv_i32 tcg_shift, tcg_single;
+    TCGv_i64 tcg_double;

-    tcg_fpstatus = get_fpstatus_ptr(false);
+    tcg_fpstatus = get_fpstatus_ptr(type == 3);

    tcg_shift = tcg_const_i32(64 - scale);

@ -5533,8 +5740,9 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
            tcg_int = tcg_extend;
        }

-        if (is_double) {
-            TCGv_i64 tcg_double = tcg_temp_new_i64();
+        switch (type) {
+        case 1: /* float64 */
+            tcg_double = tcg_temp_new_i64();
            if (is_signed) {
                gen_helper_vfp_sqtod(tcg_double, tcg_int,
                                     tcg_shift, tcg_fpstatus);
@ -5544,8 +5752,10 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
            }
            write_fp_dreg(s, rd, tcg_double);
            tcg_temp_free_i64(tcg_double);
-        } else {
-            TCGv_i32 tcg_single = tcg_temp_new_i32();
+            break;
+
+        case 0: /* float32 */
+            tcg_single = tcg_temp_new_i32();
            if (is_signed) {
                gen_helper_vfp_sqtos(tcg_single, tcg_int,
                                     tcg_shift, tcg_fpstatus);
@ -5555,6 +5765,23 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
            }
            write_fp_sreg(s, rd, tcg_single);
            tcg_temp_free_i32(tcg_single);
+            break;
+
+        case 3: /* float16 */
+            tcg_single = tcg_temp_new_i32();
+            if (is_signed) {
+                gen_helper_vfp_sqtoh(tcg_single, tcg_int,
+                                     tcg_shift, tcg_fpstatus);
+            } else {
+                gen_helper_vfp_uqtoh(tcg_single, tcg_int,
+                                     tcg_shift, tcg_fpstatus);
+            }
+            write_fp_sreg(s, rd, tcg_single);
+            tcg_temp_free_i32(tcg_single);
+            break;
+
+        default:
+            g_assert_not_reached();
        }
    } else {
        TCGv_i64 tcg_int = cpu_reg(s, rd);
@ -5571,8 +5798,9 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,

        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);

-        if (is_double) {
-            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
+        switch (type) {
+        case 1: /* float64 */
+            tcg_double = read_fp_dreg(s, rn);
            if (is_signed) {
                if (!sf) {
                    gen_helper_vfp_tosld(tcg_int, tcg_double,
@ -5590,9 +5818,14 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
                                         tcg_shift, tcg_fpstatus);
                }
            }
+            if (!sf) {
+                tcg_gen_ext32u_i64(tcg_int, tcg_int);
+            }
            tcg_temp_free_i64(tcg_double);
-        } else {
-            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
+            break;
+
+        case 0: /* float32 */
+            tcg_single = read_fp_sreg(s, rn);
            if (sf) {
                if (is_signed) {
                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
@ -5614,14 +5847,39 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
                tcg_temp_free_i32(tcg_dest);
            }
            tcg_temp_free_i32(tcg_single);
+            break;
+
+        case 3: /* float16 */
+            tcg_single = read_fp_sreg(s, rn);
+            if (sf) {
+                if (is_signed) {
+                    gen_helper_vfp_tosqh(tcg_int, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                } else {
+                    gen_helper_vfp_touqh(tcg_int, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                }
+            } else {
+                TCGv_i32 tcg_dest = tcg_temp_new_i32();
+                if (is_signed) {
+                    gen_helper_vfp_toslh(tcg_dest, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                } else {
+                    gen_helper_vfp_toulh(tcg_dest, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                }
+                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
+                tcg_temp_free_i32(tcg_dest);
+            }
+            tcg_temp_free_i32(tcg_single);
+            break;
+
+        default:
+            g_assert_not_reached();
        }

        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
        tcg_temp_free_i32(tcg_rmode);
-
-        if (!sf) {
-            tcg_gen_ext32u_i64(tcg_int, tcg_int);
-        }
    }

    tcg_temp_free_ptr(tcg_fpstatus);
@ -5646,8 +5904,21 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
    bool sf = extract32(insn, 31, 1);
    bool itof;

-    if (sbit || (type > 1)
-        || (!sf && scale < 32)) {
+    if (sbit || (!sf && scale < 32)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (type) {
+    case 0: /* float32 */
+    case 1: /* float64 */
+        break;
+    case 3: /* float16 */
+        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            break;
+        }
+        /* fallthru */
+    default:
        unallocated_encoding(s);
        return;
    }
@ -5700,6 +5971,15 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
            clear_vec_high(s, true, rd);
            break;
+        case 3:
+            /* 16 bit */
+            tmp = tcg_temp_new_i64();
+            tcg_gen_ext16u_i64(tmp, tcg_rn);
+            write_fp_dreg(s, rd, tmp);
+            tcg_temp_free_i64(tmp);
+            break;
+        default:
+            g_assert_not_reached();
        }
    } else {
        TCGv_i64 tcg_rd = cpu_reg(s, rd);
@ -5717,6 +5997,12 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
            /* 64 bits from top half */
            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
            break;
+        case 3:
+            /* 16 bit */
+            tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
+            break;
+        default:
+            g_assert_not_reached();
        }
    }
 }
@ -5756,10 +6042,16 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
        case 0xa: /* 64 bit */
        case 0xd: /* 64 bit to top half of quad */
            break;
+        case 0x6: /* 16-bit float, 32-bit int */
+        case 0xe: /* 16-bit float, 64-bit int */
+            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+                break;
+            }
+            /* fallthru */
        default:
            /* all other sf/type/rmode combinations are invalid */
            unallocated_encoding(s);
-            break;
+            return;
        }

        if (!fp_access_check(s)) {
@ -5770,7 +6062,20 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
        /* actual FP conversions */
        bool itof = extract32(opcode, 1, 1);

-        if (type > 1 || (rmode != 0 && opcode > 1)) {
+        if (rmode != 0 && opcode > 1) {
+            unallocated_encoding(s);
+            return;
+        }
+        switch (type) {
+        case 0: /* float32 */
+        case 1: /* float64 */
+            break;
+        case 3: /* float16 */
+            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+                break;
+            }
+            /* fallthru */
+        default:
            unallocated_encoding(s);
            return;
        }
@ -7686,13 +7991,10 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
        tcg_temp_free_i64(tcg_op2);
        tcg_temp_free_i64(tcg_res);
    } else {
-        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
-        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+        TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
+        TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
        TCGv_i64 tcg_res = tcg_temp_new_i64();

-        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
-        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
-
        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);

@ -8233,13 +8535,10 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,

    fpst = get_fpstatus_ptr(true);

-    tcg_op1 = tcg_temp_new_i32();
-    tcg_op2 = tcg_temp_new_i32();
+    tcg_op1 = read_fp_hreg(s, rn);
+    tcg_op2 = read_fp_hreg(s, rm);
    tcg_res = tcg_temp_new_i32();

-    read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
-    read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
-
    switch (fpopcode) {
    case 0x03: /* FMULX */
        gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
@ -12137,11 +12436,9 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
    }

    if (is_scalar) {
-        TCGv_i32 tcg_op = tcg_temp_new_i32();
+        TCGv_i32 tcg_op = read_fp_hreg(s, rn);
        TCGv_i32 tcg_res = tcg_temp_new_i32();

-        read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
-
        switch (fpop) {
        case 0x1a: /* FCVTNS */
        case 0x1b: /* FCVTMS */
--- a/util/log.c
+++ b/util/log.c
@ -256,6 +256,8 @@ const QEMULogItem qemu_log_items[] = {
      "show trace before each executed TB (lots of logs)" },
    { CPU_LOG_TB_CPU, "cpu",
      "show CPU registers before entering a TB (lots of logs)" },
+    { CPU_LOG_TB_FPU, "fpu",
+      "include FPU registers in the 'cpu' logging" },
    { CPU_LOG_MMU, "mmu",
      "log MMU-related activities" },
    { CPU_LOG_PCALL, "pcall",