diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 3db9b15f12..32fd2e1f9b 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -410,6 +410,9 @@ DEF_HELPER_FLAGS_4(mve_vhcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(mve_vhcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(mve_vhcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(mve_vfaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(mve_vfadds, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) + DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index 8744681629..e211cb016c 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -26,6 +26,10 @@ # VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit %size_28 28:1 !function=plus_1 +# 2 operand fp insns have size in bit 20: 1 for 16 bit, 0 for 32 bit, +# like Neon FP insns. +%2op_fp_size 20:1 !function=neon_3same_fp_size + # 1imm format immediate %imm_28_16_0 28:1 16:3 0:4 @@ -118,6 +122,9 @@ @vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm +@2op_fp .... .... .... .... .... .... .... .... &2op \ + qd=%qd qn=%qn qm=%qm size=%2op_fp_size + # Vector loads and stores # Widening loads and narrowing stores: @@ -615,3 +622,6 @@ VCMPGE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0 1 0 0 .... @vcmp_scalar VCMPLT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1 1 0 0 .... @vcmp_scalar VCMPGT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0 1 1 0 .... @vcmp_scalar VCMPLE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1 1 1 0 .... @vcmp_scalar + +# 2-operand FP +VADD_fp 1110 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index c2826eb5f9..abca7c0b2a 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "exec/exec-all.h" #include "tcg/tcg.h" +#include "fpu/softfloat.h" static uint16_t mve_eci_mask(CPUARMState *env) { @@ -2798,3 +2799,42 @@ DO_VMAXMINA(vmaxaw, 4, int32_t, uint32_t, DO_MAX) DO_VMAXMINA(vminab, 1, int8_t, uint8_t, DO_MIN) DO_VMAXMINA(vminah, 2, int16_t, uint16_t, DO_MIN) DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) + +/* + * 2-operand floating point. Note that if an element is partially + * predicated we must do the FP operation to update the non-predicated + * bytes, but we must be careful to avoid updating the FP exception + * state unless byte 0 of the element was unpredicated. + */ +#define DO_2OP_FP(OP, ESIZE, TYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, \ + void *vd, void *vn, void *vm) \ + { \ + TYPE *d = vd, *n = vn, *m = vm; \ + TYPE r; \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + float_status *fpst; \ + float_status scratch_fpst; \ + for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ + if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ + continue; \ + } \ + fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ + &env->vfp.standard_fp_status; \ + if (!(mask & 1)) { \ + /* We need the result but without updating flags */ \ + scratch_fpst = *fpst; \ + fpst = &scratch_fpst; \ + } \ + r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst); \ + mergemask(&d[H##ESIZE(e)], r, mask); \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_2OP_FP_ALL(OP, FN) \ + DO_2OP_FP(OP##h, 2, float16, float16_##FN) \ + DO_2OP_FP(OP##s, 4, float32, float32_##FN) + +DO_2OP_FP_ALL(vfadd, add) diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 78229c44c6..d2c40ede56 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -831,6 +831,23 @@ static bool trans_VSBCI(DisasContext *s, arg_2op *a) return do_2op(s, a, gen_helper_mve_vsbci); } +#define DO_2OP_FP(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2op *a) \ + { \ + static MVEGenTwoOpFn * const fns[] = { \ + NULL, \ + gen_helper_mve_##FN##h, \ + gen_helper_mve_##FN##s, \ + NULL, \ + }; \ + if (!dc_isar_feature(aa32_mve_fp, s)) { \ + return false; \ + } \ + return do_2op(s, a, fns[a->size]); \ + } + +DO_2OP_FP(VADD_fp, vfadd) + static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, MVEGenTwoOpScalarFn fn) { diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index c53ab20fa4..dd43de558e 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -28,12 +28,6 @@ #include "translate.h" #include "translate-a32.h" -static inline int neon_3same_fp_size(DisasContext *s, int x) -{ - /* Convert 0==fp32, 1==fp16 into a MO_* value */ - return MO_32 - x; -} - /* Include the generated Neon decoder */ #include "decode-neon-dp.c.inc" #include "decode-neon-ls.c.inc" diff --git a/target/arm/translate.h b/target/arm/translate.h index 241596c5bd..8636c20c3b 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -181,6 +181,12 @@ static inline int rsub_8(DisasContext *s, int x) return 8 - x; } +static inline int neon_3same_fp_size(DisasContext *s, int x) +{ + /* Convert 0==fp32, 1==fp16 into a MO_* value */ + return MO_32 - x; +} + static inline int arm_dc_feature(DisasContext *dc, int feature) { return (dc->features & (1ULL << feature)) != 0;