tcg/s390x: Implement TCG_TARGET_HAS_sat_vec

The unsigned saturations are handled via generic code using min/max. The signed saturations are expanded using double-sized arithmetic and a saturating pack. Since all operations are done via expansion, do not actually set TCG_TARGET_HAS_sat_vec. Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2020-09-14 20:17:29 -07:00 · 2020-09-14 20:17:29 -07:00 · 4223c9c1c6
parent 220db7a6c4
commit 4223c9c1c6
2 changed files with 66 additions and 0 deletions
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@ -291,7 +291,10 @@ typedef enum S390Opcode {
    VRRc_VNO    = 0xe76b,
    VRRc_VO     = 0xe76a,
    VRRc_VOC    = 0xe76f,
+    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
    VRRc_VS     = 0xe7f7,
+    VRRa_VUPH   = 0xe7d7,
+    VRRa_VUPL   = 0xe7d6,
    VRRc_VX     = 0xe76d,
    VRRf_VLVGP  = 0xe762,

@ -2800,6 +2803,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
        }
        break;

+    case INDEX_op_s390_vuph_vec:
+        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
+        break;
+    case INDEX_op_s390_vupl_vec:
+        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
+        break;
+    case INDEX_op_s390_vpks_vec:
+        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
+        break;
+
    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
    default:
@ -2842,6 +2855,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
        return -1;
    case INDEX_op_mul_vec:
        return vece < MO_64;
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_sssub_vec:
+        return vece < MO_64 ? -1 : 0;
    default:
        return 0;
    }
@ -2897,6 +2913,43 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
    }
 }

+static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
+                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
+{
+    TCGv_vec h1 = tcg_temp_new_vec(type);
+    TCGv_vec h2 = tcg_temp_new_vec(type);
+    TCGv_vec l1 = tcg_temp_new_vec(type);
+    TCGv_vec l2 = tcg_temp_new_vec(type);
+
+    tcg_debug_assert (vece < MO_64);
+
+    /* Unpack with sign-extension. */
+    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
+    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
+
+    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
+    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
+
+    /* Arithmetic on a wider element size. */
+    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
+              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
+    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
+              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
+
+    /* Pack with saturation. */
+    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
+              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
+
+    tcg_temp_free_vec(h1);
+    tcg_temp_free_vec(h2);
+    tcg_temp_free_vec(l1);
+    tcg_temp_free_vec(l2);
+}
+
 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
                       TCGArg a0, ...)
 {
@ -2920,6 +2973,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
        tcg_temp_free_vec(t0);
        break;

+    case INDEX_op_ssadd_vec:
+        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
+        break;
+    case INDEX_op_sssub_vec:
+        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
+        break;
+
    default:
        g_assert_not_reached();
    }
@ -3080,6 +3140,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
    case INDEX_op_sari_vec:
    case INDEX_op_shli_vec:
    case INDEX_op_shri_vec:
+    case INDEX_op_s390_vuph_vec:
+    case INDEX_op_s390_vupl_vec:
        return C_O1_I1(v, v);
    case INDEX_op_add_vec:
    case INDEX_op_sub_vec:
@ -3099,6 +3161,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
    case INDEX_op_smin_vec:
    case INDEX_op_umax_vec:
    case INDEX_op_umin_vec:
+    case INDEX_op_s390_vpks_vec:
        return C_O1_I2(v, v, v);
    case INDEX_op_rotls_vec:
    case INDEX_op_shls_vec:
--- a/tcg/s390x/tcg-target.opc.h
+++ b/tcg/s390x/tcg-target.opc.h
@ -10,3 +10,6 @@
 * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
 * consider these to be UNSPEC with names.
 */
+DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)