/* * QEMU TCG support -- s390x vector string instruction support * * Copyright (C) 2019 Red Hat Inc * * Authors: * David Hildenbrand * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" #include "qemu-common.h" #include "cpu.h" #include "internal.h" #include "vec.h" #include "tcg/tcg.h" #include "tcg/tcg-gvec-desc.h" #include "exec/helper-proto.h" /* * Returns a bit set in the MSB of each element that is zero, * as defined by the mask. */ static inline uint64_t zero_search(uint64_t a, uint64_t mask) { return ~(((a & mask) + mask) | a | mask); } /* * Returns a bit set in the MSB of each element that is not zero, * as defined by the mask. */ static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) { return (((a & mask) + mask) | a) & ~mask; } /* * Returns the byte offset for the first match, or 16 for no match. */ static inline int match_index(uint64_t c0, uint64_t c1) { return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; } /* * Returns the number of bits composing one element. */ static uint8_t get_element_bits(uint8_t es) { return (1 << es) * BITS_PER_BYTE; } /* * Returns the bitmask for a single element. */ static uint64_t get_single_element_mask(uint8_t es) { return -1ull >> (64 - get_element_bits(es)); } /* * Returns the bitmask for a single element (excluding the MSB). */ static uint64_t get_single_element_lsbs_mask(uint8_t es) { return -1ull >> (65 - get_element_bits(es)); } /* * Returns the bitmasks for multiple elements (excluding the MSBs). */ static uint64_t get_element_lsbs_mask(uint8_t es) { return dup_const(es, get_single_element_lsbs_mask(es)); } static int vfae(void *v1, const void *v2, const void *v3, bool in, bool rt, bool zs, uint8_t es) { const uint64_t mask = get_element_lsbs_mask(es); const int bits = get_element_bits(es); uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; uint64_t first_zero = 16; uint64_t first_equal; int i; a0 = s390_vec_read_element64(v2, 0); a1 = s390_vec_read_element64(v2, 1); b0 = s390_vec_read_element64(v3, 0); b1 = s390_vec_read_element64(v3, 1); e0 = 0; e1 = 0; /* compare against equality with every other element */ for (i = 0; i < 64; i += bits) { t0 = rol64(b0, i); t1 = rol64(b1, i); e0 |= zero_search(a0 ^ t0, mask); e0 |= zero_search(a0 ^ t1, mask); e1 |= zero_search(a1 ^ t0, mask); e1 |= zero_search(a1 ^ t1, mask); } /* invert the result if requested - invert only the MSBs */ if (in) { e0 = ~e0 & ~mask; e1 = ~e1 & ~mask; } first_equal = match_index(e0, e1); if (zs) { z0 = zero_search(a0, mask); z1 = zero_search(a1, mask); first_zero = match_index(z0, z1); } if (rt) { e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); s390_vec_write_element64(v1, 0, e0); s390_vec_write_element64(v1, 1, e1); } else { s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); s390_vec_write_element64(v1, 1, 0); } if (first_zero == 16 && first_equal == 16) { return 3; /* no match */ } else if (first_zero == 16) { return 1; /* matching elements, no match for zero */ } else if (first_equal < first_zero) { return 2; /* matching elements before match for zero */ } return 0; /* match for zero */ } #define DEF_VFAE_HELPER(BITS) \ void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ uint32_t desc) \ { \ const bool in = extract32(simd_data(desc), 3, 1); \ const bool rt = extract32(simd_data(desc), 2, 1); \ const bool zs = extract32(simd_data(desc), 1, 1); \ \ vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ } DEF_VFAE_HELPER(8) DEF_VFAE_HELPER(16) DEF_VFAE_HELPER(32) #define DEF_VFAE_CC_HELPER(BITS) \ void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ CPUS390XState *env, uint32_t desc) \ { \ const bool in = extract32(simd_data(desc), 3, 1); \ const bool rt = extract32(simd_data(desc), 2, 1); \ const bool zs = extract32(simd_data(desc), 1, 1); \ \ env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ } DEF_VFAE_CC_HELPER(8) DEF_VFAE_CC_HELPER(16) DEF_VFAE_CC_HELPER(32) static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) { const uint64_t mask = get_element_lsbs_mask(es); uint64_t a0, a1, b0, b1, e0, e1, z0, z1; uint64_t first_zero = 16; uint64_t first_equal; a0 = s390_vec_read_element64(v2, 0); a1 = s390_vec_read_element64(v2, 1); b0 = s390_vec_read_element64(v3, 0); b1 = s390_vec_read_element64(v3, 1); e0 = zero_search(a0 ^ b0, mask); e1 = zero_search(a1 ^ b1, mask); first_equal = match_index(e0, e1); if (zs) { z0 = zero_search(a0, mask); z1 = zero_search(a1, mask); first_zero = match_index(z0, z1); } s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); s390_vec_write_element64(v1, 1, 0); if (first_zero == 16 && first_equal == 16) { return 3; /* no match */ } else if (first_zero == 16) { return 1; /* matching elements, no match for zero */ } else if (first_equal < first_zero) { return 2; /* matching elements before match for zero */ } return 0; /* match for zero */ } #define DEF_VFEE_HELPER(BITS) \ void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ uint32_t desc) \ { \ const bool zs = extract32(simd_data(desc), 1, 1); \ \ vfee(v1, v2, v3, zs, MO_##BITS); \ } DEF_VFEE_HELPER(8) DEF_VFEE_HELPER(16) DEF_VFEE_HELPER(32) #define DEF_VFEE_CC_HELPER(BITS) \ void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ CPUS390XState *env, uint32_t desc) \ { \ const bool zs = extract32(simd_data(desc), 1, 1); \ \ env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ } DEF_VFEE_CC_HELPER(8) DEF_VFEE_CC_HELPER(16) DEF_VFEE_CC_HELPER(32) static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) { const uint64_t mask = get_element_lsbs_mask(es); uint64_t a0, a1, b0, b1, e0, e1, z0, z1; uint64_t first_zero = 16; uint64_t first_inequal; bool smaller = false; a0 = s390_vec_read_element64(v2, 0); a1 = s390_vec_read_element64(v2, 1); b0 = s390_vec_read_element64(v3, 0); b1 = s390_vec_read_element64(v3, 1); e0 = nonzero_search(a0 ^ b0, mask); e1 = nonzero_search(a1 ^ b1, mask); first_inequal = match_index(e0, e1); /* identify the smaller element */ if (first_inequal < 16) { uint8_t enr = first_inequal / (1 << es); uint32_t a = s390_vec_read_element(v2, enr, es); uint32_t b = s390_vec_read_element(v3, enr, es); smaller = a < b; } if (zs) { z0 = zero_search(a0, mask); z1 = zero_search(a1, mask); first_zero = match_index(z0, z1); } s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); s390_vec_write_element64(v1, 1, 0); if (first_zero == 16 && first_inequal == 16) { return 3; } else if (first_zero < first_inequal) { return 0; } return smaller ? 1 : 2; } #define DEF_VFENE_HELPER(BITS) \ void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ uint32_t desc) \ { \ const bool zs = extract32(simd_data(desc), 1, 1); \ \ vfene(v1, v2, v3, zs, MO_##BITS); \ } DEF_VFENE_HELPER(8) DEF_VFENE_HELPER(16) DEF_VFENE_HELPER(32) #define DEF_VFENE_CC_HELPER(BITS) \ void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ CPUS390XState *env, uint32_t desc) \ { \ const bool zs = extract32(simd_data(desc), 1, 1); \ \ env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ } DEF_VFENE_CC_HELPER(8) DEF_VFENE_CC_HELPER(16) DEF_VFENE_CC_HELPER(32)