Vector rotate support

Signal handling support for NetBSD arm/aarch64
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAl7WgZkdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8ZZwf9Flw0VHTBBzsZHPjL
 JVOdJsz8pnQLRDxk9JUo7D40xehpN1oE5mFCV5aFNMSz929r7OB5WfUHsyNpbms/
 aCDz2ADRscBZ4xEZY9cxUYiNzq4fb3Hez64ibGbRm3uHU1C2pPNDRlFpL9aFQMCE
 elhn54hwOAZUZAAKszepClHGglPffijY5QU5VX6Gq6nmqEmCBlA8lkxMtaV/NRnS
 QfskDF4KvglGqGDCcKI2tfotu5Y9k3R4yh7Nzc2JnotbHk4WrNbhBl6r9KcdXddi
 uQng4J0zhBa1Kq4ENdpKgAqN/ZNVEbsFzev5aY77UD76cnxqbQQ7hfXl2gAORp6m
 Vy5lVw==
 =nRee
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200602' into staging

Vector rotate support
Signal handling support for NetBSD arm/aarch64

# gpg: Signature made Tue 02 Jun 2020 17:43:05 BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20200602:
  accel/tcg: Provide a NetBSD specific aarch64 cpu_signal_handler
  accel/tcg: Adjust cpu_signal_handler for NetBSD/arm
  tcg: Improve move ops in liveness_pass_2
  target/s390x: Use tcg_gen_gvec_rotl{i,s,v}
  target/ppc: Use tcg_gen_gvec_rotlv
  tcg/ppc: Implement INDEX_op_rot[lr]v_vec
  tcg/aarch64: Implement INDEX_op_rotl{i,v}_vec
  tcg/i386: Implement INDEX_op_rotl{i,s,v}_vec
  tcg: Implement gvec support for rotate by scalar
  tcg: Remove expansion to shift by vector from do_shifts
  tcg: Implement gvec support for rotate by vector
  tcg: Implement gvec support for rotate by immediate

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2020-06-02 18:16:38 +01:00
commit 5cc7a54c2e
26 changed files with 736 additions and 193 deletions

View File

@ -716,6 +716,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
int shift = simd_data(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
*(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
int shift = simd_data(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
*(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
int shift = simd_data(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
*(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
int shift = simd_data(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
*(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
@ -860,6 +908,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
#define DO_CMP1(NAME, TYPE, OP) \
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
{ \

View File

@ -259,6 +259,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@ -274,6 +279,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

View File

@ -517,6 +517,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
#if defined(__NetBSD__)
#include <ucontext.h>
#include <sys/siginfo.h>
#endif
int cpu_signal_handler(int host_signum, void *pinfo,
@ -525,10 +526,12 @@ int cpu_signal_handler(int host_signum, void *pinfo,
siginfo_t *info = pinfo;
#if defined(__NetBSD__)
ucontext_t *uc = puc;
siginfo_t *si = pinfo;
#else
ucontext_t *uc = puc;
#endif
unsigned long pc;
uint32_t fsr;
int is_write;
#if defined(__NetBSD__)
@ -539,15 +542,48 @@ int cpu_signal_handler(int host_signum, void *pinfo,
pc = uc->uc_mcontext.arm_pc;
#endif
/* error_code is the FSR value, in which bit 11 is WnR (assuming a v6 or
* later processor; on v5 we will always report this as a read).
#ifdef __NetBSD__
fsr = si->si_trap;
#else
fsr = uc->uc_mcontext.error_code;
#endif
/*
* In the FSR, bit 11 is WnR, assuming a v6 or
* later processor. On v5 we will always report
* this as a read, which will fail later.
*/
is_write = extract32(uc->uc_mcontext.error_code, 11, 1);
is_write = extract32(fsr, 11, 1);
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
}
#elif defined(__aarch64__)
#if defined(__NetBSD__)
#include <ucontext.h>
#include <sys/siginfo.h>
int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
{
ucontext_t *uc = puc;
siginfo_t *si = pinfo;
unsigned long pc;
int is_write;
uint32_t esr;
pc = uc->uc_mcontext.__gregs[_REG_PC];
esr = si->si_trap;
/*
* siginfo_t::si_trap is the ESR value, for data aborts ESR.EC
* is 0b10010x: then bit 6 is the WnR bit
*/
is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask);
}
#else
#ifndef ESR_MAGIC
/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
#define ESR_MAGIC 0x45535201
@ -610,6 +646,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
}
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
}
#endif
#elif defined(__s390__)

View File

@ -334,6 +334,10 @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
@ -341,6 +345,8 @@ void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
/*
* Perform vector shift by vector element, modulo the element size.
@ -352,6 +358,10 @@ void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, uint32_t bofs,
@ -388,5 +398,7 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
#endif

View File

@ -999,14 +999,19 @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
TCGv_vec a, TCGv_vec b);

View File

@ -248,14 +248,18 @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
DEF(cmp_vec, 1, 2, 1, IMPLVEC)

View File

@ -182,6 +182,9 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0

View File

@ -214,10 +214,6 @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
DEF_HELPER_3(vrlb, void, avr, avr, avr)
DEF_HELPER_3(vrlh, void, avr, avr, avr)
DEF_HELPER_3(vrlw, void, avr, avr, avr)
DEF_HELPER_3(vrld, void, avr, avr, avr)
DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
DEF_HELPER_3(vextractub, void, avr, avr, i32)
DEF_HELPER_3(vextractuh, void, avr, avr, i32)

View File

@ -1348,23 +1348,6 @@ VRFI(p, float_round_up)
VRFI(z, float_round_to_zero)
#undef VRFI
#define VROTATE(suffix, element, mask) \
void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
int i; \
\
for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
unsigned int shift = b->element[i] & mask; \
r->element[i] = (a->element[i] << shift) | \
(a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
} \
}
VROTATE(b, u8, 0x7)
VROTATE(h, u16, 0xF)
VROTATE(w, u32, 0x1F)
VROTATE(d, u64, 0x3F)
#undef VROTATE
void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
{
int i;

View File

@ -900,13 +900,13 @@ GEN_VXFORM3(vsubeuqm, 31, 0);
GEN_VXFORM3(vsubecuq, 31, 0);
GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vrlb, 2, 0);
GEN_VXFORM(vrlh, 2, 1);
GEN_VXFORM(vrlw, 2, 2);
GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0);
GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1);
GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2);
GEN_VXFORM(vrlwmi, 2, 2);
GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
vrlwmi, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vrld, 2, 3);
GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3);
GEN_VXFORM(vrldmi, 2, 3);
GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
vrldmi, PPC_NONE, PPC2_ISA300)

View File

@ -198,10 +198,6 @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)

View File

@ -1147,8 +1147,8 @@
/* VECTOR POPULATION COUNT */
F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC)
/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC)
F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC)
F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC)
/* VECTOR ELEMENT SHIFT LEFT */

View File

@ -1825,63 +1825,6 @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
static void gen_rll_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t0 = tcg_temp_new_i32();
tcg_gen_andi_i32(t0, b, 31);
tcg_gen_rotl_i32(d, a, t0);
tcg_temp_free_i32(t0);
}
static void gen_rll_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t0 = tcg_temp_new_i64();
tcg_gen_andi_i64(t0, b, 63);
tcg_gen_rotl_i64(d, a, t0);
tcg_temp_free_i64(t0);
}
static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s, m4);
static const GVecGen3 g[4] = {
{ .fno = gen_helper_gvec_verllv8, },
{ .fno = gen_helper_gvec_verllv16, },
{ .fni4 = gen_rll_i32, },
{ .fni8 = gen_rll_i64, },
};
if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_3(get_field(s, v1), get_field(s, v2),
get_field(s, v3), &g[es]);
return DISAS_NEXT;
}
static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s, m4);
static const GVecGen2s g[4] = {
{ .fno = gen_helper_gvec_verll8, },
{ .fno = gen_helper_gvec_verll16, },
{ .fni4 = gen_rll_i32, },
{ .fni8 = gen_rll_i64, },
};
if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_2s(get_field(s, v1), get_field(s, v3), o->addr1,
&g[es]);
return DISAS_NEXT;
}
static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
{
TCGv_i32 t = tcg_temp_new_i32();
@ -1946,6 +1889,9 @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
case 0x70:
gen_gvec_fn_3(shlv, es, v1, v2, v3);
break;
case 0x73:
gen_gvec_fn_3(rotlv, es, v1, v2, v3);
break;
case 0x7a:
gen_gvec_fn_3(sarv, es, v1, v2, v3);
break;
@ -1977,6 +1923,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
case 0x30:
gen_gvec_fn_2i(shli, es, v1, v3, d2);
break;
case 0x33:
gen_gvec_fn_2i(rotli, es, v1, v3, d2);
break;
case 0x3a:
gen_gvec_fn_2i(sari, es, v1, v3, d2);
break;
@ -1994,6 +1943,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
case 0x30:
gen_gvec_fn_2s(shls, es, v1, v3, shift);
break;
case 0x33:
gen_gvec_fn_2s(rotls, es, v1, v3, shift);
break;
case 0x3a:
gen_gvec_fn_2s(sars, es, v1, v3, shift);
break;

View File

@ -515,37 +515,6 @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
DEF_VPOPCT(8)
DEF_VPOPCT(16)
#define DEF_VERLLV(BITS) \
void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
\
s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \
} \
}
DEF_VERLLV(8)
DEF_VERLLV(16)
#define DEF_VERLL(BITS) \
void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \
uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
\
s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \
} \
}
DEF_VERLL(8)
DEF_VERLL(16)
#define DEF_VERIM(BITS) \
void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \

View File

@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
* shri_vec v0, v1, i2
* sari_vec v0, v1, i2
* rotli_vec v0, v1, i2
* shrs_vec v0, v1, s2
* sars_vec v0, v1, s2
Similarly for logical and arithmetic right shift.
Similarly for logical and arithmetic right shift, and left rotate.
* shlv_vec v0, v1, v2
@ -620,8 +621,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
* shrv_vec v0, v1, v2
* sarv_vec v0, v1, v2
* rotlv_vec v0, v1, v2
* rotrv_vec v0, v1, v2
Similarly for logical and arithmetic right shift.
Similarly for logical and arithmetic right shift, and rotates.
* cmp_vec v0, v1, v2, cond

View File

@ -133,6 +133,9 @@ typedef enum {
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1

View File

@ -557,6 +557,7 @@ typedef enum {
I3614_SSHR = 0x0f000400,
I3614_SSRA = 0x0f001400,
I3614_SHL = 0x0f005400,
I3614_SLI = 0x2f005400,
I3614_USHR = 0x2f000400,
I3614_USRA = 0x2f001400,
@ -2411,6 +2412,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sari_vec:
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
break;
case INDEX_op_aa64_sli_vec:
tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
break;
case INDEX_op_shlv_vec:
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
break;
@ -2498,8 +2502,11 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shlv_vec:
case INDEX_op_bitsel_vec:
return 1;
case INDEX_op_rotli_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
return -1;
case INDEX_op_mul_vec:
case INDEX_op_smax_vec:
@ -2517,14 +2524,24 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
TCGv_vec v0, v1, v2, t1;
TCGv_vec v0, v1, v2, t1, t2;
TCGArg a2;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
a2 = va_arg(va, TCGArg);
v2 = temp_tcgv_vec(arg_temp(a2));
switch (opc) {
case INDEX_op_rotli_vec:
t1 = tcg_temp_new_vec(type);
tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
tcg_temp_free_vec(t1);
break;
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
/* Right shifts are negative left shifts for AArch64. */
@ -2537,6 +2554,35 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
tcg_temp_free_vec(t1);
break;
case INDEX_op_rotlv_vec:
t1 = tcg_temp_new_vec(type);
tcg_gen_dupi_vec(vece, t1, 8 << vece);
tcg_gen_sub_vec(vece, t1, v2, t1);
/* Right shifts are negative left shifts for AArch64. */
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
tcg_gen_or_vec(vece, v0, v0, t1);
tcg_temp_free_vec(t1);
break;
case INDEX_op_rotrv_vec:
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
tcg_gen_neg_vec(vece, t1, v2);
tcg_gen_dupi_vec(vece, t2, 8 << vece);
tcg_gen_add_vec(vece, t2, t1, t2);
/* Right shifts are negative left shifts for AArch64. */
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
tcgv_vec_arg(v1), tcgv_vec_arg(t2));
tcg_gen_or_vec(vece, v0, t1, t2);
tcg_temp_free_vec(t1);
tcg_temp_free_vec(t2);
break;
default:
g_assert_not_reached();
}
@ -2557,6 +2603,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
@ -2751,6 +2798,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
return &w_w_wZ;
case INDEX_op_bitsel_vec:
return &w_w_w_w;
case INDEX_op_aa64_sli_vec:
return &w_0_w;
default:
return NULL;

View File

@ -12,3 +12,4 @@
*/
DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)

View File

@ -183,6 +183,9 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 1
#define TCG_TARGET_HAS_shv_vec have_avx2

View File

@ -3233,6 +3233,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
case INDEX_op_rotls_vec:
case INDEX_op_cmp_vec:
case INDEX_op_x86_shufps_vec:
case INDEX_op_x86_blend_vec:
@ -3271,6 +3272,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
return 1;
case INDEX_op_rotli_vec:
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
return -1;
@ -3297,12 +3299,17 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return vece >= MO_16;
case INDEX_op_sars_vec:
return vece >= MO_16 && vece <= MO_32;
case INDEX_op_rotls_vec:
return vece >= MO_16 ? -1 : 0;
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
return have_avx2 && vece >= MO_32;
case INDEX_op_sarv_vec:
return have_avx2 && vece == MO_32;
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
return have_avx2 && vece >= MO_32 ? -1 : 0;
case INDEX_op_mul_vec:
if (vece == MO_8) {
@ -3331,7 +3338,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}
static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
TCGv_vec t1, t2;
@ -3341,26 +3348,31 @@ static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
/* Unpack to W, shift, and repack. Tricky bits:
(1) Use punpck*bw x,x to produce DDCCBBAA,
i.e. duplicate in other half of the 16-bit lane.
(2) For right-shift, add 8 so that the high half of
the lane becomes zero. For left-shift, we must
shift up and down again.
(3) Step 2 leaves high half zero such that PACKUSWB
(pack with unsigned saturation) does not modify
the quantity. */
/*
* Unpack to W, shift, and repack. Tricky bits:
* (1) Use punpck*bw x,x to produce DDCCBBAA,
* i.e. duplicate in other half of the 16-bit lane.
* (2) For right-shift, add 8 so that the high half of the lane
* becomes zero. For left-shift, and left-rotate, we must
* shift up and down again.
* (3) Step 2 leaves high half zero such that PACKUSWB
* (pack with unsigned saturation) does not modify
* the quantity.
*/
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
if (shr) {
tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
if (opc != INDEX_op_rotli_vec) {
imm += 8;
}
if (opc == INDEX_op_shri_vec) {
tcg_gen_shri_vec(MO_16, t1, t1, imm);
tcg_gen_shri_vec(MO_16, t2, t2, imm);
} else {
tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
tcg_gen_shli_vec(MO_16, t1, t1, imm);
tcg_gen_shli_vec(MO_16, t2, t2, imm);
tcg_gen_shri_vec(MO_16, t1, t1, 8);
tcg_gen_shri_vec(MO_16, t2, t2, 8);
}
@ -3427,6 +3439,61 @@ static void expand_vec_sari(TCGType type, unsigned vece,
}
}
static void expand_vec_rotli(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
TCGv_vec t;
if (vece == MO_8) {
expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
return;
}
t = tcg_temp_new_vec(type);
tcg_gen_shli_vec(vece, t, v1, imm);
tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
tcg_gen_or_vec(vece, v0, v0, t);
tcg_temp_free_vec(t);
}
static void expand_vec_rotls(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
{
TCGv_i32 rsh;
TCGv_vec t;
tcg_debug_assert(vece != MO_8);
t = tcg_temp_new_vec(type);
rsh = tcg_temp_new_i32();
tcg_gen_neg_i32(rsh, lsh);
tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
tcg_gen_shls_vec(vece, t, v1, lsh);
tcg_gen_shrs_vec(vece, v0, v1, rsh);
tcg_gen_or_vec(vece, v0, v0, t);
tcg_temp_free_vec(t);
tcg_temp_free_i32(rsh);
}
static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec sh, bool right)
{
TCGv_vec t = tcg_temp_new_vec(type);
tcg_gen_dupi_vec(vece, t, 8 << vece);
tcg_gen_sub_vec(vece, t, t, sh);
if (right) {
tcg_gen_shlv_vec(vece, t, v1, t);
tcg_gen_shrv_vec(vece, v0, v1, sh);
} else {
tcg_gen_shrv_vec(vece, t, v1, t);
tcg_gen_shlv_vec(vece, v0, v1, sh);
}
tcg_gen_or_vec(vece, v0, v0, t);
tcg_temp_free_vec(t);
}
static void expand_vec_mul(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
{
@ -3636,13 +3703,30 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
switch (opc) {
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
expand_vec_shi(type, vece, opc, v0, v1, a2);
break;
case INDEX_op_sari_vec:
expand_vec_sari(type, vece, v0, v1, a2);
break;
case INDEX_op_rotli_vec:
expand_vec_rotli(type, vece, v0, v1, a2);
break;
case INDEX_op_rotls_vec:
expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
break;
case INDEX_op_rotlv_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_rotv(type, vece, v0, v1, v2, false);
break;
case INDEX_op_rotrv_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_rotv(type, vece, v0, v1, v2, true);
break;
case INDEX_op_mul_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_mul(type, vece, v0, v1, v2);

View File

@ -161,6 +161,9 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
#define TCG_TARGET_HAS_abs_vec 0
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 1
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1

View File

@ -2995,6 +2995,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_rotlv_vec:
return vece <= MO_32 || have_isa_2_07;
case INDEX_op_ssadd_vec:
case INDEX_op_sssub_vec:
@ -3005,6 +3006,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
case INDEX_op_rotli_vec:
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
case INDEX_op_neg_vec:
return vece >= MO_32 && have_isa_3_00;
@ -3019,6 +3021,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return 0;
case INDEX_op_bitsel_vec:
return have_vsx;
case INDEX_op_rotrv_vec:
return -1;
default:
return 0;
}
@ -3301,7 +3305,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_ppc_pkum_vec:
insn = pkum_op[vece];
break;
case INDEX_op_ppc_rotl_vec:
case INDEX_op_rotlv_vec:
insn = rotl_op[vece];
break;
case INDEX_op_ppc_msum_vec:
@ -3409,7 +3413,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
t3 = tcg_temp_new_vec(type);
t4 = tcg_temp_new_vec(type);
tcg_gen_dupi_vec(MO_8, t4, -16);
vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
tcgv_vec_arg(v2), tcgv_vec_arg(t4));
vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
@ -3434,7 +3438,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
TCGv_vec v0, v1, v2;
TCGv_vec v0, v1, v2, t0;
TCGArg a2;
va_start(va, a0);
@ -3452,6 +3456,9 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
case INDEX_op_sari_vec:
expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
break;
case INDEX_op_rotli_vec:
expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
break;
case INDEX_op_cmp_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
@ -3460,6 +3467,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_mul(type, vece, v0, v1, v2);
break;
case INDEX_op_rotlv_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
t0 = tcg_temp_new_vec(type);
tcg_gen_neg_vec(vece, t0, v2);
tcg_gen_rotlv_vec(vece, v0, v1, t0);
tcg_temp_free_vec(t0);
break;
default:
g_assert_not_reached();
}
@ -3664,12 +3678,13 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
case INDEX_op_ppc_mrgh_vec:
case INDEX_op_ppc_mrgl_vec:
case INDEX_op_ppc_muleu_vec:
case INDEX_op_ppc_mulou_vec:
case INDEX_op_ppc_pkum_vec:
case INDEX_op_ppc_rotl_vec:
case INDEX_op_dup2_vec:
return &v_v_v;
case INDEX_op_not_vec:

View File

@ -30,4 +30,3 @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)

View File

@ -2694,6 +2694,74 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
}
}
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
{
uint64_t mask = dup_const(MO_8, 0xff << c);
tcg_gen_shli_i64(d, a, c);
tcg_gen_shri_i64(a, a, 8 - c);
tcg_gen_andi_i64(d, d, mask);
tcg_gen_andi_i64(a, a, ~mask);
tcg_gen_or_i64(d, d, a);
}
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
{
uint64_t mask = dup_const(MO_16, 0xffff << c);
tcg_gen_shli_i64(d, a, c);
tcg_gen_shri_i64(a, a, 16 - c);
tcg_gen_andi_i64(d, d, mask);
tcg_gen_andi_i64(a, a, ~mask);
tcg_gen_or_i64(d, d, a);
}
void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz)
{
static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
static const GVecGen2i g[4] = {
{ .fni8 = tcg_gen_vec_rotl8i_i64,
.fniv = tcg_gen_rotli_vec,
.fno = gen_helper_gvec_rotl8i,
.opt_opc = vecop_list,
.vece = MO_8 },
{ .fni8 = tcg_gen_vec_rotl16i_i64,
.fniv = tcg_gen_rotli_vec,
.fno = gen_helper_gvec_rotl16i,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fni4 = tcg_gen_rotli_i32,
.fniv = tcg_gen_rotli_vec,
.fno = gen_helper_gvec_rotl32i,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fni8 = tcg_gen_rotli_i64,
.fniv = tcg_gen_rotli_vec,
.fno = gen_helper_gvec_rotl64i,
.opt_opc = vecop_list,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_64 },
};
tcg_debug_assert(vece <= MO_64);
tcg_debug_assert(shift >= 0 && shift < (8 << vece));
if (shift == 0) {
tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
} else {
tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
}
}
void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz)
{
tcg_debug_assert(vece <= MO_64);
tcg_debug_assert(shift >= 0 && shift < (8 << vece));
tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
oprsz, maxsz);
}
/*
* Specialized generation vector shifts by a non-constant scalar.
*/
@ -2908,6 +2976,28 @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
}
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen2sh g = {
.fni4 = tcg_gen_rotl_i32,
.fni8 = tcg_gen_rotl_i64,
.fniv_s = tcg_gen_rotls_vec,
.fniv_v = tcg_gen_rotlv_vec,
.fno = {
gen_helper_gvec_rotl8i,
gen_helper_gvec_rotl16i,
gen_helper_gvec_rotl32i,
gen_helper_gvec_rotl64i,
},
.s_list = { INDEX_op_rotls_vec, 0 },
.v_list = { INDEX_op_rotlv_vec, 0 },
};
tcg_debug_assert(vece <= MO_64);
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
}
/*
* Expand D = A << (B % element bits)
*
@ -3103,6 +3193,128 @@ void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
}
/*
* Similarly for rotates.
*/
static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
TCGv_vec a, TCGv_vec b)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
tcg_gen_and_vec(vece, t, t, b);
tcg_gen_rotlv_vec(vece, d, a, t);
tcg_temp_free_vec(t);
}
static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t = tcg_temp_new_i32();
tcg_gen_andi_i32(t, b, 31);
tcg_gen_rotl_i32(d, a, t);
tcg_temp_free_i32(t);
}
static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_andi_i64(t, b, 63);
tcg_gen_rotl_i64(d, a, t);
tcg_temp_free_i64(t);
}
void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
{
static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 };
static const GVecGen3 g[4] = {
{ .fniv = tcg_gen_rotlv_mod_vec,
.fno = gen_helper_gvec_rotl8v,
.opt_opc = vecop_list,
.vece = MO_8 },
{ .fniv = tcg_gen_rotlv_mod_vec,
.fno = gen_helper_gvec_rotl16v,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fni4 = tcg_gen_rotl_mod_i32,
.fniv = tcg_gen_rotlv_mod_vec,
.fno = gen_helper_gvec_rotl32v,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fni8 = tcg_gen_rotl_mod_i64,
.fniv = tcg_gen_rotlv_mod_vec,
.fno = gen_helper_gvec_rotl64v,
.opt_opc = vecop_list,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_64 },
};
tcg_debug_assert(vece <= MO_64);
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
}
static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
TCGv_vec a, TCGv_vec b)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
tcg_gen_and_vec(vece, t, t, b);
tcg_gen_rotrv_vec(vece, d, a, t);
tcg_temp_free_vec(t);
}
static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t = tcg_temp_new_i32();
tcg_gen_andi_i32(t, b, 31);
tcg_gen_rotr_i32(d, a, t);
tcg_temp_free_i32(t);
}
static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_andi_i64(t, b, 63);
tcg_gen_rotr_i64(d, a, t);
tcg_temp_free_i64(t);
}
void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
{
static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 };
static const GVecGen3 g[4] = {
{ .fniv = tcg_gen_rotrv_mod_vec,
.fno = gen_helper_gvec_rotr8v,
.opt_opc = vecop_list,
.vece = MO_8 },
{ .fniv = tcg_gen_rotrv_mod_vec,
.fno = gen_helper_gvec_rotr16v,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fni4 = tcg_gen_rotr_mod_i32,
.fniv = tcg_gen_rotrv_mod_vec,
.fno = gen_helper_gvec_rotr32v,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fni8 = tcg_gen_rotr_mod_i64,
.fniv = tcg_gen_rotrv_mod_vec,
.fno = gen_helper_gvec_rotr64v,
.opt_opc = vecop_list,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_64 },
};
tcg_debug_assert(vece <= MO_64);
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
}
/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t oprsz, TCGCond cond)

View File

@ -545,6 +545,18 @@ void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
}
void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
{
do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
}
void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
{
int bits = 8 << vece;
tcg_debug_assert(i >= 0 && i < bits);
do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
}
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
@ -684,8 +696,18 @@ void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
}
void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
}
void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
}
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
TCGv_i32 s, TCGOpcode opc)
{
TCGTemp *rt = tcgv_vec_temp(r);
TCGTemp *at = tcgv_vec_temp(a);
@ -694,48 +716,40 @@ static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
TCGArg ai = temp_arg(at);
TCGArg si = temp_arg(st);
TCGType type = rt->base_type;
const TCGOpcode *hold_list;
int can;
tcg_debug_assert(at->base_type >= type);
tcg_assert_listed_vecop(opc_s);
hold_list = tcg_swap_vecop_list(NULL);
can = tcg_can_emit_vec_op(opc_s, type, vece);
tcg_assert_listed_vecop(opc);
can = tcg_can_emit_vec_op(opc, type, vece);
if (can > 0) {
vec_gen_3(opc_s, type, vece, ri, ai, si);
vec_gen_3(opc, type, vece, ri, ai, si);
} else if (can < 0) {
tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
tcg_expand_vec_op(opc, type, vece, ri, ai, si);
tcg_swap_vecop_list(hold_list);
} else {
TCGv_vec vec_s = tcg_temp_new_vec(type);
if (vece == MO_64) {
TCGv_i64 s64 = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(s64, s);
tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
tcg_temp_free_i64(s64);
} else {
tcg_gen_dup_i32_vec(vece, vec_s, s);
}
do_op3_nofail(vece, r, a, vec_s, opc_v);
tcg_temp_free_vec(vec_s);
g_assert_not_reached();
}
tcg_swap_vecop_list(hold_list);
}
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
do_shifts(vece, r, a, b, INDEX_op_shls_vec);
}
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
}
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
do_shifts(vece, r, a, b, INDEX_op_sars_vec);
}
void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
{
do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
}
void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,

View File

@ -1661,6 +1661,13 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
return have_vec && TCG_TARGET_HAS_shv_vec;
case INDEX_op_rotli_vec:
return have_vec && TCG_TARGET_HAS_roti_vec;
case INDEX_op_rotls_vec:
return have_vec && TCG_TARGET_HAS_rots_vec;
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
return have_vec && TCG_TARGET_HAS_rotv_vec;
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
@ -2975,34 +2982,68 @@ static bool liveness_pass_2(TCGContext *s)
}
/* Outputs become available. */
for (i = 0; i < nb_oargs; i++) {
arg_ts = arg_temp(op->args[i]);
if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
arg_ts = arg_temp(op->args[0]);
dir_ts = arg_ts->state_ptr;
if (!dir_ts) {
continue;
if (dir_ts) {
op->args[0] = temp_arg(dir_ts);
changes = true;
/* The output is now live and modified. */
arg_ts->state = 0;
if (NEED_SYNC_ARG(0)) {
TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
? INDEX_op_st_i32
: INDEX_op_st_i64);
TCGOp *sop = tcg_op_insert_after(s, op, sopc);
TCGTemp *out_ts = dir_ts;
if (IS_DEAD_ARG(0)) {
out_ts = arg_temp(op->args[1]);
arg_ts->state = TS_DEAD;
tcg_op_remove(s, op);
} else {
arg_ts->state = TS_MEM;
}
sop->args[0] = temp_arg(out_ts);
sop->args[1] = temp_arg(arg_ts->mem_base);
sop->args[2] = arg_ts->mem_offset;
} else {
tcg_debug_assert(!IS_DEAD_ARG(0));
}
}
op->args[i] = temp_arg(dir_ts);
changes = true;
} else {
for (i = 0; i < nb_oargs; i++) {
arg_ts = arg_temp(op->args[i]);
dir_ts = arg_ts->state_ptr;
if (!dir_ts) {
continue;
}
op->args[i] = temp_arg(dir_ts);
changes = true;
/* The output is now live and modified. */
arg_ts->state = 0;
/* The output is now live and modified. */
arg_ts->state = 0;
/* Sync outputs upon their last write. */
if (NEED_SYNC_ARG(i)) {
TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
? INDEX_op_st_i32
: INDEX_op_st_i64);
TCGOp *sop = tcg_op_insert_after(s, op, sopc);
/* Sync outputs upon their last write. */
if (NEED_SYNC_ARG(i)) {
TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
? INDEX_op_st_i32
: INDEX_op_st_i64);
TCGOp *sop = tcg_op_insert_after(s, op, sopc);
sop->args[0] = temp_arg(dir_ts);
sop->args[1] = temp_arg(arg_ts->mem_base);
sop->args[2] = arg_ts->mem_offset;
sop->args[0] = temp_arg(dir_ts);
sop->args[1] = temp_arg(arg_ts->mem_base);
sop->args[2] = arg_ts->mem_offset;
arg_ts->state = TS_MEM;
}
/* Drop outputs that are dead. */
if (IS_DEAD_ARG(i)) {
arg_ts->state = TS_DEAD;
arg_ts->state = TS_MEM;
}
/* Drop outputs that are dead. */
if (IS_DEAD_ARG(i)) {
arg_ts->state = TS_DEAD;
}
}
}
}