target/i386: Rewrite vector shift helper

Rewrite the vector shift helpers in preperation for AVX support (3 operand
form and 256 bit vectors).

For now keep the existing two operand interface.

No functional changes to existing helpers.

Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-11-paul@nowt.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
master
Paul Brook 2022-04-24 23:01:32 +01:00 committed by Paolo Bonzini
parent 25bdec79c6
commit 18592d2ec2
1 changed files with 112 additions and 135 deletions

View File

@ -40,6 +40,8 @@
#define SUFFIX _xmm
#endif
#define LANE_WIDTH (SHIFT ? 16 : 8)
/*
* Copy the relevant parts of a Reg value around. In the case where
* sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
@ -56,198 +58,173 @@
#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
#endif
void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
int shift;
#if SHIFT == 0
#define FPSRL(x, c) ((x) >> shift)
#define FPSRAW(x, c) ((int16_t)(x) >> shift)
#define FPSRAL(x, c) ((int32_t)(x) >> shift)
#define FPSLL(x, c) ((x) << shift)
#endif
if (s->Q(0) > 15) {
d->Q(0) = 0;
#if SHIFT == 1
d->Q(1) = 0;
#endif
void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (c->Q(0) > 15) {
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
} else {
shift = s->B(0);
d->W(0) >>= shift;
d->W(1) >>= shift;
d->W(2) >>= shift;
d->W(3) >>= shift;
#if SHIFT == 1
d->W(4) >>= shift;
d->W(5) >>= shift;
d->W(6) >>= shift;
d->W(7) >>= shift;
#endif
shift = c->B(0);
for (int i = 0; i < 4 << SHIFT; i++) {
d->W(i) = FPSRL(s->W(i), shift);
}
}
}
void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (c->Q(0) > 15) {
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
} else {
shift = c->B(0);
for (int i = 0; i < 4 << SHIFT; i++) {
d->W(i) = FPSLL(s->W(i), shift);
}
}
}
if (s->Q(0) > 15) {
void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (c->Q(0) > 15) {
shift = 15;
} else {
shift = s->B(0);
shift = c->B(0);
}
for (int i = 0; i < 4 << SHIFT; i++) {
d->W(i) = FPSRAW(s->W(i), shift);
}
d->W(0) = (int16_t)d->W(0) >> shift;
d->W(1) = (int16_t)d->W(1) >> shift;
d->W(2) = (int16_t)d->W(2) >> shift;
d->W(3) = (int16_t)d->W(3) >> shift;
#if SHIFT == 1
d->W(4) = (int16_t)d->W(4) >> shift;
d->W(5) = (int16_t)d->W(5) >> shift;
d->W(6) = (int16_t)d->W(6) >> shift;
d->W(7) = (int16_t)d->W(7) >> shift;
#endif
}
void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (s->Q(0) > 15) {
d->Q(0) = 0;
#if SHIFT == 1
d->Q(1) = 0;
#endif
if (c->Q(0) > 31) {
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
} else {
shift = s->B(0);
d->W(0) <<= shift;
d->W(1) <<= shift;
d->W(2) <<= shift;
d->W(3) <<= shift;
#if SHIFT == 1
d->W(4) <<= shift;
d->W(5) <<= shift;
d->W(6) <<= shift;
d->W(7) <<= shift;
#endif
shift = c->B(0);
for (int i = 0; i < 2 << SHIFT; i++) {
d->L(i) = FPSRL(s->L(i), shift);
}
}
}
void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (s->Q(0) > 31) {
d->Q(0) = 0;
#if SHIFT == 1
d->Q(1) = 0;
#endif
if (c->Q(0) > 31) {
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
} else {
shift = s->B(0);
d->L(0) >>= shift;
d->L(1) >>= shift;
#if SHIFT == 1
d->L(2) >>= shift;
d->L(3) >>= shift;
#endif
shift = c->B(0);
for (int i = 0; i < 2 << SHIFT; i++) {
d->L(i) = FPSLL(s->L(i), shift);
}
}
}
void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (s->Q(0) > 31) {
if (c->Q(0) > 31) {
shift = 31;
} else {
shift = s->B(0);
shift = c->B(0);
}
for (int i = 0; i < 2 << SHIFT; i++) {
d->L(i) = FPSRAL(s->L(i), shift);
}
d->L(0) = (int32_t)d->L(0) >> shift;
d->L(1) = (int32_t)d->L(1) >> shift;
#if SHIFT == 1
d->L(2) = (int32_t)d->L(2) >> shift;
d->L(3) = (int32_t)d->L(3) >> shift;
#endif
}
void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (s->Q(0) > 31) {
d->Q(0) = 0;
#if SHIFT == 1
d->Q(1) = 0;
#endif
if (c->Q(0) > 63) {
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
} else {
shift = s->B(0);
d->L(0) <<= shift;
d->L(1) <<= shift;
#if SHIFT == 1
d->L(2) <<= shift;
d->L(3) <<= shift;
#endif
shift = c->B(0);
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = FPSRL(s->Q(i), shift);
}
}
}
void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
Reg *s = d;
int shift;
if (s->Q(0) > 63) {
d->Q(0) = 0;
#if SHIFT == 1
d->Q(1) = 0;
#endif
if (c->Q(0) > 63) {
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
} else {
shift = s->B(0);
d->Q(0) >>= shift;
#if SHIFT == 1
d->Q(1) >>= shift;
#endif
shift = c->B(0);
for (int i = 0; i < 1 << SHIFT; i++) {
d->Q(i) = FPSLL(s->Q(i), shift);
}
}
}
void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
#if SHIFT >= 1
void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
int shift;
Reg *s = d;
int shift, i, j;
if (s->Q(0) > 63) {
d->Q(0) = 0;
#if SHIFT == 1
d->Q(1) = 0;
#endif
} else {
shift = s->B(0);
d->Q(0) <<= shift;
#if SHIFT == 1
d->Q(1) <<= shift;
#endif
}
}
#if SHIFT == 1
void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
int shift, i;
shift = s->L(0);
shift = c->L(0);
if (shift > 16) {
shift = 16;
}
for (i = 0; i < 16 - shift; i++) {
d->B(i) = d->B(i + shift);
}
for (i = 16 - shift; i < 16; i++) {
d->B(i) = 0;
for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
for (i = 0; i < 16 - shift; i++) {
d->B(j + i) = s->B(j + i + shift);
}
for (i = 16 - shift; i < 16; i++) {
d->B(j + i) = 0;
}
}
}
void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
int shift, i;
Reg *s = d;
int shift, i, j;
shift = s->L(0);
shift = c->L(0);
if (shift > 16) {
shift = 16;
}
for (i = 15; i >= shift; i--) {
d->B(i) = d->B(i - shift);
}
for (i = 0; i < shift; i++) {
d->B(i) = 0;
for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
for (i = 15; i >= shift; i--) {
d->B(j + i) = s->B(j + i - shift);
}
for (i = 0; i < shift; i++) {
d->B(j + i) = 0;
}
}
}
#endif