fpu: Add float64_to_int{32,64}_modulo

Add versions of float64_to_int* which do not saturate the result.

Reviewed-by: Christoph Muellner <christoph.muellner@vrull.eu>
Tested-by: Christoph Muellner <christoph.muellner@vrull.eu>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20230527141910.1885950-2-richard.henderson@linaro.org>
master
Richard Henderson 2023-05-27 07:19:07 -07:00
parent e665cf72fe
commit e2041f4d5d
3 changed files with 112 additions and 0 deletions

View File

@ -1181,6 +1181,84 @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
return r;
}
/*
* Like partsN(float_to_sint), except do not saturate the result.
* Instead, return the rounded unbounded precision two's compliment result,
* modulo 2**(bitsm1 + 1).
*/
static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
FloatRoundMode rmode,
int bitsm1, float_status *s)
{
int flags = 0;
uint64_t r;
bool overflow = false;
switch (p->cls) {
case float_class_snan:
flags |= float_flag_invalid_snan;
/* fall through */
case float_class_qnan:
flags |= float_flag_invalid;
r = 0;
break;
case float_class_inf:
overflow = true;
r = 0;
break;
case float_class_zero:
return 0;
case float_class_normal:
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
flags = float_flag_inexact;
}
if (p->exp <= DECOMPOSED_BINARY_POINT) {
/*
* Because we rounded to integral, and exp < 64,
* we know frac_low is zero.
*/
r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
if (p->exp < bitsm1) {
/* Result in range. */
} else if (p->exp == bitsm1) {
/* The only in-range value is INT_MIN. */
overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
} else {
overflow = true;
}
} else {
/* Overflow, but there might still be bits to return. */
int shl = p->exp - DECOMPOSED_BINARY_POINT;
if (shl < N) {
frac_shl(p, shl);
r = p->frac_hi;
} else {
r = 0;
}
overflow = true;
}
if (p->sign) {
r = -r;
}
break;
default:
g_assert_not_reached();
}
if (overflow) {
flags = float_flag_invalid | float_flag_invalid_cvti;
}
float_raise(flags, s);
return r;
}
/*
* Integer to float conversions
*

View File

@ -852,11 +852,24 @@ static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
#define parts_float_to_uint(P, R, Z, M, S) \
PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
FloatRoundMode rmode,
int bitsm1, float_status *s);
static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
FloatRoundMode rmode,
int bitsm1, float_status *s);
#define parts_float_to_sint_modulo(P, R, M, S) \
PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
int scale, float_status *s);
static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
int scale, float_status *s);
#define parts_float_to_sint(P, R, Z, MN, MX, S) \
PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
#define parts_sint_to_float(P, I, Z, S) \
PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
@ -3409,6 +3422,24 @@ int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
float_status *s)
{
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
return parts_float_to_sint_modulo(&p, rmode, 31, s);
}
int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
float_status *s)
{
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
return parts_float_to_sint_modulo(&p, rmode, 63, s);
}
/*
* Floating-point to unsigned integer conversions
*/

View File

@ -751,6 +751,9 @@ int16_t float64_to_int16_round_to_zero(float64, float_status *status);
int32_t float64_to_int32_round_to_zero(float64, float_status *status);
int64_t float64_to_int64_round_to_zero(float64, float_status *status);
int32_t float64_to_int32_modulo(float64, FloatRoundMode, float_status *status);
int64_t float64_to_int64_modulo(float64, FloatRoundMode, float_status *status);
uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *);
uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *);
uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *);