mirror of https://github.com/proxmox/mirror_qemu
ppc patch queue for 2022-06-21:
- tcg and target/ppc: vector divide instructions and a vbpermd fix for BE hosts - ppc440_uc.c: fix boot of sam460ex machine - target/ppc: fix stop state on cpu reset - xive2: Access direct mapped thread contexts from all chips - a couple of Coverity fixes -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQX6/+ZI9AYAK8oOBk82cqW3gMxZAUCYrGSLAAKCRA82cqW3gMx ZEL/AQDhEUUaztu+AWwnPKFZOP9VBU6vO2UIxZF1GHDRnoNlLQD+O6uADnIuxpxl klUMX8h2RFIkC0zv6xGN285SzhzpyAw= =/2K2 -----END PGP SIGNATURE----- Merge tag 'pull-ppc-20220621' of https://gitlab.com/danielhb/qemu into staging ppc patch queue for 2022-06-21: - tcg and target/ppc: vector divide instructions and a vbpermd fix for BE hosts - ppc440_uc.c: fix boot of sam460ex machine - target/ppc: fix stop state on cpu reset - xive2: Access direct mapped thread contexts from all chips - a couple of Coverity fixes # -----BEGIN PGP SIGNATURE----- # # iHUEABYKAB0WIQQX6/+ZI9AYAK8oOBk82cqW3gMxZAUCYrGSLAAKCRA82cqW3gMx # ZEL/AQDhEUUaztu+AWwnPKFZOP9VBU6vO2UIxZF1GHDRnoNlLQD+O6uADnIuxpxl # klUMX8h2RFIkC0zv6xGN285SzhzpyAw= # =/2K2 # -----END PGP SIGNATURE----- # gpg: Signature made Tue 21 Jun 2022 02:41:00 AM PDT # gpg: using EDDSA key 17EBFF9923D01800AF2838193CD9CA96DE033164 # gpg: Good signature from "Daniel Henrique Barboza <danielhb413@gmail.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 17EB FF99 23D0 1800 AF28 3819 3CD9 CA96 DE03 3164 * tag 'pull-ppc-20220621' of https://gitlab.com/danielhb/qemu: target/ppc: cpu_init: Clean up stop state on cpu reset target/ppc: fix unreachable code in fpu_helper.c target/ppc: avoid int32 multiply overflow in int_helper.c ppc/pnv: fix extra indent spaces with DEFINE_PROP* pnv/xive2: Access direct mapped thread contexts from all chips target/ppc: fix vbpermd in big endian hosts ppc: fix boot with sam460ex target/ppc: Implemented vector module quadword target/ppc: Implemented vector module word/doubleword target/ppc: Implemented remaining vector divide extended host-utils: Implemented signed 256-by-128 division host-utils: Implemented unsigned 256-by-128 division target/ppc: Implemented vector divide extended word target/ppc: Implemented vector divide quadword target/ppc: Implemented vector divide instructions Signed-off-by: Richard Henderson <richard.henderson@linaro.org>master
commit
5cdcfd861e
|
@ -1574,6 +1574,12 @@ static const MemoryRegionOps pnv_xive2_ic_sync_ops = {
|
||||||
* When the TM direct pages of the IC controller are accessed, the
|
* When the TM direct pages of the IC controller are accessed, the
|
||||||
* target HW thread is deduced from the page offset.
|
* target HW thread is deduced from the page offset.
|
||||||
*/
|
*/
|
||||||
|
static uint32_t pnv_xive2_ic_tm_get_pir(PnvXive2 *xive, hwaddr offset)
|
||||||
|
{
|
||||||
|
/* On P10, the node ID shift in the PIR register is 8 bits */
|
||||||
|
return xive->chip->chip_id << 8 | offset >> xive->ic_shift;
|
||||||
|
}
|
||||||
|
|
||||||
static XiveTCTX *pnv_xive2_get_indirect_tctx(PnvXive2 *xive, uint32_t pir)
|
static XiveTCTX *pnv_xive2_get_indirect_tctx(PnvXive2 *xive, uint32_t pir)
|
||||||
{
|
{
|
||||||
PnvChip *chip = xive->chip;
|
PnvChip *chip = xive->chip;
|
||||||
|
@ -1596,10 +1602,12 @@ static uint64_t pnv_xive2_ic_tm_indirect_read(void *opaque, hwaddr offset,
|
||||||
unsigned size)
|
unsigned size)
|
||||||
{
|
{
|
||||||
PnvXive2 *xive = PNV_XIVE2(opaque);
|
PnvXive2 *xive = PNV_XIVE2(opaque);
|
||||||
uint32_t pir = offset >> xive->ic_shift;
|
uint32_t pir;
|
||||||
XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir);
|
XiveTCTX *tctx;
|
||||||
uint64_t val = -1;
|
uint64_t val = -1;
|
||||||
|
|
||||||
|
pir = pnv_xive2_ic_tm_get_pir(xive, offset);
|
||||||
|
tctx = pnv_xive2_get_indirect_tctx(xive, pir);
|
||||||
if (tctx) {
|
if (tctx) {
|
||||||
val = xive_tctx_tm_read(NULL, tctx, offset, size);
|
val = xive_tctx_tm_read(NULL, tctx, offset, size);
|
||||||
}
|
}
|
||||||
|
@ -1611,9 +1619,11 @@ static void pnv_xive2_ic_tm_indirect_write(void *opaque, hwaddr offset,
|
||||||
uint64_t val, unsigned size)
|
uint64_t val, unsigned size)
|
||||||
{
|
{
|
||||||
PnvXive2 *xive = PNV_XIVE2(opaque);
|
PnvXive2 *xive = PNV_XIVE2(opaque);
|
||||||
uint32_t pir = offset >> xive->ic_shift;
|
uint32_t pir;
|
||||||
XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir);
|
XiveTCTX *tctx;
|
||||||
|
|
||||||
|
pir = pnv_xive2_ic_tm_get_pir(xive, offset);
|
||||||
|
tctx = pnv_xive2_get_indirect_tctx(xive, pir);
|
||||||
if (tctx) {
|
if (tctx) {
|
||||||
xive_tctx_tm_write(NULL, tctx, offset, val, size);
|
xive_tctx_tm_write(NULL, tctx, offset, val, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1088,10 +1088,10 @@ static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,
|
||||||
}
|
}
|
||||||
|
|
||||||
static Property pnv_phb3_properties[] = {
|
static Property pnv_phb3_properties[] = {
|
||||||
DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
|
DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
|
||||||
DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
|
DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
|
||||||
DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *),
|
DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *),
|
||||||
DEFINE_PROP_END_OF_LIST(),
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
};
|
};
|
||||||
|
|
||||||
static void pnv_phb3_class_init(ObjectClass *klass, void *data)
|
static void pnv_phb3_class_init(ObjectClass *klass, void *data)
|
||||||
|
|
|
@ -1692,11 +1692,11 @@ static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno,
|
||||||
}
|
}
|
||||||
|
|
||||||
static Property pnv_phb4_properties[] = {
|
static Property pnv_phb4_properties[] = {
|
||||||
DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
|
DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
|
||||||
DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
|
DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
|
||||||
DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC,
|
DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC,
|
||||||
PnvPhb4PecState *),
|
PnvPhb4PecState *),
|
||||||
DEFINE_PROP_END_OF_LIST(),
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
};
|
};
|
||||||
|
|
||||||
static void pnv_phb4_class_init(ObjectClass *klass, void *data)
|
static void pnv_phb4_class_init(ObjectClass *klass, void *data)
|
||||||
|
|
|
@ -215,11 +215,11 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt,
|
||||||
}
|
}
|
||||||
|
|
||||||
static Property pnv_pec_properties[] = {
|
static Property pnv_pec_properties[] = {
|
||||||
DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0),
|
DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0),
|
||||||
DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0),
|
DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0),
|
||||||
DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP,
|
DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP,
|
||||||
PnvChip *),
|
PnvChip *),
|
||||||
DEFINE_PROP_END_OF_LIST(),
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
};
|
};
|
||||||
|
|
||||||
static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec)
|
static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec)
|
||||||
|
|
|
@ -1180,6 +1180,14 @@ static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val)
|
||||||
case PEGPL_CFGMSK:
|
case PEGPL_CFGMSK:
|
||||||
s->cfg_mask = val;
|
s->cfg_mask = val;
|
||||||
size = ~(val & 0xfffffffe) + 1;
|
size = ~(val & 0xfffffffe) + 1;
|
||||||
|
/*
|
||||||
|
* Firmware sets this register to E0000001. Why we are not sure,
|
||||||
|
* but the current guess is anything above PCIE_MMCFG_SIZE_MAX is
|
||||||
|
* ignored.
|
||||||
|
*/
|
||||||
|
if (size > PCIE_MMCFG_SIZE_MAX) {
|
||||||
|
size = PCIE_MMCFG_SIZE_MAX;
|
||||||
|
}
|
||||||
pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size);
|
pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size);
|
||||||
break;
|
break;
|
||||||
case PEGPL_MSGBAH:
|
case PEGPL_MSGBAH:
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
|
|
||||||
#include "qemu/compiler.h"
|
#include "qemu/compiler.h"
|
||||||
#include "qemu/bswap.h"
|
#include "qemu/bswap.h"
|
||||||
|
#include "qemu/int128.h"
|
||||||
|
|
||||||
#ifdef CONFIG_INT128
|
#ifdef CONFIG_INT128
|
||||||
static inline void mulu64(uint64_t *plow, uint64_t *phigh,
|
static inline void mulu64(uint64_t *plow, uint64_t *phigh,
|
||||||
|
@ -849,4 +850,6 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
|
||||||
|
Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
|
||||||
return a >= b;
|
return a >= b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_uge(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return ((__uint128_t)a) >= ((__uint128_t)b);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_lt(Int128 a, Int128 b)
|
static inline bool int128_lt(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return a < b;
|
return a < b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_ult(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return (__uint128_t)a < (__uint128_t)b;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_le(Int128 a, Int128 b)
|
static inline bool int128_le(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return a <= b;
|
return a <= b;
|
||||||
|
@ -177,6 +187,15 @@ static inline Int128 bswap128(Int128 a)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int clz128(Int128 a)
|
||||||
|
{
|
||||||
|
if (a >> 64) {
|
||||||
|
return __builtin_clzll(a >> 64);
|
||||||
|
} else {
|
||||||
|
return (a) ? __builtin_clzll((uint64_t)a) + 64 : 128;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline Int128 int128_divu(Int128 a, Int128 b)
|
static inline Int128 int128_divu(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return (__uint128_t)a / (__uint128_t)b;
|
return (__uint128_t)a / (__uint128_t)b;
|
||||||
|
@ -373,11 +392,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
|
||||||
return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
|
return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_uge(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_lt(Int128 a, Int128 b)
|
static inline bool int128_lt(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return !int128_ge(a, b);
|
return !int128_ge(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_ult(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return !int128_uge(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_le(Int128 a, Int128 b)
|
static inline bool int128_le(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return int128_ge(b, a);
|
return int128_ge(b, a);
|
||||||
|
@ -418,6 +447,15 @@ static inline Int128 bswap128(Int128 a)
|
||||||
return int128_make128(bswap64(a.hi), bswap64(a.lo));
|
return int128_make128(bswap64(a.hi), bswap64(a.lo));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int clz128(Int128 a)
|
||||||
|
{
|
||||||
|
if (a.hi) {
|
||||||
|
return __builtin_clzll(a.hi);
|
||||||
|
} else {
|
||||||
|
return (a.lo) ? __builtin_clzll(a.lo) + 64 : 128;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Int128 int128_divu(Int128, Int128);
|
Int128 int128_divu(Int128, Int128);
|
||||||
Int128 int128_remu(Int128, Int128);
|
Int128 int128_remu(Int128, Int128);
|
||||||
Int128 int128_divs(Int128, Int128);
|
Int128 int128_divs(Int128, Int128);
|
||||||
|
|
|
@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev)
|
||||||
}
|
}
|
||||||
pmu_update_summaries(env);
|
pmu_update_summaries(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* clean any pending stop state */
|
||||||
|
env->resume_as_sreset = 0;
|
||||||
#endif
|
#endif
|
||||||
hreg_compute_hflags(env);
|
hreg_compute_hflags(env);
|
||||||
env->reserve_addr = (target_ulong)-1ULL;
|
env->reserve_addr = (target_ulong)-1ULL;
|
||||||
|
|
|
@ -464,7 +464,7 @@ static void do_fpscr_check_status(CPUPPCState *env, uintptr_t raddr)
|
||||||
}
|
}
|
||||||
cs->exception_index = POWERPC_EXCP_PROGRAM;
|
cs->exception_index = POWERPC_EXCP_PROGRAM;
|
||||||
env->error_code = error | POWERPC_EXCP_FP;
|
env->error_code = error | POWERPC_EXCP_FP;
|
||||||
env->fpscr |= error ? FP_FEX : 0;
|
env->fpscr |= FP_FEX;
|
||||||
/* Deferred floating-point exception after target FPSCR update */
|
/* Deferred floating-point exception after target FPSCR update */
|
||||||
if (fp_exceptions_enabled(env)) {
|
if (fp_exceptions_enabled(env)) {
|
||||||
raise_exception_err_ra(env, cs->exception_index,
|
raise_exception_err_ra(env, cs->exception_index,
|
||||||
|
|
|
@ -175,6 +175,14 @@ DEF_HELPER_FLAGS_3(VMULOSW, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VMODSQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
DEF_HELPER_FLAGS_3(VMODUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr)
|
||||||
|
|
|
@ -786,3 +786,26 @@ XVF64GERPP 111011 ... -- .... 0 ..... 00111010 ..- @XX3_at xa=%xx_xa_pair
|
||||||
XVF64GERPN 111011 ... -- .... 0 ..... 10111010 ..- @XX3_at xa=%xx_xa_pair
|
XVF64GERPN 111011 ... -- .... 0 ..... 10111010 ..- @XX3_at xa=%xx_xa_pair
|
||||||
XVF64GERNP 111011 ... -- .... 0 ..... 01111010 ..- @XX3_at xa=%xx_xa_pair
|
XVF64GERNP 111011 ... -- .... 0 ..... 01111010 ..- @XX3_at xa=%xx_xa_pair
|
||||||
XVF64GERNN 111011 ... -- .... 0 ..... 11111010 ..- @XX3_at xa=%xx_xa_pair
|
XVF64GERNN 111011 ... -- .... 0 ..... 11111010 ..- @XX3_at xa=%xx_xa_pair
|
||||||
|
|
||||||
|
## Vector Division Instructions
|
||||||
|
|
||||||
|
VDIVSW 000100 ..... ..... ..... 00110001011 @VX
|
||||||
|
VDIVUW 000100 ..... ..... ..... 00010001011 @VX
|
||||||
|
VDIVSD 000100 ..... ..... ..... 00111001011 @VX
|
||||||
|
VDIVUD 000100 ..... ..... ..... 00011001011 @VX
|
||||||
|
VDIVSQ 000100 ..... ..... ..... 00100001011 @VX
|
||||||
|
VDIVUQ 000100 ..... ..... ..... 00000001011 @VX
|
||||||
|
|
||||||
|
VDIVESW 000100 ..... ..... ..... 01110001011 @VX
|
||||||
|
VDIVEUW 000100 ..... ..... ..... 01010001011 @VX
|
||||||
|
VDIVESD 000100 ..... ..... ..... 01111001011 @VX
|
||||||
|
VDIVEUD 000100 ..... ..... ..... 01011001011 @VX
|
||||||
|
VDIVESQ 000100 ..... ..... ..... 01100001011 @VX
|
||||||
|
VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX
|
||||||
|
|
||||||
|
VMODSW 000100 ..... ..... ..... 11110001011 @VX
|
||||||
|
VMODUW 000100 ..... ..... ..... 11010001011 @VX
|
||||||
|
VMODSD 000100 ..... ..... ..... 11111001011 @VX
|
||||||
|
VMODUD 000100 ..... ..... ..... 11011001011 @VX
|
||||||
|
VMODSQ 000100 ..... ..... ..... 11100001011 @VX
|
||||||
|
VMODUQ 000100 ..... ..... ..... 11000001011 @VX
|
||||||
|
|
|
@ -789,7 +789,7 @@ static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
|
||||||
int64_t psum = 0;
|
int64_t psum = 0;
|
||||||
for (int i = 0; i < 8; i++, mask >>= 1) {
|
for (int i = 0; i < 8; i++, mask >>= 1) {
|
||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
|
psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return psum;
|
return psum;
|
||||||
|
@ -811,7 +811,8 @@ static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
|
||||||
int64_t psum = 0;
|
int64_t psum = 0;
|
||||||
for (int i = 0; i < 2; i++, mask >>= 1) {
|
for (int i = 0; i < 2; i++, mask >>= 1) {
|
||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16);
|
psum += (int64_t)sextract32(a, 16 * i, 16) *
|
||||||
|
sextract32(b, 16 * i, 16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return psum;
|
return psum;
|
||||||
|
@ -1162,6 +1163,112 @@ void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
|
||||||
*t = tmp;
|
*t = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
Int128 neg1 = int128_makes64(-1);
|
||||||
|
Int128 int128_min = int128_make128(0, INT64_MIN);
|
||||||
|
if (likely(int128_nz(b->s128) &&
|
||||||
|
(int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
|
||||||
|
t->s128 = int128_divs(a->s128, b->s128);
|
||||||
|
} else {
|
||||||
|
t->s128 = a->s128; /* Undefined behavior */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
if (int128_nz(b->s128)) {
|
||||||
|
t->s128 = int128_divu(a->s128, b->s128);
|
||||||
|
} else {
|
||||||
|
t->s128 = a->s128; /* Undefined behavior */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int64_t high;
|
||||||
|
uint64_t low;
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
high = a->s64[i];
|
||||||
|
low = 0;
|
||||||
|
if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
|
||||||
|
t->s64[i] = a->s64[i]; /* Undefined behavior */
|
||||||
|
} else {
|
||||||
|
divs128(&low, &high, b->s64[i]);
|
||||||
|
t->s64[i] = low;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint64_t high, low;
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
high = a->u64[i];
|
||||||
|
low = 0;
|
||||||
|
if (unlikely(!b->u64[i])) {
|
||||||
|
t->u64[i] = a->u64[i]; /* Undefined behavior */
|
||||||
|
} else {
|
||||||
|
divu128(&low, &high, b->u64[i]);
|
||||||
|
t->u64[i] = low;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
Int128 high, low;
|
||||||
|
Int128 int128_min = int128_make128(0, INT64_MIN);
|
||||||
|
Int128 neg1 = int128_makes64(-1);
|
||||||
|
|
||||||
|
high = a->s128;
|
||||||
|
low = int128_zero();
|
||||||
|
if (unlikely(!int128_nz(b->s128) ||
|
||||||
|
(int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
|
||||||
|
t->s128 = a->s128; /* Undefined behavior */
|
||||||
|
} else {
|
||||||
|
divs256(&low, &high, b->s128);
|
||||||
|
t->s128 = low;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
Int128 high, low;
|
||||||
|
|
||||||
|
high = a->s128;
|
||||||
|
low = int128_zero();
|
||||||
|
if (unlikely(!int128_nz(b->s128))) {
|
||||||
|
t->s128 = a->s128; /* Undefined behavior */
|
||||||
|
} else {
|
||||||
|
divu256(&low, &high, b->s128);
|
||||||
|
t->s128 = low;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
Int128 neg1 = int128_makes64(-1);
|
||||||
|
Int128 int128_min = int128_make128(0, INT64_MIN);
|
||||||
|
if (likely(int128_nz(b->s128) &&
|
||||||
|
(int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
|
||||||
|
t->s128 = int128_rems(a->s128, b->s128);
|
||||||
|
} else {
|
||||||
|
t->s128 = int128_zero(); /* Undefined behavior */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
|
{
|
||||||
|
if (likely(int128_nz(b->s128))) {
|
||||||
|
t->s128 = int128_remu(a->s128, b->s128);
|
||||||
|
} else {
|
||||||
|
t->s128 = int128_zero(); /* Undefined behavior */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
|
void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
|
||||||
{
|
{
|
||||||
ppc_avr_t result;
|
ppc_avr_t result;
|
||||||
|
@ -1307,14 +1414,13 @@ XXGENPCV(XXGENPCVDM, 8)
|
||||||
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
|
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
|
||||||
#define VBPERMD_INDEX(i) (i)
|
#define VBPERMD_INDEX(i) (i)
|
||||||
#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
|
#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
|
||||||
#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
|
|
||||||
#else
|
#else
|
||||||
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
|
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
|
||||||
#define VBPERMD_INDEX(i) (1 - i)
|
#define VBPERMD_INDEX(i) (1 - i)
|
||||||
#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
|
#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
|
||||||
#define EXTRACT_BIT(avr, i, index) \
|
|
||||||
(extract64((avr)->u64[1 - i], 63 - index, 1))
|
|
||||||
#endif
|
#endif
|
||||||
|
#define EXTRACT_BIT(avr, i, index) \
|
||||||
|
(extract64((avr)->VsrD(i), 63 - index, 1))
|
||||||
|
|
||||||
void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
|
void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
|
||||||
{
|
{
|
||||||
|
|
|
@ -3238,6 +3238,157 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
|
||||||
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
|
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
|
||||||
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
|
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
|
||||||
|
|
||||||
|
static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece,
|
||||||
|
void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b),
|
||||||
|
void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b))
|
||||||
|
{
|
||||||
|
const GVecGen3 op = {
|
||||||
|
.fni4 = func_32,
|
||||||
|
.fni8 = func_64,
|
||||||
|
.vece = vece
|
||||||
|
};
|
||||||
|
|
||||||
|
REQUIRE_VECTOR(ctx);
|
||||||
|
|
||||||
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
||||||
|
avr_full_offset(a->vrb), 16, 16, &op);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DIVU32(NAME, DIV) \
|
||||||
|
static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \
|
||||||
|
{ \
|
||||||
|
TCGv_i32 zero = tcg_constant_i32(0); \
|
||||||
|
TCGv_i32 one = tcg_constant_i32(1); \
|
||||||
|
tcg_gen_movcond_i32(TCG_COND_EQ, b, b, zero, one, b); \
|
||||||
|
DIV(t, a, b); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DIVS32(NAME, DIV) \
|
||||||
|
static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \
|
||||||
|
{ \
|
||||||
|
TCGv_i32 t0 = tcg_temp_new_i32(); \
|
||||||
|
TCGv_i32 t1 = tcg_temp_new_i32(); \
|
||||||
|
tcg_gen_setcondi_i32(TCG_COND_EQ, t0, a, INT32_MIN); \
|
||||||
|
tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, -1); \
|
||||||
|
tcg_gen_and_i32(t0, t0, t1); \
|
||||||
|
tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, 0); \
|
||||||
|
tcg_gen_or_i32(t0, t0, t1); \
|
||||||
|
tcg_gen_movi_i32(t1, 0); \
|
||||||
|
tcg_gen_movcond_i32(TCG_COND_NE, b, t0, t1, t0, b); \
|
||||||
|
DIV(t, a, b); \
|
||||||
|
tcg_temp_free_i32(t0); \
|
||||||
|
tcg_temp_free_i32(t1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DIVU64(NAME, DIV) \
|
||||||
|
static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \
|
||||||
|
{ \
|
||||||
|
TCGv_i64 zero = tcg_constant_i64(0); \
|
||||||
|
TCGv_i64 one = tcg_constant_i64(1); \
|
||||||
|
tcg_gen_movcond_i64(TCG_COND_EQ, b, b, zero, one, b); \
|
||||||
|
DIV(t, a, b); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DIVS64(NAME, DIV) \
|
||||||
|
static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \
|
||||||
|
{ \
|
||||||
|
TCGv_i64 t0 = tcg_temp_new_i64(); \
|
||||||
|
TCGv_i64 t1 = tcg_temp_new_i64(); \
|
||||||
|
tcg_gen_setcondi_i64(TCG_COND_EQ, t0, a, INT64_MIN); \
|
||||||
|
tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, -1); \
|
||||||
|
tcg_gen_and_i64(t0, t0, t1); \
|
||||||
|
tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, 0); \
|
||||||
|
tcg_gen_or_i64(t0, t0, t1); \
|
||||||
|
tcg_gen_movi_i64(t1, 0); \
|
||||||
|
tcg_gen_movcond_i64(TCG_COND_NE, b, t0, t1, t0, b); \
|
||||||
|
DIV(t, a, b); \
|
||||||
|
tcg_temp_free_i64(t0); \
|
||||||
|
tcg_temp_free_i64(t1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
DIVS32(do_divsw, tcg_gen_div_i32)
|
||||||
|
DIVU32(do_divuw, tcg_gen_divu_i32)
|
||||||
|
DIVS64(do_divsd, tcg_gen_div_i64)
|
||||||
|
DIVU64(do_divud, tcg_gen_divu_i64)
|
||||||
|
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVSW, do_vdiv_vmod, MO_32, do_divsw, NULL)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVUW, do_vdiv_vmod, MO_32, do_divuw, NULL)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVSD, do_vdiv_vmod, MO_64, NULL, do_divsd)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
|
||||||
|
|
||||||
|
static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
|
||||||
|
{
|
||||||
|
TCGv_i64 val1, val2;
|
||||||
|
|
||||||
|
val1 = tcg_temp_new_i64();
|
||||||
|
val2 = tcg_temp_new_i64();
|
||||||
|
|
||||||
|
tcg_gen_ext_i32_i64(val1, a);
|
||||||
|
tcg_gen_ext_i32_i64(val2, b);
|
||||||
|
|
||||||
|
/* (a << 32)/b */
|
||||||
|
tcg_gen_shli_i64(val1, val1, 32);
|
||||||
|
tcg_gen_div_i64(val1, val1, val2);
|
||||||
|
|
||||||
|
/* if quotient doesn't fit in 32 bits the result is undefined */
|
||||||
|
tcg_gen_extrl_i64_i32(t, val1);
|
||||||
|
|
||||||
|
tcg_temp_free_i64(val1);
|
||||||
|
tcg_temp_free_i64(val2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
|
||||||
|
{
|
||||||
|
TCGv_i64 val1, val2;
|
||||||
|
|
||||||
|
val1 = tcg_temp_new_i64();
|
||||||
|
val2 = tcg_temp_new_i64();
|
||||||
|
|
||||||
|
tcg_gen_extu_i32_i64(val1, a);
|
||||||
|
tcg_gen_extu_i32_i64(val2, b);
|
||||||
|
|
||||||
|
/* (a << 32)/b */
|
||||||
|
tcg_gen_shli_i64(val1, val1, 32);
|
||||||
|
tcg_gen_divu_i64(val1, val1, val2);
|
||||||
|
|
||||||
|
/* if quotient doesn't fit in 32 bits the result is undefined */
|
||||||
|
tcg_gen_extrl_i64_i32(t, val1);
|
||||||
|
|
||||||
|
tcg_temp_free_i64(val1);
|
||||||
|
tcg_temp_free_i64(val2);
|
||||||
|
}
|
||||||
|
|
||||||
|
DIVS32(do_divesw, do_dives_i32)
|
||||||
|
DIVU32(do_diveuw, do_diveu_i32)
|
||||||
|
|
||||||
|
DIVS32(do_modsw, tcg_gen_rem_i32)
|
||||||
|
DIVU32(do_moduw, tcg_gen_remu_i32)
|
||||||
|
DIVS64(do_modsd, tcg_gen_rem_i64)
|
||||||
|
DIVU64(do_modud, tcg_gen_remu_i64)
|
||||||
|
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ)
|
||||||
|
TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ)
|
||||||
|
|
||||||
|
TRANS_FLAGS2(ISA310, VMODSW, do_vdiv_vmod, MO_32, do_modsw , NULL)
|
||||||
|
TRANS_FLAGS2(ISA310, VMODUW, do_vdiv_vmod, MO_32, do_moduw, NULL)
|
||||||
|
TRANS_FLAGS2(ISA310, VMODSD, do_vdiv_vmod, MO_64, NULL, do_modsd)
|
||||||
|
TRANS_FLAGS2(ISA310, VMODUD, do_vdiv_vmod, MO_64, NULL, do_modud)
|
||||||
|
TRANS_FLAGS2(ISA310, VMODSQ, do_vx_helper, gen_helper_VMODSQ)
|
||||||
|
TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ)
|
||||||
|
|
||||||
|
#undef DIVS32
|
||||||
|
#undef DIVU32
|
||||||
|
#undef DIVS64
|
||||||
|
#undef DIVU64
|
||||||
|
|
||||||
#undef GEN_VR_LDX
|
#undef GEN_VR_LDX
|
||||||
#undef GEN_VR_STX
|
#undef GEN_VR_STX
|
||||||
#undef GEN_VR_LVE
|
#undef GEN_VR_LVE
|
||||||
|
|
|
@ -266,3 +266,183 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow)
|
||||||
*plow = *plow << shift;
|
*plow = *plow << shift;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unsigned 256-by-128 division.
|
||||||
|
* Returns the remainder via r.
|
||||||
|
* Returns lower 128 bit of quotient.
|
||||||
|
* Needs a normalized divisor (most significant bit set to 1).
|
||||||
|
*
|
||||||
|
* Adapted from include/qemu/host-utils.h udiv_qrnnd,
|
||||||
|
* from the GNU Multi Precision Library - longlong.h __udiv_qrnnd
|
||||||
|
* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
|
||||||
|
*
|
||||||
|
* Licensed under the GPLv2/LGPLv3
|
||||||
|
*/
|
||||||
|
static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d)
|
||||||
|
{
|
||||||
|
Int128 d0, d1, q0, q1, r1, r0, m;
|
||||||
|
uint64_t mp0, mp1;
|
||||||
|
|
||||||
|
d0 = int128_make64(int128_getlo(d));
|
||||||
|
d1 = int128_make64(int128_gethi(d));
|
||||||
|
|
||||||
|
r1 = int128_remu(n1, d1);
|
||||||
|
q1 = int128_divu(n1, d1);
|
||||||
|
mp0 = int128_getlo(q1);
|
||||||
|
mp1 = int128_gethi(q1);
|
||||||
|
mulu128(&mp0, &mp1, int128_getlo(d0));
|
||||||
|
m = int128_make128(mp0, mp1);
|
||||||
|
r1 = int128_make128(int128_gethi(n0), int128_getlo(r1));
|
||||||
|
if (int128_ult(r1, m)) {
|
||||||
|
q1 = int128_sub(q1, int128_one());
|
||||||
|
r1 = int128_add(r1, d);
|
||||||
|
if (int128_uge(r1, d)) {
|
||||||
|
if (int128_ult(r1, m)) {
|
||||||
|
q1 = int128_sub(q1, int128_one());
|
||||||
|
r1 = int128_add(r1, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r1 = int128_sub(r1, m);
|
||||||
|
|
||||||
|
r0 = int128_remu(r1, d1);
|
||||||
|
q0 = int128_divu(r1, d1);
|
||||||
|
mp0 = int128_getlo(q0);
|
||||||
|
mp1 = int128_gethi(q0);
|
||||||
|
mulu128(&mp0, &mp1, int128_getlo(d0));
|
||||||
|
m = int128_make128(mp0, mp1);
|
||||||
|
r0 = int128_make128(int128_getlo(n0), int128_getlo(r0));
|
||||||
|
if (int128_ult(r0, m)) {
|
||||||
|
q0 = int128_sub(q0, int128_one());
|
||||||
|
r0 = int128_add(r0, d);
|
||||||
|
if (int128_uge(r0, d)) {
|
||||||
|
if (int128_ult(r0, m)) {
|
||||||
|
q0 = int128_sub(q0, int128_one());
|
||||||
|
r0 = int128_add(r0, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r0 = int128_sub(r0, m);
|
||||||
|
|
||||||
|
*r = r0;
|
||||||
|
return int128_or(int128_lshift(q1, 64), q0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unsigned 256-by-128 division.
|
||||||
|
* Returns the remainder.
|
||||||
|
* Returns quotient via plow and phigh.
|
||||||
|
* Also returns the remainder via the function return value.
|
||||||
|
*/
|
||||||
|
Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor)
|
||||||
|
{
|
||||||
|
Int128 dhi = *phigh;
|
||||||
|
Int128 dlo = *plow;
|
||||||
|
Int128 rem, dhighest;
|
||||||
|
int sh;
|
||||||
|
|
||||||
|
if (!int128_nz(divisor) || !int128_nz(dhi)) {
|
||||||
|
*plow = int128_divu(dlo, divisor);
|
||||||
|
*phigh = int128_zero();
|
||||||
|
return int128_remu(dlo, divisor);
|
||||||
|
} else {
|
||||||
|
sh = clz128(divisor);
|
||||||
|
|
||||||
|
if (int128_ult(dhi, divisor)) {
|
||||||
|
if (sh != 0) {
|
||||||
|
/* normalize the divisor, shifting the dividend accordingly */
|
||||||
|
divisor = int128_lshift(divisor, sh);
|
||||||
|
dhi = int128_or(int128_lshift(dhi, sh),
|
||||||
|
int128_urshift(dlo, (128 - sh)));
|
||||||
|
dlo = int128_lshift(dlo, sh);
|
||||||
|
}
|
||||||
|
|
||||||
|
*phigh = int128_zero();
|
||||||
|
*plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
|
||||||
|
} else {
|
||||||
|
if (sh != 0) {
|
||||||
|
/* normalize the divisor, shifting the dividend accordingly */
|
||||||
|
divisor = int128_lshift(divisor, sh);
|
||||||
|
dhighest = int128_rshift(dhi, (128 - sh));
|
||||||
|
dhi = int128_or(int128_lshift(dhi, sh),
|
||||||
|
int128_urshift(dlo, (128 - sh)));
|
||||||
|
dlo = int128_lshift(dlo, sh);
|
||||||
|
|
||||||
|
*phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* dhi >= divisor
|
||||||
|
* Since the MSB of divisor is set (sh == 0),
|
||||||
|
* (dhi - divisor) < divisor
|
||||||
|
*
|
||||||
|
* Thus, the high part of the quotient is 1, and we can
|
||||||
|
* calculate the low part with a single call to udiv_qrnnd
|
||||||
|
* after subtracting divisor from dhi
|
||||||
|
*/
|
||||||
|
dhi = int128_sub(dhi, divisor);
|
||||||
|
*phigh = int128_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
*plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* since the dividend/divisor might have been normalized,
|
||||||
|
* the remainder might also have to be shifted back
|
||||||
|
*/
|
||||||
|
rem = int128_urshift(rem, sh);
|
||||||
|
return rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Signed 256-by-128 division.
|
||||||
|
* Returns quotient via plow and phigh.
|
||||||
|
* Also returns the remainder via the function return value.
|
||||||
|
*/
|
||||||
|
Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor)
|
||||||
|
{
|
||||||
|
bool neg_quotient = false, neg_remainder = false;
|
||||||
|
Int128 unsig_hi = *phigh, unsig_lo = *plow;
|
||||||
|
Int128 rem;
|
||||||
|
|
||||||
|
if (!int128_nonneg(*phigh)) {
|
||||||
|
neg_quotient = !neg_quotient;
|
||||||
|
neg_remainder = !neg_remainder;
|
||||||
|
|
||||||
|
if (!int128_nz(unsig_lo)) {
|
||||||
|
unsig_hi = int128_neg(unsig_hi);
|
||||||
|
} else {
|
||||||
|
unsig_hi = int128_not(unsig_hi);
|
||||||
|
unsig_lo = int128_neg(unsig_lo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!int128_nonneg(divisor)) {
|
||||||
|
neg_quotient = !neg_quotient;
|
||||||
|
|
||||||
|
divisor = int128_neg(divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
rem = divu256(&unsig_lo, &unsig_hi, divisor);
|
||||||
|
|
||||||
|
if (neg_quotient) {
|
||||||
|
if (!int128_nz(unsig_lo)) {
|
||||||
|
*phigh = int128_neg(unsig_hi);
|
||||||
|
*plow = int128_zero();
|
||||||
|
} else {
|
||||||
|
*phigh = int128_not(unsig_hi);
|
||||||
|
*plow = int128_neg(unsig_lo);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*phigh = unsig_hi;
|
||||||
|
*plow = unsig_lo;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (neg_remainder) {
|
||||||
|
return int128_neg(rem);
|
||||||
|
} else {
|
||||||
|
return rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue