*: Delete checks for old host definitions

tcg/loongarch64: Generate LSX instructions
 fpu: Add conversions between bfloat16 and [u]int8
 fpu: Handle m68k extended precision denormals properly
 accel/tcg: Improve cputlb i/o organization
 accel/tcg: Simplify tlb_plugin_lookup
 accel/tcg: Remove false-negative halted assertion
 tcg: Add gvec compare with immediate and scalar operand
 tcg/aarch64: Emit BTI insns at jump landing pads
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmUF4VIdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8wOwf+I9qNus2kV3yQxpuU
 2hqYuLXvH96l9vbqaoyx7hyyJTtrqytLGCMPmQKUdtBGtO6z7PnLNDiooGcbO+gw
 2gdfw3Q//JZUTdx+ZSujUksV0F96Tqu0zi4TdJUPNIwhCrh0K8VjiftfPfbynRtz
 KhQ1lNeO/QzcAgzKiun2NyqdPiYDmNuEIS/jYedQwQweRp/xQJ4/x8DmhGf/OiD4
 rGAcdslN+RenqgFACcJ2A1vxUGMeQv5g/Cn82FgTk0cmgcfAODMnC+WnOm8ruQdT
 snluvnh/2/r8jIhx3frKDKGtaKHCPhoCS7GNK48qejxaybvv3CJQ4qsjRIBKVrVM
 cIrsSw==
 =cTgD
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230915-2' of https://gitlab.com/rth7680/qemu into staging

*: Delete checks for old host definitions
tcg/loongarch64: Generate LSX instructions
fpu: Add conversions between bfloat16 and [u]int8
fpu: Handle m68k extended precision denormals properly
accel/tcg: Improve cputlb i/o organization
accel/tcg: Simplify tlb_plugin_lookup
accel/tcg: Remove false-negative halted assertion
tcg: Add gvec compare with immediate and scalar operand
tcg/aarch64: Emit BTI insns at jump landing pads

[Resolved conflict between CPUINFO_PMULL and CPUINFO_BTI.
--Stefan]

* tag 'pull-tcg-20230915-2' of https://gitlab.com/rth7680/qemu: (39 commits)
  tcg: Map code_gen_buffer with PROT_BTI
  tcg/aarch64: Emit BTI insns at jump landing pads
  util/cpuinfo-aarch64: Add CPUINFO_BTI
  tcg: Add tcg_out_tb_start backend hook
  fpu: Handle m68k extended precision denormals properly
  fpu: Add conversions between bfloat16 and [u]int8
  accel/tcg: Introduce do_st16_mmio_leN
  accel/tcg: Introduce do_ld16_mmio_beN
  accel/tcg: Merge io_writex into do_st_mmio_leN
  accel/tcg: Merge io_readx into do_ld_mmio_beN
  accel/tcg: Replace direct use of io_readx/io_writex in do_{ld,st}_1
  accel/tcg: Merge cpu_transaction_failed into io_failed
  plugin: Simplify struct qemu_plugin_hwaddr
  accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed
  accel/tcg: Split out io_prepare and io_failed
  accel/tcg: Simplify tlb_plugin_lookup
  target/arm: Use tcg_gen_gvec_cmpi for compare vs 0
  tcg: Add gvec compare with immediate and scalar operand
  tcg/loongarch64: Implement 128-bit load & store
  tcg/loongarch64: Lower rotli_vec to vrotri
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
master
Stefan Hajnoczi 2023-09-19 13:20:54 -04:00
commit d7754940d7
39 changed files with 7419 additions and 393 deletions

View File

@ -1193,6 +1193,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
write_flags = read_flags;
if (is_ram) {
iotlb = memory_region_get_ram_addr(section->mr) + xlat;
assert(!(iotlb & ~TARGET_PAGE_MASK));
/*
* Computing is_clean is expensive; avoid all that unless
* the page is actually writable.
@ -1255,16 +1256,18 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
/* refill the tlb */
/*
* At this point iotlb contains a physical section number in the lower
* TARGET_PAGE_BITS, and either
* + the ram_addr_t of the page base of the target RAM (RAM)
* + the offset within section->mr of the page base (I/O, ROMD)
* When memory region is ram, iotlb contains a TARGET_PAGE_BITS
* aligned ram_addr_t of the page base of the target RAM.
* Otherwise, iotlb contains
* - a physical section number in the lower TARGET_PAGE_BITS
* - the offset within section->mr of the page base (I/O, ROMD) with the
* TARGET_PAGE_BITS masked off.
* We subtract addr_page (which is page aligned and thus won't
* disturb the low bits) to give an offset which can be added to the
* (non-page-aligned) vaddr of the eventual memory access to get
* the MemoryRegion offset for the access. Note that the vaddr we
* subtract here is that of the page base, and not the same as the
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
* vaddr we add back in io_prepare()/get_page_addr_code().
*/
desc->fulltlb[index] = *full;
full = &desc->fulltlb[index];
@ -1347,116 +1350,41 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
mmu_idx, retaddr);
}
static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
vaddr addr, unsigned size,
MMUAccessType access_type,
int mmu_idx, MemTxAttrs attrs,
MemTxResult response,
uintptr_t retaddr)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
if (!cpu->ignore_memory_transaction_failures &&
cc->tcg_ops->do_transaction_failed) {
cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
access_type, mmu_idx, attrs,
response, retaddr);
}
}
/*
* Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
* This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
* because of the side effect of io_writex changing memory layout.
*/
static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
hwaddr mr_offset)
{
#ifdef CONFIG_PLUGIN
SavedIOTLB *saved = &cs->saved_iotlb;
saved->section = section;
saved->mr_offset = mr_offset;
#endif
}
static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, vaddr addr, uintptr_t retaddr,
MMUAccessType access_type, MemOp op)
static MemoryRegionSection *
io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
{
CPUState *cpu = env_cpu(env);
hwaddr mr_offset;
MemoryRegionSection *section;
MemoryRegion *mr;
uint64_t val;
MemTxResult r;
hwaddr mr_offset;
section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
mr = section->mr;
mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
section = iotlb_to_section(cpu, xlat, attrs);
mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
if (!cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
/*
* The memory_region_dispatch may trigger a flush/resize
* so for plugins we save the iotlb_data just in case.
*/
save_iotlb_data(cpu, section, mr_offset);
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
}
if (r != MEMTX_OK) {
hwaddr physaddr = mr_offset +
section->offset_within_address_space -
section->offset_within_region;
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
mmu_idx, full->attrs, r, retaddr);
}
return val;
*out_offset = mr_offset;
return section;
}
static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, uint64_t val, vaddr addr,
uintptr_t retaddr, MemOp op)
static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
unsigned size, MMUAccessType access_type, int mmu_idx,
MemTxResult response, uintptr_t retaddr)
{
CPUState *cpu = env_cpu(env);
hwaddr mr_offset;
MemoryRegionSection *section;
MemoryRegion *mr;
MemTxResult r;
section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
mr = section->mr;
mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
if (!cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_pc = retaddr;
if (!cpu->ignore_memory_transaction_failures) {
CPUClass *cc = CPU_GET_CLASS(cpu);
/*
* The memory_region_dispatch may trigger a flush/resize
* so for plugins we save the iotlb_data just in case.
*/
save_iotlb_data(cpu, section, mr_offset);
if (cc->tcg_ops->do_transaction_failed) {
hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
}
if (r != MEMTX_OK) {
hwaddr physaddr = mr_offset +
section->offset_within_address_space -
section->offset_within_region;
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
MMU_DATA_STORE, mmu_idx, full->attrs, r,
retaddr);
cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
access_type, mmu_idx,
full->attrs, response, retaddr);
}
}
}
@ -1726,45 +1654,41 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
* in the softmmu lookup code (or helper). We don't handle re-fills or
* checking the victim table. This is purely informational.
*
* This almost never fails as the memory access being instrumented
* should have just filled the TLB. The one corner case is io_writex
* which can cause TLB flushes and potential resizing of the TLBs
* losing the information we need. In those cases we need to recover
* data from a copy of the CPUTLBEntryFull. As long as this always occurs
* from the same thread (which a mem callback will be) this is safe.
* The one corner case is i/o write, which can cause changes to the
* address space. Those changes, and the corresponding tlb flush,
* should be delayed until the next TB, so even then this ought not fail.
* But check, Just in Case.
*/
bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
bool is_store, struct qemu_plugin_hwaddr *data)
{
CPUArchState *env = cpu->env_ptr;
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
uintptr_t index = tlb_index(env, mmu_idx, addr);
uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
CPUTLBEntryFull *full;
if (likely(tlb_hit(tlb_addr, addr))) {
/* We must have an iotlb entry for MMIO */
if (tlb_addr & TLB_MMIO) {
CPUTLBEntryFull *full;
full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
data->is_io = true;
data->v.io.section =
iotlb_to_section(cpu, full->xlat_section, full->attrs);
data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
} else {
data->is_io = false;
data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
}
return true;
} else {
SavedIOTLB *saved = &cpu->saved_iotlb;
data->is_io = true;
data->v.io.section = saved->section;
data->v.io.offset = saved->mr_offset;
return true;
if (unlikely(!tlb_hit(tlb_addr, addr))) {
return false;
}
}
full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
/* We must have an iotlb entry for MMIO */
if (tlb_addr & TLB_MMIO) {
MemoryRegionSection *section =
iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
full->attrs);
data->is_io = true;
data->mr = section->mr;
} else {
data->is_io = false;
data->mr = NULL;
}
return true;
}
#endif
/*
@ -2084,45 +2008,88 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
* Load @size bytes from @addr, which is memory-mapped i/o.
* The bytes are concatenated in big-endian order with @ret_be.
*/
static uint64_t int_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t ret_be, vaddr addr, int size,
int mmu_idx, MMUAccessType type, uintptr_t ra,
MemoryRegion *mr, hwaddr mr_offset)
{
do {
MemOp this_mop;
unsigned this_size;
uint64_t val;
MemTxResult r;
/* Read aligned pieces up to 8 bytes. */
this_mop = ctz32(size | (int)addr | 8);
this_size = 1 << this_mop;
this_mop |= MO_BE;
r = memory_region_dispatch_read(mr, mr_offset, &val,
this_mop, full->attrs);
if (unlikely(r != MEMTX_OK)) {
io_failed(env, full, addr, this_size, type, mmu_idx, r, ra);
}
if (this_size == 8) {
return val;
}
ret_be = (ret_be << (this_size * 8)) | val;
addr += this_size;
mr_offset += this_size;
size -= this_size;
} while (size);
return ret_be;
}
static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t ret_be, vaddr addr, int size,
int mmu_idx, MMUAccessType type, uintptr_t ra)
{
uint64_t t;
MemoryRegionSection *section;
MemoryRegion *mr;
hwaddr mr_offset;
MemTxAttrs attrs;
uint64_t ret;
tcg_debug_assert(size > 0 && size <= 8);
do {
/* Read aligned pieces up to 8 bytes. */
switch ((size | (int)addr) & 7) {
case 1:
case 3:
case 5:
case 7:
t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
ret_be = (ret_be << 8) | t;
size -= 1;
addr += 1;
break;
case 2:
case 6:
t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
ret_be = (ret_be << 16) | t;
size -= 2;
addr += 2;
break;
case 4:
t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
ret_be = (ret_be << 32) | t;
size -= 4;
addr += 4;
break;
case 0:
return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
default:
qemu_build_not_reached();
}
} while (size);
return ret_be;
attrs = full->attrs;
section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
mr = section->mr;
qemu_mutex_lock_iothread();
ret = int_ld_mmio_beN(env, full, ret_be, addr, size, mmu_idx,
type, ra, mr, mr_offset);
qemu_mutex_unlock_iothread();
return ret;
}
static Int128 do_ld16_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t ret_be, vaddr addr, int size,
int mmu_idx, uintptr_t ra)
{
MemoryRegionSection *section;
MemoryRegion *mr;
hwaddr mr_offset;
MemTxAttrs attrs;
uint64_t a, b;
tcg_debug_assert(size > 8 && size <= 16);
attrs = full->attrs;
section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
mr = section->mr;
qemu_mutex_lock_iothread();
a = int_ld_mmio_beN(env, full, ret_be, addr, size - 8, mmu_idx,
MMU_DATA_LOAD, ra, mr, mr_offset);
b = int_ld_mmio_beN(env, full, ret_be, addr + size - 8, 8, mmu_idx,
MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
qemu_mutex_unlock_iothread();
return int128_make128(b, a);
}
/**
@ -2267,7 +2234,6 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
unsigned tmp, half_size;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size,
mmu_idx, type, ra);
}
@ -2318,12 +2284,7 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
MemOp atom;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8,
mmu_idx, MMU_DATA_LOAD, ra);
b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8,
mmu_idx, MMU_DATA_LOAD, ra);
return int128_make128(b, a);
return do_ld16_mmio_beN(env, p->full, a, p->addr, size, mmu_idx, ra);
}
/*
@ -2368,7 +2329,7 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
MMUAccessType type, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
return do_ld_mmio_beN(env, p->full, 0, p->addr, 1, mmu_idx, type, ra);
} else {
return *(uint8_t *)p->haddr;
}
@ -2380,7 +2341,6 @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint16_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra);
if ((memop & MO_BSWAP) == MO_LE) {
ret = bswap16(ret);
@ -2401,7 +2361,6 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint32_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra);
if ((memop & MO_BSWAP) == MO_LE) {
ret = bswap32(ret);
@ -2422,7 +2381,6 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint64_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra);
if ((memop & MO_BSWAP) == MO_LE) {
ret = bswap64(ret);
@ -2581,12 +2539,8 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
if (likely(!crosspage)) {
if (unlikely(l.page[0].flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8,
l.mmu_idx, MMU_DATA_LOAD, ra);
b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8,
l.mmu_idx, MMU_DATA_LOAD, ra);
ret = int128_make128(b, a);
ret = do_ld16_mmio_beN(env, l.page[0].full, 0, addr, 16,
l.mmu_idx, ra);
if ((l.memop & MO_BSWAP) == MO_LE) {
ret = bswap128(ret);
}
@ -2727,46 +2681,88 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
* The bytes to store are extracted in little-endian order from @val_le;
* return the bytes of @val_le beyond @p->size that have not been stored.
*/
static uint64_t int_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t val_le, vaddr addr, int size,
int mmu_idx, uintptr_t ra,
MemoryRegion *mr, hwaddr mr_offset)
{
do {
MemOp this_mop;
unsigned this_size;
MemTxResult r;
/* Store aligned pieces up to 8 bytes. */
this_mop = ctz32(size | (int)addr | 8);
this_size = 1 << this_mop;
this_mop |= MO_LE;
r = memory_region_dispatch_write(mr, mr_offset, val_le,
this_mop, full->attrs);
if (unlikely(r != MEMTX_OK)) {
io_failed(env, full, addr, this_size, MMU_DATA_STORE,
mmu_idx, r, ra);
}
if (this_size == 8) {
return 0;
}
val_le >>= this_size * 8;
addr += this_size;
mr_offset += this_size;
size -= this_size;
} while (size);
return val_le;
}
static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t val_le, vaddr addr, int size,
int mmu_idx, uintptr_t ra)
{
MemoryRegionSection *section;
hwaddr mr_offset;
MemoryRegion *mr;
MemTxAttrs attrs;
uint64_t ret;
tcg_debug_assert(size > 0 && size <= 8);
do {
/* Store aligned pieces up to 8 bytes. */
switch ((size | (int)addr) & 7) {
case 1:
case 3:
case 5:
case 7:
io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB);
val_le >>= 8;
size -= 1;
addr += 1;
break;
case 2:
case 6:
io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW);
val_le >>= 16;
size -= 2;
addr += 2;
break;
case 4:
io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL);
val_le >>= 32;
size -= 4;
addr += 4;
break;
case 0:
io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ);
return 0;
default:
qemu_build_not_reached();
}
} while (size);
attrs = full->attrs;
section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
mr = section->mr;
return val_le;
qemu_mutex_lock_iothread();
ret = int_st_mmio_leN(env, full, val_le, addr, size, mmu_idx,
ra, mr, mr_offset);
qemu_mutex_unlock_iothread();
return ret;
}
static uint64_t do_st16_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
Int128 val_le, vaddr addr, int size,
int mmu_idx, uintptr_t ra)
{
MemoryRegionSection *section;
MemoryRegion *mr;
hwaddr mr_offset;
MemTxAttrs attrs;
uint64_t ret;
tcg_debug_assert(size > 8 && size <= 16);
attrs = full->attrs;
section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
mr = section->mr;
qemu_mutex_lock_iothread();
int_st_mmio_leN(env, full, int128_getlo(val_le), addr, 8,
mmu_idx, ra, mr, mr_offset);
ret = int_st_mmio_leN(env, full, int128_gethi(val_le), addr + 8,
size - 8, mmu_idx, ra, mr, mr_offset + 8);
qemu_mutex_unlock_iothread();
return ret;
}
/*
@ -2780,7 +2776,6 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
unsigned tmp, half_size;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
return do_st_mmio_leN(env, p->full, val_le, p->addr,
p->size, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
@ -2835,11 +2830,8 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
MemOp atom;
if (unlikely(p->flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, int128_getlo(val_le),
p->addr, 8, mmu_idx, ra);
return do_st_mmio_leN(env, p->full, int128_gethi(val_le),
p->addr + 8, size - 8, mmu_idx, ra);
return do_st16_mmio_leN(env, p->full, val_le, p->addr,
size, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
return int128_gethi(val_le) >> ((size - 8) * 8);
}
@ -2883,7 +2875,7 @@ static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
int mmu_idx, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
do_st_mmio_leN(env, p->full, val, p->addr, 1, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
@ -2898,7 +2890,6 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
if ((memop & MO_BSWAP) != MO_LE) {
val = bswap16(val);
}
QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@ -2918,7 +2909,6 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
if ((memop & MO_BSWAP) != MO_LE) {
val = bswap32(val);
}
QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@ -2938,7 +2928,6 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
if ((memop & MO_BSWAP) != MO_LE) {
val = bswap64(val);
}
QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@ -3066,11 +3055,7 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
if ((l.memop & MO_BSWAP) != MO_LE) {
val = bswap128(val);
}
a = int128_getlo(val);
b = int128_gethi(val);
QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra);
do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra);
do_st16_mmio_leN(env, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
} else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {

View File

@ -100,14 +100,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
break;
case EXCP_HALTED:
/*
* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
* Usually cpu->halted is set, but may have already been
* reset by another thread by the time we arrive here.
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();

View File

@ -1042,6 +1042,32 @@ DO_CMP2(64)
#undef DO_CMP1
#undef DO_CMP2
#define DO_CMP1(NAME, TYPE, OP) \
void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
{ \
intptr_t oprsz = simd_oprsz(desc); \
TYPE inv = simd_data(desc), b = b64; \
for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
*(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
} \
clear_high(d, oprsz, desc); \
}
#define DO_CMP2(SZ) \
DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
DO_CMP2(8)
DO_CMP2(16)
DO_CMP2(32)
DO_CMP2(64)
#undef DO_CMP1
#undef DO_CMP2
void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);

View File

@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)

View File

@ -118,7 +118,8 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
} else {
int shift = frac_normalize(p);
p->cls = float_class_normal;
p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
p->exp = fmt->frac_shift - fmt->exp_bias
- shift + !fmt->m68k_denormal;
}
} else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
p->cls = float_class_normal;
@ -256,7 +257,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
is_tiny = !frac_addi(&discard, p, inc);
}
frac_shrjam(p, 1 - exp);
frac_shrjam(p, !fmt->m68k_denormal - exp);
if (p->frac_lo & round_mask) {
/* Need to recompute round-to-even/round-to-odd. */
@ -287,7 +288,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
p->frac_lo &= ~round_mask;
}
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
frac_shr(p, frac_shift);
if (is_tiny && (flags & float_flag_inexact)) {

View File

@ -517,6 +517,7 @@ typedef struct {
* round_mask: bits below lsb which must be rounded
* The following optional modifiers are available:
* arm_althp: handle ARM Alternative Half Precision
* m68k_denormal: explicit integer bit for extended precision may be 1
*/
typedef struct {
int exp_size;
@ -526,6 +527,7 @@ typedef struct {
int frac_size;
int frac_shift;
bool arm_althp;
bool m68k_denormal;
uint64_t round_mask;
} FloatFmt;
@ -576,7 +578,12 @@ static const FloatFmt float128_params = {
static const FloatFmt floatx80_params[3] = {
[floatx80_precision_s] = { FLOATX80_PARAMS(23) },
[floatx80_precision_d] = { FLOATX80_PARAMS(52) },
[floatx80_precision_x] = { FLOATX80_PARAMS(64) },
[floatx80_precision_x] = {
FLOATX80_PARAMS(64),
#ifdef TARGET_M68K
.m68k_denormal = true,
#endif
},
};
/* Unpack a float to parts, but do not canonicalize. */
@ -3126,6 +3133,15 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
FloatParts64 p;
bfloat16_unpack_canonical(&p, a, s);
return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
}
int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
@ -3392,6 +3408,11 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
{
return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
}
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
{
return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
@ -3407,6 +3428,11 @@ int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
}
int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
}
int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@ -3534,6 +3560,15 @@ uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
}
uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
FloatParts64 p;
bfloat16_unpack_canonical(&p, a, s);
return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
}
uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
@ -3759,6 +3794,11 @@ Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
}
uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
{
return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
}
uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
{
return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
@ -3774,6 +3814,11 @@ uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
}
uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
}
uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
@ -3929,6 +3974,11 @@ bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
return int64_to_bfloat16_scalbn(a, scale, status);
}
bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
{
return int64_to_bfloat16_scalbn(a, scale, status);
}
bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
{
return int64_to_bfloat16_scalbn(a, 0, status);
@ -3944,6 +3994,11 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
return int64_to_bfloat16_scalbn(a, 0, status);
}
bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
{
return int64_to_bfloat16_scalbn(a, 0, status);
}
float128 int128_to_float128(Int128 a, float_status *status)
{
FloatParts128 p = { };
@ -4139,6 +4194,11 @@ bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
return uint64_to_bfloat16_scalbn(a, scale, status);
}
bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
{
return uint64_to_bfloat16_scalbn(a, scale, status);
}
bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
{
return uint64_to_bfloat16_scalbn(a, 0, status);
@ -4154,6 +4214,11 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
return uint64_to_bfloat16_scalbn(a, 0, status);
}
bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
{
return uint64_to_bfloat16_scalbn(a, 0, status);
}
float128 uint64_to_float128(uint64_t a, float_status *status)
{
FloatParts128 p;

View File

@ -11,6 +11,7 @@
#define CPUINFO_LSE2 (1u << 2)
#define CPUINFO_AES (1u << 3)
#define CPUINFO_PMULL (1u << 4)
#define CPUINFO_BTI (1u << 5)
/* Initialized with a constructor. */
extern unsigned cpuinfo;

View File

@ -100,12 +100,12 @@
typedef struct CPUTLBEntryFull {
/*
* @xlat_section contains:
* - in the lower TARGET_PAGE_BITS, a physical section number
* - with the lower TARGET_PAGE_BITS masked off, an offset which
* must be added to the virtual address to obtain:
* + the ram_addr_t of the target RAM (if the physical section
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
* + the offset within the target MemoryRegion (otherwise)
* - For ram, an offset which must be added to the virtual address
* to obtain the ram_addr_t of the target RAM
* - For other memory regions,
* + in the lower TARGET_PAGE_BITS, the physical section number
* + with the TARGET_PAGE_BITS masked off, the offset within
* the target MemoryRegion
*/
hwaddr xlat_section;

View File

@ -111,8 +111,7 @@ static inline int thunk_type_size(const argtype *type_ptr, int is_host)
if (is_host) {
#if defined(HOST_X86_64)
return 8;
#elif defined(HOST_ALPHA) || defined(HOST_IA64) || defined(HOST_MIPS) || \
defined(HOST_PARISC) || defined(HOST_SPARC64)
#elif defined(HOST_MIPS) || defined(HOST_SPARC64)
return 4;
#elif defined(HOST_PPC)
return sizeof(void *);

View File

@ -366,6 +366,8 @@ float32 bfloat16_to_float32(bfloat16, float_status *status);
bfloat16 float64_to_bfloat16(float64 a, float_status *status);
float64 bfloat16_to_float64(bfloat16 a, float_status *status);
int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);
int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);
int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
@ -373,14 +375,18 @@ int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);
int8_t bfloat16_to_int8(bfloat16, float_status *status);
int16_t bfloat16_to_int16(bfloat16, float_status *status);
int32_t bfloat16_to_int32(bfloat16, float_status *status);
int64_t bfloat16_to_int64(bfloat16, float_status *status);
int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status);
int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status);
int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status);
int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status);
uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);
uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);
uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
@ -388,24 +394,30 @@ uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);
uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status);
uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status);
uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status);
uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status);
uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status);
uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status);
uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status);
uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status);
bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status);
bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status);
bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status);
bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status);
bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status);
bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status);
bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status);
bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status);
bfloat16 int8_to_bfloat16(int8_t a, float_status *status);
bfloat16 int16_to_bfloat16(int16_t a, float_status *status);
bfloat16 int32_to_bfloat16(int32_t a, float_status *status);
bfloat16 int64_to_bfloat16(int64_t a, float_status *status);
bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status);
bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status);
bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status);
bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);

View File

@ -227,17 +227,6 @@ struct CPUWatchpoint {
QTAILQ_ENTRY(CPUWatchpoint) entry;
};
#ifdef CONFIG_PLUGIN
/*
* For plugins we sometime need to save the resolved iotlb data before
* the memory regions get moved around by io_writex.
*/
typedef struct SavedIOTLB {
MemoryRegionSection *section;
hwaddr mr_offset;
} SavedIOTLB;
#endif
struct KVMState;
struct kvm_run;
@ -409,8 +398,6 @@ struct CPUState {
#ifdef CONFIG_PLUGIN
GArray *plugin_mem_cbs;
/* saved iotlb data from io_writex */
SavedIOTLB saved_iotlb;
#endif
/* TODO Move common fields from CPUArchState here. */

View File

@ -15,15 +15,8 @@
struct qemu_plugin_hwaddr {
bool is_io;
bool is_store;
union {
struct {
MemoryRegionSection *section;
hwaddr offset;
} io;
struct {
void *hostaddr;
} ram;
} v;
hwaddr phys_addr;
MemoryRegion *mr;
};
/**

View File

@ -129,7 +129,6 @@ typedef struct QString QString;
typedef struct RAMBlock RAMBlock;
typedef struct Range Range;
typedef struct ReservedRegion ReservedRegion;
typedef struct SavedIOTLB SavedIOTLB;
typedef struct SHPCDevice SHPCDevice;
typedef struct SSIBus SSIBus;
typedef struct TCGHelperInfo TCGHelperInfo;

View File

@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, uint32_t bofs,
uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, int64_t c,
uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, TCGv_i64 c,
uint32_t oprsz, uint32_t maxsz);
/*
* Perform vector bit select: d = (b & a) | (c & ~a).

View File

@ -316,22 +316,7 @@ uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
{
#ifdef CONFIG_SOFTMMU
if (haddr) {
if (!haddr->is_io) {
RAMBlock *block;
ram_addr_t offset;
void *hostaddr = haddr->v.ram.hostaddr;
block = qemu_ram_block_from_host(hostaddr, false, &offset);
if (!block) {
error_report("Bad host ram pointer %p", haddr->v.ram.hostaddr);
abort();
}
return block->offset + offset + block->mr->addr;
} else {
MemoryRegionSection *mrs = haddr->v.io.section;
return mrs->offset_within_address_space + haddr->v.io.offset;
}
return haddr->phys_addr;
}
#endif
return 0;
@ -341,13 +326,13 @@ const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
{
#ifdef CONFIG_SOFTMMU
if (h && h->is_io) {
MemoryRegionSection *mrs = h->v.io.section;
if (!mrs->mr->name) {
unsigned long maddr = 0xffffffff & (uintptr_t) mrs->mr;
g_autofree char *temp = g_strdup_printf("anon%08lx", maddr);
MemoryRegion *mr = h->mr;
if (!mr->name) {
unsigned maddr = (uintptr_t)mr;
g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
return g_intern_string(temp);
} else {
return g_intern_string(mrs->mr->name);
return g_intern_string(mr->name);
}
} else {
return g_intern_static_string("RAM");

View File

@ -121,10 +121,7 @@ static void *new_stack_for_clone(void)
/* Allocate a new stack and get a pointer to its top. */
stack_ptr = qemu_alloc_stack(&stack_size);
#if !defined(HOST_HPPA)
/* The top is at the end of the area, except on HPPA. */
stack_ptr += stack_size;
#endif
return stack_ptr;
}

View File

@ -2943,54 +2943,16 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
#define GEN_CMP0(NAME, COND) \
static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
{ \
tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0)); \
} \
static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
{ \
tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0)); \
} \
static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
{ \
TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0); \
tcg_gen_cmp_vec(COND, vece, d, a, zero); \
} \
void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
uint32_t opr_sz, uint32_t max_sz) \
{ \
const GVecGen2 op[4] = { \
{ .fno = gen_helper_gvec_##NAME##0_b, \
.fniv = gen_##NAME##0_vec, \
.opt_opc = vecop_list_cmp, \
.vece = MO_8 }, \
{ .fno = gen_helper_gvec_##NAME##0_h, \
.fniv = gen_##NAME##0_vec, \
.opt_opc = vecop_list_cmp, \
.vece = MO_16 }, \
{ .fni4 = gen_##NAME##0_i32, \
.fniv = gen_##NAME##0_vec, \
.opt_opc = vecop_list_cmp, \
.vece = MO_32 }, \
{ .fni8 = gen_##NAME##0_i64, \
.fniv = gen_##NAME##0_vec, \
.opt_opc = vecop_list_cmp, \
.prefer_i64 = TCG_TARGET_REG_BITS == 64, \
.vece = MO_64 }, \
}; \
tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
}
#define GEN_CMP0(NAME, COND) \
void NAME(unsigned vece, uint32_t d, uint32_t m, \
uint32_t opr_sz, uint32_t max_sz) \
{ tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
static const TCGOpcode vecop_list_cmp[] = {
INDEX_op_cmp_vec, 0
};
GEN_CMP0(ceq, TCG_COND_EQ)
GEN_CMP0(cle, TCG_COND_LE)
GEN_CMP0(cge, TCG_COND_GE)
GEN_CMP0(clt, TCG_COND_LT)
GEN_CMP0(cgt, TCG_COND_GT)
GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
#undef GEN_CMP0

View File

@ -272,7 +272,7 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
}
}
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -602,6 +602,10 @@ typedef enum {
DMB_ISH = 0xd50338bf,
DMB_LD = 0x00000100,
DMB_ST = 0x00000200,
BTI_C = 0xd503245f,
BTI_J = 0xd503249f,
BTI_JC = 0xd50324df,
} AArch64Insn;
static inline uint32_t tcg_in32(TCGContext *s)
@ -843,6 +847,17 @@ static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
| rn << 5 | (rd & 0x1f));
}
static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
{
/*
* While BTI insns are nops on hosts without FEAT_BTI,
* there is no point in emitting them in that case either.
*/
if (cpuinfo & CPUINFO_BTI) {
tcg_out32(s, insn);
}
}
/* Register to register move using ORR (shifted register with no shift). */
static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
{
@ -1351,18 +1366,6 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
tcg_out_insn(s, 3206, B, offset);
}
static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
if (offset == sextract64(offset, 0, 26)) {
tcg_out_insn(s, 3206, B, offset);
} else {
/* Choose X9 as a call-clobbered non-LR temporary. */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
tcg_out_insn(s, 3207, BR, TCG_REG_X9);
}
}
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
@ -1947,12 +1950,28 @@ static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
{
const tcg_insn_unit *target;
ptrdiff_t offset;
/* Reuse the zeroing that exists for goto_ptr. */
if (a0 == 0) {
tcg_out_goto_long(s, tcg_code_gen_epilogue);
target = tcg_code_gen_epilogue;
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
tcg_out_goto_long(s, tb_ret_addr);
target = tb_ret_addr;
}
offset = tcg_pcrel_diff(s, target) >> 2;
if (offset == sextract64(offset, 0, 26)) {
tcg_out_insn(s, 3206, B, offset);
} else {
/*
* Only x16/x17 generate BTI type Jump (2),
* other registers generate BTI type Jump|Call (3).
*/
QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
}
}
@ -1970,6 +1989,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
tcg_out32(s, I3206_B);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
set_jmp_reset_offset(s, which);
tcg_out_bti(s, BTI_J);
}
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
@ -3074,6 +3094,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
{
TCGReg r;
tcg_out_bti(s, BTI_C);
/* Push (FP, LR) and allocate space for all saved registers. */
tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
TCG_REG_SP, -PUSH_SIZE, 1, 1);
@ -3114,10 +3136,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* and fall through to the rest of the epilogue.
*/
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_bti(s, BTI_J);
tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
/* TB epilogue */
tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_bti(s, BTI_J);
/* Remove TCG locals stack space. */
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
@ -3135,6 +3159,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_insn(s, 3207, RET, TCG_REG_LR);
}
static void tcg_out_tb_start(TCGContext *s)
{
tcg_out_bti(s, BTI_J);
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;

View File

@ -509,7 +509,7 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
* mov operand2: values represented with x << (2 * y), x < 0x100
* add, sub, eor...: ditto
*/
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -2962,6 +2962,11 @@ static void tcg_out_epilogue(TCGContext *s)
(1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
typedef struct {
DebugFrameHeader h;
uint8_t fde_def_cfa[4];

View File

@ -198,7 +198,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -4191,6 +4191,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc(s, OPC_RET, 0, 0, 0);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
memset(p, 0x90, count);

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,11 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
C_O0_I2(w, r)
C_O0_I3(r, r, r)
C_O1_I1(r, r)
C_O1_I1(w, r)
C_O1_I1(w, w)
C_O1_I2(r, r, rC)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
@ -29,4 +33,9 @@ C_O1_I2(r, 0, rZ)
C_O1_I2(r, rZ, ri)
C_O1_I2(r, rZ, rJ)
C_O1_I2(r, rZ, rZ)
C_O1_I2(w, w, w)
C_O1_I2(w, w, wM)
C_O1_I2(w, w, wA)
C_O1_I3(w, w, w, w)
C_O1_I4(r, rZ, rJ, rZ, rZ)
C_O2_I1(r, r, r)

View File

@ -14,6 +14,7 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('w', ALL_VECTOR_REGS)
/*
* Define constraint letters for constants:
@ -25,3 +26,5 @@ CONST('U', TCG_CT_CONST_U12)
CONST('Z', TCG_CT_CONST_ZERO)
CONST('C', TCG_CT_CONST_C12)
CONST('W', TCG_CT_CONST_WSZ)
CONST('M', TCG_CT_CONST_VCMP)
CONST('A', TCG_CT_CONST_VADD)

View File

@ -32,6 +32,8 @@
#include "../tcg-ldst.c.inc"
#include <asm/hwcap.h>
bool use_lsx_instructions;
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"zero",
@ -65,7 +67,39 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"s5",
"s6",
"s7",
"s8"
"s8",
"vr0",
"vr1",
"vr2",
"vr3",
"vr4",
"vr5",
"vr6",
"vr7",
"vr8",
"vr9",
"vr10",
"vr11",
"vr12",
"vr13",
"vr14",
"vr15",
"vr16",
"vr17",
"vr18",
"vr19",
"vr20",
"vr21",
"vr22",
"vr23",
"vr24",
"vr25",
"vr26",
"vr27",
"vr28",
"vr29",
"vr30",
"vr31",
};
#endif
@ -102,6 +136,15 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_A2,
TCG_REG_A1,
TCG_REG_A0,
/* Vector registers */
TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
/* V24 - V31 are caller-saved, and skipped. */
};
static const int tcg_target_call_iarg_regs[] = {
@ -133,8 +176,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_CT_CONST_U12 0x800
#define TCG_CT_CONST_C12 0x1000
#define TCG_CT_CONST_WSZ 0x2000
#define TCG_CT_CONST_VCMP 0x4000
#define TCG_CT_CONST_VADD 0x8000
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
{
@ -142,7 +188,7 @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
}
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return true;
@ -165,6 +211,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
return true;
}
int64_t vec_val = sextract64(val, 0, 8 << vece);
if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) {
return true;
}
if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) {
return true;
}
return false;
}
@ -1028,6 +1081,48 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
}
}
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi,
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst;
HostAddress h;
ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
if (h.aa.atom == MO_128) {
/*
* Use VLDX/VSTX when 128-bit atomicity is required.
* If address is aligned to 16-bytes, the 128-bit load/store is atomic.
*/
if (is_ld) {
tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index);
tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0);
tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1);
} else {
tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0);
tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1);
tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index);
}
} else {
/* Otherwise use a pair of LD/ST. */
tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index);
if (is_ld) {
tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0);
tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8);
} else {
tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0);
tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8);
}
}
if (ldst) {
ldst->type = TCG_TYPE_I128;
ldst->datalo_reg = data_lo;
ldst->datahi_reg = data_hi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
/*
* Entry-points
*/
@ -1092,6 +1187,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
TCGArg a0 = args[0];
TCGArg a1 = args[1];
TCGArg a2 = args[2];
TCGArg a3 = args[3];
int c2 = const_args[2];
switch (opc) {
@ -1454,6 +1550,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
break;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
break;
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
@ -1462,6 +1562,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
break;
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
@ -1486,6 +1590,444 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
}
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, TCGReg rs)
{
switch (vece) {
case MO_8:
tcg_out_opc_vreplgr2vr_b(s, rd, rs);
break;
case MO_16:
tcg_out_opc_vreplgr2vr_h(s, rd, rs);
break;
case MO_32:
tcg_out_opc_vreplgr2vr_w(s, rd, rs);
break;
case MO_64:
tcg_out_opc_vreplgr2vr_d(s, rd, rs);
break;
default:
g_assert_not_reached();
}
return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
/* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
if (offset < -0x800 || offset > 0x7ff || \
(offset & ((1 << vece) - 1)) != 0) {
tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
base = TCG_REG_TMP0;
offset = 0;
}
offset >>= vece;
switch (vece) {
case MO_8:
tcg_out_opc_vldrepl_b(s, r, base, offset);
break;
case MO_16:
tcg_out_opc_vldrepl_h(s, r, base, offset);
break;
case MO_32:
tcg_out_opc_vldrepl_w(s, r, base, offset);
break;
case MO_64:
tcg_out_opc_vldrepl_d(s, r, base, offset);
break;
default:
g_assert_not_reached();
}
return true;
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, int64_t v64)
{
/* Try vldi if imm can fit */
int64_t value = sextract64(v64, 0, 8 << vece);
if (-0x200 <= value && value <= 0x1FF) {
uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
tcg_out_opc_vldi(s, rd, imm);
return;
}
/* TODO: vldi patterns when imm 12 is set */
/* Fallback to vreplgr2vr */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
switch (vece) {
case MO_8:
tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
break;
case MO_16:
tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
break;
case MO_32:
tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
break;
case MO_64:
tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
break;
default:
g_assert_not_reached();
}
}
static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
const TCGArg a1, const TCGArg a2,
bool a2_is_const, bool is_add)
{
static const LoongArchInsn add_vec_insn[4] = {
OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
};
static const LoongArchInsn add_vec_imm_insn[4] = {
OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
};
static const LoongArchInsn sub_vec_insn[4] = {
OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
};
static const LoongArchInsn sub_vec_imm_insn[4] = {
OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
};
if (a2_is_const) {
int64_t value = sextract64(a2, 0, 8 << vece);
if (!is_add) {
value = -value;
}
/* Try vaddi/vsubi */
if (0 <= value && value <= 0x1f) {
tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
a1, value));
return;
} else if (-0x1f <= value && value < 0) {
tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
a1, -value));
return;
}
/* constraint TCG_CT_CONST_VADD ensures unreachable */
g_assert_not_reached();
}
if (is_add) {
tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
} else {
tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
}
}
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
{
TCGType type = vecl + TCG_TYPE_V64;
TCGArg a0, a1, a2, a3;
TCGReg temp = TCG_REG_TMP0;
TCGReg temp_vec = TCG_VEC_TMP0;
static const LoongArchInsn cmp_vec_insn[16][4] = {
[TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
[TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
[TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
[TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
[TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
};
static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
[TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
[TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
[TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU},
[TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
[TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU},
};
LoongArchInsn insn;
static const LoongArchInsn neg_vec_insn[4] = {
OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
};
static const LoongArchInsn mul_vec_insn[4] = {
OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
};
static const LoongArchInsn smin_vec_insn[4] = {
OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
};
static const LoongArchInsn umin_vec_insn[4] = {
OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
};
static const LoongArchInsn smax_vec_insn[4] = {
OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
};
static const LoongArchInsn umax_vec_insn[4] = {
OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
};
static const LoongArchInsn ssadd_vec_insn[4] = {
OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D
};
static const LoongArchInsn usadd_vec_insn[4] = {
OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU
};
static const LoongArchInsn sssub_vec_insn[4] = {
OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D
};
static const LoongArchInsn ussub_vec_insn[4] = {
OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU
};
static const LoongArchInsn shlv_vec_insn[4] = {
OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D
};
static const LoongArchInsn shrv_vec_insn[4] = {
OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D
};
static const LoongArchInsn sarv_vec_insn[4] = {
OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D
};
static const LoongArchInsn shli_vec_insn[4] = {
OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D
};
static const LoongArchInsn shri_vec_insn[4] = {
OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D
};
static const LoongArchInsn sari_vec_insn[4] = {
OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D
};
static const LoongArchInsn rotrv_vec_insn[4] = {
OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
};
a0 = args[0];
a1 = args[1];
a2 = args[2];
a3 = args[3];
/* Currently only supports V128 */
tcg_debug_assert(type == TCG_TYPE_V128);
switch (opc) {
case INDEX_op_st_vec:
/* Try to fit vst imm */
if (-0x800 <= a2 && a2 <= 0x7ff) {
tcg_out_opc_vst(s, a0, a1, a2);
} else {
tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
tcg_out_opc_vstx(s, a0, a1, temp);
}
break;
case INDEX_op_ld_vec:
/* Try to fit vld imm */
if (-0x800 <= a2 && a2 <= 0x7ff) {
tcg_out_opc_vld(s, a0, a1, a2);
} else {
tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
tcg_out_opc_vldx(s, a0, a1, temp);
}
break;
case INDEX_op_and_vec:
tcg_out_opc_vand_v(s, a0, a1, a2);
break;
case INDEX_op_andc_vec:
/*
* vandn vd, vj, vk: vd = vk & ~vj
* andc_vec vd, vj, vk: vd = vj & ~vk
* vk and vk are swapped
*/
tcg_out_opc_vandn_v(s, a0, a2, a1);
break;
case INDEX_op_or_vec:
tcg_out_opc_vor_v(s, a0, a1, a2);
break;
case INDEX_op_orc_vec:
tcg_out_opc_vorn_v(s, a0, a1, a2);
break;
case INDEX_op_xor_vec:
tcg_out_opc_vxor_v(s, a0, a1, a2);
break;
case INDEX_op_nor_vec:
tcg_out_opc_vnor_v(s, a0, a1, a2);
break;
case INDEX_op_not_vec:
tcg_out_opc_vnor_v(s, a0, a1, a1);
break;
case INDEX_op_cmp_vec:
TCGCond cond = args[3];
if (const_args[2]) {
/*
* cmp_vec dest, src, value
* Try vseqi/vslei/vslti
*/
int64_t value = sextract64(a2, 0, 8 << vece);
if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
a0, a1, value));
break;
} else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
(0x00 <= value && value <= 0x1f)) {
tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
a0, a1, value));
break;
}
/*
* Fallback to:
* dupi_vec temp, a2
* cmp_vec a0, a1, temp, cond
*/
tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
a2 = temp_vec;
}
insn = cmp_vec_insn[cond][vece];
if (insn == 0) {
TCGArg t;
t = a1, a1 = a2, a2 = t;
cond = tcg_swap_cond(cond);
insn = cmp_vec_insn[cond][vece];
tcg_debug_assert(insn != 0);
}
tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
break;
case INDEX_op_add_vec:
tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
break;
case INDEX_op_sub_vec:
tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
break;
case INDEX_op_neg_vec:
tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
break;
case INDEX_op_mul_vec:
tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_smin_vec:
tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_smax_vec:
tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_umin_vec:
tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_umax_vec:
tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_ssadd_vec:
tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_usadd_vec:
tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_sssub_vec:
tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_ussub_vec:
tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_shlv_vec:
tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_shrv_vec:
tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_sarv_vec:
tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_shli_vec:
tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_shri_vec:
tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_sari_vec:
tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_rotrv_vec:
tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1, a2));
break;
case INDEX_op_rotlv_vec:
/* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1,
temp_vec));
break;
case INDEX_op_rotli_vec:
/* rotli_vec a1, a2 = rotri_vec a1, -a2 */
a2 = extract32(-a2, 0, 3 + vece);
switch (vece) {
case MO_8:
tcg_out_opc_vrotri_b(s, a0, a1, a2);
break;
case MO_16:
tcg_out_opc_vrotri_h(s, a0, a1, a2);
break;
case MO_32:
tcg_out_opc_vrotri_w(s, a0, a1, a2);
break;
case MO_64:
tcg_out_opc_vrotri_d(s, a0, a1, a2);
break;
default:
g_assert_not_reached();
}
break;
case INDEX_op_bitsel_vec:
/* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
default:
g_assert_not_reached();
}
}
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
{
switch (opc) {
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupm_vec:
case INDEX_op_cmp_vec:
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
case INDEX_op_and_vec:
case INDEX_op_andc_vec:
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
case INDEX_op_xor_vec:
case INDEX_op_nor_vec:
case INDEX_op_not_vec:
case INDEX_op_neg_vec:
case INDEX_op_mul_vec:
case INDEX_op_smin_vec:
case INDEX_op_smax_vec:
case INDEX_op_umin_vec:
case INDEX_op_umax_vec:
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
case INDEX_op_ussub_vec:
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_bitsel_vec:
return 1;
default:
return 0;
}
}
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
g_assert_not_reached();
}
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
{
switch (op) {
@ -1505,6 +2047,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
return C_O2_I1(r, r, r);
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
return C_O0_I3(r, r, r);
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
return C_O0_I2(rZ, rZ);
@ -1627,6 +2177,54 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_movcond_i64:
return C_O1_I4(r, rZ, rJ, rZ, rZ);
case INDEX_op_ld_vec:
case INDEX_op_dupm_vec:
case INDEX_op_dup_vec:
return C_O1_I1(w, r);
case INDEX_op_st_vec:
return C_O0_I2(w, r);
case INDEX_op_cmp_vec:
return C_O1_I2(w, w, wM);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
return C_O1_I2(w, w, wA);
case INDEX_op_and_vec:
case INDEX_op_andc_vec:
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
case INDEX_op_xor_vec:
case INDEX_op_nor_vec:
case INDEX_op_mul_vec:
case INDEX_op_smin_vec:
case INDEX_op_smax_vec:
case INDEX_op_umin_vec:
case INDEX_op_umax_vec:
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
case INDEX_op_ussub_vec:
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_rotrv_vec:
case INDEX_op_rotlv_vec:
return C_O1_I2(w, w, w);
case INDEX_op_not_vec:
case INDEX_op_neg_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
case INDEX_op_rotli_vec:
return C_O1_I1(w, w);
case INDEX_op_bitsel_vec:
return C_O1_I3(w, w, w, w);
default:
g_assert_not_reached();
}
@ -1698,6 +2296,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static void tcg_target_init(TCGContext *s)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
@ -1708,6 +2311,10 @@ static void tcg_target_init(TCGContext *s)
exit(EXIT_FAILURE);
}
if (hwcap & HWCAP_LOONGARCH_LSX) {
use_lsx_instructions = 1;
}
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
@ -1723,6 +2330,18 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
if (use_lsx_instructions) {
tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
}
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
@ -1731,6 +2350,7 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
}
typedef struct {

View File

@ -30,7 +30,7 @@
#define LOONGARCH_TCG_TARGET_H
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_NB_REGS 32
#define TCG_TARGET_NB_REGS 64
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
@ -68,13 +68,25 @@ typedef enum {
TCG_REG_S7,
TCG_REG_S8,
TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
/* aliases */
TCG_AREG0 = TCG_REG_S0,
TCG_REG_TMP0 = TCG_REG_T8,
TCG_REG_TMP1 = TCG_REG_T7,
TCG_REG_TMP2 = TCG_REG_T6,
TCG_VEC_TMP0 = TCG_REG_V23,
} TCGReg;
extern bool use_lsx_instructions;
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_SP
#define TCG_TARGET_STACK_ALIGN 16
@ -159,7 +171,31 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_qemu_ldst_i128 use_lsx_instructions
#define TCG_TARGET_HAS_v64 0
#define TCG_TARGET_HAS_v128 use_lsx_instructions
#define TCG_TARGET_HAS_v256 0
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 0
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec 1
#define TCG_TARGET_HAS_nand_vec 0
#define TCG_TARGET_HAS_nor_vec 1
#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1
#define TCG_TARGET_HAS_roti_vec 1
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
#define TCG_TARGET_HAS_bitsel_vec 1
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)

View File

@ -0,0 +1,12 @@
/*
* Copyright (c) 2023 Jiajie Chen
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*
* See the COPYING file in the top-level directory for details.
*
* Target-specific opcodes for host vector expansion. These will be
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
* consider these to be UNSPEC with names.
*/

View File

@ -190,7 +190,7 @@ static bool is_p2m1(tcg_target_long val)
}
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -2628,6 +2628,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static void tcg_target_init(TCGContext *s)
{
tcg_target_detect_isa();

View File

@ -261,7 +261,7 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
}
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -2527,6 +2527,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCLR | BO_ALWAYS);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
{
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);

View File

@ -33,8 +33,19 @@
#include "tcg/tcg.h"
#include "exec/translation-block.h"
#include "tcg-internal.h"
#include "host/cpuinfo.h"
/*
* Local source-level compatibility with Unix.
* Used by tcg_region_init below.
*/
#if defined(_WIN32)
#define PROT_READ 1
#define PROT_WRITE 2
#define PROT_EXEC 4
#endif
struct tcg_region_tree {
QemuMutex lock;
QTree *tree;
@ -83,6 +94,18 @@ bool in_code_gen_buffer(const void *p)
return (size_t)(p - region.start_aligned) <= region.total_size;
}
#ifndef CONFIG_TCG_INTERPRETER
static int host_prot_read_exec(void)
{
#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI)
if (cpuinfo & CPUINFO_BTI) {
return PROT_READ | PROT_EXEC | PROT_BTI;
}
#endif
return PROT_READ | PROT_EXEC;
}
#endif
#ifdef CONFIG_DEBUG_TCG
const void *tcg_splitwx_to_rx(void *rw)
{
@ -505,14 +528,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
return PROT_READ | PROT_WRITE;
}
#elif defined(_WIN32)
/*
* Local source-level compatibility with Unix.
* Used by tcg_region_init below.
*/
#define PROT_READ 1
#define PROT_WRITE 2
#define PROT_EXEC 4
static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
void *buf;
@ -567,7 +582,7 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
goto fail;
}
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0);
if (buf_rx == MAP_FAILED) {
goto fail_rx;
}
@ -642,7 +657,7 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
return -1;
}
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
if (mprotect((void *)buf_rx, size, host_prot_read_exec()) != 0) {
error_setg_errno(errp, errno, "mprotect for jit splitwx");
munmap((void *)buf_rx, size);
munmap((void *)buf_rw, size);
@ -805,7 +820,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
need_prot = PROT_READ | PROT_WRITE;
#ifndef CONFIG_TCG_INTERPRETER
if (tcg_splitwx_diff == 0) {
need_prot |= PROT_EXEC;
need_prot |= host_prot_read_exec();
}
#endif
for (size_t i = 0, n = region.n; i < n; i++) {
@ -820,7 +835,11 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
} else if (need_prot == (PROT_READ | PROT_WRITE)) {
rc = qemu_mprotect_rw(start, end - start);
} else {
#ifdef CONFIG_POSIX
rc = mprotect(start, end - start, need_prot);
#else
g_assert_not_reached();
#endif
}
if (rc) {
error_setg_errno(&error_fatal, errno,

View File

@ -145,7 +145,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define sextreg sextract64
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -2099,6 +2099,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static volatile sig_atomic_t got_sigill;
static void sigill_handler(int signo, siginfo_t *si, void *data)

View File

@ -540,7 +540,7 @@ static bool risbg_mask(uint64_t c)
}
/* Test if a constant matches the constraint. */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -3483,6 +3483,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
memset(p, 0x07, count * sizeof(tcg_insn_unit));

View File

@ -322,7 +322,7 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
}
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return 1;
@ -962,6 +962,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_movi_s13(s, TCG_REG_O0, 0);
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;

View File

@ -3846,6 +3846,155 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
}
}
static void expand_cmps_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t tysz, TCGType type,
TCGCond cond, TCGv_vec c)
{
TCGv_vec t0 = tcg_temp_new_vec(type);
TCGv_vec t1 = tcg_temp_new_vec(type);
uint32_t i;
for (i = 0; i < oprsz; i += tysz) {
tcg_gen_ld_vec(t1, cpu_env, aofs + i);
tcg_gen_cmp_vec(cond, vece, t0, t1, c);
tcg_gen_st_vec(t0, cpu_env, dofs + i);
}
}
void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, TCGv_i64 c,
uint32_t oprsz, uint32_t maxsz)
{
static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
static gen_helper_gvec_2i * const eq_fn[4] = {
gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
};
static gen_helper_gvec_2i * const lt_fn[4] = {
gen_helper_gvec_lts8, gen_helper_gvec_lts16,
gen_helper_gvec_lts32, gen_helper_gvec_lts64
};
static gen_helper_gvec_2i * const le_fn[4] = {
gen_helper_gvec_les8, gen_helper_gvec_les16,
gen_helper_gvec_les32, gen_helper_gvec_les64
};
static gen_helper_gvec_2i * const ltu_fn[4] = {
gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
};
static gen_helper_gvec_2i * const leu_fn[4] = {
gen_helper_gvec_leus8, gen_helper_gvec_leus16,
gen_helper_gvec_leus32, gen_helper_gvec_leus64
};
static gen_helper_gvec_2i * const * const fns[16] = {
[TCG_COND_EQ] = eq_fn,
[TCG_COND_LT] = lt_fn,
[TCG_COND_LE] = le_fn,
[TCG_COND_LTU] = ltu_fn,
[TCG_COND_LEU] = leu_fn,
};
TCGType type;
check_size_align(oprsz, maxsz, dofs | aofs);
check_overlap_2(dofs, aofs, maxsz);
if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
do_dup(MO_8, dofs, oprsz, maxsz,
NULL, NULL, -(cond == TCG_COND_ALWAYS));
return;
}
/*
* Implement inline with a vector type, if possible.
* Prefer integer when 64-bit host and 64-bit comparison.
*/
type = choose_vector_type(cmp_list, vece, oprsz,
TCG_TARGET_REG_BITS == 64 && vece == MO_64);
if (type != 0) {
const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
TCGv_vec t_vec = tcg_temp_new_vec(type);
uint32_t some;
tcg_gen_dup_i64_vec(vece, t_vec, c);
switch (type) {
case TCG_TYPE_V256:
some = QEMU_ALIGN_DOWN(oprsz, 32);
expand_cmps_vec(vece, dofs, aofs, some, 32,
TCG_TYPE_V256, cond, t_vec);
aofs += some;
dofs += some;
oprsz -= some;
maxsz -= some;
/* fallthru */
case TCG_TYPE_V128:
some = QEMU_ALIGN_DOWN(oprsz, 16);
expand_cmps_vec(vece, dofs, aofs, some, 16,
TCG_TYPE_V128, cond, t_vec);
break;
case TCG_TYPE_V64:
some = QEMU_ALIGN_DOWN(oprsz, 8);
expand_cmps_vec(vece, dofs, aofs, some, 8,
TCG_TYPE_V64, cond, t_vec);
break;
default:
g_assert_not_reached();
}
tcg_temp_free_vec(t_vec);
tcg_swap_vecop_list(hold_list);
} else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
uint32_t i;
for (i = 0; i < oprsz; i += 8) {
tcg_gen_ld_i64(t0, cpu_env, aofs + i);
tcg_gen_negsetcond_i64(cond, t0, t0, c);
tcg_gen_st_i64(t0, cpu_env, dofs + i);
}
tcg_temp_free_i64(t0);
} else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
uint32_t i;
tcg_gen_extrl_i64_i32(t1, c);
for (i = 0; i < oprsz; i += 8) {
tcg_gen_ld_i32(t0, cpu_env, aofs + i);
tcg_gen_negsetcond_i32(cond, t0, t0, t1);
tcg_gen_st_i32(t0, cpu_env, dofs + i);
}
tcg_temp_free_i32(t0);
tcg_temp_free_i32(t1);
} else {
gen_helper_gvec_2i * const *fn = fns[cond];
bool inv = false;
if (fn == NULL) {
cond = tcg_invert_cond(cond);
fn = fns[cond];
assert(fn != NULL);
inv = true;
}
tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
return;
}
if (oprsz < maxsz) {
expand_clr(dofs + oprsz, maxsz - oprsz);
}
}
void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, int64_t c,
uint32_t oprsz, uint32_t maxsz)
{
TCGv_i64 tmp = tcg_constant_i64(c);
tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
}
static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
{
TCGv_i64 t = tcg_temp_ebb_new_i64();

View File

@ -108,6 +108,7 @@ static void tcg_register_jit_int(const void *buf, size_t size,
__attribute__((unused));
/* Forward declarations for functions declared and used in tcg-target.c.inc. */
static void tcg_out_tb_start(TCGContext *s);
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2);
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@ -171,7 +172,7 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
const TCGHelperInfo *info);
static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
#ifdef TCG_TARGET_NEED_LDST_LABELS
static int tcg_out_ldst_finalize(TCGContext *s);
#endif
@ -4689,7 +4690,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
ts = arg_temp(arg);
if (ts->val_type == TEMP_VAL_CONST
&& tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
&& tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
/* constant is OK for instruction */
const_args[i] = 1;
new_args[i] = ts->val;
@ -6014,6 +6015,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
s->gen_insn_data =
tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
tcg_out_tb_start(s);
num_insns = -1;
QTAILQ_FOREACH(op, &s->ops, link) {
TCGOpcode opc = op->opc;

View File

@ -913,7 +913,7 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
}
/* Test if a constant matches the constraint. */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
return ct & TCG_CT_CONST;
}
@ -955,6 +955,11 @@ static inline void tcg_target_qemu_prologue(TCGContext *s)
{
}
static void tcg_out_tb_start(TCGContext *s)
{
/* nothing to do */
}
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;

View File

@ -4,7 +4,7 @@
#
VPATH += $(SRC_PATH)/tests/tcg/m68k
TESTS += trap
TESTS += trap denormal
# On m68k Linux supports 4k and 8k pages (but 8k is currently broken)
EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192

53
tests/tcg/m68k/denormal.c Normal file
View File

@ -0,0 +1,53 @@
/*
* Test m68k extended double denormals.
*/
#include <stdio.h>
#include <stdint.h>
#define TEST(X, Y) { X, Y, X * Y }
static volatile long double test[][3] = {
TEST(0x1p+16383l, 0x1p-16446l),
TEST(0x1.1p-8223l, 0x1.1p-8224l),
TEST(1.0l, 0x1p-16383l),
};
#undef TEST
static void dump_ld(const char *label, long double ld)
{
union {
long double d;
struct {
uint32_t exp:16;
uint32_t space:16;
uint32_t h;
uint32_t l;
};
} u;
u.d = ld;
printf("%12s: % -27La 0x%04x 0x%08x 0x%08x\n", label, u.d, u.exp, u.h, u.l);
}
int main(void)
{
int i, n = sizeof(test) / sizeof(test[0]), err = 0;
for (i = 0; i < n; ++i) {
long double x = test[i][0];
long double y = test[i][1];
long double build_mul = test[i][2];
long double runtime_mul = x * y;
if (runtime_mul != build_mul) {
dump_ld("x", x);
dump_ld("y", y);
dump_ld("build_mul", build_mul);
dump_ld("runtime_mul", runtime_mul);
err = 1;
}
}
return err;
}

View File

@ -13,6 +13,9 @@
# include <asm/hwcap.h>
# include "elf.h"
# endif
# ifndef HWCAP2_BTI
# define HWCAP2_BTI 0 /* added in glibc 2.32 */
# endif
#endif
#ifdef CONFIG_DARWIN
# include <sys/sysctl.h>
@ -58,12 +61,16 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
info |= (hwcap & HWCAP_AES ? CPUINFO_AES : 0);
info |= (hwcap & HWCAP_PMULL ? CPUINFO_PMULL : 0);
unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
info |= (hwcap2 & HWCAP2_BTI ? CPUINFO_BTI : 0);
#endif
#ifdef CONFIG_DARWIN
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
info |= sysctl_for_bool("hw.optional.arm.FEAT_AES") * CPUINFO_AES;
info |= sysctl_for_bool("hw.optional.arm.FEAT_PMULL") * CPUINFO_PMULL;
info |= sysctl_for_bool("hw.optional.arm.FEAT_BTI") * CPUINFO_BTI;
#endif
cpuinfo = info;

View File

@ -585,7 +585,7 @@ char *qemu_get_pid_name(pid_t pid)
void *qemu_alloc_stack(size_t *sz)
{
void *ptr, *guardpage;
void *ptr;
int flags;
#ifdef CONFIG_DEBUG_STACK_USAGE
void *ptr2;
@ -618,17 +618,8 @@ void *qemu_alloc_stack(size_t *sz)
abort();
}
#if defined(HOST_IA64)
/* separate register stack */
guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
#elif defined(HOST_HPPA)
/* stack grows up */
guardpage = ptr + *sz - pagesz;
#else
/* stack grows down */
guardpage = ptr;
#endif
if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
/* Stack grows down -- guard page at the bottom. */
if (mprotect(ptr, pagesz, PROT_NONE) != 0) {
perror("failed to set up stack guard page");
abort();
}