cmpxchg emulation of atomics, v8

-----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJYEMv7AAoJEK0ScMxN0CebtzsIAJD3n9AlCnJoC0xVJDcacqlY
 nkUqJqgmV5FkXq+x8KA6t7G5jfxZTBxk6QY42nXBidfuquogXnk0TQ0LJxLqp316
 mbjdomF8NHZH/79wMg5cYP/Thu4FAtw4uqJbb7kUKPjvQiPJhkISAl/4Jg9y07WR
 n6KptKI09QpIs6cM8q9DQ+RXoYG/xg/hP3Wih5TL4N1hZSiJG78Mpr5nF2HgbFrs
 gKRCmUzNkrG/hBweOqWDRo3H/fHvxFUDMNOzceH7gl1OvANaXaIO2lMkEI3MleFF
 Jq5pEtJ4TefIeoqCw8Nd5WZyzqZPUucqlXWwz3TLP0x3AgzkTH4F5lWlumqb+WM=
 =jEq6
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-atomic-20161026' into staging

cmpxchg emulation of atomics, v8

# gpg: Signature made Wed 26 Oct 2016 16:30:03 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-atomic-20161026: (37 commits)
  target-alpha: Emulate LL/SC using cmpxchg helpers
  target-alpha: Introduce MMU_PHYS_IDX
  target-arm: remove EXCP_STREX + cpu_exclusive_{test, info}
  linux-user: remove handling of aarch64's EXCP_STREX
  linux-user: remove handling of ARM's EXCP_STREX
  target-arm: emulate aarch64's LL/SC using cmpxchg helpers
  target-arm: emulate SWP with atomic_xchg helper
  target-arm: emulate LL/SC using cmpxchg helpers
  target-arm: Rearrange aa32 load and store functions
  tests: add atomic_add-bench
  target-i386: remove helper_lock()
  target-i386: emulate XCHG using atomic helper
  target-i386: emulate LOCK'ed BTX ops using atomic helpers
  target-i386: emulate LOCK'ed XADD using atomic helper
  target-i386: emulate LOCK'ed NEG using cmpxchg helper
  target-i386: emulate LOCK'ed NOT using atomic helper
  target-i386: emulate LOCK'ed INC using atomic helper
  target-i386: emulate LOCK'ed OP instructions using atomic helpers
  target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers
  tcg: Emit barriers with parallel_cpus
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2016-10-27 14:06:34 +01:00
commit 5929d7e8a0
43 changed files with 2368 additions and 1113 deletions

View File

@ -89,7 +89,7 @@ endif
#######################################################################
# Target-independent parts used in system and user emulation
common-obj-y += tcg-runtime.o cpus-common.o
common-obj-y += cpus-common.o
common-obj-y += hw/
common-obj-y += qom/
common-obj-y += disas/

View File

@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
obj-y += fpu/softfloat.o
obj-y += target-$(TARGET_BASE_ARCH)/
obj-y += disas.o
obj-y += tcg-runtime.o
obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

215
atomic_template.h Normal file
View File

@ -0,0 +1,215 @@
/*
* Atomic helper templates
* Included from tcg-runtime.c and cputlb.c.
*
* Copyright (c) 2016 Red Hat, Inc
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#if DATA_SIZE == 16
# define SUFFIX o
# define DATA_TYPE Int128
# define BSWAP bswap128
#elif DATA_SIZE == 8
# define SUFFIX q
# define DATA_TYPE uint64_t
# define BSWAP bswap64
#elif DATA_SIZE == 4
# define SUFFIX l
# define DATA_TYPE uint32_t
# define BSWAP bswap32
#elif DATA_SIZE == 2
# define SUFFIX w
# define DATA_TYPE uint16_t
# define BSWAP bswap16
#elif DATA_SIZE == 1
# define SUFFIX b
# define DATA_TYPE uint8_t
# define BSWAP
#else
# error unsupported data size
#endif
#if DATA_SIZE >= 4
# define ABI_TYPE DATA_TYPE
#else
# define ABI_TYPE uint32_t
#endif
/* Define host-endian atomic operations. Note that END is used within
the ATOMIC_NAME macro, and redefined below. */
#if DATA_SIZE == 1
# define END
#elif defined(HOST_WORDS_BIGENDIAN)
# define END _be
#else
# define END _le
#endif
ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
}
#if DATA_SIZE >= 16
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
{
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
__atomic_load(haddr, &val, __ATOMIC_RELAXED);
return val;
}
void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
__atomic_store(haddr, &val, __ATOMIC_RELAXED);
}
#else
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return atomic_xchg__nocheck(haddr, val);
}
#define GEN_ATOMIC_HELPER(X) \
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE val EXTRA_ARGS) \
{ \
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
return atomic_##X(haddr, val); \
} \
GEN_ATOMIC_HELPER(fetch_add)
GEN_ATOMIC_HELPER(fetch_and)
GEN_ATOMIC_HELPER(fetch_or)
GEN_ATOMIC_HELPER(fetch_xor)
GEN_ATOMIC_HELPER(add_fetch)
GEN_ATOMIC_HELPER(and_fetch)
GEN_ATOMIC_HELPER(or_fetch)
GEN_ATOMIC_HELPER(xor_fetch)
#undef GEN_ATOMIC_HELPER
#endif /* DATA SIZE >= 16 */
#undef END
#if DATA_SIZE > 1
/* Define reverse-host-endian atomic operations. Note that END is used
within the ATOMIC_NAME macro. */
#ifdef HOST_WORDS_BIGENDIAN
# define END _le
#else
# define END _be
#endif
ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
}
#if DATA_SIZE >= 16
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
{
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
__atomic_load(haddr, &val, __ATOMIC_RELAXED);
return BSWAP(val);
}
void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
val = BSWAP(val);
__atomic_store(haddr, &val, __ATOMIC_RELAXED);
}
#else
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
}
#define GEN_ATOMIC_HELPER(X) \
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE val EXTRA_ARGS) \
{ \
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
return BSWAP(atomic_##X(haddr, BSWAP(val))); \
}
GEN_ATOMIC_HELPER(fetch_and)
GEN_ATOMIC_HELPER(fetch_or)
GEN_ATOMIC_HELPER(fetch_xor)
GEN_ATOMIC_HELPER(and_fetch)
GEN_ATOMIC_HELPER(or_fetch)
GEN_ATOMIC_HELPER(xor_fetch)
#undef GEN_ATOMIC_HELPER
/* Note that for addition, we need to use a separate cmpxchg loop instead
of bswaps for the reverse-host-endian helpers. */
ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
DATA_TYPE ldo, ldn, ret, sto;
ldo = atomic_read__nocheck(haddr);
while (1) {
ret = BSWAP(ldo);
sto = BSWAP(ret + val);
ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
if (ldn == ldo) {
return ret;
}
ldo = ldn;
}
}
ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
DATA_TYPE ldo, ldn, ret, sto;
ldo = atomic_read__nocheck(haddr);
while (1) {
ret = BSWAP(ldo) + val;
sto = BSWAP(ret);
ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
if (ldn == ldo) {
return ret;
}
ldo = ldn;
}
}
#endif /* DATA_SIZE >= 16 */
#undef END
#endif /* DATA_SIZE > 1 */
#undef BSWAP
#undef ABI_TYPE
#undef DATA_TYPE
#undef SUFFIX
#undef DATA_SIZE

62
configure vendored
View File

@ -1216,7 +1216,10 @@ case "$cpu" in
cc_i386='$(CC) -m32'
;;
x86_64)
CPU_CFLAGS="-m64"
# ??? Only extremely old AMD cpus do not have cmpxchg16b.
# If we truly care, we should simply detect this case at
# runtime and generate the fallback to serial emulation.
CPU_CFLAGS="-m64 -mcx16"
LDFLAGS="-m64 $LDFLAGS"
cc_i386='$(CC) -m32'
;;
@ -4521,6 +4524,55 @@ if compile_prog "" "" ; then
int128=yes
fi
#########################################
# See if 128-bit atomic operations are supported.
atomic128=no
if test "$int128" = "yes"; then
cat > $TMPC << EOF
int main(void)
{
unsigned __int128 x = 0, y = 0;
y = __atomic_load_16(&x, 0);
__atomic_store_16(&x, y, 0);
__atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
return 0;
}
EOF
if compile_prog "" "" ; then
atomic128=yes
fi
fi
#########################################
# See if 64-bit atomic operations are supported.
# Note that without __atomic builtins, we can only
# assume atomic loads/stores max at pointer size.
cat > $TMPC << EOF
#include <stdint.h>
int main(void)
{
uint64_t x = 0, y = 0;
#ifdef __ATOMIC_RELAXED
y = __atomic_load_8(&x, 0);
__atomic_store_8(&x, y, 0);
__atomic_compare_exchange_8(&x, &y, x, 0, 0, 0);
__atomic_exchange_8(&x, y, 0);
__atomic_fetch_add_8(&x, y, 0);
#else
typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
__sync_lock_test_and_set(&x, y);
__sync_val_compare_and_swap(&x, y, 0);
__sync_fetch_and_add(&x, y);
#endif
return 0;
}
EOF
if compile_prog "" "" ; then
atomic64=yes
fi
########################################
# check if getauxval is available.
@ -5483,6 +5535,14 @@ if test "$int128" = "yes" ; then
echo "CONFIG_INT128=y" >> $config_host_mak
fi
if test "$atomic128" = "yes" ; then
echo "CONFIG_ATOMIC128=y" >> $config_host_mak
fi
if test "$atomic64" = "yes" ; then
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
fi
if test "$getauxval" = "yes" ; then
echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
fi

View File

@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
}
siglongjmp(cpu->jmp_env, 1);
}
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
{
cpu->exception_index = EXCP_ATOMIC;
cpu_loop_exit_restore(cpu, pc);
}

View File

@ -216,6 +216,36 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
}
#endif
static void cpu_exec_step(CPUState *cpu)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
}
void cpu_exec_step_atomic(CPUState *cpu)
{
start_exclusive();
/* Since we got here, we know that parallel_cpus must be true. */
parallel_cpus = false;
cpu_exec_step(cpu);
parallel_cpus = true;
end_exclusive();
}
struct tb_desc {
target_ulong pc;
target_ulong cs_base;

2
cpus.c
View File

@ -1497,6 +1497,8 @@ static void tcg_exec_all(void)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
cpu_exec_step_atomic(cpu);
}
} else if (cpu->stop || cpu->stopped) {
if (cpu->unplug) {

203
cputlb.c
View File

@ -23,15 +23,15 @@
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "exec/cpu_ldst.h"
#include "exec/cputlb.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
#include "exec/exec-all.h"
#include "tcg/tcg.h"
#include "qemu/error-report.h"
#include "exec/log.h"
#include "exec/helper-proto.h"
#include "qemu/atomic.h"
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
/* #define DEBUG_TLB */
@ -498,6 +498,43 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
return qemu_ram_addr_from_host_nofail(p);
}
static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
target_ulong addr, uintptr_t retaddr, int size)
{
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
uint64_t val;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
return val;
}
static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
uint64_t val, target_ulong addr,
uintptr_t retaddr, int size)
{
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
}
/* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb. */
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
@ -527,34 +564,178 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
(ADDR) & TARGET_PAGE_MASK)
/* Probe for whether the specified guest write access is permitted.
* If it is not permitted then an exception will be taken in the same
* way as if this were a real write access (and we will not return).
* Otherwise the function will return, and there will be a valid
* entry in the TLB for this access.
*/
void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
uintptr_t retaddr)
{
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
/* TLB entry is for a different page */
if (!VICTIM_TLB_HIT(addr_write, addr)) {
tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
}
}
}
/* Probe for a read-modify-write atomic operation. Do not allow unaligned
* operations, or io operations to proceed. Return the host address. */
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
TCGMemOpIdx oi, uintptr_t retaddr)
{
size_t mmu_idx = get_mmuidx(oi);
size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
target_ulong tlb_addr = tlbe->addr_write;
TCGMemOp mop = get_memop(oi);
int a_bits = get_alignment_bits(mop);
int s_bits = mop & MO_SIZE;
/* Adjust the given return address. */
retaddr -= GETPC_ADJ;
/* Enforce guest required alignment. */
if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
/* ??? Maybe indicate atomic op to cpu_unaligned_access */
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
mmu_idx, retaddr);
}
/* Enforce qemu required alignment. */
if (unlikely(addr & ((1 << s_bits) - 1))) {
/* We get here if guest alignment was not requested,
or was not enforced by cpu_unaligned_access above.
We might widen the access and emulate, but for now
mark an exception and exit the cpu loop. */
goto stop_the_world;
}
/* Check TLB entry and enforce page permissions. */
if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
if (!VICTIM_TLB_HIT(addr_write, addr)) {
tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
}
tlb_addr = tlbe->addr_write;
}
/* Notice an IO access, or a notdirty page. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
/* There's really nothing that can be done to
support this apart from stop-the-world. */
goto stop_the_world;
}
/* Let the guest notice RMW on a write-only page. */
if (unlikely(tlbe->addr_read != tlb_addr)) {
tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_LOAD, mmu_idx, retaddr);
/* Since we don't support reads and writes to different addresses,
and we do have the proper page loaded for write, this shouldn't
ever return. But just in case, handle via stop-the-world. */
goto stop_the_world;
}
return (void *)((uintptr_t)addr + tlbe->addend);
stop_the_world:
cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
}
#ifdef TARGET_WORDS_BIGENDIAN
# define TGT_BE(X) (X)
# define TGT_LE(X) BSWAP(X)
#else
# define TGT_BE(X) BSWAP(X)
# define TGT_LE(X) (X)
#endif
#define MMUSUFFIX _mmu
#define SHIFT 0
#define DATA_SIZE 1
#include "softmmu_template.h"
#define SHIFT 1
#define DATA_SIZE 2
#include "softmmu_template.h"
#define SHIFT 2
#define DATA_SIZE 4
#include "softmmu_template.h"
#define SHIFT 3
#define DATA_SIZE 8
#include "softmmu_template.h"
/* First set of helpers allows passing in of OI and RETADDR. This makes
them callable from other helpers. */
#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
#define ATOMIC_NAME(X) \
HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
#define DATA_SIZE 1
#include "atomic_template.h"
#define DATA_SIZE 2
#include "atomic_template.h"
#define DATA_SIZE 4
#include "atomic_template.h"
#ifdef CONFIG_ATOMIC64
#define DATA_SIZE 8
#include "atomic_template.h"
#endif
#ifdef CONFIG_ATOMIC128
#define DATA_SIZE 16
#include "atomic_template.h"
#endif
/* Second set of helpers are directly callable from TCG as helpers. */
#undef EXTRA_ARGS
#undef ATOMIC_NAME
#undef ATOMIC_MMU_LOOKUP
#define EXTRA_ARGS , TCGMemOpIdx oi
#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
#define DATA_SIZE 1
#include "atomic_template.h"
#define DATA_SIZE 2
#include "atomic_template.h"
#define DATA_SIZE 4
#include "atomic_template.h"
#ifdef CONFIG_ATOMIC64
#define DATA_SIZE 8
#include "atomic_template.h"
#endif
/* Code access functions. */
#undef MMUSUFFIX
#define MMUSUFFIX _cmmu
#undef GETPC
#define GETPC() ((uintptr_t)0)
#define SOFTMMU_CODE_ACCESS
#define SHIFT 0
#define DATA_SIZE 1
#include "softmmu_template.h"
#define SHIFT 1
#define DATA_SIZE 2
#include "softmmu_template.h"
#define SHIFT 2
#define DATA_SIZE 4
#include "softmmu_template.h"
#define SHIFT 3
#define DATA_SIZE 8
#include "softmmu_template.h"

4
exec.c
View File

@ -352,9 +352,9 @@ static inline bool section_covers_addr(const MemoryRegionSection *section,
/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
* the section must cover the entire address space.
*/
return section->size.hi ||
return int128_gethi(section->size) ||
range_covers_byte(section->offset_within_address_space,
section->size.lo, addr);
int128_getlo(section->size), addr);
}
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,

View File

@ -31,6 +31,7 @@
#define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */
#define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */
#define EXCP_YIELD 0x10004 /* cpu wants to yield timeslice to another */
#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */
/* some important defines:
*

View File

@ -59,6 +59,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
#if !defined(CONFIG_USER_ONLY)
void cpu_reloading_memory_map(void);

View File

@ -80,6 +80,7 @@ void tcg_exec_init(unsigned long tb_size);
bool tcg_enabled(void);
void cpu_exec_init_all(void);
void cpu_exec_step_atomic(CPUState *cpu);
/**
* set_preferred_target_page_bits:

View File

@ -99,15 +99,21 @@
* no effect on the generated code but not using the atomic primitives
* will get flagged by sanitizers as a violation.
*/
#define atomic_read__nocheck(ptr) \
__atomic_load_n(ptr, __ATOMIC_RELAXED)
#define atomic_read(ptr) \
({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
__atomic_load_n(ptr, __ATOMIC_RELAXED); \
atomic_read__nocheck(ptr); \
})
#define atomic_set__nocheck(ptr, i) \
__atomic_store_n(ptr, i, __ATOMIC_RELAXED)
#define atomic_set(ptr, i) do { \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
__atomic_store_n(ptr, i, __ATOMIC_RELAXED); \
atomic_set__nocheck(ptr, i); \
} while(0)
/* See above: most compilers currently treat consume and acquire the
@ -151,20 +157,27 @@
/* All the remaining operations are fully sequentially consistent */
#define atomic_xchg__nocheck(ptr, i) ({ \
__atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST); \
})
#define atomic_xchg(ptr, i) ({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
__atomic_exchange_n(ptr, i, __ATOMIC_SEQ_CST); \
atomic_xchg__nocheck(ptr, i); \
})
/* Returns the eventual value, failed or not */
#define atomic_cmpxchg(ptr, old, new) \
({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
#define atomic_cmpxchg__nocheck(ptr, old, new) ({ \
typeof_strip_qual(*ptr) _old = (old); \
__atomic_compare_exchange_n(ptr, &_old, new, false, \
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
_old; \
})
})
#define atomic_cmpxchg(ptr, old, new) ({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
atomic_cmpxchg__nocheck(ptr, old, new); \
})
/* Provide shorter names for GCC atomic builtins, return old value */
#define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)
@ -173,6 +186,15 @@
#define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_inc_fetch(ptr) __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST)
#define atomic_dec_fetch(ptr) __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST)
#define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_or_fetch(ptr, n) __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST)
#define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST)
/* And even shorter names that return void. */
#define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST))
@ -181,6 +203,7 @@
#define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST))
#define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST))
#define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
#define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST))
#else /* __ATOMIC_RELAXED */
@ -269,8 +292,11 @@
/* These will only be atomic if the processor does the fetch or store
* in a single issue memory operation
*/
#define atomic_read(ptr) (*(__typeof__(*ptr) volatile*) (ptr))
#define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i))
#define atomic_read__nocheck(p) (*(__typeof__(*(p)) volatile*) (p))
#define atomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i))
#define atomic_read(ptr) atomic_read__nocheck(ptr)
#define atomic_set(ptr, i) atomic_set__nocheck(ptr,i)
/**
* atomic_rcu_read - reads a RCU-protected pointer to a local variable
@ -331,15 +357,27 @@
#define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i))
#endif
#endif
#define atomic_xchg__nocheck atomic_xchg
/* Provide shorter names for GCC atomic builtins. */
#define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1)
#define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1)
#define atomic_fetch_add __sync_fetch_and_add
#define atomic_fetch_sub __sync_fetch_and_sub
#define atomic_fetch_and __sync_fetch_and_and
#define atomic_fetch_or __sync_fetch_and_or
#define atomic_cmpxchg __sync_val_compare_and_swap
#define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n)
#define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n)
#define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n)
#define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n)
#define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n)
#define atomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1)
#define atomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1)
#define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n)
#define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n)
#define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n)
#define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n)
#define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n)
#define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new)
#define atomic_cmpxchg__nocheck(ptr, old, new) atomic_cmpxchg(ptr, old, new)
/* And even shorter names that return void. */
#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1))
@ -348,6 +386,7 @@
#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
#define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
#define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n))
#define atomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n))
#endif /* __ATOMIC_RELAXED */

View File

@ -1,6 +1,149 @@
#ifndef INT128_H
#define INT128_H
#ifdef CONFIG_INT128
#include "qemu/bswap.h"
typedef __int128_t Int128;
static inline Int128 int128_make64(uint64_t a)
{
return a;
}
static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
{
return (__uint128_t)hi << 64 | lo;
}
static inline uint64_t int128_get64(Int128 a)
{
uint64_t r = a;
assert(r == a);
return r;
}
static inline uint64_t int128_getlo(Int128 a)
{
return a;
}
static inline int64_t int128_gethi(Int128 a)
{
return a >> 64;
}
static inline Int128 int128_zero(void)
{
return 0;
}
static inline Int128 int128_one(void)
{
return 1;
}
static inline Int128 int128_2_64(void)
{
return (Int128)1 << 64;
}
static inline Int128 int128_exts64(int64_t a)
{
return a;
}
static inline Int128 int128_and(Int128 a, Int128 b)
{
return a & b;
}
static inline Int128 int128_rshift(Int128 a, int n)
{
return a >> n;
}
static inline Int128 int128_add(Int128 a, Int128 b)
{
return a + b;
}
static inline Int128 int128_neg(Int128 a)
{
return -a;
}
static inline Int128 int128_sub(Int128 a, Int128 b)
{
return a - b;
}
static inline bool int128_nonneg(Int128 a)
{
return a >= 0;
}
static inline bool int128_eq(Int128 a, Int128 b)
{
return a == b;
}
static inline bool int128_ne(Int128 a, Int128 b)
{
return a != b;
}
static inline bool int128_ge(Int128 a, Int128 b)
{
return a >= b;
}
static inline bool int128_lt(Int128 a, Int128 b)
{
return a < b;
}
static inline bool int128_le(Int128 a, Int128 b)
{
return a <= b;
}
static inline bool int128_gt(Int128 a, Int128 b)
{
return a > b;
}
static inline bool int128_nz(Int128 a)
{
return a != 0;
}
static inline Int128 int128_min(Int128 a, Int128 b)
{
return a < b ? a : b;
}
static inline Int128 int128_max(Int128 a, Int128 b)
{
return a > b ? a : b;
}
static inline void int128_addto(Int128 *a, Int128 b)
{
*a += b;
}
static inline void int128_subfrom(Int128 *a, Int128 b)
{
*a -= b;
}
static inline Int128 bswap128(Int128 a)
{
return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
}
#else /* !CONFIG_INT128 */
typedef struct Int128 Int128;
@ -14,12 +157,27 @@ static inline Int128 int128_make64(uint64_t a)
return (Int128) { a, 0 };
}
static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
{
return (Int128) { lo, hi };
}
static inline uint64_t int128_get64(Int128 a)
{
assert(!a.hi);
return a.lo;
}
static inline uint64_t int128_getlo(Int128 a)
{
return a.lo;
}
static inline int64_t int128_gethi(Int128 a)
{
return a.hi;
}
static inline Int128 int128_zero(void)
{
return int128_make64(0);
@ -53,9 +211,9 @@ static inline Int128 int128_rshift(Int128 a, int n)
}
h = a.hi >> (n & 63);
if (n >= 64) {
return (Int128) { h, h >> 63 };
return int128_make128(h, h >> 63);
} else {
return (Int128) { (a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h };
return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h);
}
}
@ -69,18 +227,18 @@ static inline Int128 int128_add(Int128 a, Int128 b)
*
* So the carry is lo < a.lo.
*/
return (Int128) { lo, (uint64_t)a.hi + b.hi + (lo < a.lo) };
return int128_make128(lo, (uint64_t)a.hi + b.hi + (lo < a.lo));
}
static inline Int128 int128_neg(Int128 a)
{
uint64_t lo = -a.lo;
return (Int128) { lo, ~(uint64_t)a.hi + !lo };
return int128_make128(lo, ~(uint64_t)a.hi + !lo);
}
static inline Int128 int128_sub(Int128 a, Int128 b)
{
return (Int128){ a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo) };
return int128_make128(a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo));
}
static inline bool int128_nonneg(Int128 a)
@ -143,4 +301,5 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
*a = int128_sub(*a, b);
}
#endif
#endif /* CONFIG_INT128 */
#endif /* INT128_H */

View File

@ -354,6 +354,9 @@ void cpu_loop(CPUX86State *env)
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
pc = env->segs[R_CS].base + env->eip;
EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
@ -550,94 +553,6 @@ do_kernel_trap(CPUARMState *env)
return 0;
}
/* Store exclusive handling for AArch32 */
static int do_strex(CPUARMState *env)
{
uint64_t val;
int size;
int rc = 1;
int segv = 0;
uint32_t addr;
start_exclusive();
if (env->exclusive_addr != env->exclusive_test) {
goto fail;
}
/* We know we're always AArch32 so the address is in uint32_t range
* unless it was the -1 exclusive-monitor-lost value (which won't
* match exclusive_test above).
*/
assert(extract64(env->exclusive_addr, 32, 32) == 0);
addr = env->exclusive_addr;
size = env->exclusive_info & 0xf;
switch (size) {
case 0:
segv = get_user_u8(val, addr);
break;
case 1:
segv = get_user_data_u16(val, addr, env);
break;
case 2:
case 3:
segv = get_user_data_u32(val, addr, env);
break;
default:
abort();
}
if (segv) {
env->exception.vaddress = addr;
goto done;
}
if (size == 3) {
uint32_t valhi;
segv = get_user_data_u32(valhi, addr + 4, env);
if (segv) {
env->exception.vaddress = addr + 4;
goto done;
}
if (arm_cpu_bswap_data(env)) {
val = deposit64((uint64_t)valhi, 32, 32, val);
} else {
val = deposit64(val, 32, 32, valhi);
}
}
if (val != env->exclusive_val) {
goto fail;
}
val = env->regs[(env->exclusive_info >> 8) & 0xf];
switch (size) {
case 0:
segv = put_user_u8(val, addr);
break;
case 1:
segv = put_user_data_u16(val, addr, env);
break;
case 2:
case 3:
segv = put_user_data_u32(val, addr, env);
break;
}
if (segv) {
env->exception.vaddress = addr;
goto done;
}
if (size == 3) {
val = env->regs[(env->exclusive_info >> 12) & 0xf];
segv = put_user_data_u32(val, addr + 4, env);
if (segv) {
env->exception.vaddress = addr + 4;
goto done;
}
}
rc = 0;
fail:
env->regs[15] += 4;
env->regs[(env->exclusive_info >> 4) & 0xf] = rc;
done:
end_exclusive();
return segv;
}
void cpu_loop(CPUARMState *env)
{
CPUState *cs = CPU(arm_env_get_cpu(env));
@ -812,11 +727,6 @@ void cpu_loop(CPUARMState *env)
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
case EXCP_STREX:
if (!do_strex(env)) {
break;
}
/* fall through for segv */
case EXCP_PREFETCH_ABORT:
case EXCP_DATA_ABORT:
addr = env->exception.vaddress;
@ -851,6 +761,9 @@ void cpu_loop(CPUARMState *env)
case EXCP_YIELD:
/* nothing to do here for user-mode, just resume guest code */
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
error:
EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
@ -862,124 +775,6 @@ void cpu_loop(CPUARMState *env)
#else
/*
* Handle AArch64 store-release exclusive
*
* rs = gets the status result of store exclusive
* rt = is the register that is stored
* rt2 = is the second register store (in STP)
*
*/
static int do_strex_a64(CPUARMState *env)
{
uint64_t val;
int size;
bool is_pair;
int rc = 1;
int segv = 0;
uint64_t addr;
int rs, rt, rt2;
start_exclusive();
/* size | is_pair << 2 | (rs << 4) | (rt << 9) | (rt2 << 14)); */
size = extract32(env->exclusive_info, 0, 2);
is_pair = extract32(env->exclusive_info, 2, 1);
rs = extract32(env->exclusive_info, 4, 5);
rt = extract32(env->exclusive_info, 9, 5);
rt2 = extract32(env->exclusive_info, 14, 5);
addr = env->exclusive_addr;
if (addr != env->exclusive_test) {
goto finish;
}
switch (size) {
case 0:
segv = get_user_u8(val, addr);
break;
case 1:
segv = get_user_u16(val, addr);
break;
case 2:
segv = get_user_u32(val, addr);
break;
case 3:
segv = get_user_u64(val, addr);
break;
default:
abort();
}
if (segv) {
env->exception.vaddress = addr;
goto error;
}
if (val != env->exclusive_val) {
goto finish;
}
if (is_pair) {
if (size == 2) {
segv = get_user_u32(val, addr + 4);
} else {
segv = get_user_u64(val, addr + 8);
}
if (segv) {
env->exception.vaddress = addr + (size == 2 ? 4 : 8);
goto error;
}
if (val != env->exclusive_high) {
goto finish;
}
}
/* handle the zero register */
val = rt == 31 ? 0 : env->xregs[rt];
switch (size) {
case 0:
segv = put_user_u8(val, addr);
break;
case 1:
segv = put_user_u16(val, addr);
break;
case 2:
segv = put_user_u32(val, addr);
break;
case 3:
segv = put_user_u64(val, addr);
break;
}
if (segv) {
goto error;
}
if (is_pair) {
/* handle the zero register */
val = rt2 == 31 ? 0 : env->xregs[rt2];
if (size == 2) {
segv = put_user_u32(val, addr + 4);
} else {
segv = put_user_u64(val, addr + 8);
}
if (segv) {
env->exception.vaddress = addr + (size == 2 ? 4 : 8);
goto error;
}
}
rc = 0;
finish:
env->pc += 4;
/* rs == 31 encodes a write to the ZR, thus throwing away
* the status return. This is rather silly but valid.
*/
if (rs < 31) {
env->xregs[rs] = rc;
}
error:
/* instruction faulted, PC does not advance */
/* either way a strex releases any exclusive lock we have */
env->exclusive_addr = -1;
end_exclusive();
return segv;
}
/* AArch64 main loop */
void cpu_loop(CPUARMState *env)
{
@ -1021,11 +816,6 @@ void cpu_loop(CPUARMState *env)
info._sifields._sigfault._addr = env->pc;
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
break;
case EXCP_STREX:
if (!do_strex_a64(env)) {
break;
}
/* fall through for segv */
case EXCP_PREFETCH_ABORT:
case EXCP_DATA_ABORT:
info.si_signo = TARGET_SIGSEGV;
@ -1051,6 +841,9 @@ void cpu_loop(CPUARMState *env)
case EXCP_YIELD:
/* nothing to do here for user-mode, just resume guest code */
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
abort();
@ -1058,8 +851,6 @@ void cpu_loop(CPUARMState *env)
process_pending_signals(env);
/* Exception return on AArch64 always clears the exclusive monitor,
* so any return to running guest code implies this.
* A strex (successful or otherwise) also clears the monitor, so
* we don't need to specialcase EXCP_STREX.
*/
env->exclusive_addr = -1;
}
@ -1142,6 +933,9 @@ void cpu_loop(CPUUniCore32State *env)
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
goto error;
}
@ -1415,6 +1209,9 @@ void cpu_loop (CPUSPARCState *env)
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
printf ("Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(cs, stderr, fprintf, 0);
@ -1954,6 +1751,9 @@ void cpu_loop(CPUPPCState *env)
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
cpu_abort(cs, "Unknown exception 0x%x. Aborting\n", trapnr);
break;
@ -2649,6 +2449,9 @@ done_syscall:
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
error:
EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
@ -2736,6 +2539,9 @@ void cpu_loop(CPUOpenRISCState *env)
case EXCP_NR:
qemu_log_mask(CPU_LOG_INT, "\nNR\n");
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n",
trapnr);
@ -2812,6 +2618,9 @@ void cpu_loop(CPUSH4State *env)
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
printf ("Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(cs, stderr, fprintf, 0);
@ -2879,6 +2688,9 @@ void cpu_loop(CPUCRISState *env)
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
printf ("Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(cs, stderr, fprintf, 0);
@ -2995,6 +2807,9 @@ void cpu_loop(CPUMBState *env)
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
printf ("Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(cs, stderr, fprintf, 0);
@ -3098,6 +2913,9 @@ void cpu_loop(CPUM68KState *env)
}
}
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
abort();
@ -3108,51 +2926,6 @@ void cpu_loop(CPUM68KState *env)
#endif /* TARGET_M68K */
#ifdef TARGET_ALPHA
static void do_store_exclusive(CPUAlphaState *env, int reg, int quad)
{
target_ulong addr, val, tmp;
target_siginfo_t info;
int ret = 0;
addr = env->lock_addr;
tmp = env->lock_st_addr;
env->lock_addr = -1;
env->lock_st_addr = 0;
start_exclusive();
mmap_lock();
if (addr == tmp) {
if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) {
goto do_sigsegv;
}
if (val == env->lock_value) {
tmp = env->ir[reg];
if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) {
goto do_sigsegv;
}
ret = 1;
}
}
env->ir[reg] = ret;
env->pc += 4;
mmap_unlock();
end_exclusive();
return;
do_sigsegv:
mmap_unlock();
end_exclusive();
info.si_signo = TARGET_SIGSEGV;
info.si_errno = 0;
info.si_code = TARGET_SEGV_MAPERR;
info._sifields._sigfault._addr = addr;
queue_signal(env, TARGET_SIGSEGV, QEMU_SI_FAULT, &info);
}
void cpu_loop(CPUAlphaState *env)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
@ -3327,13 +3100,12 @@ void cpu_loop(CPUAlphaState *env)
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
}
break;
case EXCP_STL_C:
case EXCP_STQ_C:
do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C);
break;
case EXCP_INTERRUPT:
/* Just indicate that signals should be handled asap. */
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
printf ("Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(cs, stderr, fprintf, 0);
@ -3463,6 +3235,9 @@ void cpu_loop(CPUS390XState *env)
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(cs, stderr, fprintf, 0);
@ -3717,6 +3492,9 @@ void cpu_loop(CPUTLGState *env)
case TILEGX_EXCP_REG_UDN_ACCESS:
gen_sigill_reg(env);
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
g_assert_not_reached();

View File

@ -6164,6 +6164,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
sigfillset(&sigmask);
sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
/* If this is our first additional thread, we need to ensure we
* generate code for parallel execution and flush old translations.
*/
if (!parallel_cpus) {
parallel_cpus = true;
tb_flush(cpu);
}
ret = pthread_create(&info.thread, &attr, clone_func, &info);
/* TODO: Free new CPU state if thread creation failed. */

View File

@ -21,12 +21,6 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/timer.h"
#include "exec/address-spaces.h"
#include "exec/memory.h"
#define DATA_SIZE (1 << SHIFT)
#if DATA_SIZE == 8
#define SUFFIX q
#define LSUFFIX q
@ -84,14 +78,6 @@
# define BSWAP(X) (X)
#endif
#ifdef TARGET_WORDS_BIGENDIAN
# define TGT_BE(X) (X)
# define TGT_LE(X) BSWAP(X)
#else
# define TGT_BE(X) BSWAP(X)
# define TGT_LE(X) (X)
#endif
#if DATA_SIZE == 1
# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
# define helper_be_ld_name helper_le_ld_name
@ -108,35 +94,14 @@
# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
#endif
#ifdef TARGET_WORDS_BIGENDIAN
# define helper_te_ld_name helper_be_ld_name
# define helper_te_st_name helper_be_st_name
#else
# define helper_te_ld_name helper_le_ld_name
# define helper_te_st_name helper_le_st_name
#endif
#ifndef SOFTMMU_CODE_ACCESS
static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
CPUIOTLBEntry *iotlbentry,
size_t mmu_idx, size_t index,
target_ulong addr,
uintptr_t retaddr)
{
uint64_t val;
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT,
iotlbentry->attrs);
return val;
CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
return io_readx(env, iotlbentry, addr, retaddr, DATA_SIZE);
}
#endif
@ -167,15 +132,13 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
/* Handle an IO access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
}
iotlbentry = &env->iotlb[mmu_idx][index];
/* ??? Note that the io helpers always read data in the target
byte ordering. We should push the LE/BE request down into io. */
res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
res = TGT_LE(res);
return res;
}
@ -236,15 +199,13 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
/* Handle an IO access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
}
iotlbentry = &env->iotlb[mmu_idx][index];
/* ??? Note that the io helpers always read data in the target
byte ordering. We should push the LE/BE request down into io. */
res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
res = TGT_BE(res);
return res;
}
@ -295,24 +256,13 @@ WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
#endif
static inline void glue(io_write, SUFFIX)(CPUArchState *env,
CPUIOTLBEntry *iotlbentry,
size_t mmu_idx, size_t index,
DATA_TYPE val,
target_ulong addr,
uintptr_t retaddr)
{
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT,
iotlbentry->attrs);
CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
return io_writex(env, iotlbentry, val, addr, retaddr, DATA_SIZE);
}
void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
@ -340,16 +290,14 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
/* Handle an IO access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
}
iotlbentry = &env->iotlb[mmu_idx][index];
/* ??? Note that the io helpers always read data in the target
byte ordering. We should push the LE/BE request down into io. */
val = TGT_LE(val);
glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr);
return;
}
@ -418,16 +366,14 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
/* Handle an IO access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
}
iotlbentry = &env->iotlb[mmu_idx][index];
/* ??? Note that the io helpers always read data in the target
byte ordering. We should push the LE/BE request down into io. */
val = TGT_BE(val);
glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr);
return;
}
@ -466,33 +412,9 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
}
#endif /* DATA_SIZE > 1 */
#if DATA_SIZE == 1
/* Probe for whether the specified guest write access is permitted.
* If it is not permitted then an exception will be taken in the same
* way as if this were a real write access (and we will not return).
* Otherwise the function will return, and there will be a valid
* entry in the TLB for this access.
*/
void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
uintptr_t retaddr)
{
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
/* TLB entry is for a different page */
if (!VICTIM_TLB_HIT(addr_write, addr)) {
tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
}
}
}
#endif
#endif /* !defined(SOFTMMU_CODE_ACCESS) */
#undef READ_ACCESS_TYPE
#undef SHIFT
#undef DATA_TYPE
#undef SUFFIX
#undef LSUFFIX
@ -503,15 +425,9 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
#undef USUFFIX
#undef SSUFFIX
#undef BSWAP
#undef TGT_BE
#undef TGT_LE
#undef CPU_BE
#undef CPU_LE
#undef helper_le_ld_name
#undef helper_be_ld_name
#undef helper_le_lds_name
#undef helper_be_lds_name
#undef helper_le_st_name
#undef helper_be_st_name
#undef helper_te_ld_name
#undef helper_te_st_name

View File

@ -201,7 +201,7 @@ enum {
/* MMU modes definitions */
/* Alpha has 5 MMU modes: PALcode, kernel, executive, supervisor, and user.
/* Alpha has 5 MMU modes: PALcode, Kernel, Executive, Supervisor, and User.
The Unix PALcode only exposes the kernel and user modes; presumably
executive and supervisor are used by VMS.
@ -209,22 +209,18 @@ enum {
there are PALmode instructions that can access data via physical mode
or via an os-installed "alternate mode", which is one of the 4 above.
QEMU does not currently properly distinguish between code/data when
looking up addresses. To avoid having to address this issue, our
emulated PALcode will cheat and use the KSEG mapping for its code+data
rather than physical addresses.
That said, we're only emulating Unix PALcode, and not attempting VMS,
so we don't need to implement Executive and Supervisor. QEMU's own
PALcode cheats and usees the KSEG mapping for its code+data rather than
physical addresses. */
Moreover, we're only emulating Unix PALcode, and not attempting VMS.
All of which allows us to drop all but kernel and user modes.
Elide the unused MMU modes to save space. */
#define NB_MMU_MODES 2
#define NB_MMU_MODES 3
#define MMU_MODE0_SUFFIX _kernel
#define MMU_MODE1_SUFFIX _user
#define MMU_KERNEL_IDX 0
#define MMU_USER_IDX 1
#define MMU_PHYS_IDX 2
typedef struct CPUAlphaState CPUAlphaState;
@ -234,7 +230,6 @@ struct CPUAlphaState {
uint64_t pc;
uint64_t unique;
uint64_t lock_addr;
uint64_t lock_st_addr;
uint64_t lock_value;
/* The FPCR, and disassembled portions thereof. */
@ -350,9 +345,6 @@ enum {
EXCP_ARITH,
EXCP_FEN,
EXCP_CALL_PAL,
/* For Usermode emulation. */
EXCP_STL_C,
EXCP_STQ_C,
};
/* Alpha-specific interrupt pending bits. */

View File

@ -126,6 +126,14 @@ static int get_physical_address(CPUAlphaState *env, target_ulong addr,
int prot = 0;
int ret = MM_K_ACV;
/* Handle physical accesses. */
if (mmu_idx == MMU_PHYS_IDX) {
phys = addr;
prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
ret = -1;
goto exit;
}
/* Ensure that the virtual address is properly sign-extended from
the last implemented virtual address bit. */
if (saddr >> TARGET_VIRT_ADDR_SPACE_BITS != saddr >> 63) {
@ -298,12 +306,6 @@ void alpha_cpu_do_interrupt(CPUState *cs)
case EXCP_CALL_PAL:
name = "call_pal";
break;
case EXCP_STL_C:
name = "stl_c";
break;
case EXCP_STQ_C:
name = "stq_c";
break;
}
qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n",
++count, name, env->error_code, env->pc, env->ir[IR_SP]);

View File

@ -92,15 +92,6 @@ DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, env, i64)
DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64)
#if !defined (CONFIG_USER_ONLY)
DEF_HELPER_2(ldl_phys, i64, env, i64)
DEF_HELPER_2(ldq_phys, i64, env, i64)
DEF_HELPER_2(ldl_l_phys, i64, env, i64)
DEF_HELPER_2(ldq_l_phys, i64, env, i64)
DEF_HELPER_3(stl_phys, void, env, i64, i64)
DEF_HELPER_3(stq_phys, void, env, i64, i64)
DEF_HELPER_3(stl_c_phys, i64, env, i64, i64)
DEF_HELPER_3(stq_c_phys, i64, env, i64, i64)
DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env)
DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64)
DEF_HELPER_FLAGS_1(tb_flush, TCG_CALL_NO_RWG, void, env)

View File

@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = {
VMSTATE_UINTTL(unique, CPUAlphaState),
VMSTATE_UINTTL(lock_addr, CPUAlphaState),
VMSTATE_UINTTL(lock_value, CPUAlphaState),
/* Note that lock_st_addr is not saved; it is a temporary
used during the execution of the st[lq]_c insns. */
VMSTATE_UINT8(ps, CPUAlphaState),
VMSTATE_UINT8(intr_flag, CPUAlphaState),

View File

@ -25,79 +25,6 @@
/* Softmmu support */
#ifndef CONFIG_USER_ONLY
uint64_t helper_ldl_phys(CPUAlphaState *env, uint64_t p)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
return (int32_t)ldl_phys(cs->as, p);
}
uint64_t helper_ldq_phys(CPUAlphaState *env, uint64_t p)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
return ldq_phys(cs->as, p);
}
uint64_t helper_ldl_l_phys(CPUAlphaState *env, uint64_t p)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
env->lock_addr = p;
return env->lock_value = (int32_t)ldl_phys(cs->as, p);
}
uint64_t helper_ldq_l_phys(CPUAlphaState *env, uint64_t p)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
env->lock_addr = p;
return env->lock_value = ldq_phys(cs->as, p);
}
void helper_stl_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
stl_phys(cs->as, p, v);
}
void helper_stq_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
stq_phys(cs->as, p, v);
}
uint64_t helper_stl_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
uint64_t ret = 0;
if (p == env->lock_addr) {
int32_t old = ldl_phys(cs->as, p);
if (old == (int32_t)env->lock_value) {
stl_phys(cs->as, p, v);
ret = 1;
}
}
env->lock_addr = -1;
return ret;
}
uint64_t helper_stq_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
{
CPUState *cs = CPU(alpha_env_get_cpu(env));
uint64_t ret = 0;
if (p == env->lock_addr) {
uint64_t old = ldq_phys(cs->as, p);
if (old == env->lock_value) {
stq_phys(cs->as, p, v);
ret = 1;
}
}
env->lock_addr = -1;
return ret;
}
void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)

View File

@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31];
static TCGv cpu_fir[31];
static TCGv cpu_pc;
static TCGv cpu_lock_addr;
static TCGv cpu_lock_st_addr;
static TCGv cpu_lock_value;
#ifndef CONFIG_USER_ONLY
@ -116,7 +115,6 @@ void alpha_translate_init(void)
static const GlobalVar vars[] = {
DEF_VAR(pc),
DEF_VAR(lock_addr),
DEF_VAR(lock_st_addr),
DEF_VAR(lock_value),
};
@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx)
return ctx->sink;
}
static void free_context_temps(DisasContext *ctx)
{
if (!TCGV_IS_UNUSED_I64(ctx->sink)) {
tcg_gen_discard_i64(ctx->sink);
tcg_temp_free(ctx->sink);
TCGV_UNUSED_I64(ctx->sink);
}
if (!TCGV_IS_UNUSED_I64(ctx->zero)) {
tcg_temp_free(ctx->zero);
TCGV_UNUSED_I64(ctx->zero);
}
if (!TCGV_IS_UNUSED_I64(ctx->lit)) {
tcg_temp_free(ctx->lit);
TCGV_UNUSED_I64(ctx->lit);
}
}
static TCGv load_gpr(DisasContext *ctx, unsigned reg)
{
if (likely(reg < 31)) {
@ -392,59 +407,40 @@ static inline void gen_store_mem(DisasContext *ctx,
}
static ExitStatus gen_store_conditional(DisasContext *ctx, int ra, int rb,
int32_t disp16, int quad)
int32_t disp16, int mem_idx,
TCGMemOp op)
{
TCGv addr;
if (ra == 31) {
/* ??? Don't bother storing anything. The user can't tell
the difference, since the zero register always reads zero. */
return NO_EXIT;
}
#if defined(CONFIG_USER_ONLY)
addr = cpu_lock_st_addr;
#else
addr = tcg_temp_local_new();
#endif
TCGLabel *lab_fail, *lab_done;
TCGv addr, val;
addr = tcg_temp_new_i64();
tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16);
free_context_temps(ctx);
#if defined(CONFIG_USER_ONLY)
/* ??? This is handled via a complicated version of compare-and-swap
in the cpu_loop. Hopefully one day we'll have a real CAS opcode
in TCG so that this isn't necessary. */
return gen_excp(ctx, quad ? EXCP_STQ_C : EXCP_STL_C, ra);
#else
/* ??? In system mode we are never multi-threaded, so CAS can be
implemented via a non-atomic load-compare-store sequence. */
{
TCGLabel *lab_fail, *lab_done;
TCGv val;
lab_fail = gen_new_label();
lab_done = gen_new_label();
tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail);
tcg_temp_free_i64(addr);
lab_fail = gen_new_label();
lab_done = gen_new_label();
tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail);
val = tcg_temp_new_i64();
tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value,
load_gpr(ctx, ra), mem_idx, op);
free_context_temps(ctx);
val = tcg_temp_new();
tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, quad ? MO_LEQ : MO_LESL);
tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail);
tcg_gen_qemu_st_i64(ctx->ir[ra], addr, ctx->mem_idx,
quad ? MO_LEQ : MO_LEUL);
tcg_gen_movi_i64(ctx->ir[ra], 1);
tcg_gen_br(lab_done);
gen_set_label(lab_fail);
tcg_gen_movi_i64(ctx->ir[ra], 0);
gen_set_label(lab_done);
tcg_gen_movi_i64(cpu_lock_addr, -1);
tcg_temp_free(addr);
return NO_EXIT;
if (ra != 31) {
tcg_gen_setcond_i64(TCG_COND_EQ, ctx->ir[ra], val, cpu_lock_value);
}
#endif
tcg_temp_free_i64(val);
tcg_gen_br(lab_done);
gen_set_label(lab_fail);
if (ra != 31) {
tcg_gen_movi_i64(ctx->ir[ra], 0);
}
gen_set_label(lab_done);
tcg_gen_movi_i64(cpu_lock_addr, -1);
return NO_EXIT;
}
static bool in_superpage(DisasContext *ctx, int64_t addr)
@ -2423,19 +2419,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
switch ((insn >> 12) & 0xF) {
case 0x0:
/* Longword physical access (hw_ldl/p) */
gen_helper_ldl_phys(va, cpu_env, addr);
tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL);
break;
case 0x1:
/* Quadword physical access (hw_ldq/p) */
gen_helper_ldq_phys(va, cpu_env, addr);
tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEQ);
break;
case 0x2:
/* Longword physical access with lock (hw_ldl_l/p) */
gen_helper_ldl_l_phys(va, cpu_env, addr);
gen_qemu_ldl_l(va, addr, MMU_PHYS_IDX);
break;
case 0x3:
/* Quadword physical access with lock (hw_ldq_l/p) */
gen_helper_ldq_l_phys(va, cpu_env, addr);
gen_qemu_ldq_l(va, addr, MMU_PHYS_IDX);
break;
case 0x4:
/* Longword virtual PTE fetch (hw_ldl/v) */
@ -2674,27 +2670,34 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
#ifndef CONFIG_USER_ONLY
REQUIRE_TB_FLAG(TB_FLAGS_PAL_MODE);
{
TCGv addr = tcg_temp_new();
va = load_gpr(ctx, ra);
vb = load_gpr(ctx, rb);
tcg_gen_addi_i64(addr, vb, disp12);
switch ((insn >> 12) & 0xF) {
case 0x0:
/* Longword physical access */
gen_helper_stl_phys(cpu_env, addr, va);
va = load_gpr(ctx, ra);
vb = load_gpr(ctx, rb);
tmp = tcg_temp_new();
tcg_gen_addi_i64(tmp, vb, disp12);
tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LESL);
tcg_temp_free(tmp);
break;
case 0x1:
/* Quadword physical access */
gen_helper_stq_phys(cpu_env, addr, va);
va = load_gpr(ctx, ra);
vb = load_gpr(ctx, rb);
tmp = tcg_temp_new();
tcg_gen_addi_i64(tmp, vb, disp12);
tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LEQ);
tcg_temp_free(tmp);
break;
case 0x2:
/* Longword physical access with lock */
gen_helper_stl_c_phys(dest_gpr(ctx, ra), cpu_env, addr, va);
ret = gen_store_conditional(ctx, ra, rb, disp12,
MMU_PHYS_IDX, MO_LESL);
break;
case 0x3:
/* Quadword physical access with lock */
gen_helper_stq_c_phys(dest_gpr(ctx, ra), cpu_env, addr, va);
ret = gen_store_conditional(ctx, ra, rb, disp12,
MMU_PHYS_IDX, MO_LEQ);
break;
case 0x4:
/* Longword virtual access */
@ -2733,7 +2736,6 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
/* Invalid */
goto invalid_opc;
}
tcg_temp_free(addr);
break;
}
#else
@ -2797,11 +2799,13 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
break;
case 0x2E:
/* STL_C */
ret = gen_store_conditional(ctx, ra, rb, disp16, 0);
ret = gen_store_conditional(ctx, ra, rb, disp16,
ctx->mem_idx, MO_LESL);
break;
case 0x2F:
/* STQ_C */
ret = gen_store_conditional(ctx, ra, rb, disp16, 1);
ret = gen_store_conditional(ctx, ra, rb, disp16,
ctx->mem_idx, MO_LEQ);
break;
case 0x30:
/* BR */
@ -2906,6 +2910,10 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
/* Similarly for flush-to-zero. */
ctx.tb_ftz = -1;
TCGV_UNUSED_I64(ctx.zero);
TCGV_UNUSED_I64(ctx.sink);
TCGV_UNUSED_I64(ctx.lit);
num_insns = 0;
max_insns = tb->cflags & CF_COUNT_MASK;
if (max_insns == 0) {
@ -2940,23 +2948,9 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
}
insn = cpu_ldl_code(env, ctx.pc);
TCGV_UNUSED_I64(ctx.zero);
TCGV_UNUSED_I64(ctx.sink);
TCGV_UNUSED_I64(ctx.lit);
ctx.pc += 4;
ret = translate_one(ctxp, insn);
if (!TCGV_IS_UNUSED_I64(ctx.sink)) {
tcg_gen_discard_i64(ctx.sink);
tcg_temp_free(ctx.sink);
}
if (!TCGV_IS_UNUSED_I64(ctx.zero)) {
tcg_temp_free(ctx.zero);
}
if (!TCGV_IS_UNUSED_I64(ctx.lit)) {
tcg_temp_free(ctx.lit);
}
free_context_temps(ctxp);
/* If we reach a page boundary, are single stepping,
or exhaust instruction count, stop generation. */

View File

@ -46,7 +46,6 @@
#define EXCP_BKPT 7
#define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */
#define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */
#define EXCP_STREX 10
#define EXCP_HVC 11 /* HyperVisor Call */
#define EXCP_HYP_TRAP 12
#define EXCP_SMC 13 /* Secure Monitor Call */
@ -475,10 +474,6 @@ typedef struct CPUARMState {
uint64_t exclusive_addr;
uint64_t exclusive_val;
uint64_t exclusive_high;
#if defined(CONFIG_USER_ONLY)
uint64_t exclusive_test;
uint32_t exclusive_info;
#endif
/* iwMMXt coprocessor state. */
struct {

View File

@ -27,6 +27,10 @@
#include "qemu/bitops.h"
#include "internals.h"
#include "qemu/crc32c.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#include "qemu/int128.h"
#include "tcg.h"
#include <zlib.h> /* For crc32 */
/* C2.4.7 Multiply and divide */
@ -444,3 +448,112 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
/* Linux crc32c converts the output to one's complement. */
return crc32c(acc, buf, bytes) ^ 0xffffffff;
}
/* Returns 0 on success; 1 otherwise. */
uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
uint64_t new_lo, uint64_t new_hi)
{
uintptr_t ra = GETPC();
Int128 oldv, cmpv, newv;
bool success;
cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
newv = int128_make128(new_lo, new_hi);
if (parallel_cpus) {
#ifndef CONFIG_ATOMIC128
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
#else
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
success = int128_eq(oldv, cmpv);
#endif
} else {
uint64_t o0, o1;
#ifdef CONFIG_USER_ONLY
/* ??? Enforce alignment. */
uint64_t *haddr = g2h(addr);
o0 = ldq_le_p(haddr + 0);
o1 = ldq_le_p(haddr + 1);
oldv = int128_make128(o0, o1);
success = int128_eq(oldv, cmpv);
if (success) {
stq_le_p(haddr + 0, int128_getlo(newv));
stq_le_p(haddr + 1, int128_gethi(newv));
}
#else
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
oldv = int128_make128(o0, o1);
success = int128_eq(oldv, cmpv);
if (success) {
helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
}
#endif
}
return !success;
}
uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
uint64_t new_lo, uint64_t new_hi)
{
uintptr_t ra = GETPC();
Int128 oldv, cmpv, newv;
bool success;
cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
newv = int128_make128(new_lo, new_hi);
if (parallel_cpus) {
#ifndef CONFIG_ATOMIC128
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
#else
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
success = int128_eq(oldv, cmpv);
#endif
} else {
uint64_t o0, o1;
#ifdef CONFIG_USER_ONLY
/* ??? Enforce alignment. */
uint64_t *haddr = g2h(addr);
o1 = ldq_be_p(haddr + 0);
o0 = ldq_be_p(haddr + 1);
oldv = int128_make128(o0, o1);
success = int128_eq(oldv, cmpv);
if (success) {
stq_be_p(haddr + 0, int128_gethi(newv));
stq_be_p(haddr + 1, int128_getlo(newv));
}
#else
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
oldv = int128_make128(o0, o1);
success = int128_eq(oldv, cmpv);
if (success) {
helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
}
#endif
}
return !success;
}

View File

@ -46,3 +46,5 @@ DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)

View File

@ -46,8 +46,7 @@ static inline bool excp_is_internal(int excp)
|| excp == EXCP_HALTED
|| excp == EXCP_EXCEPTION_EXIT
|| excp == EXCP_KERNEL_TRAP
|| excp == EXCP_SEMIHOST
|| excp == EXCP_STREX;
|| excp == EXCP_SEMIHOST;
}
/* Exception names for debug logging; note that not all of these
@ -63,7 +62,6 @@ static const char * const excnames[] = {
[EXCP_BKPT] = "Breakpoint",
[EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
[EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
[EXCP_STREX] = "QEMU intercept of STREX",
[EXCP_HVC] = "Hypervisor Call",
[EXCP_HYP_TRAP] = "Hypervisor Trap",
[EXCP_SMC] = "Secure Monitor Call",

View File

@ -1839,37 +1839,41 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
}
}
/*
* Load/Store exclusive instructions are implemented by remembering
* the value/address loaded, and seeing if these are the same
* when the store is performed. This is not actually the architecturally
* mandated semantics, but it works for typical guest code sequences
* and avoids having to monitor regular stores.
*
* In system emulation mode only one CPU will be running at once, so
* this sequence is effectively atomic. In user emulation mode we
* throw an exception and handle the atomic operation elsewhere.
*/
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
TCGv_i64 addr, int size, bool is_pair)
{
TCGv_i64 tmp = tcg_temp_new_i64();
TCGMemOp memop = s->be_data + size;
TCGMemOp be = s->be_data;
g_assert(size <= 3);
tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
if (is_pair) {
TCGv_i64 addr2 = tcg_temp_new_i64();
TCGv_i64 hitmp = tcg_temp_new_i64();
g_assert(size >= 2);
tcg_gen_addi_i64(addr2, addr, 1 << size);
tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
tcg_temp_free_i64(addr2);
if (size == 3) {
TCGv_i64 addr2 = tcg_temp_new_i64();
tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s),
MO_64 | MO_ALIGN_16 | be);
tcg_gen_addi_i64(addr2, addr, 8);
tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s),
MO_64 | MO_ALIGN | be);
tcg_temp_free_i64(addr2);
} else {
g_assert(size == 2);
tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s),
MO_64 | MO_ALIGN | be);
if (be == MO_LE) {
tcg_gen_extr32_i64(tmp, hitmp, tmp);
} else {
tcg_gen_extr32_i64(hitmp, tmp, tmp);
}
}
tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
tcg_temp_free_i64(hitmp);
} else {
tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), size | MO_ALIGN | be);
}
tcg_gen_mov_i64(cpu_exclusive_val, tmp);
@ -1879,16 +1883,6 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
tcg_gen_mov_i64(cpu_exclusive_addr, addr);
}
#ifdef CONFIG_USER_ONLY
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
TCGv_i64 addr, int size, int is_pair)
{
tcg_gen_mov_i64(cpu_exclusive_test, addr);
tcg_gen_movi_i32(cpu_exclusive_info,
size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
gen_exception_internal_insn(s, 4, EXCP_STREX);
}
#else
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
TCGv_i64 inaddr, int size, int is_pair)
{
@ -1916,46 +1910,42 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
tmp = tcg_temp_new_i64();
tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
tcg_temp_free_i64(tmp);
if (is_pair) {
TCGv_i64 addrhi = tcg_temp_new_i64();
TCGv_i64 tmphi = tcg_temp_new_i64();
tcg_gen_addi_i64(addrhi, addr, 1 << size);
tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
s->be_data + size);
tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
tcg_temp_free_i64(tmphi);
tcg_temp_free_i64(addrhi);
}
/* We seem to still have the exclusive monitor, so do the store */
tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
s->be_data + size);
if (is_pair) {
TCGv_i64 addrhi = tcg_temp_new_i64();
tcg_gen_addi_i64(addrhi, addr, 1 << size);
tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
get_mem_index(s), s->be_data + size);
tcg_temp_free_i64(addrhi);
if (size == 2) {
TCGv_i64 val = tcg_temp_new_i64();
tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
get_mem_index(s),
size | MO_ALIGN | s->be_data);
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
tcg_temp_free_i64(val);
} else if (s->be_data == MO_LE) {
gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
cpu_reg(s, rt2));
} else {
gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
cpu_reg(s, rt2));
}
} else {
TCGv_i64 val = cpu_reg(s, rt);
tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val,
get_mem_index(s),
size | MO_ALIGN | s->be_data);
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
}
tcg_temp_free_i64(addr);
tcg_gen_movi_i64(cpu_reg(s, rd), 0);
tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
tcg_temp_free_i64(tmp);
tcg_gen_br(done_label);
gen_set_label(fail_label);
tcg_gen_movi_i64(cpu_reg(s, rd), 1);
gen_set_label(done_label);
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
#endif
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
* from the ARMv8 specs for LDR (Shared decode for all encodings).

View File

@ -65,10 +65,6 @@ static TCGv_i32 cpu_R[16];
TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
TCGv_i64 cpu_exclusive_addr;
TCGv_i64 cpu_exclusive_val;
#ifdef CONFIG_USER_ONLY
TCGv_i64 cpu_exclusive_test;
TCGv_i32 cpu_exclusive_info;
#endif
/* FIXME: These should be removed. */
static TCGv_i32 cpu_F0s, cpu_F1s;
@ -102,12 +98,6 @@ void arm_translate_init(void)
offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
offsetof(CPUARMState, exclusive_val), "exclusive_val");
#ifdef CONFIG_USER_ONLY
cpu_exclusive_test = tcg_global_mem_new_i64(cpu_env,
offsetof(CPUARMState, exclusive_test), "exclusive_test");
cpu_exclusive_info = tcg_global_mem_new_i32(cpu_env,
offsetof(CPUARMState, exclusive_info), "exclusive_info");
#endif
a64_translate_init();
}
@ -932,145 +922,103 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
* These functions work like tcg_gen_qemu_{ld,st}* except
* that the address argument is TCGv_i32 rather than TCGv.
*/
#if TARGET_LONG_BITS == 32
#define DO_GEN_LD(SUFF, OPC, BE32_XOR) \
static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
TCGv_i32 addr, int index) \
{ \
TCGMemOp opc = (OPC) | s->be_data; \
/* Not needed for user-mode BE32, where we use MO_BE instead. */ \
if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \
TCGv addr_be = tcg_temp_new(); \
tcg_gen_xori_i32(addr_be, addr, BE32_XOR); \
tcg_gen_qemu_ld_i32(val, addr_be, index, opc); \
tcg_temp_free(addr_be); \
return; \
} \
tcg_gen_qemu_ld_i32(val, addr, index, opc); \
}
#define DO_GEN_ST(SUFF, OPC, BE32_XOR) \
static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
TCGv_i32 addr, int index) \
{ \
TCGMemOp opc = (OPC) | s->be_data; \
/* Not needed for user-mode BE32, where we use MO_BE instead. */ \
if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \
TCGv addr_be = tcg_temp_new(); \
tcg_gen_xori_i32(addr_be, addr, BE32_XOR); \
tcg_gen_qemu_st_i32(val, addr_be, index, opc); \
tcg_temp_free(addr_be); \
return; \
} \
tcg_gen_qemu_st_i32(val, addr, index, opc); \
}
static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
TCGv_i32 addr, int index)
static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
{
TCGv addr = tcg_temp_new();
tcg_gen_extu_i32_tl(addr, a32);
/* Not needed for user-mode BE32, where we use MO_BE instead. */
if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
}
return addr;
}
static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
int index, TCGMemOp opc)
{
TCGv addr = gen_aa32_addr(s, a32, opc);
tcg_gen_qemu_ld_i32(val, addr, index, opc);
tcg_temp_free(addr);
}
static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
int index, TCGMemOp opc)
{
TCGv addr = gen_aa32_addr(s, a32, opc);
tcg_gen_qemu_st_i32(val, addr, index, opc);
tcg_temp_free(addr);
}
#define DO_GEN_LD(SUFF, OPC) \
static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
TCGv_i32 a32, int index) \
{ \
gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
}
#define DO_GEN_ST(SUFF, OPC) \
static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
TCGv_i32 a32, int index) \
{ \
gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
}
static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
{
TCGMemOp opc = MO_Q | s->be_data;
tcg_gen_qemu_ld_i64(val, addr, index, opc);
/* Not needed for user-mode BE32, where we use MO_BE instead. */
if (!IS_USER_ONLY && s->sctlr_b) {
tcg_gen_rotri_i64(val, val, 32);
}
}
static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
TCGv_i32 addr, int index)
static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
int index, TCGMemOp opc)
{
TCGMemOp opc = MO_Q | s->be_data;
TCGv addr = gen_aa32_addr(s, a32, opc);
tcg_gen_qemu_ld_i64(val, addr, index, opc);
gen_aa32_frob64(s, val);
tcg_temp_free(addr);
}
static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
TCGv_i32 a32, int index)
{
gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
}
static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
int index, TCGMemOp opc)
{
TCGv addr = gen_aa32_addr(s, a32, opc);
/* Not needed for user-mode BE32, where we use MO_BE instead. */
if (!IS_USER_ONLY && s->sctlr_b) {
TCGv_i64 tmp = tcg_temp_new_i64();
tcg_gen_rotri_i64(tmp, val, 32);
tcg_gen_qemu_st_i64(tmp, addr, index, opc);
tcg_temp_free_i64(tmp);
return;
} else {
tcg_gen_qemu_st_i64(val, addr, index, opc);
}
tcg_gen_qemu_st_i64(val, addr, index, opc);
}
#else
#define DO_GEN_LD(SUFF, OPC, BE32_XOR) \
static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
TCGv_i32 addr, int index) \
{ \
TCGMemOp opc = (OPC) | s->be_data; \
TCGv addr64 = tcg_temp_new(); \
tcg_gen_extu_i32_i64(addr64, addr); \
/* Not needed for user-mode BE32, where we use MO_BE instead. */ \
if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \
tcg_gen_xori_i64(addr64, addr64, BE32_XOR); \
} \
tcg_gen_qemu_ld_i32(val, addr64, index, opc); \
tcg_temp_free(addr64); \
}
#define DO_GEN_ST(SUFF, OPC, BE32_XOR) \
static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
TCGv_i32 addr, int index) \
{ \
TCGMemOp opc = (OPC) | s->be_data; \
TCGv addr64 = tcg_temp_new(); \
tcg_gen_extu_i32_i64(addr64, addr); \
/* Not needed for user-mode BE32, where we use MO_BE instead. */ \
if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \
tcg_gen_xori_i64(addr64, addr64, BE32_XOR); \
} \
tcg_gen_qemu_st_i32(val, addr64, index, opc); \
tcg_temp_free(addr64); \
}
static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
TCGv_i32 addr, int index)
{
TCGMemOp opc = MO_Q | s->be_data;
TCGv addr64 = tcg_temp_new();
tcg_gen_extu_i32_i64(addr64, addr);
tcg_gen_qemu_ld_i64(val, addr64, index, opc);
/* Not needed for user-mode BE32, where we use MO_BE instead. */
if (!IS_USER_ONLY && s->sctlr_b) {
tcg_gen_rotri_i64(val, val, 32);
}
tcg_temp_free(addr64);
tcg_temp_free(addr);
}
static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
TCGv_i32 addr, int index)
TCGv_i32 a32, int index)
{
TCGMemOp opc = MO_Q | s->be_data;
TCGv addr64 = tcg_temp_new();
tcg_gen_extu_i32_i64(addr64, addr);
/* Not needed for user-mode BE32, where we use MO_BE instead. */
if (!IS_USER_ONLY && s->sctlr_b) {
TCGv tmp = tcg_temp_new();
tcg_gen_rotri_i64(tmp, val, 32);
tcg_gen_qemu_st_i64(tmp, addr64, index, opc);
tcg_temp_free(tmp);
} else {
tcg_gen_qemu_st_i64(val, addr64, index, opc);
}
tcg_temp_free(addr64);
gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
}
#endif
DO_GEN_LD(8s, MO_SB, 3)
DO_GEN_LD(8u, MO_UB, 3)
DO_GEN_LD(16s, MO_SW, 2)
DO_GEN_LD(16u, MO_UW, 2)
DO_GEN_LD(32u, MO_UL, 0)
/* 'a' variants include an alignment check */
DO_GEN_LD(16ua, MO_UW | MO_ALIGN, 2)
DO_GEN_LD(32ua, MO_UL | MO_ALIGN, 0)
DO_GEN_ST(8, MO_UB, 3)
DO_GEN_ST(16, MO_UW, 2)
DO_GEN_ST(32, MO_UL, 0)
DO_GEN_LD(8s, MO_SB)
DO_GEN_LD(8u, MO_UB)
DO_GEN_LD(16s, MO_SW)
DO_GEN_LD(16u, MO_UW)
DO_GEN_LD(32u, MO_UL)
DO_GEN_ST(8, MO_UB)
DO_GEN_ST(16, MO_UW)
DO_GEN_ST(32, MO_UL)
static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
{
@ -7759,45 +7707,30 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
/* Load/Store exclusive instructions are implemented by remembering
the value/address loaded, and seeing if these are the same
when the store is performed. This should be sufficient to implement
when the store is performed. This should be sufficient to implement
the architecturally mandated semantics, and avoids having to monitor
regular stores.
In system emulation mode only one CPU will be running at once, so
this sequence is effectively atomic. In user emulation mode we
throw an exception and handle the atomic operation elsewhere. */
regular stores. The compare vs the remembered value is done during
the cmpxchg operation, but we must compare the addresses manually. */
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
TCGv_i32 addr, int size)
{
TCGv_i32 tmp = tcg_temp_new_i32();
TCGMemOp opc = size | MO_ALIGN | s->be_data;
s->is_ldex = true;
switch (size) {
case 0:
gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
break;
case 1:
gen_aa32_ld16ua(s, tmp, addr, get_mem_index(s));
break;
case 2:
case 3:
gen_aa32_ld32ua(s, tmp, addr, get_mem_index(s));
break;
default:
abort();
}
if (size == 3) {
TCGv_i32 tmp2 = tcg_temp_new_i32();
TCGv_i32 tmp3 = tcg_temp_new_i32();
TCGv_i64 t64 = tcg_temp_new_i64();
tcg_gen_addi_i32(tmp2, addr, 4);
gen_aa32_ld32u(s, tmp3, tmp2, get_mem_index(s));
tcg_temp_free_i32(tmp2);
tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
store_reg(s, rt2, tmp3);
gen_aa32_ld_i64(s, t64, addr, get_mem_index(s), opc);
tcg_gen_mov_i64(cpu_exclusive_val, t64);
tcg_gen_extr_i64_i32(tmp, tmp2, t64);
tcg_temp_free_i64(t64);
store_reg(s, rt2, tmp2);
} else {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
}
@ -7810,23 +7743,15 @@ static void gen_clrex(DisasContext *s)
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
#ifdef CONFIG_USER_ONLY
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
TCGv_i32 addr, int size)
{
tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
tcg_gen_movi_i32(cpu_exclusive_info,
size | (rd << 4) | (rt << 8) | (rt2 << 12));
gen_exception_internal_insn(s, 4, EXCP_STREX);
}
#else
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
TCGv_i32 addr, int size)
{
TCGv_i32 tmp;
TCGv_i64 val64, extaddr;
TCGv_i32 t0, t1, t2;
TCGv_i64 extaddr;
TCGv taddr;
TCGLabel *done_label;
TCGLabel *fail_label;
TCGMemOp opc = size | MO_ALIGN | s->be_data;
/* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
[addr] = {Rt};
@ -7841,69 +7766,45 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
tcg_temp_free_i64(extaddr);
tmp = tcg_temp_new_i32();
switch (size) {
case 0:
gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
break;
case 1:
gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
break;
case 2:
case 3:
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
break;
default:
abort();
}
val64 = tcg_temp_new_i64();
taddr = gen_aa32_addr(s, addr, opc);
t0 = tcg_temp_new_i32();
t1 = load_reg(s, rt);
if (size == 3) {
TCGv_i32 tmp2 = tcg_temp_new_i32();
TCGv_i32 tmp3 = tcg_temp_new_i32();
tcg_gen_addi_i32(tmp2, addr, 4);
gen_aa32_ld32u(s, tmp3, tmp2, get_mem_index(s));
tcg_temp_free_i32(tmp2);
tcg_gen_concat_i32_i64(val64, tmp, tmp3);
tcg_temp_free_i32(tmp3);
TCGv_i64 o64 = tcg_temp_new_i64();
TCGv_i64 n64 = tcg_temp_new_i64();
t2 = load_reg(s, rt2);
tcg_gen_concat_i32_i64(n64, t1, t2);
tcg_temp_free_i32(t2);
gen_aa32_frob64(s, n64);
tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
get_mem_index(s), opc);
tcg_temp_free_i64(n64);
gen_aa32_frob64(s, o64);
tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
tcg_gen_extrl_i64_i32(t0, o64);
tcg_temp_free_i64(o64);
} else {
tcg_gen_extu_i32_i64(val64, tmp);
t2 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
tcg_temp_free_i32(t2);
}
tcg_temp_free_i32(tmp);
tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
tcg_temp_free_i64(val64);
tmp = load_reg(s, rt);
switch (size) {
case 0:
gen_aa32_st8(s, tmp, addr, get_mem_index(s));
break;
case 1:
gen_aa32_st16(s, tmp, addr, get_mem_index(s));
break;
case 2:
case 3:
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
break;
default:
abort();
}
tcg_temp_free_i32(tmp);
if (size == 3) {
tcg_gen_addi_i32(addr, addr, 4);
tmp = load_reg(s, rt2);
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
tcg_temp_free_i32(tmp);
}
tcg_gen_movi_i32(cpu_R[rd], 0);
tcg_temp_free_i32(t1);
tcg_temp_free(taddr);
tcg_gen_mov_i32(cpu_R[rd], t0);
tcg_temp_free_i32(t0);
tcg_gen_br(done_label);
gen_set_label(fail_label);
tcg_gen_movi_i32(cpu_R[rd], 1);
gen_set_label(done_label);
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
#endif
/* gen_srs:
* @env: CPUARMState
@ -8878,25 +8779,27 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
}
tcg_temp_free_i32(addr);
} else {
TCGv taddr;
TCGMemOp opc = s->be_data;
/* SWP instruction */
rm = (insn) & 0xf;
/* ??? This is not really atomic. However we know
we never have multiple CPUs running in parallel,
so it is good enough. */
addr = load_reg(s, rn);
tmp = load_reg(s, rm);
tmp2 = tcg_temp_new_i32();
if (insn & (1 << 22)) {
gen_aa32_ld8u(s, tmp2, addr, get_mem_index(s));
gen_aa32_st8(s, tmp, addr, get_mem_index(s));
opc |= MO_UB;
} else {
gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
opc |= MO_UL | MO_ALIGN;
}
tcg_temp_free_i32(tmp);
addr = load_reg(s, rn);
taddr = gen_aa32_addr(s, addr, opc);
tcg_temp_free_i32(addr);
store_reg(s, rd, tmp2);
tmp = load_reg(s, rm);
tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
get_mem_index(s), opc);
tcg_temp_free(taddr);
store_reg(s, rd, tmp);
}
}
} else {

View File

@ -79,10 +79,6 @@ extern TCGv_env cpu_env;
extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
extern TCGv_i64 cpu_exclusive_addr;
extern TCGv_i64 cpu_exclusive_val;
#ifdef CONFIG_USER_ONLY
extern TCGv_i64 cpu_exclusive_test;
extern TCGv_i32 cpu_exclusive_info;
#endif
static inline int arm_dc_feature(DisasContext *dc, int feature)
{

View File

@ -1,8 +1,6 @@
DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_0(lock, void)
DEF_HELPER_0(unlock, void)
DEF_HELPER_3(write_eflags, void, env, tl, i32)
DEF_HELPER_1(read_eflags, tl, env)
DEF_HELPER_2(divb_AL, void, env, tl)
@ -74,8 +72,10 @@ DEF_HELPER_3(boundw, void, env, tl, int)
DEF_HELPER_3(boundl, void, env, tl, int)
DEF_HELPER_1(rsm, void, env)
DEF_HELPER_2(into, void, env, int)
DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
DEF_HELPER_2(cmpxchg8b, void, env, tl)
#ifdef TARGET_X86_64
DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
DEF_HELPER_2(cmpxchg16b, void, env, tl)
#endif
DEF_HELPER_1(single_step, void, env)

View File

@ -22,87 +22,146 @@
#include "exec/helper-proto.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#include "qemu/int128.h"
#include "tcg.h"
/* broken thread support */
#if defined(CONFIG_USER_ONLY)
QemuMutex global_cpu_lock;
void helper_lock(void)
void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
{
qemu_mutex_lock(&global_cpu_lock);
}
void helper_unlock(void)
{
qemu_mutex_unlock(&global_cpu_lock);
}
void helper_lock_init(void)
{
qemu_mutex_init(&global_cpu_lock);
}
#else
void helper_lock(void)
{
}
void helper_unlock(void)
{
}
void helper_lock_init(void)
{
}
#endif
void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
{
uint64_t d;
uintptr_t ra = GETPC();
uint64_t oldv, cmpv, newv;
int eflags;
eflags = cpu_cc_compute_all(env, CC_OP);
d = cpu_ldq_data_ra(env, a0, GETPC());
if (d == (((uint64_t)env->regs[R_EDX] << 32) | (uint32_t)env->regs[R_EAX])) {
cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32)
| (uint32_t)env->regs[R_EBX], GETPC());
cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
oldv = cpu_ldq_data_ra(env, a0, ra);
newv = (cmpv == oldv ? newv : oldv);
/* always do the store */
cpu_stq_data_ra(env, a0, newv, ra);
if (oldv == cmpv) {
eflags |= CC_Z;
} else {
/* always do the store */
cpu_stq_data_ra(env, a0, d, GETPC());
env->regs[R_EDX] = (uint32_t)(d >> 32);
env->regs[R_EAX] = (uint32_t)d;
env->regs[R_EAX] = (uint32_t)oldv;
env->regs[R_EDX] = (uint32_t)(oldv >> 32);
eflags &= ~CC_Z;
}
CC_SRC = eflags;
}
#ifdef TARGET_X86_64
void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
{
uint64_t d0, d1;
#ifdef CONFIG_ATOMIC64
uint64_t oldv, cmpv, newv;
int eflags;
eflags = cpu_cc_compute_all(env, CC_OP);
cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
#ifdef CONFIG_USER_ONLY
{
uint64_t *haddr = g2h(a0);
cmpv = cpu_to_le64(cmpv);
newv = cpu_to_le64(newv);
oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
oldv = le64_to_cpu(oldv);
}
#else
{
uintptr_t ra = GETPC();
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
}
#endif
if (oldv == cmpv) {
eflags |= CC_Z;
} else {
env->regs[R_EAX] = (uint32_t)oldv;
env->regs[R_EDX] = (uint32_t)(oldv >> 32);
eflags &= ~CC_Z;
}
CC_SRC = eflags;
#else
cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
#endif /* CONFIG_ATOMIC64 */
}
#ifdef TARGET_X86_64
void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
{
uintptr_t ra = GETPC();
Int128 oldv, cmpv, newv;
uint64_t o0, o1;
int eflags;
bool success;
if ((a0 & 0xf) != 0) {
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
eflags = cpu_cc_compute_all(env, CC_OP);
d0 = cpu_ldq_data_ra(env, a0, GETPC());
d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC());
if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) {
cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC());
cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC());
cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
oldv = int128_make128(o0, o1);
success = int128_eq(oldv, cmpv);
if (!success) {
newv = oldv;
}
cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
if (success) {
eflags |= CC_Z;
} else {
/* always do the store */
cpu_stq_data_ra(env, a0, d0, GETPC());
cpu_stq_data_ra(env, a0 + 8, d1, GETPC());
env->regs[R_EDX] = d1;
env->regs[R_EAX] = d0;
env->regs[R_EAX] = int128_getlo(oldv);
env->regs[R_EDX] = int128_gethi(oldv);
eflags &= ~CC_Z;
}
CC_SRC = eflags;
}
void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
{
uintptr_t ra = GETPC();
if ((a0 & 0xf) != 0) {
raise_exception_ra(env, EXCP0D_GPF, ra);
} else {
#ifndef CONFIG_ATOMIC128
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
#else
int eflags = cpu_cc_compute_all(env, CC_OP);
Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv,
newv, oi, ra);
if (int128_eq(oldv, cmpv)) {
eflags |= CC_Z;
} else {
env->regs[R_EAX] = int128_getlo(oldv);
env->regs[R_EDX] = int128_gethi(oldv);
eflags &= ~CC_Z;
}
CC_SRC = eflags;
#endif
}
}
#endif
void helper_boundw(CPUX86State *env, target_ulong a0, int v)

View File

@ -1257,55 +1257,95 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
{
if (d != OR_TMP0) {
gen_op_mov_v_reg(ot, cpu_T0, d);
} else {
} else if (!(s1->prefix & PREFIX_LOCK)) {
gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
}
switch(op) {
case OP_ADCL:
gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
s1->mem_index, ot | MO_LE);
} else {
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update3_cc(cpu_tmp4);
set_cc_op(s1, CC_OP_ADCB + ot);
break;
case OP_SBBL:
gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
tcg_gen_neg_tl(cpu_T0, cpu_T0);
tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
s1->mem_index, ot | MO_LE);
} else {
tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update3_cc(cpu_tmp4);
set_cc_op(s1, CC_OP_SBBB + ot);
break;
case OP_ADDL:
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
s1->mem_index, ot | MO_LE);
} else {
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update2_cc();
set_cc_op(s1, CC_OP_ADDB + ot);
break;
case OP_SUBL:
tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_neg_tl(cpu_T0, cpu_T1);
tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
s1->mem_index, ot | MO_LE);
tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
} else {
tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update2_cc();
set_cc_op(s1, CC_OP_SUBB + ot);
break;
default:
case OP_ANDL:
tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
s1->mem_index, ot | MO_LE);
} else {
tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update1_cc();
set_cc_op(s1, CC_OP_LOGICB + ot);
break;
case OP_ORL:
tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
s1->mem_index, ot | MO_LE);
} else {
tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update1_cc();
set_cc_op(s1, CC_OP_LOGICB + ot);
break;
case OP_XORL:
tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
s1->mem_index, ot | MO_LE);
} else {
tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_op_update1_cc();
set_cc_op(s1, CC_OP_LOGICB + ot);
break;
@ -1321,21 +1361,23 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
/* if d == OR_TMP0, it means memory operand (address in A0) */
static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
{
if (d != OR_TMP0) {
gen_op_mov_v_reg(ot, cpu_T0, d);
if (s1->prefix & PREFIX_LOCK) {
tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
s1->mem_index, ot | MO_LE);
} else {
gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
if (d != OR_TMP0) {
gen_op_mov_v_reg(ot, cpu_T0, d);
} else {
gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
}
tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
gen_op_st_rm_T0_A0(s1, ot, d);
}
gen_compute_eflags_c(s1, cpu_cc_src);
if (c > 0) {
tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
set_cc_op(s1, CC_OP_INCB + ot);
} else {
tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
set_cc_op(s1, CC_OP_DECB + ot);
}
gen_op_st_rm_T0_A0(s1, ot, d);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
}
static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
@ -4494,10 +4536,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
s->aflag = aflag;
s->dflag = dflag;
/* lock generation */
if (prefixes & PREFIX_LOCK)
gen_helper_lock();
/* now check op code */
reswitch:
switch(b) {
@ -4632,10 +4670,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
rm = (modrm & 7) | REX_B(s);
op = (modrm >> 3) & 7;
if (mod != 3) {
if (op == 0)
if (op == 0) {
s->rip_offset = insn_const_size(ot);
}
gen_lea_modrm(env, s, modrm);
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
/* For those below that handle locked memory, don't load here. */
if (!(s->prefix & PREFIX_LOCK)
|| op != 2) {
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
}
} else {
gen_op_mov_v_reg(ot, cpu_T0, rm);
}
@ -4648,19 +4691,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
set_cc_op(s, CC_OP_LOGICB + ot);
break;
case 2: /* not */
tcg_gen_not_tl(cpu_T0, cpu_T0);
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
if (s->prefix & PREFIX_LOCK) {
if (mod == 3) {
goto illegal_op;
}
tcg_gen_movi_tl(cpu_T0, ~0);
tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
s->mem_index, ot | MO_LE);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T0);
tcg_gen_not_tl(cpu_T0, cpu_T0);
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T0);
}
}
break;
case 3: /* neg */
tcg_gen_neg_tl(cpu_T0, cpu_T0);
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
if (s->prefix & PREFIX_LOCK) {
TCGLabel *label1;
TCGv a0, t0, t1, t2;
if (mod == 3) {
goto illegal_op;
}
a0 = tcg_temp_local_new();
t0 = tcg_temp_local_new();
label1 = gen_new_label();
tcg_gen_mov_tl(a0, cpu_A0);
tcg_gen_mov_tl(t0, cpu_T0);
gen_set_label(label1);
t1 = tcg_temp_new();
t2 = tcg_temp_new();
tcg_gen_mov_tl(t2, t0);
tcg_gen_neg_tl(t1, t0);
tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
s->mem_index, ot | MO_LE);
tcg_temp_free(t1);
tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
tcg_temp_free(t2);
tcg_temp_free(a0);
tcg_gen_mov_tl(cpu_T0, t0);
tcg_temp_free(t0);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T0);
tcg_gen_neg_tl(cpu_T0, cpu_T0);
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T0);
}
}
gen_op_update_neg_cc();
set_cc_op(s, CC_OP_SUBB + ot);
@ -5048,19 +5130,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
modrm = cpu_ldub_code(env, s->pc++);
reg = ((modrm >> 3) & 7) | rex_r;
mod = (modrm >> 6) & 3;
gen_op_mov_v_reg(ot, cpu_T0, reg);
if (mod == 3) {
rm = (modrm & 7) | REX_B(s);
gen_op_mov_v_reg(ot, cpu_T0, reg);
gen_op_mov_v_reg(ot, cpu_T1, rm);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_mov_reg_v(ot, reg, cpu_T1);
gen_op_mov_reg_v(ot, rm, cpu_T0);
} else {
gen_lea_modrm(env, s, modrm);
gen_op_mov_v_reg(ot, cpu_T0, reg);
gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
s->mem_index, ot | MO_LE);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
} else {
gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
}
gen_op_mov_reg_v(ot, reg, cpu_T1);
}
gen_op_update2_cc();
@ -5069,57 +5156,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 0x1b0:
case 0x1b1: /* cmpxchg Ev, Gv */
{
TCGLabel *label1, *label2;
TCGv t0, t1, t2, a0;
TCGv oldv, newv, cmpv;
ot = mo_b_d(b, dflag);
modrm = cpu_ldub_code(env, s->pc++);
reg = ((modrm >> 3) & 7) | rex_r;
mod = (modrm >> 6) & 3;
t0 = tcg_temp_local_new();
t1 = tcg_temp_local_new();
t2 = tcg_temp_local_new();
a0 = tcg_temp_local_new();
gen_op_mov_v_reg(ot, t1, reg);
if (mod == 3) {
rm = (modrm & 7) | REX_B(s);
gen_op_mov_v_reg(ot, t0, rm);
} else {
oldv = tcg_temp_new();
newv = tcg_temp_new();
cmpv = tcg_temp_new();
gen_op_mov_v_reg(ot, newv, reg);
tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
if (s->prefix & PREFIX_LOCK) {
if (mod == 3) {
goto illegal_op;
}
gen_lea_modrm(env, s, modrm);
tcg_gen_mov_tl(a0, cpu_A0);
gen_op_ld_v(s, ot, t0, a0);
rm = 0; /* avoid warning */
}
label1 = gen_new_label();
tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
gen_extu(ot, t0);
gen_extu(ot, t2);
tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
label2 = gen_new_label();
if (mod == 3) {
gen_op_mov_reg_v(ot, R_EAX, t0);
tcg_gen_br(label2);
gen_set_label(label1);
gen_op_mov_reg_v(ot, rm, t1);
tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
s->mem_index, ot | MO_LE);
gen_op_mov_reg_v(ot, R_EAX, oldv);
} else {
/* perform no-op store cycle like physical cpu; must be
before changing accumulator to ensure idempotency if
the store faults and the instruction is restarted */
gen_op_st_v(s, ot, t0, a0);
gen_op_mov_reg_v(ot, R_EAX, t0);
tcg_gen_br(label2);
gen_set_label(label1);
gen_op_st_v(s, ot, t1, a0);
if (mod == 3) {
rm = (modrm & 7) | REX_B(s);
gen_op_mov_v_reg(ot, oldv, rm);
} else {
gen_lea_modrm(env, s, modrm);
gen_op_ld_v(s, ot, oldv, cpu_A0);
rm = 0; /* avoid warning */
}
gen_extu(ot, oldv);
gen_extu(ot, cmpv);
/* store value = (old == cmp ? new : old); */
tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
if (mod == 3) {
gen_op_mov_reg_v(ot, R_EAX, oldv);
gen_op_mov_reg_v(ot, rm, newv);
} else {
/* Perform an unconditional store cycle like physical cpu;
must be before changing accumulator to ensure
idempotency if the store faults and the instruction
is restarted */
gen_op_st_v(s, ot, newv, cpu_A0);
gen_op_mov_reg_v(ot, R_EAX, oldv);
}
}
gen_set_label(label2);
tcg_gen_mov_tl(cpu_cc_src, t0);
tcg_gen_mov_tl(cpu_cc_srcT, t2);
tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
tcg_gen_mov_tl(cpu_cc_src, oldv);
tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
set_cc_op(s, CC_OP_SUBB + ot);
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
tcg_temp_free(a0);
tcg_temp_free(oldv);
tcg_temp_free(newv);
tcg_temp_free(cmpv);
}
break;
case 0x1c7: /* cmpxchg8b */
@ -5132,14 +5220,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
goto illegal_op;
gen_lea_modrm(env, s, modrm);
gen_helper_cmpxchg16b(cpu_env, cpu_A0);
if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
gen_helper_cmpxchg16b(cpu_env, cpu_A0);
} else {
gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
}
} else
#endif
{
if (!(s->cpuid_features & CPUID_CX8))
goto illegal_op;
gen_lea_modrm(env, s, modrm);
gen_helper_cmpxchg8b(cpu_env, cpu_A0);
if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
gen_helper_cmpxchg8b(cpu_env, cpu_A0);
} else {
gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
}
}
set_cc_op(s, CC_OP_EFLAGS);
break;
@ -5464,12 +5560,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_lea_modrm(env, s, modrm);
gen_op_mov_v_reg(ot, cpu_T0, reg);
/* for xchg, lock is implicit */
if (!(prefixes & PREFIX_LOCK))
gen_helper_lock();
gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
if (!(prefixes & PREFIX_LOCK))
gen_helper_unlock();
tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
s->mem_index, ot | MO_LE);
gen_op_mov_reg_v(ot, reg, cpu_T1);
}
break;
@ -6555,7 +6647,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (mod != 3) {
s->rip_offset = 1;
gen_lea_modrm(env, s, modrm);
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
if (!(s->prefix & PREFIX_LOCK)) {
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
}
} else {
gen_op_mov_v_reg(ot, cpu_T0, rm);
}
@ -6585,44 +6679,69 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
rm = (modrm & 7) | REX_B(s);
gen_op_mov_v_reg(MO_32, cpu_T1, reg);
if (mod != 3) {
gen_lea_modrm(env, s, modrm);
AddressParts a = gen_lea_modrm_0(env, s, modrm);
/* specific case: we need to add a displacement */
gen_exts(ot, cpu_T1);
tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
if (!(s->prefix & PREFIX_LOCK)) {
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
}
} else {
gen_op_mov_v_reg(ot, cpu_T0, rm);
}
bt_op:
tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
switch(op) {
case 0:
break;
case 1:
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
break;
case 2:
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
break;
default:
case 3:
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
break;
}
if (op != 0) {
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T0);
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
if (s->prefix & PREFIX_LOCK) {
switch (op) {
case 0: /* bt */
/* Needs no atomic ops; we surpressed the normal
memory load for LOCK above so do it now. */
gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
break;
case 1: /* bts */
tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
s->mem_index, ot | MO_LE);
break;
case 2: /* btr */
tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
s->mem_index, ot | MO_LE);
break;
default:
case 3: /* btc */
tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
s->mem_index, ot | MO_LE);
break;
}
tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
} else {
tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
switch (op) {
case 0: /* bt */
/* Data already loaded; nothing to do. */
break;
case 1: /* bts */
tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
break;
case 2: /* btr */
tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
break;
default:
case 3: /* btc */
tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
break;
}
if (op != 0) {
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T0, cpu_A0);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T0);
}
}
}
@ -8088,20 +8207,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
default:
goto unknown_op;
}
/* lock generation */
if (s->prefix & PREFIX_LOCK)
gen_helper_unlock();
return s->pc;
illegal_op:
if (s->prefix & PREFIX_LOCK)
gen_helper_unlock();
/* XXX: ensure that no lock was generated */
gen_illegal_opcode(s);
return s->pc;
unknown_op:
if (s->prefix & PREFIX_LOCK)
gen_helper_unlock();
/* XXX: ensure that no lock was generated */
gen_unknown_opcode(env, s);
return s->pc;
}
@ -8193,8 +8303,6 @@ void tcg_x86_init(void)
offsetof(CPUX86State, bnd_regs[i].ub),
bnd_regu_names[i]);
}
helper_lock_init();
}
/* generate intermediate code for basic block 'tb'. */

View File

@ -23,17 +23,10 @@
*/
#include "qemu/osdep.h"
#include "qemu/host-utils.h"
/* This file is compiled once, and thus we can't include the standard
"exec/helper-proto.h", which has includes that are target specific. */
#include "exec/helper-head.h"
#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
#include "tcg-runtime.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
/* 32-bit helpers */
@ -107,3 +100,62 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2)
muls64(&l, &h, arg1, arg2);
return h;
}
void HELPER(exit_atomic)(CPUArchState *env)
{
cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
}
#ifndef CONFIG_SOFTMMU
/* The softmmu versions of these helpers are in cputlb.c. */
/* Do not allow unaligned operations to proceed. Return the host address. */
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
int size, uintptr_t retaddr)
{
/* Enforce qemu required alignment. */
if (unlikely(addr & (size - 1))) {
cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
}
return g2h(addr);
}
/* Macro to call the above, with local variables from the use context. */
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
#define EXTRA_ARGS
#define DATA_SIZE 1
#include "atomic_template.h"
#define DATA_SIZE 2
#include "atomic_template.h"
#define DATA_SIZE 4
#include "atomic_template.h"
#ifdef CONFIG_ATOMIC64
#define DATA_SIZE 8
#include "atomic_template.h"
#endif
/* The following is only callable from other helpers, and matches up
with the softmmu version. */
#ifdef CONFIG_ATOMIC128
#undef EXTRA_ARGS
#undef ATOMIC_NAME
#undef ATOMIC_MMU_LOOKUP
#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
#define ATOMIC_NAME(X) \
HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr)
#define DATA_SIZE 16
#include "atomic_template.h"
#endif /* CONFIG_ATOMIC128 */
#endif /* !CONFIG_SOFTMMU */

View File

@ -150,17 +150,7 @@ void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
void tcg_gen_mb(TCGBar mb_type)
{
bool emit_barriers = true;
#ifndef CONFIG_USER_ONLY
/* TODO: When MTTCG is available for system mode, we will check
* the following condition and enable emit_barriers
* (qemu_tcg_mttcg_enabled() && smp_cpus > 1)
*/
emit_barriers = false;
#endif
if (emit_barriers) {
if (parallel_cpus) {
tcg_gen_op1(&tcg_ctx, INDEX_op_mb, mb_type);
}
}
@ -1975,3 +1965,345 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
addr, trace_mem_get_info(memop, 1));
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
}
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc)
{
switch (opc & MO_SSIZE) {
case MO_SB:
tcg_gen_ext8s_i32(ret, val);
break;
case MO_UB:
tcg_gen_ext8u_i32(ret, val);
break;
case MO_SW:
tcg_gen_ext16s_i32(ret, val);
break;
case MO_UW:
tcg_gen_ext16u_i32(ret, val);
break;
default:
tcg_gen_mov_i32(ret, val);
break;
}
}
static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, TCGMemOp opc)
{
switch (opc & MO_SSIZE) {
case MO_SB:
tcg_gen_ext8s_i64(ret, val);
break;
case MO_UB:
tcg_gen_ext8u_i64(ret, val);
break;
case MO_SW:
tcg_gen_ext16s_i64(ret, val);
break;
case MO_UW:
tcg_gen_ext16u_i64(ret, val);
break;
case MO_SL:
tcg_gen_ext32s_i64(ret, val);
break;
case MO_UL:
tcg_gen_ext32u_i64(ret, val);
break;
default:
tcg_gen_mov_i64(ret, val);
break;
}
}
#ifdef CONFIG_SOFTMMU
typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
TCGv_i32, TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
TCGv_i64, TCGv_i64, TCGv_i32);
typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
TCGv_i64, TCGv_i32);
#else
typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64);
typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32);
typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
#endif
#ifdef CONFIG_ATOMIC64
# define WITH_ATOMIC64(X) X,
#else
# define WITH_ATOMIC64(X)
#endif
static void * const table_cmpxchg[16] = {
[MO_8] = gen_helper_atomic_cmpxchgb,
[MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
[MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
[MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
[MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
};
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
TCGv_i32 newv, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 0, 0);
if (!parallel_cpus) {
TCGv_i32 t1 = tcg_temp_new_i32();
TCGv_i32 t2 = tcg_temp_new_i32();
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
tcg_gen_qemu_st_i32(t2, addr, idx, memop);
tcg_temp_free_i32(t2);
if (memop & MO_SIGN) {
tcg_gen_ext_i32(retv, t1, memop);
} else {
tcg_gen_mov_i32(retv, t1);
}
tcg_temp_free_i32(t1);
} else {
gen_atomic_cx_i32 gen;
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
#ifdef CONFIG_SOFTMMU
{
TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi);
tcg_temp_free_i32(oi);
}
#else
gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
#endif
if (memop & MO_SIGN) {
tcg_gen_ext_i32(retv, retv, memop);
}
}
}
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
TCGv_i64 newv, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 1, 0);
if (!parallel_cpus) {
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 t2 = tcg_temp_new_i64();
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
tcg_gen_qemu_st_i64(t2, addr, idx, memop);
tcg_temp_free_i64(t2);
if (memop & MO_SIGN) {
tcg_gen_ext_i64(retv, t1, memop);
} else {
tcg_gen_mov_i64(retv, t1);
}
tcg_temp_free_i64(t1);
} else if ((memop & MO_SIZE) == MO_64) {
#ifdef CONFIG_ATOMIC64
gen_atomic_cx_i64 gen;
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
#ifdef CONFIG_SOFTMMU
{
TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi);
tcg_temp_free_i32(oi);
}
#else
gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
#endif
#else
gen_helper_exit_atomic(tcg_ctx.tcg_env);
#endif /* CONFIG_ATOMIC64 */
} else {
TCGv_i32 c32 = tcg_temp_new_i32();
TCGv_i32 n32 = tcg_temp_new_i32();
TCGv_i32 r32 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(c32, cmpv);
tcg_gen_extrl_i64_i32(n32, newv);
tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
tcg_temp_free_i32(c32);
tcg_temp_free_i32(n32);
tcg_gen_extu_i32_i64(retv, r32);
tcg_temp_free_i32(r32);
if (memop & MO_SIGN) {
tcg_gen_ext_i64(retv, retv, memop);
}
}
}
static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
TCGArg idx, TCGMemOp memop, bool new_val,
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
{
TCGv_i32 t1 = tcg_temp_new_i32();
TCGv_i32 t2 = tcg_temp_new_i32();
memop = tcg_canonicalize_memop(memop, 0, 0);
tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
gen(t2, t1, val);
tcg_gen_qemu_st_i32(t2, addr, idx, memop);
tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
tcg_temp_free_i32(t1);
tcg_temp_free_i32(t2);
}
static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
TCGArg idx, TCGMemOp memop, void * const table[])
{
gen_atomic_op_i32 gen;
memop = tcg_canonicalize_memop(memop, 0, 0);
gen = table[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
#ifdef CONFIG_SOFTMMU
{
TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
gen(ret, tcg_ctx.tcg_env, addr, val, oi);
tcg_temp_free_i32(oi);
}
#else
gen(ret, tcg_ctx.tcg_env, addr, val);
#endif
if (memop & MO_SIGN) {
tcg_gen_ext_i32(ret, ret, memop);
}
}
static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
TCGArg idx, TCGMemOp memop, bool new_val,
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
{
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 t2 = tcg_temp_new_i64();
memop = tcg_canonicalize_memop(memop, 1, 0);
tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
gen(t2, t1, val);
tcg_gen_qemu_st_i64(t2, addr, idx, memop);
tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
tcg_temp_free_i64(t1);
tcg_temp_free_i64(t2);
}
static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
TCGArg idx, TCGMemOp memop, void * const table[])
{
memop = tcg_canonicalize_memop(memop, 1, 0);
if ((memop & MO_SIZE) == MO_64) {
#ifdef CONFIG_ATOMIC64
gen_atomic_op_i64 gen;
gen = table[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
#ifdef CONFIG_SOFTMMU
{
TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
gen(ret, tcg_ctx.tcg_env, addr, val, oi);
tcg_temp_free_i32(oi);
}
#else
gen(ret, tcg_ctx.tcg_env, addr, val);
#endif
#else
gen_helper_exit_atomic(tcg_ctx.tcg_env);
#endif /* CONFIG_ATOMIC64 */
} else {
TCGv_i32 v32 = tcg_temp_new_i32();
TCGv_i32 r32 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(v32, val);
do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
tcg_temp_free_i32(v32);
tcg_gen_extu_i32_i64(ret, r32);
tcg_temp_free_i32(r32);
if (memop & MO_SIGN) {
tcg_gen_ext_i64(ret, ret, memop);
}
}
}
#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
static void * const table_##NAME[16] = { \
[MO_8] = gen_helper_atomic_##NAME##b, \
[MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
[MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
[MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
[MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
}; \
void tcg_gen_atomic_##NAME##_i32 \
(TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, TCGMemOp memop) \
{ \
if (parallel_cpus) { \
do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
} else { \
do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
tcg_gen_##OP##_i32); \
} \
} \
void tcg_gen_atomic_##NAME##_i64 \
(TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, TCGMemOp memop) \
{ \
if (parallel_cpus) { \
do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
} else { \
do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
tcg_gen_##OP##_i64); \
} \
}
GEN_ATOMIC_HELPER(fetch_add, add, 0)
GEN_ATOMIC_HELPER(fetch_and, and, 0)
GEN_ATOMIC_HELPER(fetch_or, or, 0)
GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
GEN_ATOMIC_HELPER(add_fetch, add, 1)
GEN_ATOMIC_HELPER(and_fetch, and, 1)
GEN_ATOMIC_HELPER(or_fetch, or, 1)
GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
{
tcg_gen_mov_i32(r, b);
}
static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
{
tcg_gen_mov_i64(r, b);
}
GEN_ATOMIC_HELPER(xchg, mov2, 0)
#undef GEN_ATOMIC_HELPER

View File

@ -854,6 +854,30 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ);
}
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
TCGArg, TCGMemOp);
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
TCGArg, TCGMemOp);
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
#if TARGET_LONG_BITS == 64
#define tcg_gen_movi_tl tcg_gen_movi_i64
#define tcg_gen_mov_tl tcg_gen_mov_i64
@ -932,6 +956,16 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_sub2_tl tcg_gen_sub2_i64
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
#define tcg_gen_muls2_tl tcg_gen_muls2_i64
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
#define tcg_gen_mov_tl tcg_gen_mov_i32
@ -1009,6 +1043,16 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_sub2_tl tcg_gen_sub2_i32
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
#define tcg_gen_muls2_tl tcg_gen_muls2_i32
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
#endif
#if UINTPTR_MAX == UINT32_MAX

View File

@ -14,3 +14,112 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
#ifdef CONFIG_SOFTMMU
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
#ifdef CONFIG_ATOMIC64
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
i64, env, tl, i64, i64, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
i64, env, tl, i64, i64, i32)
#endif
#ifdef CONFIG_ATOMIC64
#define GEN_ATOMIC_HELPERS(NAME) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \
TCG_CALL_NO_WG, i64, env, tl, i64, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \
TCG_CALL_NO_WG, i64, env, tl, i64, i32)
#else
#define GEN_ATOMIC_HELPERS(NAME) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32)
#endif /* CONFIG_ATOMIC64 */
#else
DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
#ifdef CONFIG_ATOMIC64
DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
#endif
#ifdef CONFIG_ATOMIC64
#define GEN_ATOMIC_HELPERS(NAME) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le), \
TCG_CALL_NO_WG, i64, env, tl, i64) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be), \
TCG_CALL_NO_WG, i64, env, tl, i64)
#else
#define GEN_ATOMIC_HELPERS(NAME) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le), \
TCG_CALL_NO_WG, i32, env, tl, i32) \
DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be), \
TCG_CALL_NO_WG, i32, env, tl, i32)
#endif /* CONFIG_ATOMIC64 */
#endif /* CONFIG_SOFTMMU */
GEN_ATOMIC_HELPERS(fetch_add)
GEN_ATOMIC_HELPERS(fetch_and)
GEN_ATOMIC_HELPERS(fetch_or)
GEN_ATOMIC_HELPERS(fetch_xor)
GEN_ATOMIC_HELPERS(add_fetch)
GEN_ATOMIC_HELPERS(and_fetch)
GEN_ATOMIC_HELPERS(or_fetch)
GEN_ATOMIC_HELPERS(xor_fetch)
GEN_ATOMIC_HELPERS(xchg)
#undef GEN_ATOMIC_HELPERS

View File

@ -704,6 +704,7 @@ struct TCGContext {
};
extern TCGContext tcg_ctx;
extern bool parallel_cpus;
static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
{
@ -1176,6 +1177,90 @@ uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
# define helper_ret_ldq_cmmu helper_le_ldq_cmmu
#endif
uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
uint64_t cmpv, uint64_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
uint64_t cmpv, uint64_t newv,
TCGMemOpIdx oi, uintptr_t retaddr);
#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \
TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu \
(CPUArchState *env, target_ulong addr, TYPE val, \
TCGMemOpIdx oi, uintptr_t retaddr);
#ifdef CONFIG_ATOMIC64
#define GEN_ATOMIC_HELPER_ALL(NAME) \
GEN_ATOMIC_HELPER(NAME, uint32_t, b) \
GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \
GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \
GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \
GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) \
GEN_ATOMIC_HELPER(NAME, uint64_t, q_le) \
GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
#else
#define GEN_ATOMIC_HELPER_ALL(NAME) \
GEN_ATOMIC_HELPER(NAME, uint32_t, b) \
GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \
GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \
GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \
GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
#endif
GEN_ATOMIC_HELPER_ALL(fetch_add)
GEN_ATOMIC_HELPER_ALL(fetch_sub)
GEN_ATOMIC_HELPER_ALL(fetch_and)
GEN_ATOMIC_HELPER_ALL(fetch_or)
GEN_ATOMIC_HELPER_ALL(fetch_xor)
GEN_ATOMIC_HELPER_ALL(add_fetch)
GEN_ATOMIC_HELPER_ALL(sub_fetch)
GEN_ATOMIC_HELPER_ALL(and_fetch)
GEN_ATOMIC_HELPER_ALL(or_fetch)
GEN_ATOMIC_HELPER_ALL(xor_fetch)
GEN_ATOMIC_HELPER_ALL(xchg)
#undef GEN_ATOMIC_HELPER_ALL
#undef GEN_ATOMIC_HELPER
#endif /* CONFIG_SOFTMMU */
#ifdef CONFIG_ATOMIC128
#include "qemu/int128.h"
/* These aren't really a "proper" helpers because TCG cannot manage Int128.
However, use the same format as the others, for use by the backends. */
Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
Int128 cmpv, Int128 newv,
TCGMemOpIdx oi, uintptr_t retaddr);
Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
Int128 cmpv, Int128 newv,
TCGMemOpIdx oi, uintptr_t retaddr);
Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
TCGMemOpIdx oi, uintptr_t retaddr);
Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
TCGMemOpIdx oi, uintptr_t retaddr);
void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
TCGMemOpIdx oi, uintptr_t retaddr);
void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
TCGMemOpIdx oi, uintptr_t retaddr);
#endif /* CONFIG_ATOMIC128 */
#endif /* TCG_H */

1
tests/.gitignore vendored
View File

@ -1,3 +1,4 @@
atomic_add-bench
check-qdict
check-qfloat
check-qint

View File

@ -460,7 +460,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
tests/test-opts-visitor.o tests/test-qmp-event.o \
tests/rcutorture.o tests/test-rcu-list.o \
tests/test-qdist.o \
tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
tests/atomic_add-bench.o
$(test-obj-y): QEMU_INCLUDES += -Itests
QEMU_CFLAGS += -I$(SRC_PATH)/tests
@ -507,6 +508,7 @@ tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y)
tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y)
tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\

163
tests/atomic_add-bench.c Normal file
View File

@ -0,0 +1,163 @@
#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "qemu/host-utils.h"
#include "qemu/processor.h"
struct thread_info {
uint64_t r;
} QEMU_ALIGNED(64);
struct count {
unsigned long val;
} QEMU_ALIGNED(64);
static QemuThread *threads;
static struct thread_info *th_info;
static unsigned int n_threads = 1;
static unsigned int n_ready_threads;
static struct count *counts;
static unsigned int duration = 1;
static unsigned int range = 1024;
static bool test_start;
static bool test_stop;
static const char commands_string[] =
" -n = number of threads\n"
" -d = duration in seconds\n"
" -r = range (will be rounded up to pow2)";
static void usage_complete(char *argv[])
{
fprintf(stderr, "Usage: %s [options]\n", argv[0]);
fprintf(stderr, "options:\n%s\n", commands_string);
}
/*
* From: https://en.wikipedia.org/wiki/Xorshift
* This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
* guaranteed to be >= INT_MAX).
*/
static uint64_t xorshift64star(uint64_t x)
{
x ^= x >> 12; /* a */
x ^= x << 25; /* b */
x ^= x >> 27; /* c */
return x * UINT64_C(2685821657736338717);
}
static void *thread_func(void *arg)
{
struct thread_info *info = arg;
atomic_inc(&n_ready_threads);
while (!atomic_read(&test_start)) {
cpu_relax();
}
while (!atomic_read(&test_stop)) {
unsigned int index;
info->r = xorshift64star(info->r);
index = info->r & (range - 1);
atomic_inc(&counts[index].val);
}
return NULL;
}
static void run_test(void)
{
unsigned int remaining;
unsigned int i;
while (atomic_read(&n_ready_threads) != n_threads) {
cpu_relax();
}
atomic_set(&test_start, true);
do {
remaining = sleep(duration);
} while (remaining);
atomic_set(&test_stop, true);
for (i = 0; i < n_threads; i++) {
qemu_thread_join(&threads[i]);
}
}
static void create_threads(void)
{
unsigned int i;
threads = g_new(QemuThread, n_threads);
th_info = g_new(struct thread_info, n_threads);
counts = qemu_memalign(64, sizeof(*counts) * range);
memset(counts, 0, sizeof(*counts) * range);
for (i = 0; i < n_threads; i++) {
struct thread_info *info = &th_info[i];
info->r = (i + 1) ^ time(NULL);
qemu_thread_create(&threads[i], NULL, thread_func, info,
QEMU_THREAD_JOINABLE);
}
}
static void pr_params(void)
{
printf("Parameters:\n");
printf(" # of threads: %u\n", n_threads);
printf(" duration: %u\n", duration);
printf(" ops' range: %u\n", range);
}
static void pr_stats(void)
{
unsigned long long val = 0;
unsigned int i;
double tx;
for (i = 0; i < range; i++) {
val += counts[i].val;
}
tx = val / duration / 1e6;
printf("Results:\n");
printf("Duration: %u s\n", duration);
printf(" Throughput: %.2f Mops/s\n", tx);
printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads);
}
static void parse_args(int argc, char *argv[])
{
int c;
for (;;) {
c = getopt(argc, argv, "hd:n:r:");
if (c < 0) {
break;
}
switch (c) {
case 'h':
usage_complete(argv);
exit(0);
case 'd':
duration = atoi(optarg);
break;
case 'n':
n_threads = atoi(optarg);
break;
case 'r':
range = pow2ceil(atoi(optarg));
break;
}
}
}
int main(int argc, char *argv[])
{
parse_args(argc, argv);
pr_params();
create_threads();
run_test();
pr_stats();
return 0;
}

View File

@ -41,7 +41,7 @@ static Int128 expand(uint32_t x)
uint64_t l, h;
l = expand16(x & 65535);
h = expand16(x >> 16);
return (Int128) {l, h};
return (Int128) int128_make128(l, h);
};
static void test_and(void)
@ -54,8 +54,8 @@ static void test_and(void)
Int128 b = expand(tests[j]);
Int128 r = expand(tests[i] & tests[j]);
Int128 s = int128_and(a, b);
g_assert_cmpuint(r.lo, ==, s.lo);
g_assert_cmpuint(r.hi, ==, s.hi);
g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
}
}
}
@ -70,8 +70,8 @@ static void test_add(void)
Int128 b = expand(tests[j]);
Int128 r = expand(tests[i] + tests[j]);
Int128 s = int128_add(a, b);
g_assert_cmpuint(r.lo, ==, s.lo);
g_assert_cmpuint(r.hi, ==, s.hi);
g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
}
}
}
@ -86,8 +86,8 @@ static void test_sub(void)
Int128 b = expand(tests[j]);
Int128 r = expand(tests[i] - tests[j]);
Int128 s = int128_sub(a, b);
g_assert_cmpuint(r.lo, ==, s.lo);
g_assert_cmpuint(r.hi, ==, s.hi);
g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
}
}
}
@ -100,8 +100,8 @@ static void test_neg(void)
Int128 a = expand(tests[i]);
Int128 r = expand(-tests[i]);
Int128 s = int128_neg(a);
g_assert_cmpuint(r.lo, ==, s.lo);
g_assert_cmpuint(r.hi, ==, s.hi);
g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
}
}
@ -180,8 +180,8 @@ test_rshift_one(uint32_t x, int n, uint64_t h, uint64_t l)
{
Int128 a = expand(x);
Int128 r = int128_rshift(a, n);
g_assert_cmpuint(r.lo, ==, l);
g_assert_cmpuint(r.hi, ==, h);
g_assert_cmpuint(int128_getlo(r), ==, l);
g_assert_cmpuint(int128_gethi(r), ==, h);
}
static void test_rshift(void)

View File

@ -118,6 +118,7 @@ static void *l1_map[V_L1_MAX_SIZE];
/* code generation context */
TCGContext tcg_ctx;
bool parallel_cpus;
/* translation block context */
#ifdef CONFIG_USER_ONLY