accel/tcg: Reorg system mode store helpers

Instead of trying to unify all operations on uint64_t, use
mmu_lookup() to perform the basic tlb hit and resolution.
Create individual functions to handle access by size.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
master
Richard Henderson 2022-10-30 12:07:32 +11:00
parent 8cfdacaa16
commit 592134617c
1 changed files with 193 additions and 215 deletions

View File

@ -2532,322 +2532,300 @@ store_memop(void *haddr, uint64_t val, MemOp op)
}
}
static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr);
static void __attribute__((noinline))
store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
bool big_endian)
/**
* do_st_mmio_leN:
* @env: cpu context
* @p: translation parameters
* @val_le: data to store
* @mmu_idx: virtual address context
* @ra: return address into tcg generated code, or 0
*
* Store @p->size bytes at @p->addr, which is memory-mapped i/o.
* The bytes to store are extracted in little-endian order from @val_le;
* return the bytes of @val_le beyond @p->size that have not been stored.
*/
static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
uint64_t val_le, int mmu_idx, uintptr_t ra)
{
uintptr_t index, index2;
CPUTLBEntry *entry, *entry2;
target_ulong page1, page2, tlb_addr, tlb_addr2;
MemOpIdx oi;
size_t size2;
int i;
CPUTLBEntryFull *full = p->full;
target_ulong addr = p->addr;
int i, size = p->size;
/*
* Ensure the second page is in the TLB. Note that the first page
* is already guaranteed to be filled, and that the second page
* cannot evict the first. An exception to this rule is PAGE_WRITE_INV
* handling: the first page could have evicted itself.
*/
page1 = addr & TARGET_PAGE_MASK;
page2 = (addr + size) & TARGET_PAGE_MASK;
size2 = (addr + size) & ~TARGET_PAGE_MASK;
index2 = tlb_index(env, mmu_idx, page2);
entry2 = tlb_entry(env, mmu_idx, page2);
tlb_addr2 = tlb_addr_write(entry2);
if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
mmu_idx, retaddr);
index2 = tlb_index(env, mmu_idx, page2);
entry2 = tlb_entry(env, mmu_idx, page2);
}
tlb_addr2 = tlb_addr_write(entry2);
QEMU_IOTHREAD_LOCK_GUARD();
for (i = 0; i < size; i++, val_le >>= 8) {
io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
}
return val_le;
}
index = tlb_index(env, mmu_idx, addr);
entry = tlb_entry(env, mmu_idx, addr);
tlb_addr = tlb_addr_write(entry);
/**
* do_st_bytes_leN:
* @p: translation parameters
* @val_le: data to store
*
* Store @p->size bytes at @p->haddr, which is RAM.
* The bytes to store are extracted in little-endian order from @val_le;
* return the bytes of @val_le beyond @p->size that have not been stored.
*/
static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
{
uint8_t *haddr = p->haddr;
int i, size = p->size;
/*
* Handle watchpoints. Since this may trap, all checks
* must happen before any store.
*/
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), addr, size - size2,
env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
BP_MEM_WRITE, retaddr);
}
if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), page2, size2,
env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
BP_MEM_WRITE, retaddr);
for (i = 0; i < size; i++, val_le >>= 8) {
haddr[i] = val_le;
}
return val_le;
}
/*
* XXX: not efficient, but simple.
* This loop must go in the forward direction to avoid issues
* with self-modifying code in Windows 64-bit.
*/
oi = make_memop_idx(MO_UB, mmu_idx);
if (big_endian) {
for (i = 0; i < size; ++i) {
/* Big-endian extract. */
uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
full_stb_mmu(env, addr + i, val8, oi, retaddr);
}
/*
* Wrapper for the above.
*/
static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
uint64_t val_le, int mmu_idx, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
return val_le >> (p->size * 8);
} else {
for (i = 0; i < size; ++i) {
/* Little-endian extract. */
uint8_t val8 = val >> (i * 8);
full_stb_mmu(env, addr + i, val8, oi, retaddr);
}
return do_st_bytes_leN(p, val_le);
}
}
static inline void QEMU_ALWAYS_INLINE
store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr, MemOp op)
static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
int mmu_idx, uintptr_t ra)
{
const unsigned a_bits = get_alignment_bits(get_memop(oi));
const size_t size = memop_size(op);
uintptr_t mmu_idx = get_mmuidx(oi);
uintptr_t index;
CPUTLBEntry *entry;
target_ulong tlb_addr;
void *haddr;
tcg_debug_assert(mmu_idx < NB_MMU_MODES);
/* Handle CPU specific unaligned behaviour */
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
mmu_idx, retaddr);
if (unlikely(p->flags & TLB_MMIO)) {
io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
*(uint8_t *)p->haddr = val;
}
index = tlb_index(env, mmu_idx, addr);
entry = tlb_entry(env, mmu_idx, addr);
tlb_addr = tlb_addr_write(entry);
/* If the TLB entry is for a different page, reload and try again. */
if (!tlb_hit(tlb_addr, addr)) {
if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
addr & TARGET_PAGE_MASK)) {
tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
mmu_idx, retaddr);
index = tlb_index(env, mmu_idx, addr);
entry = tlb_entry(env, mmu_idx, addr);
}
tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
}
/* Handle anything that isn't just a straight memory access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
CPUTLBEntryFull *full;
bool need_swap;
/* For anything that is unaligned, recurse through byte stores. */
if ((addr & (size - 1)) != 0) {
goto do_unaligned_access;
}
full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
/* Handle watchpoints. */
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
/* On watchpoint hit, this will longjmp out. */
cpu_check_watchpoint(env_cpu(env), addr, size,
full->attrs, BP_MEM_WRITE, retaddr);
}
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
/* Handle I/O access. */
if (tlb_addr & TLB_MMIO) {
io_writex(env, full, mmu_idx, val, addr, retaddr,
op ^ (need_swap * MO_BSWAP));
return;
}
/* Ignore writes to ROM. */
if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
return;
}
/* Handle clean RAM pages. */
if (tlb_addr & TLB_NOTDIRTY) {
notdirty_write(env_cpu(env), addr, size, full, retaddr);
}
haddr = (void *)((uintptr_t)addr + entry->addend);
/*
* Keep these two store_memop separate to ensure that the compiler
* is able to fold the entire function to a single instruction.
* There is a build-time assert inside to remind you of this. ;-)
*/
if (unlikely(need_swap)) {
store_memop(haddr, val, op ^ MO_BSWAP);
} else {
store_memop(haddr, val, op);
}
return;
}
/* Handle slow unaligned access (it spans two pages or IO). */
if (size > 1
&& unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
>= TARGET_PAGE_SIZE)) {
do_unaligned_access:
store_helper_unaligned(env, addr, val, retaddr, size,
mmu_idx, memop_big_endian(op));
return;
}
haddr = (void *)((uintptr_t)addr + entry->addend);
store_memop(haddr, val, op);
}
static void __attribute__((noinline))
full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
int mmu_idx, MemOp memop, uintptr_t ra)
{
validate_memop(oi, MO_UB);
store_helper(env, addr, val, oi, retaddr, MO_UB);
if (unlikely(p->flags & TLB_MMIO)) {
io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
/* Swap to host endian if necessary, then store. */
if (memop & MO_BSWAP) {
val = bswap16(val);
}
store_memop(p->haddr, val, MO_UW);
}
}
static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
int mmu_idx, MemOp memop, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
/* Swap to host endian if necessary, then store. */
if (memop & MO_BSWAP) {
val = bswap32(val);
}
store_memop(p->haddr, val, MO_UL);
}
}
static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
int mmu_idx, MemOp memop, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
/* Swap to host endian if necessary, then store. */
if (memop & MO_BSWAP) {
val = bswap64(val);
}
store_memop(p->haddr, val, MO_UQ);
}
}
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
MemOpIdx oi, uintptr_t ra)
{
full_stb_mmu(env, addr, val, oi, retaddr);
MMULookupLocals l;
bool crosspage;
validate_memop(oi, MO_UB);
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
tcg_debug_assert(!crosspage);
do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
}
static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t ra)
{
validate_memop(oi, MO_LEUW);
store_helper(env, addr, val, oi, retaddr, MO_LEUW);
MMULookupLocals l;
bool crosspage;
uint8_t a, b;
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
return;
}
if ((l.memop & MO_BSWAP) == MO_LE) {
a = val, b = val >> 8;
} else {
b = val, a = val >> 8;
}
do_st_1(env, &l.page[0], a, l.mmu_idx, ra);
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
}
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
full_le_stw_mmu(env, addr, val, oi, retaddr);
}
static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUW);
store_helper(env, addr, val, oi, retaddr, MO_BEUW);
validate_memop(oi, MO_LEUW);
do_st2_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
full_be_stw_mmu(env, addr, val, oi, retaddr);
validate_memop(oi, MO_BEUW);
do_st2_mmu(env, addr, val, oi, retaddr);
}
static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
validate_memop(oi, MO_LEUL);
store_helper(env, addr, val, oi, retaddr, MO_LEUL);
MMULookupLocals l;
bool crosspage;
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
return;
}
/* Swap to little endian for simplicity, then store by bytes. */
if ((l.memop & MO_BSWAP) != MO_LE) {
val = bswap32(val);
}
val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
}
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
full_le_stl_mmu(env, addr, val, oi, retaddr);
}
static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUL);
store_helper(env, addr, val, oi, retaddr, MO_BEUL);
validate_memop(oi, MO_LEUL);
do_st4_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
full_be_stl_mmu(env, addr, val, oi, retaddr);
validate_memop(oi, MO_BEUL);
do_st4_mmu(env, addr, val, oi, retaddr);
}
static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t ra)
{
MMULookupLocals l;
bool crosspage;
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
return;
}
/* Swap to little endian for simplicity, then store by bytes. */
if ((l.memop & MO_BSWAP) != MO_LE) {
val = bswap64(val);
}
val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
}
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_LEUQ);
store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
do_st8_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUQ);
store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
do_st8_mmu(env, addr, val, oi, retaddr);
}
/*
* Store Helpers for cpu_ldst.h
*/
typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
uint64_t val, MemOpIdx oi, uintptr_t retaddr);
static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
uint64_t val, MemOpIdx oi, uintptr_t ra,
FullStoreHelper *full_store)
static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
full_store(env, addr, val, oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
helper_ret_stb_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
helper_be_stw_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
helper_be_stl_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
helper_be_stq_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
helper_le_stw_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
helper_le_stl_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
helper_le_stq_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,