tcg: Add guest load/store primitives for TCGv_i128

These are not yet considering atomicity of the 16-byte value;
this is a direct replacement for the current target code which
uses a pair of 8-byte operations.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
master
Richard Henderson 2022-11-07 19:48:14 +11:00
parent 4771e71c28
commit cb48f3654e
5 changed files with 324 additions and 0 deletions

View File

@ -2192,6 +2192,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
}
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
int mmu_idx = get_mmuidx(oi);
MemOpIdx new_oi;
unsigned a_bits;
uint64_t h, l;
tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
a_bits = get_alignment_bits(mop);
/* Handle CPU specific unaligned behaviour */
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
mmu_idx, ra);
}
/* Construct an unaligned 64-bit replacement MemOpIdx. */
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
new_oi = make_memop_idx(mop, mmu_idx);
h = helper_be_ldq_mmu(env, addr, new_oi, ra);
l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return int128_make128(l, h);
}
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
int mmu_idx = get_mmuidx(oi);
MemOpIdx new_oi;
unsigned a_bits;
uint64_t h, l;
tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
a_bits = get_alignment_bits(mop);
/* Handle CPU specific unaligned behaviour */
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
mmu_idx, ra);
}
/* Construct an unaligned 64-bit replacement MemOpIdx. */
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
new_oi = make_memop_idx(mop, mmu_idx);
l = helper_le_ldq_mmu(env, addr, new_oi, ra);
h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return int128_make128(l, h);
}
/*
* Store Helpers
*/
@ -2546,6 +2604,60 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
}
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
int mmu_idx = get_mmuidx(oi);
MemOpIdx new_oi;
unsigned a_bits;
tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
a_bits = get_alignment_bits(mop);
/* Handle CPU specific unaligned behaviour */
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
mmu_idx, ra);
}
/* Construct an unaligned 64-bit replacement MemOpIdx. */
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
new_oi = make_memop_idx(mop, mmu_idx);
helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
int mmu_idx = get_mmuidx(oi);
MemOpIdx new_oi;
unsigned a_bits;
tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
a_bits = get_alignment_bits(mop);
/* Handle CPU specific unaligned behaviour */
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
mmu_idx, ra);
}
/* Construct an unaligned 64-bit replacement MemOpIdx. */
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
new_oi = make_memop_idx(mop, mmu_idx);
helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
#include "ldst_common.c.inc"
/*

View File

@ -1031,6 +1031,42 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return ret;
}
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
Int128 ret;
validate_memop(oi, MO_128 | MO_BE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
memcpy(&ret, haddr, 16);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (!HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
return ret;
}
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
Int128 ret;
validate_memop(oi, MO_128 | MO_LE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
memcpy(&ret, haddr, 16);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
return ret;
}
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
MemOpIdx oi, uintptr_t ra)
{
@ -1115,6 +1151,36 @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_128 | MO_BE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
if (!HOST_BIG_ENDIAN) {
val = bswap128(val);
}
memcpy(haddr, &val, 16);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_128 | MO_LE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
if (HOST_BIG_ENDIAN) {
val = bswap128(val);
}
memcpy(haddr, &val, 16);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
{
uint32_t ret;

View File

@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
MemOpIdx oi, uintptr_t ra);
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
MemOpIdx oi, uintptr_t ra);
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra);
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra);
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
MemOpIdx oi, uintptr_t retaddr);

View File

@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
{

View File

@ -3109,6 +3109,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
}
}
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
{
MemOp mop_1 = orig, mop_2;
tcg_debug_assert((orig & MO_SIZE) == MO_128);
tcg_debug_assert((orig & MO_SIGN) == 0);
/* Use a memory ordering implemented by the host. */
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
mop_1 &= ~MO_BSWAP;
}
/* Reduce the size to 64-bit. */
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
/* Retain the alignment constraints of the original. */
switch (orig & MO_AMASK) {
case MO_UNALN:
case MO_ALIGN_2:
case MO_ALIGN_4:
mop_2 = mop_1;
break;
case MO_ALIGN_8:
/* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
mop_2 = mop_1;
break;
case MO_ALIGN:
/* Second has 8-byte alignment; first has 16-byte alignment. */
mop_2 = mop_1;
mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
break;
case MO_ALIGN_16:
case MO_ALIGN_32:
case MO_ALIGN_64:
/* Second has 8-byte alignment; first retains original. */
mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
break;
default:
g_assert_not_reached();
}
ret[0] = mop_1;
ret[1] = mop_2;
}
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp mop[2];
TCGv addr_p8;
TCGv_i64 x, y;
canonicalize_memop_i128_as_i64(mop, memop);
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
addr = plugin_prep_mem_callbacks(addr);
/* TODO: respect atomicity of the operation. */
/* TODO: allow the tcg backend to see the whole operation. */
/*
* Since there are no global TCGv_i128, there is no visible state
* changed if the second load faults. Load directly into the two
* subwords.
*/
if ((memop & MO_BSWAP) == MO_LE) {
x = TCGV128_LOW(val);
y = TCGV128_HIGH(val);
} else {
x = TCGV128_HIGH(val);
y = TCGV128_LOW(val);
}
gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
if ((mop[0] ^ memop) & MO_BSWAP) {
tcg_gen_bswap64_i64(x, x);
}
addr_p8 = tcg_temp_new();
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
tcg_temp_free(addr_p8);
if ((mop[0] ^ memop) & MO_BSWAP) {
tcg_gen_bswap64_i64(y, y);
}
plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
QEMU_PLUGIN_MEM_R);
}
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp mop[2];
TCGv addr_p8;
TCGv_i64 x, y;
canonicalize_memop_i128_as_i64(mop, memop);
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
addr = plugin_prep_mem_callbacks(addr);
/* TODO: respect atomicity of the operation. */
/* TODO: allow the tcg backend to see the whole operation. */
if ((memop & MO_BSWAP) == MO_LE) {
x = TCGV128_LOW(val);
y = TCGV128_HIGH(val);
} else {
x = TCGV128_HIGH(val);
y = TCGV128_LOW(val);
}
addr_p8 = tcg_temp_new();
if ((mop[0] ^ memop) & MO_BSWAP) {
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_bswap64_i64(t, x);
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
tcg_gen_bswap64_i64(t, y);
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
tcg_temp_free_i64(t);
} else {
gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
}
tcg_temp_free(addr_p8);
plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
QEMU_PLUGIN_MEM_W);
}
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
{
switch (opc & MO_SSIZE) {