tcg/arm: Use register pair allocation for qemu_{ld,st}_i64

Although we still can't use ldrd and strd for all operations,
increase the chances by getting the register allocation correct.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
master
Richard Henderson 2022-10-14 11:24:52 +11:00
parent 9fd86b518e
commit 1b18d1fa05
3 changed files with 24 additions and 13 deletions

View File

@ -15,8 +15,9 @@ C_O0_I2(r, rIN)
C_O0_I2(s, s)
C_O0_I2(w, r)
C_O0_I3(s, s, s)
C_O0_I3(S, p, s)
C_O0_I4(r, r, rI, rI)
C_O0_I4(s, s, s, s)
C_O0_I4(S, p, s, s)
C_O1_I1(r, l)
C_O1_I1(r, r)
C_O1_I1(w, r)
@ -38,8 +39,8 @@ C_O1_I2(w, w, wZ)
C_O1_I3(w, w, w, w)
C_O1_I4(r, r, r, rI, rI)
C_O1_I4(r, r, rIN, rIK, 0)
C_O2_I1(r, r, l)
C_O2_I2(r, r, l, l)
C_O2_I1(e, p, l)
C_O2_I2(e, p, l, l)
C_O2_I2(r, r, r, r)
C_O2_I4(r, r, r, r, rIN, rIK)
C_O2_I4(r, r, rI, rI, rIN, rIK)

View File

@ -8,9 +8,11 @@
* Define constraint letters for register sets:
* REGS(letter, register_mask)
*/
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
REGS('r', ALL_GENERAL_REGS)
REGS('l', ALL_QLOAD_REGS)
REGS('s', ALL_QSTORE_REGS)
REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
REGS('w', ALL_VECTOR_REGS)
/*

View File

@ -1694,9 +1694,11 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
break;
case MO_UQ:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* LDRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64
&& (datalo & 1) == 0 && datahi == datalo + 1) {
if (get_alignment_bits(opc) >= MO_64) {
/*
* Rm (the second address op) must not overlap Rt or Rt + 1.
* Since datalo is aligned, we can simplify the test via alignment.
@ -1750,9 +1752,11 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
break;
case MO_UQ:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* LDRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64
&& (datalo & 1) == 0 && datahi == datalo + 1) {
if (get_alignment_bits(opc) >= MO_64) {
tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
} else if (datalo == addrlo) {
tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
@ -1834,9 +1838,11 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
tcg_out_st32_r(s, cond, datalo, addrlo, addend);
break;
case MO_64:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* STRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64
&& (datalo & 1) == 0 && datahi == datalo + 1) {
if (get_alignment_bits(opc) >= MO_64) {
tcg_out_strd_r(s, cond, datalo, addrlo, addend);
} else if (scratch_addend) {
tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
@ -1871,9 +1877,11 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
break;
case MO_64:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* STRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64
&& (datalo & 1) == 0 && datahi == datalo + 1) {
if (get_alignment_bits(opc) >= MO_64) {
tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
} else {
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
@ -2372,11 +2380,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_i32:
return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
case INDEX_op_qemu_ld_i64:
return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
case INDEX_op_qemu_st_i32:
return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
case INDEX_op_qemu_st_i64:
return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
case INDEX_op_st_vec:
return C_O0_I2(w, r);