From 2089fcc9e7b4174d1c351eaa7d277c02188a6dd2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 25 Feb 2019 16:42:04 +0100 Subject: [PATCH 01/15] tcg: Implement tcg_gen_extract2_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be helpful for s390x. Input 128 bit and output 64 bit only, which is sufficient for now. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand Message-Id: <20190225154204.26751-1-david@redhat.com> [rth: Add matching tcg_gen_extract2_i32.] Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ tcg/tcg-op.h | 6 ++++++ 2 files changed, 50 insertions(+) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 1bd7ef24af..7c56c92c8e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -809,6 +809,28 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_sari_i32(ret, ret, 32 - len); } +/* + * Extract 32-bits from a 64-bit input, ah:al, starting from ofs. + * Unlike tcg_gen_extract_i32 above, len is fixed at 32. + */ +void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, + unsigned int ofs) +{ + tcg_debug_assert(ofs <= 32); + if (ofs == 0) { + tcg_gen_mov_i32(ret, al); + } else if (ofs == 32) { + tcg_gen_mov_i32(ret, ah); + } else if (al == ah) { + tcg_gen_rotri_i32(ret, al, ofs); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t0, al, ofs); + tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs); + tcg_temp_free_i32(t0); + } +} + void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2) { @@ -2297,6 +2319,28 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_sari_i64(ret, ret, 64 - len); } +/* + * Extract 64 bits from a 128-bit input, ah:al, starting from ofs. + * Unlike tcg_gen_extract_i64 above, len is fixed at 64. + */ +void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, + unsigned int ofs) +{ + tcg_debug_assert(ofs <= 64); + if (ofs == 0) { + tcg_gen_mov_i64(ret, al); + } else if (ofs == 64) { + tcg_gen_mov_i64(ret, ah); + } else if (al == ah) { + tcg_gen_rotri_i64(ret, al, ofs); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t0, al, ofs); + tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs); + tcg_temp_free_i64(t0); + } +} + void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2) { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d3e51b15af..1f1824c30a 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -308,6 +308,8 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigned int len); void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigned int len); +void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, + unsigned int ofs); void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *); void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *); void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, @@ -501,6 +503,8 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned int len); void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned int len); +void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, + unsigned int ofs); void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *); void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *); void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, @@ -1068,6 +1072,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64 #define tcg_gen_extract_tl tcg_gen_extract_i64 #define tcg_gen_sextract_tl tcg_gen_sextract_i64 +#define tcg_gen_extract2_tl tcg_gen_extract2_i64 #define tcg_const_tl tcg_const_i64 #define tcg_const_local_tl tcg_const_local_i64 #define tcg_gen_movcond_tl tcg_gen_movcond_i64 @@ -1178,6 +1183,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32 #define tcg_gen_extract_tl tcg_gen_extract_i32 #define tcg_gen_sextract_tl tcg_gen_sextract_i32 +#define tcg_gen_extract2_tl tcg_gen_extract2_i32 #define tcg_const_tl tcg_const_i32 #define tcg_const_local_tl tcg_const_local_i32 #define tcg_gen_movcond_tl tcg_gen_movcond_i32 From fce1296f135669eca85dc42154a2a352c818ad76 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 25 Feb 2019 10:29:25 -0800 Subject: [PATCH 02/15] tcg: Add INDEX_op_extract2_{i32,i64} This will let backends implement the double-word shift operation. Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- tcg/README | 7 +++++++ tcg/aarch64/tcg-target.h | 2 ++ tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 2 ++ tcg/mips/tcg-target.h | 2 ++ tcg/optimize.c | 16 ++++++++++++++++ tcg/ppc/tcg-target.h | 2 ++ tcg/riscv/tcg-target.h | 2 ++ tcg/s390/tcg-target.h | 2 ++ tcg/sparc/tcg-target.h | 2 ++ tcg/tcg-op.c | 4 ++++ tcg/tcg-opc.h | 2 ++ tcg/tcg.c | 4 ++++ tcg/tcg.h | 1 + tcg/tci/tcg-target.h | 2 ++ 15 files changed, 51 insertions(+) diff --git a/tcg/README b/tcg/README index 603f4df659..c30e5418a6 100644 --- a/tcg/README +++ b/tcg/README @@ -343,6 +343,13 @@ at bit 8. This operation would be equivalent to (using an arithmetic right shift). +* extract2_i32/i64 dest, t1, t2, pos + +For N = {32,64}, extract an N-bit quantity from the concatenation +of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander +accepts 0 <= pos <= N as inputs. The backend code generator will +not see either 0 or N as inputs for these opcodes. + * extrl_i64_i32 t0, t1 For 64-bit hosts only, extract the low 32-bits of input T1 and place it diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 2d93cf404e..6600a54a02 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -77,6 +77,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -113,6 +114,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 1 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 16172f73a3..4ee6c98958 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -116,6 +116,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7995fe3eab..2c58eaa9ed 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -124,6 +124,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -162,6 +163,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 5cb8672470..c6b091d849 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -162,6 +162,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions @@ -177,6 +178,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions diff --git a/tcg/optimize.c b/tcg/optimize.c index 01e80c3e46..5150c38a25 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1202,6 +1202,22 @@ void tcg_optimize(TCGContext *s) } goto do_default; + CASE_OP_32_64(extract2): + if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { + TCGArg v1 = arg_info(op->args[1])->val; + TCGArg v2 = arg_info(op->args[2])->val; + + if (opc == INDEX_op_extract2_i64) { + tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); + } else { + tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); + tmp = (int32_t)tmp; + } + tcg_opt_gen_movi(s, op, op->args[0], tmp); + break; + } + goto do_default; + CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(opc, op->args[1], op->args[2], op->args[3]); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 52c1bb04b1..7627fb62d3 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -77,6 +77,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 @@ -115,6 +116,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 60918cacb4..032439d806 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -93,6 +93,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 @@ -128,6 +129,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 853ed6e7aa..07accabbd1 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -85,6 +85,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -121,6 +122,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index a0ed2a3342..633841ebf2 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -116,6 +116,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -153,6 +154,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7c56c92c8e..deacc63e3b 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -823,6 +823,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, tcg_gen_mov_i32(ret, ah); } else if (al == ah) { tcg_gen_rotri_i32(ret, al, ofs); + } else if (TCG_TARGET_HAS_extract2_i32) { + tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); } else { TCGv_i32 t0 = tcg_temp_new_i32(); tcg_gen_shri_i32(t0, al, ofs); @@ -2333,6 +2335,8 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, tcg_gen_mov_i64(ret, ah); } else if (al == ah) { tcg_gen_rotri_i64(ret, al, ofs); + } else if (TCG_TARGET_HAS_extract2_i64) { + tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); } else { TCGv_i64 t0 = tcg_temp_new_i64(); tcg_gen_shri_i64(t0, al, ofs); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 4e0238ad1a..1bad6e4208 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -79,6 +79,7 @@ DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32)) DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) +DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) @@ -146,6 +147,7 @@ DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64)) DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64)) +DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64)) /* size changing ops */ DEF(ext_i32_i64, 1, 1, 0, IMPL64) diff --git a/tcg/tcg.c b/tcg/tcg.c index 6a22c8746c..c0730f119f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1426,6 +1426,8 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_extract_i32; case INDEX_op_sextract_i32: return TCG_TARGET_HAS_sextract_i32; + case INDEX_op_extract2_i32: + return TCG_TARGET_HAS_extract2_i32; case INDEX_op_add2_i32: return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: @@ -1523,6 +1525,8 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_extract_i64; case INDEX_op_sextract_i64: return TCG_TARGET_HAS_sextract_i64; + case INDEX_op_extract2_i64: + return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: return TCG_TARGET_HAS_extrl_i64_i32; case INDEX_op_extrh_i64_i32: diff --git a/tcg/tcg.h b/tcg/tcg.h index a394d78237..50de1cdda3 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -125,6 +125,7 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 086f34e69a..8b90ab71cb 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -71,6 +71,7 @@ #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 @@ -97,6 +98,7 @@ #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_ext8s_i64 1 From 02616bad6f0788652deaca9a48d0dfa7716ff87a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 25 Feb 2019 10:58:15 -0800 Subject: [PATCH 03/15] tcg: Use deposit and extract2 in tcg_gen_shifti_i64 Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index deacc63e3b..fbc70649dd 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1355,31 +1355,32 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c); tcg_gen_movi_i32(TCGV_LOW(ret), 0); } - } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - if (right) { - tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c); - if (arith) { - tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c); - } else { - tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c); - } - tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0); - tcg_gen_mov_i32(TCGV_HIGH(ret), t1); + } else if (right) { + if (TCG_TARGET_HAS_extract2_i32) { + tcg_gen_extract2_i32(TCGV_LOW(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), c); } else { - tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); - /* Note: ret can be the same as arg1, so we use t1 */ - tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c); - tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0); - tcg_gen_mov_i32(TCGV_LOW(ret), t1); + tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); + tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret), + TCGV_HIGH(arg1), 32 - c, c); } - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); + if (arith) { + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); + } else { + tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); + } + } else { + if (TCG_TARGET_HAS_extract2_i32) { + tcg_gen_extract2_i32(TCGV_HIGH(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); + tcg_gen_deposit_i32(TCGV_HIGH(ret), t0, + TCGV_HIGH(arg1), c, 32 - c); + tcg_temp_free_i32(t0); + } + tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); } } From b0a6056719b4a409a5699d11bbfdf79301417221 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 25 Feb 2019 11:10:53 -0800 Subject: [PATCH 04/15] tcg: Use extract2 in tcg_gen_deposit_{i32,i64} Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index fbc70649dd..a00d1df37e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -611,9 +611,22 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, return; } - mask = (1u << len) - 1; t1 = tcg_temp_new_i32(); + if (TCG_TARGET_HAS_extract2_i32) { + if (ofs + len == 32) { + tcg_gen_shli_i32(t1, arg1, len); + tcg_gen_extract2_i32(ret, t1, arg2, len); + goto done; + } + if (ofs == 0) { + tcg_gen_extract2_i32(ret, arg1, arg2, len); + tcg_gen_rotli_i32(ret, ret, len); + goto done; + } + } + + mask = (1u << len) - 1; if (ofs + len < 32) { tcg_gen_andi_i32(t1, arg2, mask); tcg_gen_shli_i32(t1, t1, ofs); @@ -622,7 +635,7 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, } tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); tcg_gen_or_i32(ret, ret, t1); - + done: tcg_temp_free_i32(t1); } @@ -2024,9 +2037,22 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, } } - mask = (1ull << len) - 1; t1 = tcg_temp_new_i64(); + if (TCG_TARGET_HAS_extract2_i64) { + if (ofs + len == 64) { + tcg_gen_shli_i64(t1, arg1, len); + tcg_gen_extract2_i64(ret, t1, arg2, len); + goto done; + } + if (ofs == 0) { + tcg_gen_extract2_i64(ret, arg1, arg2, len); + tcg_gen_rotli_i64(ret, ret, len); + goto done; + } + } + + mask = (1ull << len) - 1; if (ofs + len < 64) { tcg_gen_andi_i64(t1, arg2, mask); tcg_gen_shli_i64(t1, t1, ofs); @@ -2035,7 +2061,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, } tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); tcg_gen_or_i64(ret, ret, t1); - + done: tcg_temp_free_i64(t1); } From c6fb8c0cf704c4a1a48c3e99e995ad4c58150dab Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 25 Feb 2019 11:42:35 -0800 Subject: [PATCH 05/15] tcg/i386: Support INDEX_op_extract2_{i32,i64} Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.h | 4 ++-- tcg/i386/tcg-target.inc.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 2c58eaa9ed..241bf19413 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -124,7 +124,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 -#define TCG_TARGET_HAS_extract2_i32 0 +#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -163,7 +163,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 -#define TCG_TARGET_HAS_extract2_i64 0 +#define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index e0670e5098..1fa833840e 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -452,6 +452,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_SHUFPS (0xc6 | P_EXT) #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) +#define OPC_SHRD_Ib (0xac | P_EXT) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) #define OPC_UD2 (0x0b | P_EXT) @@ -2587,6 +2588,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + OP_32_64(extract2): + /* Note that SHRD outputs to the r/m operand. */ + tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); + tcg_out8(s, args[3]); + break; + case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -2845,6 +2852,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } }; static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } }; + static const TCGTargetOpDef r_0_r = { .args_ct_str = { "r", "0", "r" } }; static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } }; static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } }; static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } }; @@ -2970,6 +2978,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_ctpop_i32: case INDEX_op_ctpop_i64: return &r_r; + case INDEX_op_extract2_i32: + case INDEX_op_extract2_i64: + return &r_0_r; case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: From 3b832d67a993968868f4087a9720a5c911e23f7a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 25 Feb 2019 13:20:01 -0800 Subject: [PATCH 06/15] tcg/arm: Support INDEX_op_extract2_i32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.h | 2 +- tcg/arm/tcg-target.inc.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 4ee6c98958..17e771374d 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -116,7 +116,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions -#define TCG_TARGET_HAS_extract2_i32 0 +#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 2245a8aeb9..6873b0cf95 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -2064,6 +2064,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sextract_i32: tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]); break; + case INDEX_op_extract2_i32: + /* ??? These optimization vs zero should be generic. */ + /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_REG, args[0], 0); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, + args[2], SHIFT_IMM_LSL(32 - args[3])); + } + } else if (const_args[2]) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, + args[1], SHIFT_IMM_LSR(args[3])); + } else { + /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, + args[2], SHIFT_IMM_LSL(32 - args[3])); + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP, + args[1], SHIFT_IMM_LSR(args[3])); + } + break; case INDEX_op_div_i32: tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); @@ -2108,6 +2129,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "s", "s", "s", "s" } }; static const TCGTargetOpDef br = { .args_ct_str = { "r", "rIN" } }; + static const TCGTargetOpDef ext2 + = { .args_ct_str = { "r", "rZ", "rZ" } }; static const TCGTargetOpDef dep = { .args_ct_str = { "r", "0", "rZ" } }; static const TCGTargetOpDef movc @@ -2174,6 +2197,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return &br; case INDEX_op_deposit_i32: return &dep; + case INDEX_op_extract2_i32: + return &ext2; case INDEX_op_movcond_i32: return &movc; case INDEX_op_add2_i32: From 464c2969d5d7a0a5d38d2aa5d930986df876d3fb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 25 Feb 2019 13:25:46 -0800 Subject: [PATCH 07/15] tcg/aarch64: Support INDEX_op_extract2_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 4 ++-- tcg/aarch64/tcg-target.inc.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 6600a54a02..ce2bb1f90b 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -77,7 +77,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 -#define TCG_TARGET_HAS_extract2_i32 0 +#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -114,7 +114,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 1 -#define TCG_TARGET_HAS_extract2_i64 0 +#define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index d57f9e500f..8b93598bce 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -2058,6 +2058,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); break; + case INDEX_op_extract2_i64: + case INDEX_op_extract2_i32: + tcg_out_extr(s, ext, a0, a1, a2, args[3]); + break; + case INDEX_op_add2_i32: tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), (int32_t)args[4], args[5], const_args[4], @@ -2300,6 +2305,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "r", "r", "rAL" } }; static const TCGTargetOpDef dep = { .args_ct_str = { "r", "0", "rZ" } }; + static const TCGTargetOpDef ext2 + = { .args_ct_str = { "r", "rZ", "rZ" } }; static const TCGTargetOpDef movc = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; static const TCGTargetOpDef add2 @@ -2430,6 +2437,10 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_deposit_i64: return &dep; + case INDEX_op_extract2_i32: + case INDEX_op_extract2_i64: + return &ext2; + case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: From 8b86d6d25807e13a63ab6ea879f976b9f18cc45a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Apr 2019 20:54:54 -1000 Subject: [PATCH 08/15] tcg: Hoist max_insns computation to tb_gen_code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to handle TB's that translate to too much code, we need to place the control of the length of the translation in the hands of the code gen master loop. Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 15 +++++++++++++-- accel/tcg/translator.c | 15 ++------------- include/exec/exec-all.h | 4 ++-- include/exec/translator.h | 3 ++- target/alpha/translate.c | 4 ++-- target/arm/translate.c | 4 ++-- target/cris/translate.c | 10 +--------- target/hppa/translate.c | 5 ++--- target/i386/translate.c | 4 ++-- target/lm32/translate.c | 10 +--------- target/m68k/translate.c | 4 ++-- target/microblaze/translate.c | 10 +--------- target/mips/translate.c | 4 ++-- target/moxie/translate.c | 11 ++--------- target/nios2/translate.c | 14 ++------------ target/openrisc/translate.c | 4 ++-- target/ppc/translate.c | 4 ++-- target/riscv/translate.c | 4 ++-- target/s390x/translate.c | 4 ++-- target/sh4/translate.c | 4 ++-- target/sparc/translate.c | 4 ++-- target/tilegx/translate.c | 12 +----------- target/tricore/translate.c | 16 ++-------------- target/unicore32/translate.c | 10 +--------- target/xtensa/translate.c | 4 ++-- 25 files changed, 56 insertions(+), 127 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 75a6cf49f1..39532fd44c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1674,7 +1674,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_page_addr_t phys_pc, phys_page2; target_ulong virt_page2; tcg_insn_unit *gen_code_buf; - int gen_code_size, search_size; + int gen_code_size, search_size, max_insns; #ifdef CONFIG_PROFILER TCGProfile *prof = &tcg_ctx->prof; int64_t ti; @@ -1692,6 +1692,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, cflags &= ~CF_CLUSTER_MASK; cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT; + max_insns = cflags & CF_COUNT_MASK; + if (max_insns == 0) { + max_insns = CF_COUNT_MASK; + } + if (max_insns > TCG_MAX_INSNS) { + max_insns = TCG_MAX_INSNS; + } + if (cpu->singlestep_enabled || singlestep) { + max_insns = 1; + } + buffer_overflow: tb = tb_alloc(pc); if (unlikely(!tb)) { @@ -1721,7 +1732,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_func_start(tcg_ctx); tcg_ctx->cpu = ENV_GET_CPU(env); - gen_intermediate_code(cpu, tb); + gen_intermediate_code(cpu, tb, max_insns); tcg_ctx->cpu = NULL; trace_translate_block(tb, tb->pc, tb->tc.ptr); diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index afd0a49ea6..8d65ead708 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -32,7 +32,7 @@ void translator_loop_temp_check(DisasContextBase *db) } void translator_loop(const TranslatorOps *ops, DisasContextBase *db, - CPUState *cpu, TranslationBlock *tb) + CPUState *cpu, TranslationBlock *tb, int max_insns) { int bp_insn = 0; @@ -42,20 +42,9 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, db->pc_next = db->pc_first; db->is_jmp = DISAS_NEXT; db->num_insns = 0; + db->max_insns = max_insns; db->singlestep_enabled = cpu->singlestep_enabled; - /* Instruction counting */ - db->max_insns = tb_cflags(db->tb) & CF_COUNT_MASK; - if (db->max_insns == 0) { - db->max_insns = CF_COUNT_MASK; - } - if (db->max_insns > TCG_MAX_INSNS) { - db->max_insns = TCG_MAX_INSNS; - } - if (db->singlestep_enabled || singlestep) { - db->max_insns = 1; - } - ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 97b90cb0db..58e988b3b1 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -40,8 +40,8 @@ typedef ram_addr_t tb_page_addr_t; #include "qemu/log.h" -void gen_intermediate_code(CPUState *cpu, struct TranslationBlock *tb); -void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); +void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, target_ulong *data); void cpu_gen_init(void); diff --git a/include/exec/translator.h b/include/exec/translator.h index 71e7b2c347..66dfe906c4 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -123,6 +123,7 @@ typedef struct TranslatorOps { * @db: Disassembly context. * @cpu: Target vCPU. * @tb: Translation block. + * @max_insns: Maximum number of insns to translate. * * Generic translator loop. * @@ -137,7 +138,7 @@ typedef struct TranslatorOps { * - When too many instructions have been translated. */ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, - CPUState *cpu, TranslationBlock *tb); + CPUState *cpu, TranslationBlock *tb, int max_insns); void translator_loop_temp_check(DisasContextBase *db); diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 9d8f9b3eea..2c9cccf6c1 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -3049,10 +3049,10 @@ static const TranslatorOps alpha_tr_ops = { .disas_log = alpha_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&alpha_tr_ops, &dc.base, cpu, tb); + translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns); } void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, diff --git a/target/arm/translate.c b/target/arm/translate.c index d9e7bb737a..4ea4018e2b 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -13756,7 +13756,7 @@ static const TranslatorOps thumb_translator_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; const TranslatorOps *ops = &arm_translator_ops; @@ -13770,7 +13770,7 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) } #endif - translator_loop(ops, &dc.base, cpu, tb); + translator_loop(ops, &dc.base, cpu, tb, max_insns); } void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/cris/translate.c b/target/cris/translate.c index 96359c0d7d..b005a5c20e 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -3081,7 +3081,7 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc) */ /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUCRISState *env = cs->env_ptr; uint32_t pc_start; @@ -3091,7 +3091,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint32_t page_start; target_ulong npc; int num_insns; - int max_insns; if (env->pregs[PR_VR] == 32) { dc->decoder = crisv32_decoder; @@ -3137,13 +3136,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do { diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 43b74367ea..7c03c62768 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -4312,11 +4312,10 @@ static const TranslatorOps hppa_tr_ops = { .disas_log = hppa_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) - +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&hppa_tr_ops, &ctx.base, cs, tb); + translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns); } void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb, diff --git a/target/i386/translate.c b/target/i386/translate.c index b725bec37c..77d6b73e42 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -8590,11 +8590,11 @@ static const TranslatorOps i386_tr_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&i386_tr_ops, &dc.base, cpu, tb); + translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns); } void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, diff --git a/target/lm32/translate.c b/target/lm32/translate.c index b8b5e12e63..f0e0e7058e 100644 --- a/target/lm32/translate.c +++ b/target/lm32/translate.c @@ -1050,7 +1050,7 @@ static inline void decode(DisasContext *dc, uint32_t ir) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPULM32State *env = cs->env_ptr; LM32CPU *cpu = lm32_env_get_cpu(env); @@ -1058,7 +1058,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint32_t pc_start; uint32_t page_start; int num_insns; - int max_insns; pc_start = tb->pc; dc->features = cpu->features; @@ -1078,13 +1077,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do { diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 3b2280b48b..58596278c2 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -6170,10 +6170,10 @@ static const TranslatorOps m68k_tr_ops = { .disas_log = m68k_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&m68k_tr_ops, &dc.base, cpu, tb); + translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns); } static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low) diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index bc2712ddbd..885fc44b51 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -1601,7 +1601,7 @@ static inline void decode(DisasContext *dc, uint32_t ir) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUMBState *env = cs->env_ptr; MicroBlazeCPU *cpu = mb_env_get_cpu(env); @@ -1611,7 +1611,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint32_t page_start, org_flags; uint32_t npc; int num_insns; - int max_insns; pc_start = tb->pc; dc->cpu = cpu; @@ -1635,13 +1634,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do diff --git a/target/mips/translate.c b/target/mips/translate.c index 7849d53977..f96c0d01ef 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -29721,11 +29721,11 @@ static const TranslatorOps mips_tr_ops = { .disas_log = mips_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&mips_tr_ops, &ctx.base, cs, tb); + translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns); } static void fpu_dump_state(CPUMIPSState *env, FILE *f, int flags) diff --git a/target/moxie/translate.c b/target/moxie/translate.c index dd055c4ca5..c668178f2c 100644 --- a/target/moxie/translate.c +++ b/target/moxie/translate.c @@ -813,13 +813,13 @@ static int decode_opc(MoxieCPU *cpu, DisasContext *ctx) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUMoxieState *env = cs->env_ptr; MoxieCPU *cpu = moxie_env_get_cpu(env); DisasContext ctx; target_ulong pc_start; - int num_insns, max_insns; + int num_insns; pc_start = tb->pc; ctx.pc = pc_start; @@ -829,13 +829,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) ctx.singlestep_enabled = 0; ctx.bstate = BS_NONE; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do { diff --git a/target/nios2/translate.c b/target/nios2/translate.c index f0bbf78a32..17d8f1877c 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -806,12 +806,11 @@ static void gen_exception(DisasContext *dc, uint32_t excp) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUNios2State *env = cs->env_ptr; DisasContext dc1, *dc = &dc1; int num_insns; - int max_insns; /* Initialize DC */ dc->cpu_env = cpu_env; @@ -824,20 +823,11 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) /* Set up instruction counts */ num_insns = 0; - if (cs->singlestep_enabled || singlestep) { - max_insns = 1; - } else { + if (max_insns > 1) { int page_insns = (TARGET_PAGE_SIZE - (tb->pc & TARGET_PAGE_MASK)) / 4; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } if (max_insns > page_insns) { max_insns = page_insns; } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } } gen_tb_start(tb); diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index a88502fdc1..36821948c0 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -1409,11 +1409,11 @@ static const TranslatorOps openrisc_tr_ops = { .disas_log = openrisc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb); + translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns); } void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93d77a2626..dee2bb6910 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -7862,11 +7862,11 @@ static const TranslatorOps ppc_tr_ops = { .disas_log = ppc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&ppc_tr_ops, &ctx.base, cs, tb); + translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns); } void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb, diff --git a/target/riscv/translate.c b/target/riscv/translate.c index dd763647ea..967eac7bc3 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -783,11 +783,11 @@ static const TranslatorOps riscv_tr_ops = { .disas_log = riscv_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&riscv_tr_ops, &ctx.base, cs, tb); + translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns); } void riscv_translate_init(void) diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 0afa8f7ca5..d4951836ad 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -6552,11 +6552,11 @@ static const TranslatorOps s390x_tr_ops = { .disas_log = s390x_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&s390x_tr_ops, &dc.base, cs, tb); + translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns); } void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb, diff --git a/target/sh4/translate.c b/target/sh4/translate.c index cffc6919d0..cdf0888490 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -2383,11 +2383,11 @@ static const TranslatorOps sh4_tr_ops = { .disas_log = sh4_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&sh4_tr_ops, &ctx.base, cs, tb); + translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns); } void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb, diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 74315cdf09..091bab53af 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5962,11 +5962,11 @@ static const TranslatorOps sparc_tr_ops = { .disas_log = sparc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext dc = {}; - translator_loop(&sparc_tr_ops, &dc.base, cs, tb); + translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns); } void sparc_tcg_init(void) diff --git a/target/tilegx/translate.c b/target/tilegx/translate.c index df1e4d0fef..c46a4ab151 100644 --- a/target/tilegx/translate.c +++ b/target/tilegx/translate.c @@ -2369,7 +2369,7 @@ static void translate_one_bundle(DisasContext *dc, uint64_t bundle) } } -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUTLGState *env = cs->env_ptr; DisasContext ctx; @@ -2377,7 +2377,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint64_t pc_start = tb->pc; uint64_t page_start = pc_start & TARGET_PAGE_MASK; int num_insns = 0; - int max_insns = tb_cflags(tb) & CF_COUNT_MASK; dc->pc = pc_start; dc->mmuidx = 0; @@ -2392,15 +2391,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) qemu_log_lock(); qemu_log("IN: %s\n", lookup_symbol(pc_start)); } - if (!max_insns) { - max_insns = CF_COUNT_MASK; - } - if (cs->singlestep_enabled || singlestep) { - max_insns = 1; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); while (1) { diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 352f52bb4a..8f6416144e 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -8807,24 +8807,12 @@ static void decode_opc(CPUTriCoreState *env, DisasContext *ctx, int *is_branch) } } -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUTriCoreState *env = cs->env_ptr; DisasContext ctx; target_ulong pc_start; - int num_insns, max_insns; - - num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (singlestep) { - max_insns = 1; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } + int num_insns = 0; pc_start = tb->pc; ctx.pc = pc_start; diff --git a/target/unicore32/translate.c b/target/unicore32/translate.c index dfe41c9069..89b02d1c3c 100644 --- a/target/unicore32/translate.c +++ b/target/unicore32/translate.c @@ -1871,14 +1871,13 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUUniCore32State *env = cs->env_ptr; DisasContext dc1, *dc = &dc1; target_ulong pc_start; uint32_t page_start; int num_insns; - int max_insns; /* generate intermediate code */ num_temps = 0; @@ -1897,13 +1896,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) cpu_F1d = tcg_temp_new_i64(); page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } #ifndef CONFIG_USER_ONLY if ((env->uncached_asr & ASR_M) == ASR_MODE_USER) { diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 43a5e94daa..301c8e3161 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1635,10 +1635,10 @@ static const TranslatorOps xtensa_translator_ops = { .disas_log = xtensa_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc = {}; - translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb); + translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns); } void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags) From 6e6c4efed995d9eca6ae0cfdb2252df830262f50 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Apr 2019 22:06:39 -1000 Subject: [PATCH 09/15] tcg: Restart after TB code generation overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a TB generates too much code, try again with fewer insns. Fixes: https://bugs.launchpad.net/bugs/1824853 Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 38 ++++++++++++++++++++++++++++++++------ tcg/tcg.c | 4 ++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 39532fd44c..20b59f93f4 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1722,6 +1722,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; tcg_ctx->tb_cflags = cflags; + tb_overflow: #ifdef CONFIG_PROFILER /* includes aborted translations because of exceptions */ @@ -1755,14 +1756,39 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif - /* ??? Overflow could be handled better here. In particular, we - don't need to re-do gen_intermediate_code, nor should we re-do - the tcg optimization currently hidden inside tcg_gen_code. All - that should be required is to flush the TBs, allocate a new TB, - re-initialize it per above, and re-do the actual code generation. */ gen_code_size = tcg_gen_code(tcg_ctx, tb); if (unlikely(gen_code_size < 0)) { - goto buffer_overflow; + switch (gen_code_size) { + case -1: + /* + * Overflow of code_gen_buffer, or the current slice of it. + * + * TODO: We don't need to re-do gen_intermediate_code, nor + * should we re-do the tcg optimization currently hidden + * inside tcg_gen_code. All that should be required is to + * flush the TBs, allocate a new TB, re-initialize it per + * above, and re-do the actual code generation. + */ + goto buffer_overflow; + + case -2: + /* + * The code generated for the TranslationBlock is too large. + * The maximum size allowed by the unwind info is 64k. + * There may be stricter constraints from relocations + * in the tcg backend. + * + * Try again with half as many insns as we attempted this time. + * If a single insn overflows, there's a bug somewhere... + */ + max_insns = tb->icount; + assert(max_insns > 1); + max_insns /= 2; + goto tb_overflow; + + default: + g_assert_not_reached(); + } } search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); if (unlikely(search_size < 0)) { diff --git a/tcg/tcg.c b/tcg/tcg.c index c0730f119f..5d255166c0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3996,6 +3996,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { return -1; } + /* Test for TB overflow, as seen by gen_insn_end_off. */ + if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { + return -2; + } } tcg_debug_assert(num_insns >= 0); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); From 7ecd02a06f8f4c0bbf872ecc15e37035b7e1df5f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 21 Apr 2019 13:34:35 -0700 Subject: [PATCH 10/15] tcg: Restart TB generation after relocation overflow If the TB generates too much code, such that backend relocations overflow, try again with a smaller TB. In support of this, move relocation processing from a random place within tcg_out_op, in the handling of branch opcodes, to a new function at the end of tcg_gen_code. This is not a complete solution, as there are additional relocs generated for out-of-line ldst handling and constant pools. Signed-off-by: Richard Henderson --- tcg/tcg.c | 61 ++++++++++++++++++++++++++----------------------------- tcg/tcg.h | 15 +++++++------- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 5d255166c0..752c45a0ec 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -263,37 +263,17 @@ static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, TCGLabel *l, intptr_t addend) { - TCGRelocation *r; + TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); - if (l->has_value) { - /* FIXME: This may break relocations on RISC targets that - modify instruction fields in place. The caller may not have - written the initial value. */ - bool ok = patch_reloc(code_ptr, type, l->u.value, addend); - tcg_debug_assert(ok); - } else { - /* add a new relocation entry */ - r = tcg_malloc(sizeof(TCGRelocation)); - r->type = type; - r->ptr = code_ptr; - r->addend = addend; - r->next = l->u.first_reloc; - l->u.first_reloc = r; - } + r->type = type; + r->ptr = code_ptr; + r->addend = addend; + QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); } static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) { - intptr_t value = (intptr_t)ptr; - TCGRelocation *r; - tcg_debug_assert(!l->has_value); - - for (r = l->u.first_reloc; r != NULL; r = r->next) { - bool ok = patch_reloc(r->ptr, r->type, value, r->addend); - tcg_debug_assert(ok); - } - l->has_value = 1; l->u.value_ptr = ptr; } @@ -303,16 +283,32 @@ TCGLabel *gen_new_label(void) TCGContext *s = tcg_ctx; TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); - *l = (TCGLabel){ - .id = s->nb_labels++ - }; -#ifdef CONFIG_DEBUG_TCG + memset(l, 0, sizeof(TCGLabel)); + l->id = s->nb_labels++; + QSIMPLEQ_INIT(&l->relocs); + QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); -#endif return l; } +static bool tcg_resolve_relocs(TCGContext *s) +{ + TCGLabel *l; + + QSIMPLEQ_FOREACH(l, &s->labels, next) { + TCGRelocation *r; + uintptr_t value = l->u.value; + + QSIMPLEQ_FOREACH(r, &l->relocs, next) { + if (!patch_reloc(r->ptr, r->type, value, r->addend)) { + return false; + } + } + } + return true; +} + static void set_jmp_reset_offset(TCGContext *s, int which) { size_t off = tcg_current_code_size(s); @@ -1096,9 +1092,7 @@ void tcg_func_start(TCGContext *s) QTAILQ_INIT(&s->ops); QTAILQ_INIT(&s->free_ops); -#ifdef CONFIG_DEBUG_TCG QSIMPLEQ_INIT(&s->labels); -#endif } static inline TCGTemp *tcg_temp_alloc(TCGContext *s) @@ -4015,6 +4009,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) return -1; } #endif + if (!tcg_resolve_relocs(s)) { + return -2; + } /* flush instruction cache */ flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); diff --git a/tcg/tcg.h b/tcg/tcg.h index 50de1cdda3..cfc57110a1 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -238,12 +238,13 @@ typedef uint64_t tcg_insn_unit; do { if (!(X)) { __builtin_unreachable(); } } while (0) #endif -typedef struct TCGRelocation { - struct TCGRelocation *next; - int type; +typedef struct TCGRelocation TCGRelocation; +struct TCGRelocation { + QSIMPLEQ_ENTRY(TCGRelocation) next; tcg_insn_unit *ptr; intptr_t addend; -} TCGRelocation; + int type; +}; typedef struct TCGLabel TCGLabel; struct TCGLabel { @@ -254,11 +255,9 @@ struct TCGLabel { union { uintptr_t value; tcg_insn_unit *value_ptr; - TCGRelocation *first_reloc; } u; -#ifdef CONFIG_DEBUG_TCG + QSIMPLEQ_HEAD(, TCGRelocation) relocs; QSIMPLEQ_ENTRY(TCGLabel) next; -#endif }; typedef struct TCGPool { @@ -691,7 +690,6 @@ struct TCGContext { #endif #ifdef CONFIG_DEBUG_TCG - QSIMPLEQ_HEAD(, TCGLabel) labels; int temps_in_use; int goto_tb_issue_mask; #endif @@ -729,6 +727,7 @@ struct TCGContext { TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ QTAILQ_HEAD(, TCGOp) ops, free_ops; + QSIMPLEQ_HEAD(, TCGLabel) labels; /* Tells which temporary holds a given register. It does not take into account fixed registers */ From 1768987b73fa7e23e58b7844abe5882490ff8e42 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 21 Apr 2019 13:51:56 -0700 Subject: [PATCH 11/15] tcg: Restart TB generation after constant pool overflow This is part b of relocation overflow handling. Signed-off-by: Richard Henderson --- tcg/tcg-pool.inc.c | 12 +++++++----- tcg/tcg.c | 9 +++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tcg/tcg-pool.inc.c b/tcg/tcg-pool.inc.c index 7af5513ff3..4eaa84b631 100644 --- a/tcg/tcg-pool.inc.c +++ b/tcg/tcg-pool.inc.c @@ -121,14 +121,14 @@ static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, /* To be provided by cpu/tcg-target.inc.c. */ static void tcg_out_nop_fill(tcg_insn_unit *p, int count); -static bool tcg_out_pool_finalize(TCGContext *s) +static int tcg_out_pool_finalize(TCGContext *s) { TCGLabelPoolData *p = s->pool_labels; TCGLabelPoolData *l = NULL; void *a; if (p == NULL) { - return true; + return 0; } /* ??? Round up to qemu_icache_linesize, but then do not round @@ -142,15 +142,17 @@ static bool tcg_out_pool_finalize(TCGContext *s) size_t size = sizeof(tcg_target_ulong) * p->nlong; if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { if (unlikely(a > s->code_gen_highwater)) { - return false; + return -1; } memcpy(a, p->data, size); a += size; l = p; } - patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend); + if (!patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend)) { + return -2; + } } s->code_ptr = a; - return true; + return 0; } diff --git a/tcg/tcg.c b/tcg/tcg.c index 752c45a0ec..0394e8ab34 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1019,8 +1019,8 @@ void tcg_prologue_init(TCGContext *s) #ifdef TCG_TARGET_NEED_POOL_LABELS /* Allow the prologue to put e.g. guest_base into a pool entry. */ { - bool ok = tcg_out_pool_finalize(s); - tcg_debug_assert(ok); + int result = tcg_out_pool_finalize(s); + tcg_debug_assert(result == 0); } #endif @@ -4005,8 +4005,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) } #endif #ifdef TCG_TARGET_NEED_POOL_LABELS - if (!tcg_out_pool_finalize(s)) { - return -1; + i = tcg_out_pool_finalize(s); + if (i < 0) { + return i; } #endif if (!tcg_resolve_relocs(s)) { From aeee05f53a5d67304a521d2644dc0a607e3c8b28 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 21 Apr 2019 14:51:00 -0700 Subject: [PATCH 12/15] tcg: Restart TB generation after out-of-line ldst overflow This is part c of relocation overflow handling. Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 16 ++++++++++------ tcg/arm/tcg-target.inc.c | 16 ++++++++++------ tcg/i386/tcg-target.inc.c | 6 ++++-- tcg/mips/tcg-target.inc.c | 6 ++++-- tcg/ppc/tcg-target.inc.c | 14 ++++++++++---- tcg/riscv/tcg-target.inc.c | 16 ++++++++++++---- tcg/s390/tcg-target.inc.c | 20 ++++++++++++-------- tcg/tcg-ldst.inc.c | 18 +++++++++--------- tcg/tcg.c | 7 ++++--- 9 files changed, 75 insertions(+), 44 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 8b93598bce..eefa929948 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -1395,14 +1395,15 @@ static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) tcg_out_insn(s, 3406, ADR, rd, offset); } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGMemOp size = opc & MO_SIZE; - bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); @@ -1416,16 +1417,18 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_goto(s, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGMemOp size = opc & MO_SIZE; - bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); @@ -1434,6 +1437,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_adr(s, TCG_REG_X4, lb->raddr); tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_goto(s, lb->raddr); + return true; } static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 6873b0cf95..12a65cc9a6 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -1372,15 +1372,16 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->label_ptr[0] = label_ptr; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg, datalo, datahi; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); void *func; - bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { + return false; + } argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1432,16 +1433,18 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_goto(s, COND_AL, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg, datalo, datahi; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { + return false; + } argreg = TCG_REG_R0; argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); @@ -1474,6 +1477,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) /* Tail-call to the helper, which will return to the fast path. */ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + return true; } #endif /* SOFTMMU */ diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 1fa833840e..d5ed9f1ffd 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1730,7 +1730,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64, /* * Generate code for the slow path for a load at the end of block */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1809,12 +1809,13 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) /* Jump to the code corresponding to next IR of qemu_st */ tcg_out_jmp(s, l->raddr); + return true; } /* * Generate code for the slow path for a store at the end of block */ -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1877,6 +1878,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) /* "Tail call" to the helper, with the return address back inline. */ tcg_out_push(s, retaddr); tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + return true; } #elif TCG_TARGET_REG_BITS == 32 # define x86_guest_base_seg 0 diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index 8a92e916dd..412cacdcb9 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -1338,7 +1338,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, } } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1385,9 +1385,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } else { tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO); } + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1435,6 +1436,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + return true; } #endif diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 773690f1d9..c0923ced4f 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -1653,13 +1653,15 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->label_ptr[0] = lptr; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGReg hi, lo, arg = TCG_REG_R3; - **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); + if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); @@ -1695,16 +1697,19 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_b(s, 0, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGMemOp s_bits = opc & MO_SIZE; TCGReg hi, lo, arg = TCG_REG_R3; - **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); + if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); @@ -1753,6 +1758,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); + return true; } #endif /* SOFTMMU */ diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c index b785f4acb7..2932505094 100644 --- a/tcg/riscv/tcg-target.inc.c +++ b/tcg/riscv/tcg-target.inc.c @@ -1065,7 +1065,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, label->label_ptr[0] = label_ptr[0]; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1080,7 +1080,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* resolve label address */ - patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, + (intptr_t) s->code_ptr, 0)) { + return false; + } /* call load helper */ tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); @@ -1092,9 +1095,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); tcg_out_goto(s, l->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1111,7 +1115,10 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* resolve label address */ - patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, + (intptr_t) s->code_ptr, 0)) { + return false; + } /* call store helper */ tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); @@ -1133,6 +1140,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); tcg_out_goto(s, l->raddr); + return true; } #endif /* CONFIG_SOFTMMU */ diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 7db90b3bae..3d6150b10e 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1609,16 +1609,17 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->label_ptr[0] = label_ptr; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg addr_reg = lb->addrlo_reg; TCGReg data_reg = lb->datalo_reg; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, - (intptr_t)s->code_ptr, 2); - tcg_debug_assert(ok); + if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, + (intptr_t)s->code_ptr, 2)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1630,18 +1631,20 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg addr_reg = lb->addrlo_reg; TCGReg data_reg = lb->datalo_reg; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, - (intptr_t)s->code_ptr, 2); - tcg_debug_assert(ok); + if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, + (intptr_t)s->code_ptr, 2)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1668,6 +1671,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); + return true; } #else static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, diff --git a/tcg/tcg-ldst.inc.c b/tcg/tcg-ldst.inc.c index 47f41b921b..05f9b3ccd6 100644 --- a/tcg/tcg-ldst.inc.c +++ b/tcg/tcg-ldst.inc.c @@ -38,19 +38,19 @@ typedef struct TCGLabelQemuLdst { * Generate TB finalization at the end of block */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); -static bool tcg_out_ldst_finalize(TCGContext *s) +static int tcg_out_ldst_finalize(TCGContext *s) { TCGLabelQemuLdst *lb; /* qemu_ld/st slow paths */ QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { - if (lb->is_ld) { - tcg_out_qemu_ld_slow_path(s, lb); - } else { - tcg_out_qemu_st_slow_path(s, lb); + if (lb->is_ld + ? !tcg_out_qemu_ld_slow_path(s, lb) + : !tcg_out_qemu_st_slow_path(s, lb)) { + return -2; } /* Test for (pending) buffer overflow. The assumption is that any @@ -58,10 +58,10 @@ static bool tcg_out_ldst_finalize(TCGContext *s) the buffer completely. Thus we can test for overflow after generating code without having to check during generation. */ if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { - return false; + return -1; } } - return true; + return 0; } /* diff --git a/tcg/tcg.c b/tcg/tcg.c index 0394e8ab34..f7bef51de8 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -128,7 +128,7 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); #ifdef TCG_TARGET_NEED_LDST_LABELS -static bool tcg_out_ldst_finalize(TCGContext *s); +static int tcg_out_ldst_finalize(TCGContext *s); #endif #define TCG_HIGHWATER 1024 @@ -4000,8 +4000,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) /* Generate TB finalization at the end of block */ #ifdef TCG_TARGET_NEED_LDST_LABELS - if (!tcg_out_ldst_finalize(s)) { - return -1; + i = tcg_out_ldst_finalize(s); + if (i < 0) { + return i; } #endif #ifdef TCG_TARGET_NEED_POOL_LABELS From a7cdaf710f2aaaf0be855a338dd67463d4bb99e2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 22 Apr 2019 17:12:33 +0000 Subject: [PATCH 13/15] tcg/ppc: Allow the constant pool to overflow at 32k There is no point in coding for a 2GB offset when the max TB size is already limited to 64k. If we further restrict to 32k then we can eliminate the extra ADDIS instruction. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.inc.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index c0923ced4f..36b4791707 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -529,7 +529,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { tcg_insn_unit *target; - tcg_insn_unit old; value += addend; target = (tcg_insn_unit *)value; @@ -540,22 +539,16 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, case R_PPC_REL24: return reloc_pc24(code_ptr, target); case R_PPC_ADDR16: - /* We are abusing this relocation type. This points to a pair - of insns, addis + load. If the displacement is small, we - can nop out the addis. */ - if (value == (int16_t)value) { - code_ptr[0] = NOP; - old = deposit32(code_ptr[1], 0, 16, value); - code_ptr[1] = deposit32(old, 16, 5, TCG_REG_TB); - } else { - int16_t lo = value; - int hi = value - lo; - if (hi + lo != value) { - return false; - } - code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); - code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); + /* + * We are (slightly) abusing this relocation type. In particular, + * assert that the low 2 bits are zero, and do not modify them. + * That way we can use this with LD et al that have opcode bits + * in the low 2 bits of the insn. + */ + if ((value & 3) || value != (int16_t)value) { + return false; } + *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); break; default: g_assert_not_reached(); @@ -701,8 +694,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, if (!in_prologue && USE_REG_TB) { new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, -(intptr_t)s->code_gen_ptr); - tcg_out32(s, ADDIS | TAI(ret, TCG_REG_TB, 0)); - tcg_out32(s, LD | TAI(ret, ret, 0)); + tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); return; } From b4b82d7e9caff7ccca5c621817b5a4b8e95eb9b1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Apr 2019 10:39:39 -0700 Subject: [PATCH 14/15] tcg/arm: Restrict constant pool displacement to 12 bits This will not necessarily restrict the size of the TB, since for v7 the majority of constant pool usage is for calls from the out-of-line ldst code, which is already at the end of the TB. But this does allow us to save one insn per reference on the off-chance. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.inc.c | 57 +++++++++++++++------------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 12a65cc9a6..abf0c444b4 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -197,6 +197,24 @@ static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) return false; } +static inline bool reloc_pc13(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = tcg_ptr_byte_diff(target, code_ptr) - 8; + + if (offset >= -0xfff && offset <= 0xfff) { + tcg_insn_unit insn = *code_ptr; + bool u = (offset >= 0); + if (!u) { + offset = -offset; + } + insn = deposit32(insn, 23, 1, u); + insn = deposit32(insn, 0, 12, offset); + *code_ptr = insn; + return true; + } + return false; +} + static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { @@ -205,39 +223,10 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, if (type == R_ARM_PC24) { return reloc_pc24(code_ptr, (tcg_insn_unit *)value); } else if (type == R_ARM_PC13) { - intptr_t diff = value - (uintptr_t)(code_ptr + 2); - tcg_insn_unit insn = *code_ptr; - bool u; - - if (diff >= -0xfff && diff <= 0xfff) { - u = (diff >= 0); - if (!u) { - diff = -diff; - } - } else { - int rd = extract32(insn, 12, 4); - int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd; - - if (diff < 0x1000 || diff >= 0x100000) { - return false; - } - - /* add rt, pc, #high */ - *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD - | (TCG_REG_PC << 16) | (rt << 12) - | (20 << 7) | (diff >> 12)); - /* ldr rd, [rt, #low] */ - insn = deposit32(insn, 12, 4, rt); - diff &= 0xfff; - u = 1; - } - insn = deposit32(insn, 23, 1, u); - insn = deposit32(insn, 0, 12, diff); - *code_ptr = insn; + return reloc_pc13(code_ptr, (tcg_insn_unit *)value); } else { g_assert_not_reached(); } - return true; } #define TCG_CT_CONST_ARM 0x100 @@ -605,12 +594,8 @@ static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg) { - /* The 12-bit range on the ldr insn is sometimes a bit too small. - In order to get around that we require two insns, one of which - will usually be a nop, but may be replaced in patch_reloc. */ new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0); tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0); - tcg_out_nop(s); } static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) @@ -1069,8 +1054,8 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); tcg_out_blx(s, COND_AL, TCG_REG_TMP); } else { - /* ??? Know that movi_pool emits exactly 2 insns. */ - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + /* ??? Know that movi_pool emits exactly 1 insn. */ + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0); tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri); } } From ef5dae6805cce7b59d129d801bdc5db71bcbd60d Mon Sep 17 00:00:00 2001 From: Shahab Vahedi Date: Sat, 20 Apr 2019 09:22:37 +0200 Subject: [PATCH 15/15] cputlb: Fix io_readx() to respect the access_type This change adapts io_readx() to its input access_type. Currently io_readx() treats any memory access as a read, although it has an input argument "MMUAccessType access_type". This results in: 1) Calling the tlb_fill() only with MMU_DATA_LOAD 2) Considering only entry->addr_read as the tlb_addr Buglink: https://bugs.launchpad.net/qemu/+bug/1825359 Reviewed-by: Richard Henderson Signed-off-by: Shahab Vahedi Message-Id: <20190420072236.12347-1-shahab.vahedi@gmail.com> [rth: Remove assert; fix expression formatting.] Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 88cc8389e9..f2f618217d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -878,10 +878,11 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, CPUTLBEntry *entry; target_ulong tlb_addr; - tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); + tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr); entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = entry->addr_read; + tlb_addr = (access_type == MMU_DATA_LOAD ? + entry->addr_read : entry->addr_code); if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ uintptr_t haddr = addr + entry->addend;