From 0fdf05d774a5f6b701459d61e7875229667cacbd Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Wed, 12 Jul 2023 11:13:22 -0500 Subject: [PATCH 01/35] target/ppc: Generate storage interrupts for radix RC changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change radix model to always generate a storage interrupt when the R/C bits are not set appropriately in a PTE instead of setting the bits itself. According to the ISA both behaviors are valid, but in practice this change more closely matches behavior observed on the POWER9 CPU. From the POWER9 Processor User's Manual, Section 4.10.13.1: "When performing Radix translation, the POWER9 hardware triggers the appropriate interrupt ... for the mode and type of access whenever Reference (R) and Change (C) bits require setting in either the guest or host page-table entry (PTE)." Signed-off-by: Shawn Anastasio Reviewed-by: Cédric Le Goater Reviewed-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/mmu-radix64.c | 74 ++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 920084bd8f..5823e039e6 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -219,27 +219,25 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, return false; } -static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type, - uint64_t pte, hwaddr pte_addr, int *prot) +static int ppc_radix64_check_rc(MMUAccessType access_type, uint64_t pte) { - CPUState *cs = CPU(cpu); - uint64_t npte; + switch (access_type) { + case MMU_DATA_STORE: + if (!(pte & R_PTE_C)) { + break; + } + /* fall through */ + case MMU_INST_FETCH: + case MMU_DATA_LOAD: + if (!(pte & R_PTE_R)) { + break; + } - npte = pte | R_PTE_R; /* Always set reference bit */ - - if (access_type == MMU_DATA_STORE) { /* Store/Write */ - npte |= R_PTE_C; /* Set change bit */ - } else { - /* - * Treat the page as read-only for now, so that a later write - * will pass through this function again to set the C bit. - */ - *prot &= ~PAGE_WRITE; + /* R/C bits are already set appropriately for this access */ + return 0; } - if (pte ^ npte) { /* If pte has changed then write it back */ - stq_phys(cs->as, pte_addr, npte); - } + return 1; } static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls) @@ -380,7 +378,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, ppc_v3_pate_t pate, hwaddr *h_raddr, int *h_prot, int *h_page_size, bool pde_addr, - int mmu_idx, bool guest_visible) + int mmu_idx, uint64_t lpid, + bool guest_visible) { MMUAccessType access_type = orig_access_type; int fault_cause = 0; @@ -418,7 +417,24 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, } if (guest_visible) { - ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot); + if (ppc_radix64_check_rc(access_type, pte)) { + /* + * Per ISA 3.1 Book III, 7.5.3 and 7.5.5, failure to set R/C during + * partition-scoped translation when effLPID = 0 results in normal + * (non-Hypervisor) Data and Instruction Storage Interrupts + * respectively. + * + * ISA 3.0 is ambiguous about this, but tests on POWER9 hardware + * seem to exhibit the same behavior. + */ + if (lpid > 0) { + ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, + DSISR_ATOMIC_RC); + } else { + ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC); + } + return 1; + } } return 0; @@ -447,7 +463,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, vaddr eaddr, uint64_t pid, ppc_v3_pate_t pate, hwaddr *g_raddr, int *g_prot, int *g_page_size, - int mmu_idx, bool guest_visible) + int mmu_idx, uint64_t lpid, + bool guest_visible) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -497,7 +514,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, prtbe_addr, pate, &h_raddr, &h_prot, &h_page_size, true, - 5, guest_visible); + 5, lpid, guest_visible); if (ret) { return ret; } @@ -539,7 +556,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, pte_addr, pate, &h_raddr, &h_prot, &h_page_size, - true, 5, guest_visible); + true, 5, lpid, + guest_visible); if (ret) { return ret; } @@ -580,7 +598,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, } if (guest_visible) { - ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot); + /* R/C bits not appropriately set for access */ + if (ppc_radix64_check_rc(access_type, pte)) { + ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC); + return 1; + } } return 0; @@ -695,7 +717,8 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, if (relocation) { int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, pate, &g_raddr, &prot, - &psize, mmu_idx, guest_visible); + &psize, mmu_idx, lpid, + guest_visible); if (ret) { return false; } @@ -719,7 +742,8 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, g_raddr, pate, raddr, &prot, &psize, false, - mmu_idx, guest_visible); + mmu_idx, lpid, + guest_visible); if (ret) { return false; } From 99837aa88ce0494f8adb0ebf6bc7ce951f048a8d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 19 Jul 2023 14:59:20 +0930 Subject: [PATCH 02/35] ppc: Add stub implementation of TRIG SPRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux sets these to control cache flush behaviour on Power9. Supervisor and hypervisor are allowed to write, and reads are noops. Add implementations to avoid noisy messages when booting Linux under the pseries machine with guest_errors enabled. Reviewed-by: Nicholas Piggin Signed-off-by: Joel Stanley Signed-off-by: Cédric Le Goater --- target/ppc/cpu.h | 2 ++ target/ppc/cpu_init.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 25fac9577a..6826702ea6 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1897,7 +1897,9 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_PSSCR (0x357) #define SPR_440_INV0 (0x370) #define SPR_440_INV1 (0x371) +#define SPR_TRIG1 (0x371) #define SPR_440_INV2 (0x372) +#define SPR_TRIG2 (0x372) #define SPR_440_INV3 (0x373) #define SPR_440_ITV0 (0x374) #define SPR_440_ITV1 (0x375) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 02b7aad9b0..3b6ccb5ea4 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5660,6 +5660,16 @@ static void register_power_common_book4_sprs(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_tfmr, &spr_write_tfmr, 0x00000000); + spr_register_hv(env, SPR_TRIG1, "TRIG1", + SPR_NOACCESS, SPR_NOACCESS, + &spr_access_nop, &spr_write_generic, + &spr_access_nop, &spr_write_generic, + 0x00000000); + spr_register_hv(env, SPR_TRIG2, "TRIG2", + SPR_NOACCESS, SPR_NOACCESS, + &spr_access_nop, &spr_write_generic, + &spr_access_nop, &spr_write_generic, + 0x00000000); #endif } From 98a18f4d1189886921f563bbff86c30090915dfd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 13:11:11 +1000 Subject: [PATCH 03/35] target/ppc: Remove single-step suppression inside 0x100-0xf00 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-step interrupts are suppressed if the nip is between 0x100 and 0xf00. This has been the case for a long time and it's not clear what the intention is. Likely either an attempt to suppress trace interrupts for instructions that cause an interrupt on completion, or a workaround to prevent software tripping over itself single stepping its interrupt handlers. BookE interrupt vectors are set by IVOR registers, and BookS has AIL modes and new interrupt types, so there are many interrupts including the debug interrupt which can be outside this range. So any effect it might have had does not cover most cases (including Linux on recent BookS CPUs). Remove this special case. Signed-off-by: Nicholas Piggin [ clg : fixed typo in commit logs ] Signed-off-by: Cédric Le Goater --- target/ppc/translate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 7111b34030..d71811a1b4 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -7408,8 +7408,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } /* Honor single stepping. */ - if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP) - && (nip <= 0x100 || nip > 0xf00)) { + if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)) { switch (is_jmp) { case DISAS_TOO_MANY: case DISAS_EXIT_UPDATE: From 148953849d25f3e4a9a88031ac88921147548cd0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 13:11:12 +1000 Subject: [PATCH 04/35] target/ppc: Improve book3s branch trace interrupt for v2.07S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve the emulation accuracy of the single step and branch trace interrupts for v2.07S. Set SRR1[33]=1, and set SIAR to completed instruction address. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/excp_helper.c | 16 +++++++++++++++- target/ppc/helper.h | 1 + target/ppc/translate.c | 21 +++++++++++---------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 9aa8e46566..2d6aef5e66 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1571,9 +1571,11 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) } } break; + case POWERPC_EXCP_TRACE: /* Trace exception */ + msr |= env->error_code; + /* fall through */ case POWERPC_EXCP_DSEG: /* Data segment exception */ case POWERPC_EXCP_ISEG: /* Instruction segment exception */ - case POWERPC_EXCP_TRACE: /* Trace exception */ case POWERPC_EXCP_SDOOR: /* Doorbell interrupt */ case POWERPC_EXCP_PERFM: /* Performance monitor interrupt */ break; @@ -3168,6 +3170,18 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) } #endif /* TARGET_PPC64 */ +/* Single-step tracing */ +void helper_book3s_trace(CPUPPCState *env, target_ulong prev_ip) +{ + uint32_t error_code = 0; + if (env->insns_flags2 & PPC2_ISA207S) { + /* Load/store reporting, SRR1[35, 36] and SDAR, are not implemented. */ + env->spr[SPR_POWER_SIAR] = prev_ip; + error_code = PPC_BIT(33); + } + raise_exception_err(env, POWERPC_EXCP_TRACE, error_code); +} + void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index abec6fe341..f4db32ee1a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -32,6 +32,7 @@ DEF_HELPER_2(read_pmc, tl, env, i32) DEF_HELPER_2(insns_inc, void, env, i32) DEF_HELPER_1(handle_pmc5_overflow, void, env) #endif +DEF_HELPER_2(book3s_trace, void, env, tl) DEF_HELPER_1(check_tlb_flush_local, void, env) DEF_HELPER_1(check_tlb_flush_global, void, env) #endif diff --git a/target/ppc/translate.c b/target/ppc/translate.c index d71811a1b4..02aec093eb 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -336,8 +336,9 @@ static void gen_ppc_maybe_interrupt(DisasContext *ctx) * The exception can be either POWERPC_EXCP_TRACE (on most PowerPCs) or * POWERPC_EXCP_DEBUG (on BookE). */ -static uint32_t gen_prep_dbgex(DisasContext *ctx) +static void gen_debug_exception(DisasContext *ctx) { +#if !defined(CONFIG_USER_ONLY) if (ctx->flags & POWERPC_FLAG_DE) { target_ulong dbsr = 0; if (ctx->singlestep_enabled & CPU_SINGLE_STEP) { @@ -350,16 +351,16 @@ static uint32_t gen_prep_dbgex(DisasContext *ctx) gen_load_spr(t0, SPR_BOOKE_DBSR); tcg_gen_ori_tl(t0, t0, dbsr); gen_store_spr(SPR_BOOKE_DBSR, t0); - return POWERPC_EXCP_DEBUG; + gen_helper_raise_exception(cpu_env, + tcg_constant_i32(POWERPC_EXCP_DEBUG)); + ctx->base.is_jmp = DISAS_NORETURN; } else { - return POWERPC_EXCP_TRACE; + TCGv t0 = tcg_temp_new(); + tcg_gen_movi_tl(t0, ctx->cia); + gen_helper_book3s_trace(cpu_env, t0); + ctx->base.is_jmp = DISAS_NORETURN; } -} - -static void gen_debug_exception(DisasContext *ctx) -{ - gen_helper_raise_exception(cpu_env, tcg_constant_i32(gen_prep_dbgex(ctx))); - ctx->base.is_jmp = DISAS_NORETURN; +#endif } static inline void gen_inval_exception(DisasContext *ctx, uint32_t error) @@ -4182,7 +4183,7 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) static void gen_lookup_and_goto_ptr(DisasContext *ctx) { if (unlikely(ctx->singlestep_enabled)) { - gen_debug_exception(ctx); + gen_debug_exception(ctx, false); } else { /* * tcg_gen_lookup_and_goto_ptr will exit the TB if From a11e3a1582b8c0d62ae3ef0323526baf2303e44a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 13:11:13 +1000 Subject: [PATCH 05/35] target/ppc: Suppress single step interrupts on rfi-type instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BookS does not take single step interrupts on completion of rfi and similar (rfid, hrfid, rfscv). This is not a completely clean way to do it, but in general non-branch instructions that change NIP on completion are excluded. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/translate.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 02aec093eb..ed6fc8f29e 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -336,7 +336,7 @@ static void gen_ppc_maybe_interrupt(DisasContext *ctx) * The exception can be either POWERPC_EXCP_TRACE (on most PowerPCs) or * POWERPC_EXCP_DEBUG (on BookE). */ -static void gen_debug_exception(DisasContext *ctx) +static void gen_debug_exception(DisasContext *ctx, bool rfi_type) { #if !defined(CONFIG_USER_ONLY) if (ctx->flags & POWERPC_FLAG_DE) { @@ -355,10 +355,12 @@ static void gen_debug_exception(DisasContext *ctx) tcg_constant_i32(POWERPC_EXCP_DEBUG)); ctx->base.is_jmp = DISAS_NORETURN; } else { - TCGv t0 = tcg_temp_new(); - tcg_gen_movi_tl(t0, ctx->cia); - gen_helper_book3s_trace(cpu_env, t0); - ctx->base.is_jmp = DISAS_NORETURN; + if (!rfi_type) { /* BookS does not single step rfi type instructions */ + TCGv t0 = tcg_temp_new(); + tcg_gen_movi_tl(t0, ctx->cia); + gen_helper_book3s_trace(cpu_env, t0); + ctx->base.is_jmp = DISAS_NORETURN; + } } #endif } @@ -7410,6 +7412,8 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) /* Honor single stepping. */ if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)) { + bool rfi_type = false; + switch (is_jmp) { case DISAS_TOO_MANY: case DISAS_EXIT_UPDATE: @@ -7418,12 +7422,19 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) break; case DISAS_EXIT: case DISAS_CHAIN: + /* + * This is a heuristic, to put it kindly. The rfi class of + * instructions are among the few outside branches that change + * NIP without taking an interrupt. Single step trace interrupts + * do not fire on completion of these instructions. + */ + rfi_type = true; break; default: g_assert_not_reached(); } - gen_debug_exception(ctx); + gen_debug_exception(ctx, rfi_type); return; } From 14192307ef6e63c9a0f3c7fe937e26bee95bc6a9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 13:11:14 +1000 Subject: [PATCH 06/35] target/ppc: Implement breakpoint debug facility for v2.07S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ISA v2.07S introduced the breakpoint facility based on the CIABR SPR. Implement this in TCG. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/cpu.c | 27 ++++++++++++++++++++++++++ target/ppc/cpu.h | 3 +++ target/ppc/cpu_init.c | 5 ++++- target/ppc/excp_helper.c | 42 ++++++++++++++++++++++++++++++++++++++++ target/ppc/helper.h | 1 + target/ppc/internal.h | 2 ++ target/ppc/machine.c | 4 ++++ target/ppc/misc_helper.c | 5 +++++ target/ppc/spr_common.h | 1 + target/ppc/translate.c | 10 +++++++++- 10 files changed, 98 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c index 424f2e1741..d9c665ce18 100644 --- a/target/ppc/cpu.c +++ b/target/ppc/cpu.c @@ -102,6 +102,33 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) ppc_maybe_interrupt(env); } + +#if defined(TARGET_PPC64) +void ppc_update_ciabr(CPUPPCState *env) +{ + CPUState *cs = env_cpu(env); + target_ulong ciabr = env->spr[SPR_CIABR]; + target_ulong ciea, priv; + + ciea = ciabr & PPC_BITMASK(0, 61); + priv = ciabr & PPC_BITMASK(62, 63); + + if (env->ciabr_breakpoint) { + cpu_breakpoint_remove_by_ref(cs, env->ciabr_breakpoint); + env->ciabr_breakpoint = NULL; + } + + if (priv) { + cpu_breakpoint_insert(cs, ciea, BP_CPU, &env->ciabr_breakpoint); + } +} + +void ppc_store_ciabr(CPUPPCState *env, target_ulong val) +{ + env->spr[SPR_CIABR] = val; + ppc_update_ciabr(env); +} +#endif #endif static inline void fpscr_set_rounding_mode(CPUPPCState *env) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 6826702ea6..264a915ad9 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1137,6 +1137,7 @@ struct CPUArchState { /* MMU context, only relevant for full system emulation */ #if defined(TARGET_PPC64) ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */ + struct CPUBreakpoint *ciabr_breakpoint; #endif target_ulong sr[32]; /* segment registers */ uint32_t nb_BATs; /* number of BATs */ @@ -1403,6 +1404,8 @@ void ppc_translate_init(void); #if !defined(CONFIG_USER_ONLY) void ppc_store_sdr1(CPUPPCState *env, target_ulong value); void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val); +void ppc_update_ciabr(CPUPPCState *env); +void ppc_store_ciabr(CPUPPCState *env, target_ulong value); #endif /* !defined(CONFIG_USER_ONLY) */ void ppc_store_msr(CPUPPCState *env, target_ulong value); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 3b6ccb5ea4..18b4757faa 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5127,7 +5127,7 @@ static void register_book3s_207_dbg_sprs(CPUPPCState *env) spr_register_kvm_hv(env, SPR_CIABR, "CIABR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_ciabr, KVM_REG_PPC_CIABR, 0x00000000); } @@ -7159,6 +7159,7 @@ static void ppc_cpu_reset_hold(Object *obj) env->nip = env->hreset_vector | env->excp_prefix; if (tcg_enabled()) { + cpu_breakpoint_remove_all(s, BP_CPU); if (env->mmu_model != POWERPC_MMU_REAL) { ppc_tlb_invalidate_all(env); } @@ -7346,6 +7347,8 @@ static const struct TCGCPUOps ppc_tcg_ops = { .cpu_exec_exit = ppc_cpu_exec_exit, .do_unaligned_access = ppc_cpu_do_unaligned_access, .do_transaction_failed = ppc_cpu_do_transaction_failed, + .debug_excp_handler = ppc_cpu_debug_excp_handler, + .debug_check_breakpoint = ppc_cpu_debug_check_breakpoint, #endif /* !CONFIG_USER_ONLY */ }; #endif /* CONFIG_TCG */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 2d6aef5e66..9c9881ae19 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -3257,5 +3257,47 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, cs->exception_index = POWERPC_EXCP_MCHECK; cpu_loop_exit_restore(cs, retaddr); } + +void ppc_cpu_debug_excp_handler(CPUState *cs) +{ +#if defined(TARGET_PPC64) + CPUPPCState *env = cs->env_ptr; + + if (env->insns_flags2 & PPC2_ISA207S) { + if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) { + raise_exception_err(env, POWERPC_EXCP_TRACE, + PPC_BIT(33) | PPC_BIT(43)); + } + } +#endif +} + +bool ppc_cpu_debug_check_breakpoint(CPUState *cs) +{ +#if defined(TARGET_PPC64) + CPUPPCState *env = cs->env_ptr; + + if (env->insns_flags2 & PPC2_ISA207S) { + target_ulong priv; + + priv = env->spr[SPR_CIABR] & PPC_BITMASK(62, 63); + switch (priv) { + case 0x1: /* problem */ + return env->msr & ((target_ulong)1 << MSR_PR); + case 0x2: /* supervisor */ + return (!(env->msr & ((target_ulong)1 << MSR_PR)) && + !(env->msr & ((target_ulong)1 << MSR_HV))); + case 0x3: /* hypervisor */ + return (!(env->msr & ((target_ulong)1 << MSR_PR)) && + (env->msr & ((target_ulong)1 << MSR_HV))); + default: + g_assert_not_reached(); + } + } +#endif + + return false; +} + #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ diff --git a/target/ppc/helper.h b/target/ppc/helper.h index f4db32ee1a..83d5deec07 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -25,6 +25,7 @@ DEF_HELPER_1(hrfid, void, env) DEF_HELPER_2(rfebb, void, env, tl) DEF_HELPER_2(store_lpcr, void, env, tl) DEF_HELPER_2(store_pcr, void, env, tl) +DEF_HELPER_2(store_ciabr, void, env, tl) DEF_HELPER_2(store_mmcr0, void, env, tl) DEF_HELPER_2(store_mmcr1, void, env, tl) DEF_HELPER_3(store_pmc, void, env, i32, i64) diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 57acb3212c..16f02fd9c4 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -301,6 +301,8 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr); +void ppc_cpu_debug_excp_handler(CPUState *cs); +bool ppc_cpu_debug_check_breakpoint(CPUState *cs); #endif FIELD(GER_MSK, XMSK, 0, 4) diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 1270a1f7fc..890b7ea7af 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -313,6 +313,10 @@ static int cpu_post_load(void *opaque, int version_id) post_load_update_msr(env); if (tcg_enabled()) { + /* Re-set breaks based on regs */ +#if defined(TARGET_PPC64) + ppc_update_ciabr(env); +#endif pmu_mmcr01_updated(env); } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 692d058665..0b0f2e59a7 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -199,6 +199,11 @@ void helper_store_pcr(CPUPPCState *env, target_ulong value) env->spr[SPR_PCR] = value & pcc->pcr_mask; } +void helper_store_ciabr(CPUPPCState *env, target_ulong value) +{ + ppc_store_ciabr(env, value); +} + /* * DPDES register is shared. Each bit reflects the state of the * doorbell interrupt of a thread of the same core. diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h index 5995070eaf..b7bedd9ef1 100644 --- a/target/ppc/spr_common.h +++ b/target/ppc/spr_common.h @@ -159,6 +159,7 @@ void spr_read_mas73(DisasContext *ctx, int gprn, int sprn); #ifdef TARGET_PPC64 void spr_read_cfar(DisasContext *ctx, int gprn, int sprn); void spr_write_cfar(DisasContext *ctx, int sprn, int gprn); +void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn); void spr_write_ureg(DisasContext *ctx, int sprn, int gprn); void spr_read_purr(DisasContext *ctx, int gprn, int sprn); void spr_write_purr(DisasContext *ctx, int sprn, int gprn); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index ed6fc8f29e..8b22f7accd 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -557,8 +557,9 @@ void spr_write_lr(DisasContext *ctx, int sprn, int gprn) tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]); } -/* CFAR */ #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +/* Debug facilities */ +/* CFAR */ void spr_read_cfar(DisasContext *ctx, int gprn, int sprn) { tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar); @@ -568,6 +569,13 @@ void spr_write_cfar(DisasContext *ctx, int sprn, int gprn) { tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]); } + +/* Breakpoint */ +void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn) +{ + translator_io_start(&ctx->base); + gen_helper_store_ciabr(cpu_env, cpu_gpr[gprn]); +} #endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */ /* CTR */ From d5ee641cfc5c3cbd51282d0c6e996f990b9d62a3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 13:11:15 +1000 Subject: [PATCH 07/35] target/ppc: Implement watchpoint debug facility for v2.07S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ISA v2.07S introduced the watchpoint facility based on the DAWR0 and DAWRX0 SPRs. Implement this in TCG. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/cpu.c | 59 ++++++++++++++++++++++++++++++++++++++++ target/ppc/cpu.h | 4 +++ target/ppc/cpu_init.c | 6 ++-- target/ppc/excp_helper.c | 52 ++++++++++++++++++++++++++++++++++- target/ppc/helper.h | 2 ++ target/ppc/internal.h | 1 + target/ppc/machine.c | 1 + target/ppc/misc_helper.c | 10 +++++++ target/ppc/spr_common.h | 2 ++ target/ppc/translate.c | 13 +++++++++ 10 files changed, 147 insertions(+), 3 deletions(-) diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c index d9c665ce18..62e1c15e3d 100644 --- a/target/ppc/cpu.c +++ b/target/ppc/cpu.c @@ -128,6 +128,65 @@ void ppc_store_ciabr(CPUPPCState *env, target_ulong val) env->spr[SPR_CIABR] = val; ppc_update_ciabr(env); } + +void ppc_update_daw0(CPUPPCState *env) +{ + CPUState *cs = env_cpu(env); + target_ulong deaw = env->spr[SPR_DAWR0] & PPC_BITMASK(0, 60); + uint32_t dawrx = env->spr[SPR_DAWRX0]; + int mrd = extract32(dawrx, PPC_BIT_NR(48), 54 - 48); + bool dw = extract32(dawrx, PPC_BIT_NR(57), 1); + bool dr = extract32(dawrx, PPC_BIT_NR(58), 1); + bool hv = extract32(dawrx, PPC_BIT_NR(61), 1); + bool sv = extract32(dawrx, PPC_BIT_NR(62), 1); + bool pr = extract32(dawrx, PPC_BIT_NR(62), 1); + vaddr len; + int flags; + + if (env->dawr0_watchpoint) { + cpu_watchpoint_remove_by_ref(cs, env->dawr0_watchpoint); + env->dawr0_watchpoint = NULL; + } + + if (!dr && !dw) { + return; + } + + if (!hv && !sv && !pr) { + return; + } + + len = (mrd + 1) * 8; + flags = BP_CPU | BP_STOP_BEFORE_ACCESS; + if (dr) { + flags |= BP_MEM_READ; + } + if (dw) { + flags |= BP_MEM_WRITE; + } + + cpu_watchpoint_insert(cs, deaw, len, flags, &env->dawr0_watchpoint); +} + +void ppc_store_dawr0(CPUPPCState *env, target_ulong val) +{ + env->spr[SPR_DAWR0] = val; + ppc_update_daw0(env); +} + +void ppc_store_dawrx0(CPUPPCState *env, uint32_t val) +{ + int hrammc = extract32(val, PPC_BIT_NR(56), 1); + + if (hrammc) { + /* This might be done with a second watchpoint at the xor of DEAW[0] */ + qemu_log_mask(LOG_UNIMP, "%s: DAWRX0[HRAMMC] is unimplemented\n", + __func__); + } + + env->spr[SPR_DAWRX0] = val; + ppc_update_daw0(env); +} #endif #endif diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 264a915ad9..7e7a60f68f 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1138,6 +1138,7 @@ struct CPUArchState { #if defined(TARGET_PPC64) ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */ struct CPUBreakpoint *ciabr_breakpoint; + struct CPUWatchpoint *dawr0_watchpoint; #endif target_ulong sr[32]; /* segment registers */ uint32_t nb_BATs; /* number of BATs */ @@ -1406,6 +1407,9 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value); void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val); void ppc_update_ciabr(CPUPPCState *env); void ppc_store_ciabr(CPUPPCState *env, target_ulong value); +void ppc_update_daw0(CPUPPCState *env); +void ppc_store_dawr0(CPUPPCState *env, target_ulong value); +void ppc_store_dawrx0(CPUPPCState *env, uint32_t value); #endif /* !defined(CONFIG_USER_ONLY) */ void ppc_store_msr(CPUPPCState *env, target_ulong value); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 18b4757faa..7ab5ee92d9 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5117,12 +5117,12 @@ static void register_book3s_207_dbg_sprs(CPUPPCState *env) spr_register_kvm_hv(env, SPR_DAWR0, "DAWR0", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_dawr0, KVM_REG_PPC_DAWR, 0x00000000); spr_register_kvm_hv(env, SPR_DAWRX0, "DAWRX0", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic32, + &spr_read_generic, &spr_write_dawrx0, KVM_REG_PPC_DAWRX, 0x00000000); spr_register_kvm_hv(env, SPR_CIABR, "CIABR", SPR_NOACCESS, SPR_NOACCESS, @@ -7160,6 +7160,7 @@ static void ppc_cpu_reset_hold(Object *obj) if (tcg_enabled()) { cpu_breakpoint_remove_all(s, BP_CPU); + cpu_watchpoint_remove_all(s, BP_CPU); if (env->mmu_model != POWERPC_MMU_REAL) { ppc_tlb_invalidate_all(env); } @@ -7349,6 +7350,7 @@ static const struct TCGCPUOps ppc_tcg_ops = { .do_transaction_failed = ppc_cpu_do_transaction_failed, .debug_excp_handler = ppc_cpu_debug_excp_handler, .debug_check_breakpoint = ppc_cpu_debug_check_breakpoint, + .debug_check_watchpoint = ppc_cpu_debug_check_watchpoint, #endif /* !CONFIG_USER_ONLY */ }; #endif /* CONFIG_TCG */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 9c9881ae19..32e46e56b3 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -3264,7 +3264,15 @@ void ppc_cpu_debug_excp_handler(CPUState *cs) CPUPPCState *env = cs->env_ptr; if (env->insns_flags2 & PPC2_ISA207S) { - if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) { + if (cs->watchpoint_hit) { + if (cs->watchpoint_hit->flags & BP_CPU) { + env->spr[SPR_DAR] = cs->watchpoint_hit->hitaddr; + env->spr[SPR_DSISR] = PPC_BIT(41); + cs->watchpoint_hit = NULL; + raise_exception(env, POWERPC_EXCP_DSI); + } + cs->watchpoint_hit = NULL; + } else if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) { raise_exception_err(env, POWERPC_EXCP_TRACE, PPC_BIT(33) | PPC_BIT(43)); } @@ -3299,5 +3307,47 @@ bool ppc_cpu_debug_check_breakpoint(CPUState *cs) return false; } +bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) +{ +#if defined(TARGET_PPC64) + CPUPPCState *env = cs->env_ptr; + + if (env->insns_flags2 & PPC2_ISA207S) { + if (wp == env->dawr0_watchpoint) { + uint32_t dawrx = env->spr[SPR_DAWRX0]; + bool wt = extract32(dawrx, PPC_BIT_NR(59), 1); + bool wti = extract32(dawrx, PPC_BIT_NR(60), 1); + bool hv = extract32(dawrx, PPC_BIT_NR(61), 1); + bool sv = extract32(dawrx, PPC_BIT_NR(62), 1); + bool pr = extract32(dawrx, PPC_BIT_NR(62), 1); + + if ((env->msr & ((target_ulong)1 << MSR_PR)) && !pr) { + return false; + } else if ((env->msr & ((target_ulong)1 << MSR_HV)) && !hv) { + return false; + } else if (!sv) { + return false; + } + + if (!wti) { + if (env->msr & ((target_ulong)1 << MSR_DR)) { + if (!wt) { + return false; + } + } else { + if (wt) { + return false; + } + } + } + + return true; + } + } +#endif + + return false; +} + #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 83d5deec07..86f97ee1e7 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -26,6 +26,8 @@ DEF_HELPER_2(rfebb, void, env, tl) DEF_HELPER_2(store_lpcr, void, env, tl) DEF_HELPER_2(store_pcr, void, env, tl) DEF_HELPER_2(store_ciabr, void, env, tl) +DEF_HELPER_2(store_dawr0, void, env, tl) +DEF_HELPER_2(store_dawrx0, void, env, tl) DEF_HELPER_2(store_mmcr0, void, env, tl) DEF_HELPER_2(store_mmcr1, void, env, tl) DEF_HELPER_3(store_pmc, void, env, i32, i64) diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 16f02fd9c4..15803bc313 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -303,6 +303,7 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MemTxResult response, uintptr_t retaddr); void ppc_cpu_debug_excp_handler(CPUState *cs); bool ppc_cpu_debug_check_breakpoint(CPUState *cs); +bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); #endif FIELD(GER_MSK, XMSK, 0, 4) diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 890b7ea7af..07ca18437f 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -316,6 +316,7 @@ static int cpu_post_load(void *opaque, int version_id) /* Re-set breaks based on regs */ #if defined(TARGET_PPC64) ppc_update_ciabr(env); + ppc_update_daw0(env); #endif pmu_mmcr01_updated(env); } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 0b0f2e59a7..a05bdf78c9 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -204,6 +204,16 @@ void helper_store_ciabr(CPUPPCState *env, target_ulong value) ppc_store_ciabr(env, value); } +void helper_store_dawr0(CPUPPCState *env, target_ulong value) +{ + ppc_store_dawr0(env, value); +} + +void helper_store_dawrx0(CPUPPCState *env, target_ulong value) +{ + ppc_store_dawrx0(env, value); +} + /* * DPDES register is shared. Each bit reflects the state of the * doorbell interrupt of a thread of the same core. diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h index b7bedd9ef1..8a9d6cd994 100644 --- a/target/ppc/spr_common.h +++ b/target/ppc/spr_common.h @@ -160,6 +160,8 @@ void spr_read_mas73(DisasContext *ctx, int gprn, int sprn); void spr_read_cfar(DisasContext *ctx, int gprn, int sprn); void spr_write_cfar(DisasContext *ctx, int sprn, int gprn); void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn); +void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn); +void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn); void spr_write_ureg(DisasContext *ctx, int sprn, int gprn); void spr_read_purr(DisasContext *ctx, int gprn, int sprn); void spr_write_purr(DisasContext *ctx, int sprn, int gprn); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 8b22f7accd..16592194e2 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -576,6 +576,19 @@ void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn) translator_io_start(&ctx->base); gen_helper_store_ciabr(cpu_env, cpu_gpr[gprn]); } + +/* Watchpoint */ +void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn) +{ + translator_io_start(&ctx->base); + gen_helper_store_dawr0(cpu_env, cpu_gpr[gprn]); +} + +void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn) +{ + translator_io_start(&ctx->base); + gen_helper_store_dawrx0(cpu_env, cpu_gpr[gprn]); +} #endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */ /* CTR */ From 17f826af864a253a388ad1f2e21bdc82b67dd83c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 13:11:16 +1000 Subject: [PATCH 08/35] spapr: implement H_SET_MODE debug facilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire up the H_SET_MODE debug resources to the CIABR and DAWR0 debug facilities in TCG. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/spapr_hcall.c | 57 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 9b1f225d4a..b7dc388f2f 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -3,6 +3,7 @@ #include "qapi/error.h" #include "sysemu/hw_accel.h" #include "sysemu/runstate.h" +#include "sysemu/tcg.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" @@ -789,6 +790,54 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, SpaprMachineState *spapr, return H_SUCCESS; } +static target_ulong h_set_mode_resource_set_ciabr(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + CPUPPCState *env = &cpu->env; + + assert(tcg_enabled()); /* KVM will have handled this */ + + if (mflags) { + return H_UNSUPPORTED_FLAG; + } + if (value2) { + return H_P4; + } + if ((value1 & PPC_BITMASK(62, 63)) == 0x3) { + return H_P3; + } + + ppc_store_ciabr(env, value1); + + return H_SUCCESS; +} + +static target_ulong h_set_mode_resource_set_dawr0(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + CPUPPCState *env = &cpu->env; + + assert(tcg_enabled()); /* KVM will have handled this */ + + if (mflags) { + return H_UNSUPPORTED_FLAG; + } + if (value2 & PPC_BIT(61)) { + return H_P4; + } + + ppc_store_dawr0(env, value1); + ppc_store_dawrx0(env, value2); + + return H_SUCCESS; +} + static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong mflags, @@ -858,6 +907,14 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong ret = H_P2; switch (resource) { + case H_SET_MODE_RESOURCE_SET_CIABR: + ret = h_set_mode_resource_set_ciabr(cpu, spapr, args[0], args[2], + args[3]); + break; + case H_SET_MODE_RESOURCE_SET_DAWR0: + ret = h_set_mode_resource_set_dawr0(cpu, spapr, args[0], args[2], + args[3]); + break; case H_SET_MODE_RESOURCE_LE: ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]); break; From 2c71b4f6049ef1ed8c75bce7091102be7209a473 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:43 +1000 Subject: [PATCH 09/35] ppc/vhyp: reset exception state when handling vhyp hcall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convention is to reset the exception_index and error_code after handling an interrupt. The vhyp hcall handler fails to do this. This does not appear to have ill effects because cpu_handle_exception() clears exception_index later, but it is fragile and inconsistent. Reset the exception state after handling vhyp hcall like other handlers. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/excp_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 32e46e56b3..72ec2be92e 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -843,6 +843,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); vhc->hypercall(cpu->vhyp, cpu); + powerpc_reset_excp_state(cpu); return; } @@ -1014,6 +1015,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); vhc->hypercall(cpu->vhyp, cpu); + powerpc_reset_excp_state(cpu); return; } @@ -1526,6 +1528,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); vhc->hypercall(cpu->vhyp, cpu); + powerpc_reset_excp_state(cpu); return; } if (env->insns_flags2 & PPC2_ISA310) { From 7b8589d7ce7e23f26ff53338d575a5cbd7818e28 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:44 +1000 Subject: [PATCH 10/35] ppc/vof: Fix missed fields in VOF cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Failing to reset the of_instance_last makes ihandle allocation continue to increase, which causes record-replay replay fail to match the recorded trace. Not resetting claimed_base makes VOF eventually run out of memory after some resets. Cc: Alexey Kardashevskiy Fixes: fc8c745d501 ("spapr: Implement Open Firmware client interface") Signed-off-by: Nicholas Piggin Reviewed-by: Alexey Kardashevskiy Signed-off-by: Cédric Le Goater --- hw/ppc/vof.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c index 18c3f92317..e3b430a81f 100644 --- a/hw/ppc/vof.c +++ b/hw/ppc/vof.c @@ -1024,6 +1024,8 @@ void vof_cleanup(Vof *vof) } vof->claimed = NULL; vof->of_instances = NULL; + vof->of_instance_last = 0; + vof->claimed_base = 0; } void vof_build_dt(void *fdt, Vof *vof) From eaf832fc3b058ab62e9f2b677f1cba012eb728c2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:45 +1000 Subject: [PATCH 11/35] hw/ppc/ppc.c: Tidy over-long lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 0e0a3d93c3..09b82f68a8 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -497,7 +497,8 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env) return env->spr[SPR_TBL]; } - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->tb_offset); trace_ppc_tb_load(tb); return tb; @@ -508,7 +509,8 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env) ppc_tb_t *tb_env = env->tb_env; uint64_t tb; - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->tb_offset); trace_ppc_tb_load(tb); return tb >> 32; @@ -565,7 +567,8 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env) ppc_tb_t *tb_env = env->tb_env; uint64_t tb; - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->atb_offset); trace_ppc_tb_load(tb); return tb; @@ -576,7 +579,8 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) ppc_tb_t *tb_env = env->tb_env; uint64_t tb; - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->atb_offset); trace_ppc_tb_load(tb); return tb >> 32; @@ -1040,10 +1044,11 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL; } /* Create new timer */ - tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu); + tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &cpu_ppc_decr_cb, cpu); if (env->has_hv_mode && !cpu->vhyp) { - tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb, - cpu); + tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &cpu_ppc_hdecr_cb, cpu); } else { tb_env->hdecr_timer = NULL; } From 7798f5c576d898e7e10c4a2518f3f16411dedeb9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:46 +1000 Subject: [PATCH 12/35] hw/ppc: Introduce functions for conversion between timebase and nanoseconds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These calculations are repeated several times, and they will become a little more complicated with subsequent changes. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 09b82f68a8..423a3a117a 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -482,10 +482,20 @@ void ppce500_set_mpic_proxy(bool enabled) /*****************************************************************************/ /* PowerPC time base and decrementer emulation */ +static uint64_t ns_to_tb(uint32_t freq, int64_t clock) +{ + return muldiv64(clock, freq, NANOSECONDS_PER_SECOND); +} + +static int64_t tb_to_ns(uint32_t freq, uint64_t tb) +{ + return muldiv64(tb, NANOSECONDS_PER_SECOND, freq); +} + uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset) { /* TB time in tb periods */ - return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset; + return ns_to_tb(tb_env->tb_freq, vmclk) + tb_offset; } uint64_t cpu_ppc_load_tbl (CPUPPCState *env) @@ -528,8 +538,7 @@ uint32_t cpu_ppc_load_tbu (CPUPPCState *env) static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t *tb_offsetp, uint64_t value) { - *tb_offsetp = value - - muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND); + *tb_offsetp = value - ns_to_tb(tb_env->tb_freq, vmclk); trace_ppc_tb_store(value, *tb_offsetp); } @@ -694,11 +703,11 @@ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); if (diff >= 0) { - decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); + decr = ns_to_tb(tb_env->decr_freq, diff); } else if (tb_env->flags & PPC_TIMER_BOOKE) { decr = 0; } else { - decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); + decr = -ns_to_tb(tb_env->decr_freq, -diff); } trace_ppc_decr_load(decr); @@ -838,7 +847,7 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, /* Calculate the next timer event */ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq); + next = now + tb_to_ns(tb_env->decr_freq, value); *nextp = next; /* Adjust timer */ @@ -1130,7 +1139,7 @@ static void cpu_4xx_fit_cb (void *opaque) /* Cannot occur, but makes gcc happy */ return; } - next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq); + next = now + tb_to_ns(tb_env->tb_freq, next); if (next == now) next++; timer_mod(ppc40x_timer->fit_timer, next); @@ -1158,8 +1167,7 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) } else { trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + muldiv64(ppc40x_timer->pit_reload, - NANOSECONDS_PER_SECOND, tb_env->decr_freq); + next = now + tb_to_ns(tb_env->decr_freq, ppc40x_timer->pit_reload); if (is_excp) next += tb_env->decr_next - now; if (next == now) @@ -1218,7 +1226,7 @@ static void cpu_4xx_wdt_cb (void *opaque) /* Cannot occur, but makes gcc happy */ return; } - next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq); + next = now + tb_to_ns(tb_env->decr_freq, next); if (next == now) next++; trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); From 47de6c4c287079744ceb96f606b3c0457addf380 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:47 +1000 Subject: [PATCH 13/35] host-utils: Add muldiv64_round_up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used for converting time intervals in different base units to host units, for the purpose of scheduling timers to emulate target timers. Timers typically must not fire before their requested expiry time but may fire some time afterward, so rounding up is the right way to implement these. Signed-off-by: Nicholas Piggin Reviewed-by: Richard Henderson [ clg: renamed __muldiv64() to muldiv64_rounding() ] Signed-off-by: Cédric Le Goater --- include/qemu/host-utils.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 011618373e..ead97d354d 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -56,6 +56,11 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) return (__int128_t)a * b / c; } +static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c) +{ + return ((__int128_t)a * b + c - 1) / c; +} + static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) { @@ -83,7 +88,8 @@ void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor); -static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c, + bool round_up) { union { uint64_t ll; @@ -99,12 +105,25 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) u.ll = a; rl = (uint64_t)u.l.low * (uint64_t)b; + if (round_up) { + rl += c - 1; + } rh = (uint64_t)u.l.high * (uint64_t)b; rh += (rl >> 32); res.l.high = rh / c; res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; return res.ll; } + +static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + return muldiv64_rounding(a, b, c, false); +} + +static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c) +{ + return muldiv64_rounding(a, b, c, true); +} #endif /** From eab0888418ab44344864965193cf6cd194ab6858 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:48 +1000 Subject: [PATCH 14/35] hw/ppc: Round up the decrementer interval when converting to ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rule of timers is typically that they should never expire before the timeout, but some time afterward. Rounding timer intervals up when doing conversion is the right thing to do. Under most circumstances it is impossible observe the decrementer interrupt before the dec register has triggered. However with icount timing, problems can arise. For example setting DEC to 0 can schedule the timer for now, causing it to fire before any more instructions have been executed and DEC is still 0. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 423a3a117a..13eb45f4b7 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -482,14 +482,26 @@ void ppce500_set_mpic_proxy(bool enabled) /*****************************************************************************/ /* PowerPC time base and decrementer emulation */ +/* + * Conversion between QEMU_CLOCK_VIRTUAL ns and timebase (TB) ticks: + * TB ticks are arrived at by multiplying tb_freq then dividing by + * ns per second, and rounding down. TB ticks drive all clocks and + * timers in the target machine. + * + * Converting TB intervals to ns for the purpose of setting a + * QEMU_CLOCK_VIRTUAL timer should go the other way, but rounding + * up. Rounding down could cause the timer to fire before the TB + * value has been reached. + */ static uint64_t ns_to_tb(uint32_t freq, int64_t clock) { return muldiv64(clock, freq, NANOSECONDS_PER_SECOND); } -static int64_t tb_to_ns(uint32_t freq, uint64_t tb) +/* virtual clock in TB ticks, not adjusted by TB offset */ +static int64_t tb_to_ns_round_up(uint32_t freq, uint64_t tb) { - return muldiv64(tb, NANOSECONDS_PER_SECOND, freq); + return muldiv64_round_up(tb, NANOSECONDS_PER_SECOND, freq); } uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset) @@ -847,7 +859,7 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, /* Calculate the next timer event */ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + tb_to_ns(tb_env->decr_freq, value); + next = now + tb_to_ns_round_up(tb_env->decr_freq, value); *nextp = next; /* Adjust timer */ @@ -1139,9 +1151,7 @@ static void cpu_4xx_fit_cb (void *opaque) /* Cannot occur, but makes gcc happy */ return; } - next = now + tb_to_ns(tb_env->tb_freq, next); - if (next == now) - next++; + next = now + tb_to_ns_round_up(tb_env->tb_freq, next); timer_mod(ppc40x_timer->fit_timer, next); env->spr[SPR_40x_TSR] |= 1 << 26; if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) { @@ -1167,11 +1177,10 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) } else { trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + tb_to_ns(tb_env->decr_freq, ppc40x_timer->pit_reload); + next = now + tb_to_ns_round_up(tb_env->decr_freq, + ppc40x_timer->pit_reload); if (is_excp) next += tb_env->decr_next - now; - if (next == now) - next++; timer_mod(tb_env->decr_timer, next); tb_env->decr_next = next; } @@ -1226,9 +1235,7 @@ static void cpu_4xx_wdt_cb (void *opaque) /* Cannot occur, but makes gcc happy */ return; } - next = now + tb_to_ns(tb_env->decr_freq, next); - if (next == now) - next++; + next = now + tb_to_ns_round_up(tb_env->decr_freq, next); trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) { case 0x0: From 8e0a5ac87800ccc6dd5013f89f27652f4480ab33 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:49 +1000 Subject: [PATCH 15/35] hw/ppc: Avoid decrementer rounding errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The decrementer register contains a relative time in timebase units. When writing to DECR this is converted and stored as an absolute value in nanosecond units, reading DECR converts back to relative timebase. The tb<->ns conversion of the relative part can cause rounding such that a value writen to the decrementer can read back a different, with time held constant. This is a particular problem for a deterministic icount and record-replay trace. Fix this by storing the absolute value in timebase units rather than nanoseconds. The math before: store: decr_next = now_ns + decr * ns_per_sec / tb_per_sec load: decr = (decr_next - now_ns) * tb_per_sec / ns_per_sec load(store): decr = decr * ns_per_sec / tb_per_sec * tb_per_sec / ns_per_sec After: store: decr_next = now_ns * tb_per_sec / ns_per_sec + decr load: decr = decr_next - now_ns * tb_per_sec / ns_per_sec load(store): decr = decr Fixes: 9fddaa0c0cab ("PowerPC merge: real time TB and decrementer - faster and simpler exception handling (Jocelyn Mayer)") Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 13eb45f4b7..a397820d9c 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -711,16 +711,17 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env) static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) { ppc_tb_t *tb_env = env->tb_env; - int64_t decr, diff; + uint64_t now, n; + int64_t decr; - diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (diff >= 0) { - decr = ns_to_tb(tb_env->decr_freq, diff); - } else if (tb_env->flags & PPC_TIMER_BOOKE) { + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + n = ns_to_tb(tb_env->decr_freq, now); + if (next > n && tb_env->flags & PPC_TIMER_BOOKE) { decr = 0; - } else { - decr = -ns_to_tb(tb_env->decr_freq, -diff); + } else { + decr = next - n; } + trace_ppc_decr_load(decr); return decr; @@ -857,13 +858,18 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, (*lower_excp)(cpu); } - /* Calculate the next timer event */ + /* + * Calculate the next decrementer event and set a timer. + * decr_next is in timebase units to keep rounding simple. Note it is + * not adjusted by tb_offset because if TB changes via tb_offset changing, + * decrementer does not change, so not directly comparable with TB. + */ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + tb_to_ns_round_up(tb_env->decr_freq, value); + next = ns_to_tb(tb_env->decr_freq, now) + value; *nextp = next; /* Adjust timer */ - timer_mod(timer, next); + timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next)); } static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr, @@ -1177,12 +1183,15 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) } else { trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + tb_to_ns_round_up(tb_env->decr_freq, - ppc40x_timer->pit_reload); - if (is_excp) - next += tb_env->decr_next - now; + + if (is_excp) { + tb_env->decr_next += ppc40x_timer->pit_reload; + } else { + tb_env->decr_next = ns_to_tb(tb_env->decr_freq, now) + + ppc40x_timer->pit_reload; + } + next = tb_to_ns_round_up(tb_env->decr_freq, tb_env->decr_next); timer_mod(tb_env->decr_timer, next); - tb_env->decr_next = next; } } From c8fbc6b9f2f3c732ee3307093c1c5c367eaa64ae Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:50 +1000 Subject: [PATCH 16/35] target/ppc: Sign-extend large decrementer to 64-bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When storing a large decrementer value with the most significant implemented bit set, it is to be treated as a negative and sign extended. This isn't hit for book3s DEC because of another bug, fixing it in the next patch exposes this one and can cause additional problems, so fix this first. It can be hit with HDECR and other edge triggered types. Fixes: a8dafa52518 ("target/ppc: Implement large decrementer support for TCG") Signed-off-by: Nicholas Piggin [ clg: removed extra cpu and pcc variables shadowing local variables ] Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index a397820d9c..51f2bec0a1 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -743,7 +743,9 @@ target_ulong cpu_ppc_load_decr(CPUPPCState *env) * to 64 bits, otherwise it is a 32 bit value. */ if (env->spr[SPR_LPCR] & LPCR_LD) { - return decr; + PowerPCCPU *cpu = env_archcpu(env); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + return sextract64(decr, 0, pcc->lrg_decr_bits); } return (uint32_t) decr; } @@ -762,7 +764,7 @@ target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) * extended to 64 bits, otherwise it is 32 bits. */ if (pcc->lrg_decr_bits > 32) { - return hdecr; + return sextract64(hdecr, 0, pcc->lrg_decr_bits); } return (uint32_t) hdecr; } From febb71d543a8f747b2f8aaf0182d0a385c6a02c3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:51 +1000 Subject: [PATCH 17/35] hw/ppc: Always store the decrementer value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When writing a value to the decrementer that raises an exception, the irq is raised, but the value is not stored so the store doesn't appear to have changed the register when it is read again. Always store the write value to the register. Fixes: e81a982aa53 ("PPC: Clean up DECR implementation") Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 51f2bec0a1..e87a65fa23 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -839,6 +839,16 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, return; } + /* + * Calculate the next decrementer event and set a timer. + * decr_next is in timebase units to keep rounding simple. Note it is + * not adjusted by tb_offset because if TB changes via tb_offset changing, + * decrementer does not change, so not directly comparable with TB. + */ + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + next = ns_to_tb(tb_env->decr_freq, now) + value; + *nextp = next; /* nextp is in timebase units */ + /* * Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt. * @@ -860,16 +870,6 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, (*lower_excp)(cpu); } - /* - * Calculate the next decrementer event and set a timer. - * decr_next is in timebase units to keep rounding simple. Note it is - * not adjusted by tb_offset because if TB changes via tb_offset changing, - * decrementer does not change, so not directly comparable with TB. - */ - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = ns_to_tb(tb_env->decr_freq, now) + value; - *nextp = next; - /* Adjust timer */ timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next)); } From 578912ad7312ececb9a88b4c38d406dda640346d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:52 +1000 Subject: [PATCH 18/35] target/ppc: Migrate DECR SPR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCG does not maintain the DEC reigster in the SPR array, so it does get migrated. TCG also needs to re-start the decrementer timer on the destination machine. Load and store the decrementer into the SPR when migrating. This works for the level-triggered (book3s) decrementer, and should be compatible with existing KVM machines that do keep the DEC value there. This fixes lost decrementer interrupt on migration that can cause hangs, as well as other problems including record-replay bugs. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/machine.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 07ca18437f..3265338e16 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -208,6 +208,14 @@ static int cpu_pre_save(void *opaque) /* Used to retain migration compatibility for pre 6.0 for 601 machines. */ env->hflags_compat_nmsr = 0; + if (tcg_enabled()) { + /* + * TCG does not maintain the DECR spr (unlike KVM) so have to save + * it here. + */ + env->spr[SPR_DECR] = cpu_ppc_load_decr(env); + } + return 0; } @@ -318,6 +326,12 @@ static int cpu_post_load(void *opaque, int version_id) ppc_update_ciabr(env); ppc_update_daw0(env); #endif + /* + * TCG needs to re-start the decrementer timer and/or raise the + * interrupt. This works for level-triggered decrementer. Edge + * triggered types (including HDEC) would need to carry more state. + */ + cpu_ppc_store_decr(env, env->spr[SPR_DECR]); pmu_mmcr01_updated(env); } From 30d0647bcfa99d4a141eaa843a9fb5b091ddbb76 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:53 +1000 Subject: [PATCH 19/35] hw/ppc: Reset timebase facilities on machine reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lower interrupts, delete timers, and set time facility registers back to initial state on machine reset. This is not so important for record-replay since timebase and decrementer are migrated, but it gives a cleaner reset state. Cc: Mark Cave-Ayland Cc: BALATON Zoltan Signed-off-by: Nicholas Piggin [ clg: checkpatch.pl fixes ] Signed-off-by: Cédric Le Goater --- hw/ppc/mac_oldworld.c | 1 + hw/ppc/pegasos2.c | 1 + hw/ppc/pnv_core.c | 2 ++ hw/ppc/ppc.c | 47 +++++++++++++++++++++++------------------ hw/ppc/prep.c | 1 + hw/ppc/spapr_cpu_core.c | 2 ++ include/hw/ppc/ppc.h | 3 ++- 7 files changed, 36 insertions(+), 21 deletions(-) diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 510ff0eaaf..9acc7adfc9 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -81,6 +81,7 @@ static void ppc_heathrow_reset(void *opaque) { PowerPCCPU *cpu = opaque; + cpu_ppc_tb_reset(&cpu->env); cpu_reset(CPU(cpu)); } diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 075367d94d..bd397cf2b5 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -99,6 +99,7 @@ static void pegasos2_cpu_reset(void *opaque) cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20; cpu->env.nip = 0x100; } + cpu_ppc_tb_reset(&cpu->env); } static void pegasos2_pci_irq(void *opaque, int n, int level) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 9b39d527de..8c7afe037f 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -61,6 +61,8 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) hreg_compute_hflags(env); ppc_maybe_interrupt(env); + cpu_ppc_tb_reset(env); + pcc->intc_reset(pc->chip, cpu); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index e87a65fa23..71559ed51d 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -942,23 +942,6 @@ void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value) &tb_env->purr_offset, value); } -static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq) -{ - CPUPPCState *env = opaque; - PowerPCCPU *cpu = env_archcpu(env); - ppc_tb_t *tb_env = env->tb_env; - - tb_env->tb_freq = freq; - tb_env->decr_freq = freq; - /* There is a bug in Linux 2.4 kernels: - * if a decrementer exception is pending when it enables msr_ee at startup, - * it's not ready to handle it... - */ - _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32); - _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32); - cpu_ppc_store_purr(env, 0x0000000000000000ULL); -} - static void timebase_save(PPCTimebase *tb) { uint64_t ticks = cpu_get_host_ticks(); @@ -1060,7 +1043,7 @@ const VMStateDescription vmstate_ppc_timebase = { }; /* Set up (once) timebase frequency (in Hz) */ -clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) +void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq) { PowerPCCPU *cpu = env_archcpu(env); ppc_tb_t *tb_env; @@ -1081,9 +1064,33 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) } else { tb_env->hdecr_timer = NULL; } - cpu_ppc_set_tb_clk(env, freq); - return &cpu_ppc_set_tb_clk; + tb_env->tb_freq = freq; + tb_env->decr_freq = freq; +} + +void cpu_ppc_tb_reset(CPUPPCState *env) +{ + PowerPCCPU *cpu = env_archcpu(env); + ppc_tb_t *tb_env = env->tb_env; + + timer_del(tb_env->decr_timer); + ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0); + tb_env->decr_next = 0; + if (tb_env->hdecr_timer != NULL) { + timer_del(tb_env->hdecr_timer); + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0); + tb_env->hdecr_next = 0; + } + + /* + * There is a bug in Linux 2.4 kernels: + * if a decrementer exception is pending when it enables msr_ee at startup, + * it's not ready to handle it... + */ + cpu_ppc_store_decr(env, -1); + cpu_ppc_store_hdecr(env, -1); + cpu_ppc_store_purr(env, 0x0000000000000000ULL); } void cpu_ppc_tb_free(CPUPPCState *env) diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index d9231c7317..f6fd35fcb9 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -67,6 +67,7 @@ static void ppc_prep_reset(void *opaque) PowerPCCPU *cpu = opaque; cpu_reset(CPU(cpu)); + cpu_ppc_tb_reset(&cpu->env); } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index b482d9754a..91fae56573 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -74,6 +74,8 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) kvm_check_mmu(cpu, &error_fatal); + cpu_ppc_tb_reset(env); + spapr_irq_cpu_intc_reset(spapr, cpu); } diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index e095c002dc..17a8dfc107 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -54,7 +54,8 @@ struct ppc_tb_t { */ uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset); -clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq); +void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq); +void cpu_ppc_tb_reset(CPUPPCState *env); void cpu_ppc_tb_free(CPUPPCState *env); void cpu_ppc_hdecr_init(CPUPPCState *env); void cpu_ppc_hdecr_exit(CPUPPCState *env); From ea62f8a5172cf5fcd97df143b758730f6865a625 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:54 +1000 Subject: [PATCH 20/35] hw/ppc: Read time only once to perform decrementer write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading the time more than once to perform an operation always increases complexity and fragility due to introduced deltas. Simplify the decrementer write by reading the clock once for the operation. Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 84 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 31 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 71559ed51d..1a507043ec 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -708,13 +708,13 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env) return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED); } -static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) +static inline int64_t __cpu_ppc_load_decr(CPUPPCState *env, int64_t now, + uint64_t next) { ppc_tb_t *tb_env = env->tb_env; - uint64_t now, n; + uint64_t n; int64_t decr; - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); n = ns_to_tb(tb_env->decr_freq, now); if (next > n && tb_env->flags & PPC_TIMER_BOOKE) { decr = 0; @@ -727,16 +727,12 @@ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) return decr; } -target_ulong cpu_ppc_load_decr(CPUPPCState *env) +static target_ulong _cpu_ppc_load_decr(CPUPPCState *env, int64_t now) { ppc_tb_t *tb_env = env->tb_env; uint64_t decr; - if (kvm_enabled()) { - return env->spr[SPR_DECR]; - } - - decr = _cpu_ppc_load_decr(env, tb_env->decr_next); + decr = __cpu_ppc_load_decr(env, now, tb_env->decr_next); /* * If large decrementer is enabled then the decrementer is signed extened @@ -750,14 +746,23 @@ target_ulong cpu_ppc_load_decr(CPUPPCState *env) return (uint32_t) decr; } -target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) +target_ulong cpu_ppc_load_decr(CPUPPCState *env) +{ + if (kvm_enabled()) { + return env->spr[SPR_DECR]; + } else { + return _cpu_ppc_load_decr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + } +} + +static target_ulong _cpu_ppc_load_hdecr(CPUPPCState *env, int64_t now) { PowerPCCPU *cpu = env_archcpu(env); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); ppc_tb_t *tb_env = env->tb_env; uint64_t hdecr; - hdecr = _cpu_ppc_load_decr(env, tb_env->hdecr_next); + hdecr = __cpu_ppc_load_decr(env, now, tb_env->hdecr_next); /* * If we have a large decrementer (POWER9 or later) then hdecr is sign @@ -769,6 +774,11 @@ target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) return (uint32_t) hdecr; } +target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) +{ + return _cpu_ppc_load_hdecr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); +} + uint64_t cpu_ppc_load_purr (CPUPPCState *env) { ppc_tb_t *tb_env = env->tb_env; @@ -813,7 +823,7 @@ static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu) ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0); } -static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, +static void __cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now, uint64_t *nextp, QEMUTimer *timer, void (*raise_excp)(void *), void (*lower_excp)(PowerPCCPU *), @@ -822,7 +832,7 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, { CPUPPCState *env = &cpu->env; ppc_tb_t *tb_env = env->tb_env; - uint64_t now, next; + uint64_t next; int64_t signed_value; int64_t signed_decr; @@ -834,18 +844,12 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, trace_ppc_decr_store(nr_bits, decr, value); - if (kvm_enabled()) { - /* KVM handles decrementer exceptions, we don't need our own timer */ - return; - } - /* * Calculate the next decrementer event and set a timer. * decr_next is in timebase units to keep rounding simple. Note it is * not adjusted by tb_offset because if TB changes via tb_offset changing, * decrementer does not change, so not directly comparable with TB. */ - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); next = ns_to_tb(tb_env->decr_freq, now) + value; *nextp = next; /* nextp is in timebase units */ @@ -874,12 +878,13 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next)); } -static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr, - target_ulong value, int nr_bits) +static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now, + target_ulong decr, target_ulong value, + int nr_bits) { ppc_tb_t *tb_env = cpu->env.tb_env; - __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer, + __cpu_ppc_store_decr(cpu, now, &tb_env->decr_next, tb_env->decr_timer, tb_env->decr_timer->cb, &cpu_ppc_decr_lower, tb_env->flags, decr, value, nr_bits); } @@ -888,13 +893,22 @@ void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value) { PowerPCCPU *cpu = env_archcpu(env); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + int64_t now; + target_ulong decr; int nr_bits = 32; + if (kvm_enabled()) { + /* KVM handles decrementer exceptions, we don't need our own timer */ + return; + } + if (env->spr[SPR_LPCR] & LPCR_LD) { nr_bits = pcc->lrg_decr_bits; } - _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits); + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + decr = _cpu_ppc_load_decr(env, now); + _cpu_ppc_store_decr(cpu, now, decr, value, nr_bits); } static void cpu_ppc_decr_cb(void *opaque) @@ -904,14 +918,15 @@ static void cpu_ppc_decr_cb(void *opaque) cpu_ppc_decr_excp(cpu); } -static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr, - target_ulong value, int nr_bits) +static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, int64_t now, + target_ulong hdecr, target_ulong value, + int nr_bits) { ppc_tb_t *tb_env = cpu->env.tb_env; if (tb_env->hdecr_timer != NULL) { /* HDECR (Book3S 64bit) is edge-based, not level like DECR */ - __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer, + __cpu_ppc_store_decr(cpu, now, &tb_env->hdecr_next, tb_env->hdecr_timer, tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower, PPC_DECR_UNDERFLOW_TRIGGERED, hdecr, value, nr_bits); @@ -922,9 +937,12 @@ void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value) { PowerPCCPU *cpu = env_archcpu(env); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + int64_t now; + target_ulong hdecr; - _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, - pcc->lrg_decr_bits); + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + hdecr = _cpu_ppc_load_hdecr(env, now); + _cpu_ppc_store_hdecr(cpu, now, hdecr, value, pcc->lrg_decr_bits); } static void cpu_ppc_hdecr_cb(void *opaque) @@ -934,12 +952,16 @@ static void cpu_ppc_hdecr_cb(void *opaque) cpu_ppc_hdecr_excp(cpu); } -void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value) +static void _cpu_ppc_store_purr(CPUPPCState *env, int64_t now, uint64_t value) { ppc_tb_t *tb_env = env->tb_env; - cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->purr_offset, value); + cpu_ppc_store_tb(tb_env, now, &tb_env->purr_offset, value); +} + +void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value) +{ + _cpu_ppc_store_purr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), value); } static void timebase_save(PPCTimebase *tb) From cdab53dd223ca5417a70feedb6f8692e5c080aba Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:55 +1000 Subject: [PATCH 21/35] target/ppc: Fix CPU reservation migration for record-replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ppc only migrates reserve_addr, so the destination machine can get a valid reservation with an incorrect reservation value of 0. Prior to commit 392d328abe753 ("target/ppc: Ensure stcx size matches larx"), this could permit a stcx. to incorrectly succeed. That commit inadvertently fixed that bug because the target machine starts with an impossible reservation size of 0, so any stcx. will fail. This behaviour is permitted by the ISA because reservation loss may have implementation-dependent cause. What's more, with KVM machines it is impossible save or reasonably restore reservation state. However if the vmstate is being used for record-replay, the reservation must be saved and restored exactly in order for execution from snapshot to match the record. This patch deprecates the existing incomplete reserve_addr vmstate, and adds a new vmstate subsection with complete reservation state. The new vmstate is needed only when record-replay mode is active. Acked-by: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/cpu.h | 2 ++ target/ppc/machine.c | 26 ++++++++++++++++++++++++-- target/ppc/translate.c | 4 ++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 7e7a60f68f..77113521ac 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1121,7 +1121,9 @@ struct CPUArchState { target_ulong reserve_addr; /* Reservation address */ target_ulong reserve_length; /* Reservation larx op size (bytes) */ target_ulong reserve_val; /* Reservation value */ +#if defined(TARGET_PPC64) target_ulong reserve_val2; +#endif /* These are used in supervisor mode only */ target_ulong msr; /* machine state register */ diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 3265338e16..68cbdffecd 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -9,6 +9,7 @@ #include "qapi/error.h" #include "kvm_ppc.h" #include "power8-pmu.h" +#include "sysemu/replay.h" static void post_load_update_msr(CPUPPCState *env) { @@ -689,6 +690,27 @@ static const VMStateDescription vmstate_compat = { } }; +static bool reservation_needed(void *opaque) +{ + return (replay_mode != REPLAY_MODE_NONE); +} + +static const VMStateDescription vmstate_reservation = { + .name = "cpu/reservation", + .version_id = 1, + .minimum_version_id = 1, + .needed = reservation_needed, + .fields = (VMStateField[]) { + VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU), + VMSTATE_UINTTL(env.reserve_length, PowerPCCPU), + VMSTATE_UINTTL(env.reserve_val, PowerPCCPU), +#if defined(TARGET_PPC64) + VMSTATE_UINTTL(env.reserve_val2, PowerPCCPU), +#endif + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ppc_cpu = { .name = "cpu", .version_id = 5, @@ -710,8 +732,7 @@ const VMStateDescription vmstate_ppc_cpu = { VMSTATE_UINTTL_ARRAY(env.spr, PowerPCCPU, 1024), VMSTATE_UINT64(env.spe_acc, PowerPCCPU), - /* Reservation */ - VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU), + VMSTATE_UNUSED(sizeof(target_ulong)), /* was env.reserve_addr */ /* Supervisor mode architected state */ VMSTATE_UINTTL(env.msr, PowerPCCPU), @@ -740,6 +761,7 @@ const VMStateDescription vmstate_ppc_cpu = { &vmstate_tlbemb, &vmstate_tlbmas, &vmstate_compat, + &vmstate_reservation, NULL } }; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 16592194e2..6b242ae0a6 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -75,7 +75,9 @@ static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; static TCGv cpu_reserve; static TCGv cpu_reserve_length; static TCGv cpu_reserve_val; +#if defined(TARGET_PPC64) static TCGv cpu_reserve_val2; +#endif static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; @@ -149,9 +151,11 @@ void ppc_translate_init(void) cpu_reserve_val = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_val), "reserve_val"); +#if defined(TARGET_PPC64) cpu_reserve_val2 = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_val2), "reserve_val2"); +#endif cpu_fpscr = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, fpscr), "fpscr"); From 9db680f8fd483521597564b52843151991b3ed55 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:56 +1000 Subject: [PATCH 22/35] target/ppc: Fix timebase reset with record-replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timebase save uses a random number for a legacy vmstate field, which makes rr snapshot loading unbalanced. The easiest way to deal with this is just to skip the rng if record-replay is active. Reviewed-by: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 1a507043ec..ace8f4e725 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -32,6 +32,7 @@ #include "qemu/main-loop.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" +#include "sysemu/replay.h" #include "sysemu/runstate.h" #include "kvm_ppc.h" #include "migration/vmstate.h" @@ -974,8 +975,14 @@ static void timebase_save(PPCTimebase *tb) return; } - /* not used anymore, we keep it for compatibility */ - tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); + if (replay_mode == REPLAY_MODE_NONE) { + /* not used anymore, we keep it for compatibility */ + tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); + } else { + /* simpler for record-replay to avoid this event, compat not needed */ + tb->time_of_the_day_ns = 0; + } + /* * tb_offset is only expected to be changed by QEMU so * there is no need to update it from KVM here From 9c7b7f01f982a22b06c0d2f49a0d2466b5f34485 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:57 +1000 Subject: [PATCH 23/35] spapr: Fix machine reset deadlock from replay-record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the machine is reset to load a new snapshot while being debugged with replay-record, it is done from another thread, so the CPU does not run the register setting operations. Set CPU registers directly in machine reset. Cc: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/spapr.c | 20 ++++++++++++++++++-- include/hw/ppc/spapr.h | 1 + target/ppc/compat.c | 19 +++++++++++++++++++ target/ppc/cpu.h | 1 + 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 07e91e3800..c0b0ada121 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1322,6 +1322,22 @@ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask) } } +/* May be used when the machine is not running */ +void spapr_init_all_lpcrs(target_ulong value, target_ulong mask) +{ + CPUState *cs; + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + target_ulong lpcr; + + lpcr = env->spr[SPR_LPCR]; + lpcr &= ~(LPCR_HR | LPCR_UPRT); + ppc_store_lpcr(cpu, lpcr); + } +} + + static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) { @@ -1583,7 +1599,7 @@ int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp) } /* We're setting up a hash table, so that means we're not radix */ spapr->patb_entry = 0; - spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT); + spapr_init_all_lpcrs(0, LPCR_HR | LPCR_UPRT); return 0; } @@ -1661,7 +1677,7 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); - ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); + ppc_init_compat_all(spapr->max_compat_pvr, &error_fatal); /* * This is fixing some of the default configuration of the XIVE diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 538b2dfb89..f47e8419a5 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -1012,6 +1012,7 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); +void spapr_init_all_lpcrs(target_ulong value, target_ulong mask); hwaddr spapr_get_rtas_addr(void); bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); diff --git a/target/ppc/compat.c b/target/ppc/compat.c index 7949a24f5a..ebef2cccec 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -229,6 +229,25 @@ int ppc_set_compat_all(uint32_t compat_pvr, Error **errp) return 0; } +/* To be used when the machine is not running */ +int ppc_init_compat_all(uint32_t compat_pvr, Error **errp) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + int ret; + + ret = ppc_set_compat(cpu, compat_pvr, errp); + + if (ret < 0) { + return ret; + } + } + + return 0; +} + int ppc_compat_max_vthreads(PowerPCCPU *cpu) { const CompatInfo *compat = compat_by_pvr(cpu->compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 77113521ac..173e4c351a 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1504,6 +1504,7 @@ int ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp); #if !defined(CONFIG_USER_ONLY) int ppc_set_compat_all(uint32_t compat_pvr, Error **errp); +int ppc_init_compat_all(uint32_t compat_pvr, Error **errp); #endif int ppc_compat_max_vthreads(PowerPCCPU *cpu); void ppc_compat_add_property(Object *obj, const char *name, From b27fcb288bbdb9e2d89ce9ee578a8869f14c579c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:58 +1000 Subject: [PATCH 24/35] spapr: Fix record-replay machine reset consuming too many events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit spapr_machine_reset gets a random number to populate the device-tree rng seed with. When loading a snapshot for record-replay, the machine is reset again, and that tries to consume the random event record again, crashing due to inconsistent record Fix this by saving the seed to populate the device tree with, and skipping the rng on snapshot load. Acked-by: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/spapr.c | 12 +++++++++--- include/hw/ppc/spapr.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index c0b0ada121..f7cc6a890f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1022,7 +1022,6 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) { MachineState *machine = MACHINE(spapr); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); - uint8_t rng_seed[32]; int chosen; _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); @@ -1100,8 +1099,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) spapr_dt_ov5_platform_support(spapr, fdt, chosen); } - qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); - _FDT(fdt_setprop(fdt, chosen, "rng-seed", rng_seed, sizeof(rng_seed))); + _FDT(fdt_setprop(fdt, chosen, "rng-seed", spapr->fdt_rng_seed, 32)); _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5")); } @@ -1654,6 +1652,14 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) void *fdt; int rc; + if (reason != SHUTDOWN_CAUSE_SNAPSHOT_LOAD) { + /* + * Record-replay snapshot load must not consume random, this was + * already replayed from initial machine reset. + */ + qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); + } + pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index f47e8419a5..f4bd204d86 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -204,6 +204,7 @@ struct SpaprMachineState { uint32_t fdt_size; uint32_t fdt_initial_size; void *fdt_blob; + uint8_t fdt_rng_seed[32]; long kernel_size; bool kernel_le; uint64_t kernel_addr; From d08c825c80176b5318e9c2b2a3667f8e59f50ffc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:19:59 +1000 Subject: [PATCH 25/35] tests/avocado: boot ppc64 pseries replay-record test to Linux VFS mount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This the ppc64 record-replay test is able to replay the full kernel boot so try enabling it. Acked-by: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- tests/avocado/replay_kernel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py index 79c607b0e7..a18610542e 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -255,8 +255,7 @@ class ReplayKernelNormal(ReplayKernelBase): kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0' - # icount is not good enough for PPC64 for complete boot yet - console_pattern = 'Kernel command line: %s' % kernel_command_line + console_pattern = 'VFS: Cannot open root device' self.run_rr(kernel_path, kernel_command_line, console_pattern) def test_ppc64_powernv(self): From 76e9c1dfb9634be2a3ae35c7b663753c6151a526 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:20:00 +1000 Subject: [PATCH 26/35] tests/avocado: reverse-debugging cope with re-executing breakpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reverse-debugging test creates a trace, then replays it and: 1. Steps the first 10 instructions and records their addresses. 2. Steps backward and verifies their addresses match. 3. Runs to (near) the end of the trace. 4. Sets breakpoints on the first 10 instructions. 5. Continues backward and verifies execution stops at the last breakpoint. Step 5 breaks if any of the other 9 breakpoints are re-executed in the trace after the 10th instruction is run, because those will be unexpectedly hit when reverse continuing. This situation does arise with the ppc pseries machine, the SLOF bios branches to its own entry point. Deal with this by switching steps 3 and 4, so the trace will be run to the end *or* one of the breakpoints being re-executed. Step 5 then reverses from there to the 10th instruction will not hit a breakpoint in between, by definition. Another step is added between steps 2 and 3, which steps forward over the first 10 instructions and verifies their addresses, to support this. Reviewed-by: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- tests/avocado/reverse_debugging.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py index 680c314cfc..7d1a478df1 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py @@ -150,16 +150,33 @@ class ReverseDebugging(LinuxKernelTest): self.check_pc(g, addr) logger.info('found position %x' % addr) - logger.info('seeking to the end (icount %s)' % (last_icount - 1)) - vm.qmp('replay-break', icount=last_icount - 1) - # continue - will return after pausing - g.cmd(b'c', b'T02thread:01;') + # visit the recorded instruction in forward order + logger.info('stepping forward') + for addr in steps: + self.check_pc(g, addr) + self.gdb_step(g) + logger.info('found position %x' % addr) + # set breakpoints for the instructions just stepped over logger.info('setting breakpoints') for addr in steps: # hardware breakpoint at addr with len=1 g.cmd(b'Z1,%x,1' % addr, b'OK') + # this may hit a breakpoint if first instructions are executed + # again + logger.info('continuing execution') + vm.qmp('replay-break', icount=last_icount - 1) + # continue - will return after pausing + # This could stop at the end and get a T02 return, or by + # re-executing one of the breakpoints and get a T05 return. + g.cmd(b'c') + if self.vm_get_icount(vm) == last_icount - 1: + logger.info('reached the end (icount %s)' % (last_icount - 1)) + else: + logger.info('hit a breakpoint again at %x (icount %s)' % + (self.get_pc(g), self.vm_get_icount(vm))) + logger.info('running reverse continue to reach %x' % steps[-1]) # reverse continue - will return after stopping at the breakpoint g.cmd(b'bc', b'T05thread:01;') From 761a13b239468c1dd175c2d93fc09c1693a937e7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Aug 2023 14:20:01 +1000 Subject: [PATCH 27/35] tests/avocado: ppc64 reverse debugging tests for pseries and powernv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These machines run reverse-debugging well enough to pass basic tests. Wire them up. Reviewed-by: Pavel Dovgalyuk Signed-off-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- tests/avocado/reverse_debugging.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py index 7d1a478df1..fc47874eda 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py @@ -233,3 +233,32 @@ class ReverseDebugging_AArch64(ReverseDebugging): self.reverse_debugging( args=('-kernel', kernel_path)) + +class ReverseDebugging_ppc64(ReverseDebugging): + """ + :avocado: tags=accel:tcg + """ + + REG_PC = 0x40 + + # unidentified gitlab timeout problem + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') + def test_ppc64_pseries(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + # SLOF branches back to its entry point, which causes this test + # to take the 'hit a breakpoint again' path. That's not a problem, + # just slightly different than the other machines. + self.endian_is_le = False + self.reverse_debugging() + + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') + def test_ppc64_powernv(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:powernv + """ + self.endian_is_le = False + self.reverse_debugging() From 718209358f2e4f231cbacf974c3299c4fe7beb83 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Aug 2023 01:30:51 +1000 Subject: [PATCH 28/35] target/ppc: Fix LQ, STQ register-pair order for big-endian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LQ, STQ have the same register-pair ordering as LQARX/STQARX., which is the even (lower) register contains the most significant bits. This is not implemented correctly for big-endian. do_ldst_quad() has variables low_addr_gpr and high_addr_gpr which is confusing because they are low and high addresses, whereas LQARX/STQARX. and most such things use the low and high values for lo/hi variables. The conversion to native 128-bit memory access functions missed this strangeness. Fix this by changing the if condition, and change the variable names to hi/lo to match convention. Cc: qemu-stable@nongnu.org Reported-by: Ivan Warren Fixes: 57b38ffd0c6f ("target/ppc: Use tcg_gen_qemu_{ld,st}_i128 for LQARX, LQ, STQ") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1836 Signed-off-by: Nicholas Piggin Reviewed-by: Richard Henderson Signed-off-by: Cédric Le Goater --- target/ppc/translate/fixedpoint-impl.c.inc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc index 4ce02fd3a4..7ff7e1ec46 100644 --- a/target/ppc/translate/fixedpoint-impl.c.inc +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -71,7 +71,7 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed) { #if defined(TARGET_PPC64) TCGv ea; - TCGv_i64 low_addr_gpr, high_addr_gpr; + TCGv_i64 lo, hi; TCGv_i128 t16; REQUIRE_INSNS_FLAGS(ctx, 64BX); @@ -94,21 +94,21 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed) gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, a->ra, tcg_constant_tl(a->si)); - if (prefixed || !ctx->le_mode) { - low_addr_gpr = cpu_gpr[a->rt]; - high_addr_gpr = cpu_gpr[a->rt + 1]; + if (ctx->le_mode && prefixed) { + lo = cpu_gpr[a->rt]; + hi = cpu_gpr[a->rt + 1]; } else { - low_addr_gpr = cpu_gpr[a->rt + 1]; - high_addr_gpr = cpu_gpr[a->rt]; + lo = cpu_gpr[a->rt + 1]; + hi = cpu_gpr[a->rt]; } t16 = tcg_temp_new_i128(); if (store) { - tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr); + tcg_gen_concat_i64_i128(t16, lo, hi); tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128)); } else { tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128)); - tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16); + tcg_gen_extr_i128_i64(lo, hi, t16); } #else qemu_build_not_reached(); From af03aeb631eeb81a44d2c0ff5b429cd4b5dc2799 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 20 Aug 2023 19:59:35 -0700 Subject: [PATCH 29/35] target/ppc: Flush inputs to zero with NJ in ppc_store_vscr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1779 Signed-off-by: Richard Henderson Reviewed-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- target/ppc/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c index 62e1c15e3d..e3ad8e0c27 100644 --- a/target/ppc/cpu.c +++ b/target/ppc/cpu.c @@ -59,6 +59,7 @@ void ppc_store_vscr(CPUPPCState *env, uint32_t vscr) env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); env->vscr_sat.u64[1] = 0; set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); + set_flush_inputs_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); } uint32_t ppc_get_vscr(CPUPPCState *env) From 6ec65b69ba17c954414fa23a397fb8a3fcfb4a43 Mon Sep 17 00:00:00 2001 From: Maksim Kostin Date: Wed, 9 Aug 2023 13:07:33 +0300 Subject: [PATCH 30/35] hw/ppc/e500: fix broken snapshot replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ppce500_reset_device_tree is registered for system reset, but after c4b075318eb1 this function rerandomizes rng-seed via qemu_guest_getrandom_nofail. And when loading a snapshot, it tries to read EVENT_RANDOM that doesn't exist, so we have an error: qemu-system-ppc: Missing random event in the replay log To fix this, use qemu_register_reset_nosnapshotload instead of qemu_register_reset. Reported-by: Vitaly Cheptsov Fixes: c4b075318eb1 ("hw/ppc: pass random seed to fdt ") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1634 Signed-off-by: Maksim Kostin Reviewed-by: Nicholas Piggin Signed-off-by: Cédric Le Goater --- hw/ppc/e500.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 67793a86f1..d5b6820d1d 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -712,7 +712,7 @@ static int ppce500_prep_device_tree(PPCE500MachineState *machine, p->kernel_base = kernel_base; p->kernel_size = kernel_size; - qemu_register_reset(ppce500_reset_device_tree, p); + qemu_register_reset_nosnapshotload(ppce500_reset_device_tree, p); p->notifier.notify = ppce500_init_notify; qemu_add_machine_init_done_notifier(&p->notifier); From 76d93e146768dde7e38b6e5e43c27e478ccb580e Mon Sep 17 00:00:00 2001 From: jianchunfu Date: Fri, 21 Jul 2023 15:37:34 +0800 Subject: [PATCH 31/35] target/ppc: Fix the order of kvm_enable judgment about kvmppc_set_interrupt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unnecessary for non-KVM accelerators(TCG, for example), to call this function, so change the order of kvm_enable() judgment. The static inline function that returns -1 directly does not work in TCG's situation. Signed-off-by: jianchunfu Tested-by: Gautam Menghani Signed-off-by: Cédric Le Goater --- hw/ppc/ppc.c | 8 ++++++-- target/ppc/kvm.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index ace8f4e725..aeb116d919 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -59,7 +59,9 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) if (old_pending != env->pending_interrupts) { ppc_maybe_interrupt(env); - kvmppc_set_interrupt(cpu, irq, level); + if (kvm_enabled()) { + kvmppc_set_interrupt(cpu, irq, level); + } } trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts, @@ -1532,5 +1534,7 @@ void ppc_irq_reset(PowerPCCPU *cpu) CPUPPCState *env = &cpu->env; env->irq_input_state = 0; - kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); + if (kvm_enabled()) { + kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); + } } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 7698501743..51112bd367 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1320,7 +1320,7 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) return 0; } - if (!kvm_enabled() || !cap_interrupt_unset) { + if (!cap_interrupt_unset) { return 0; } From ed409be14c00a4d818e63fed0f4537c845bf319c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 29 Aug 2023 16:32:33 +0200 Subject: [PATCH 32/35] ppc/xive: Use address_space routines to access the machine RAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit to log an error in case of bad configuration of the XIVE tables by the FW. Reviewed-by: Frederic Barrat Signed-off-by: Cédric Le Goater --- hw/intc/pnv_xive.c | 27 +++++++++++++++++++++++---- hw/intc/pnv_xive2.c | 27 +++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index e536b3ec26..b2bafd61b1 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -242,12 +242,20 @@ static int pnv_xive_vst_read(PnvXive *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } - cpu_physical_memory_read(addr, data, info->size); + result = address_space_read(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); + if (result != MEMTX_OK) { + xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx + " for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; + } return 0; } @@ -258,16 +266,27 @@ static int pnv_xive_vst_write(PnvXive *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } if (word_number == XIVE_VST_WORD_ALL) { - cpu_physical_memory_write(addr, data, info->size); + result = address_space_write(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); } else { - cpu_physical_memory_write(addr + word_number * 4, - data + word_number * 4, 4); + result = address_space_write(&address_space_memory, + addr + word_number * 4, + MEMTXATTRS_UNSPECIFIED, + data + word_number * 4, 4); + } + + if (result != MEMTX_OK) { + xive_error(xive, "VST: write failed at @0x%" HWADDR_PRIx + "for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; } return 0; } diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index bbb44a533c..4b8d0a5d81 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -240,12 +240,20 @@ static int pnv_xive2_vst_read(PnvXive2 *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } - cpu_physical_memory_read(addr, data, info->size); + result = address_space_read(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); + if (result != MEMTX_OK) { + xive2_error(xive, "VST: read failed at @0x%" HWADDR_PRIx + " for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; + } return 0; } @@ -256,16 +264,27 @@ static int pnv_xive2_vst_write(PnvXive2 *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } if (word_number == XIVE_VST_WORD_ALL) { - cpu_physical_memory_write(addr, data, info->size); + result = address_space_write(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); } else { - cpu_physical_memory_write(addr + word_number * 4, - data + word_number * 4, 4); + result = address_space_write(&address_space_memory, + addr + word_number * 4, + MEMTXATTRS_UNSPECIFIED, + data + word_number * 4, 4); + } + + if (result != MEMTX_OK) { + xive2_error(xive, "VST: write failed at @0x%" HWADDR_PRIx + "for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; } return 0; } From 56e08e77dee92269e80727ff4692fb1931d3dab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 29 Aug 2023 16:32:34 +0200 Subject: [PATCH 33/35] ppc/xive: Introduce a new XiveRouter end_notify() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will help us model the END triggers on the PowerNV machine, which can be rerouted to another interrupt controller. Reviewed-by: Frederic Barrat Signed-off-by: Cédric Le Goater --- hw/intc/xive.c | 28 ++++++++++++++++++---------- include/hw/ppc/xive.h | 2 ++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 56670b2cac..df3ee0496f 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -1518,6 +1518,13 @@ static void xive_router_realize(DeviceState *dev, Error **errp) assert(xrtr->xfb); } +static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->end_notify(xrtr, eas); +} + /* * Encode the HW CAM line in the block group mode format : * @@ -1664,8 +1671,7 @@ static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk, * another chip. We don't model the PowerBus but the END trigger * message has the same parameters than in the function below. */ -static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, - uint32_t end_idx, uint32_t end_data) +void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) { XiveEND end; uint8_t priority; @@ -1675,6 +1681,10 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, XiveNVT nvt; bool found; + uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w); + /* END cache lookup */ if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, @@ -1817,10 +1827,7 @@ do_escalation: /* * The END trigger becomes an Escalation trigger */ - xive_router_end_notify(xrtr, - xive_get_field32(END_W4_ESC_END_BLOCK, end.w4), - xive_get_field32(END_W4_ESC_END_INDEX, end.w4), - xive_get_field32(END_W5_ESC_END_DATA, end.w5)); + xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4); } void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) @@ -1871,10 +1878,7 @@ void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) /* * The event trigger becomes an END trigger */ - xive_router_end_notify(xrtr, - xive_get_field64(EAS_END_BLOCK, eas.w), - xive_get_field64(EAS_END_INDEX, eas.w), - xive_get_field64(EAS_END_DATA, eas.w)); + xive_router_end_notify_handler(xrtr, &eas); } static Property xive_router_properties[] = { @@ -1887,12 +1891,16 @@ static void xive_router_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass); + XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass); dc->desc = "XIVE Router Engine"; device_class_set_props(dc, xive_router_properties); /* Parent is SysBusDeviceClass. No need to call its realize hook */ dc->realize = xive_router_realize; xnc->notify = xive_router_notify; + + /* By default, the router handles END triggers locally */ + xrc->end_notify = xive_router_end_notify; } static const TypeInfo xive_router_info = { diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 9f580a2699..f120874e0f 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -401,6 +401,7 @@ struct XiveRouterClass { int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt, uint8_t word_number); uint8_t (*get_block_id)(XiveRouter *xrtr); + void (*end_notify)(XiveRouter *xrtr, XiveEAS *eas); }; int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, @@ -414,6 +415,7 @@ int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt, uint8_t word_number); void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked); +void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas); /* * XIVE Presenter From f2c1e591fa3adb964337daa85be1f86cd7a20a0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 29 Aug 2023 16:32:35 +0200 Subject: [PATCH 34/35] ppc/xive: Handle END triggers between chips with MMIOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The notify page of the interrupt controller can either be used to receive trigger events from the HW controllers (PHB, PSI) or to reroute interrupts between Interrupt Controllers. In which case, the VSD table is used to determine the address of the notify page of the remote IC and the store data is forwarded. Today, our model grabs the remote VSD (EAS, END, NVT) address using pnv_xive_get_remote() helper. Be more precise and implement remote END triggers using a store on the remote IC notify page. We still have a shortcut in the model for the NVT accesses which we will address later. Reviewed-by: Frederic Barrat Signed-off-by: Cédric Le Goater --- hw/intc/pnv_xive.c | 69 +++++++++++++++++++++++++++++++++++++++-- hw/intc/pnv_xive_regs.h | 1 + 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index b2bafd61b1..aae5cb6f60 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -225,6 +225,11 @@ static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk, /* Remote VST access */ if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { + if (type != VST_TSEL_VPDT) { + xive_error(xive, "VST: invalid access on remote VST %s %x/%x !?", + info->name, blk, idx); + return 0; + } xive = pnv_xive_get_remote(blk); return xive ? pnv_xive_vst_addr(xive, type, blk, idx) : 0; @@ -294,12 +299,26 @@ static int pnv_xive_vst_write(PnvXive *xive, uint32_t type, uint8_t blk, static int pnv_xive_get_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx, XiveEND *end) { + PnvXive *xive = PNV_XIVE(xrtr); + + if (pnv_xive_block_id(xive) != blk) { + xive_error(xive, "VST: END %x/%x is remote !?", blk, idx); + return -1; + } + return pnv_xive_vst_read(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end); } static int pnv_xive_write_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx, XiveEND *end, uint8_t word_number) { + PnvXive *xive = PNV_XIVE(xrtr); + + if (pnv_xive_block_id(xive) != blk) { + xive_error(xive, "VST: END %x/%x is remote !?", blk, idx); + return -1; + } + return pnv_xive_vst_write(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end, word_number); } @@ -1368,6 +1387,50 @@ static const MemoryRegionOps pnv_xive_ic_reg_ops = { #define PNV_XIVE_SYNC_PUSH 0xf00 /* Sync push context */ #define PNV_XIVE_SYNC_VPC 0xf80 /* Sync remove VPC store */ +static void pnv_xive_end_notify(XiveRouter *xrtr, XiveEAS *eas) +{ + PnvXive *xive = PNV_XIVE(xrtr); + uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w); + uint64_t end_vsd = xive->vsds[VST_TSEL_EQDT][end_blk]; + + switch (GETFIELD(VSD_MODE, end_vsd)) { + case VSD_MODE_EXCLUSIVE: + /* Perform the END notification on the local IC. */ + xive_router_end_notify(xrtr, eas); + break; + + case VSD_MODE_FORWARD: { + MemTxResult result; + uint64_t notif_port = end_vsd & VSD_ADDRESS_MASK; + uint64_t data = XIVE_TRIGGER_END | XIVE_TRIGGER_PQ | + be64_to_cpu(eas->w); + + /* Forward the store on the remote IC notify page. */ + address_space_stq_be(&address_space_memory, notif_port, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + xive_error(xive, "IC: Forward notif END %x/%x [%x] failed @%" + HWADDR_PRIx, end_blk, end_idx, end_data, notif_port); + return; + } + break; + } + + case VSD_MODE_INVALID: + default: + /* Set FIR */ + xive_error(xive, "IC: Invalid END VSD for block %x", end_blk); + return; + } +} + +/* + * The notify page can either be used to receive trigger events from + * the HW controllers (PHB, PSI) or to reroute interrupts between + * Interrupt controllers. + */ static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val) { uint8_t blk; @@ -1376,8 +1439,8 @@ static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val) trace_pnv_xive_ic_hw_trigger(addr, val); if (val & XIVE_TRIGGER_END) { - xive_error(xive, "IC: END trigger at @0x%"HWADDR_PRIx" data 0x%"PRIx64, - addr, val); + val = cpu_to_be64(val); + pnv_xive_end_notify(XIVE_ROUTER(xive), (XiveEAS *) &val); return; } @@ -1917,6 +1980,7 @@ static void pnv_xive_realize(DeviceState *dev, Error **errp) memory_region_init_io(&xive->ic_notify_mmio, OBJECT(dev), &pnv_xive_ic_notify_ops, xive, "xive-ic-notify", 1 << xive->ic_shift); + xive->ic_notify_mmio.disable_reentrancy_guard = true; /* The Pervasive LSI trigger and EOI pages (not modeled) */ memory_region_init_io(&xive->ic_lsi_mmio, OBJECT(dev), &pnv_xive_ic_lsi_ops, @@ -2017,6 +2081,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data) xrc->get_nvt = pnv_xive_get_nvt; xrc->write_nvt = pnv_xive_write_nvt; xrc->get_block_id = pnv_xive_get_block_id; + xrc->end_notify = pnv_xive_end_notify; xnc->notify = pnv_xive_notify; xpc->match_nvt = pnv_xive_match_nvt; diff --git a/hw/intc/pnv_xive_regs.h b/hw/intc/pnv_xive_regs.h index c78f030c02..793847638b 100644 --- a/hw/intc/pnv_xive_regs.h +++ b/hw/intc/pnv_xive_regs.h @@ -228,6 +228,7 @@ * VSD and is only meant to be used in indirect mode ! */ #define VSD_MODE PPC_BITMASK(0, 1) +#define VSD_MODE_INVALID 0 #define VSD_MODE_SHARED 1 #define VSD_MODE_EXCLUSIVE 2 #define VSD_MODE_FORWARD 3 From b68147b7a5bf6ea2c2b8a8830465e7e90bb2a77c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 29 Aug 2023 16:32:36 +0200 Subject: [PATCH 35/35] ppc/xive: Add support for the PC MMIOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XIVE interrupt contoller maintains various fields on interrupt targets in a structure called NVT. Each unit has a NVT cache, backed by RAM. When the NVT structure is not local (in RAM) to the chip, the XIVE interrupt controller forwards the memory operation to the owning chip using the PC MMIO region configured for this purpose. QEMU does not need to be so precise since software shouldn't perform any of these operations. The model implementation is simplified to return the RAM address of the NVT structure which is then used by pnv_xive_vst_write or read to perform the operation in RAM. Remove the last use of pnv_xive_get_remote(). Reviewed-by: Frederic Barrat Signed-off-by: Cédric Le Goater --- hw/intc/pnv_xive.c | 84 ++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index aae5cb6f60..9b10e90519 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -84,28 +84,6 @@ static uint8_t pnv_xive_block_id(PnvXive *xive) return blk; } -/* - * Remote access to controllers. HW uses MMIOs. For now, a simple scan - * of the chips is good enough. - * - * TODO: Block scope support - */ -static PnvXive *pnv_xive_get_remote(uint8_t blk) -{ - PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); - int i; - - for (i = 0; i < pnv->num_chips; i++) { - Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]); - PnvXive *xive = &chip9->xive; - - if (pnv_xive_block_id(xive) == blk) { - return xive; - } - } - return NULL; -} - /* * VST accessors for SBE, EAT, ENDT, NVT * @@ -209,6 +187,42 @@ static uint64_t pnv_xive_vst_addr_indirect(PnvXive *xive, uint32_t type, return pnv_xive_vst_addr_direct(xive, type, vsd, (idx % vst_per_page)); } +/* + * This is a simplified model of operation forwarding on a remote IC. + * + * A PC MMIO address is built to identify the NVT structure. The load + * on the remote IC will return the address of the structure in RAM, + * which will then be used by pnv_xive_vst_write/read to perform the + * RAM operation. + */ +static uint64_t pnv_xive_vst_addr_remote(PnvXive *xive, uint32_t type, + uint64_t vsd, uint8_t blk, + uint32_t idx) +{ + const XiveVstInfo *info = &vst_infos[type]; + uint64_t remote_addr = vsd & VSD_ADDRESS_MASK; + uint64_t vst_addr; + MemTxResult result; + + if (type != VST_TSEL_VPDT) { + xive_error(xive, "VST: invalid access on remote VST %s %x/%x !?", + info->name, blk, idx); + return 0; + } + + remote_addr |= idx << xive->pc_shift; + + vst_addr = address_space_ldq_be(&address_space_memory, remote_addr, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx + " for NVT %x/%x\n", remote_addr, blk, idx); + return 0; + } + + return vst_addr; +} + static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk, uint32_t idx) { @@ -225,14 +239,7 @@ static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk, /* Remote VST access */ if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { - if (type != VST_TSEL_VPDT) { - xive_error(xive, "VST: invalid access on remote VST %s %x/%x !?", - info->name, blk, idx); - return 0; - } - xive = pnv_xive_get_remote(blk); - - return xive ? pnv_xive_vst_addr(xive, type, blk, idx) : 0; + return pnv_xive_vst_addr_remote(xive, type, vsd, blk, idx); } if (VSD_INDIRECT & vsd) { @@ -1785,16 +1792,20 @@ static const MemoryRegionOps pnv_xive_vc_ops = { }; /* - * Presenter Controller MMIO region. The Virtualization Controller - * updates the IPB in the NVT table when required. Not modeled. + * Presenter Controller MMIO region. Points to the NVT sets. + * + * HW implements all possible mem ops to the underlying NVT structure + * but QEMU does not need to be so precise. The model implementation + * simply returns the RAM address of the NVT structure which is then + * used by pnv_xive_vst_write/read to perform the RAM operation. */ -static uint64_t pnv_xive_pc_read(void *opaque, hwaddr addr, - unsigned size) +static uint64_t pnv_xive_pc_read(void *opaque, hwaddr offset, unsigned size) { PnvXive *xive = PNV_XIVE(opaque); + uint32_t nvt_idx = offset >> xive->pc_shift; + uint8_t blk = pnv_xive_block_id(xive); /* TODO: VDT -> block xlate */ - xive_error(xive, "PC: invalid read @%"HWADDR_PRIx, addr); - return -1; + return pnv_xive_vst_addr(xive, VST_TSEL_VPDT, blk, nvt_idx); } static void pnv_xive_pc_write(void *opaque, hwaddr addr, @@ -2016,6 +2027,7 @@ static void pnv_xive_realize(DeviceState *dev, Error **errp) /* Presenter Controller MMIO region (not modeled) */ memory_region_init_io(&xive->pc_mmio, OBJECT(xive), &pnv_xive_pc_ops, xive, "xive-pc", PNV9_XIVE_PC_SIZE); + xive->pc_mmio.disable_reentrancy_guard = true; /* Thread Interrupt Management Area (Direct) */ memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &pnv_xive_tm_ops,