From d6f1445fafa8dcb578c90e7b475fd4728abd99da Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 2 Mar 2016 21:19:20 +0100 Subject: [PATCH 01/16] ppc: Define the PSPB register on POWER8 POWER8 / PowerISA 2.07 has a new special purpose register called PSPB ("Problem State Priority Boost Register"). The contents of this register are currently lost during migration. To be able to migrate this register, too, we've got to define this SPR along with the other SPRs of POWER8. Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- target-ppc/cpu.h | 1 + target-ppc/translate_init.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 2b10597fa9..8fc0fb4ce8 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1380,6 +1380,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_UAMOR (0x09D) #define SPR_MPC_ICTRL (0x09E) #define SPR_MPC_BAR (0x09F) +#define SPR_PSPB (0x09F) #define SPR_VRSAVE (0x100) #define SPR_USPRG0 (0x100) #define SPR_USPRG1 (0x101) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index bd0cffca23..f72148cc22 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7842,6 +7842,14 @@ static void gen_spr_power8_fscr(CPUPPCState *env) KVM_REG_PPC_FSCR, initval); } +static void gen_spr_power8_pspb(CPUPPCState *env) +{ + spr_register_kvm(env, SPR_PSPB, "PSPB", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic32, + KVM_REG_PPC_PSPB, 0); +} + static void init_proc_book3s_64(CPUPPCState *env, int version) { gen_spr_ne_601(env); @@ -7892,6 +7900,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version) gen_spr_power8_pmu_sup(env); gen_spr_power8_pmu_user(env); gen_spr_power8_tm(env); + gen_spr_power8_pspb(env); gen_spr_vtb(env); } if (version < BOOK3S_CPU_POWER8) { From 1e440cbc99d810ac8153619e6fbee3d7dfaf1893 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 2 Mar 2016 21:19:21 +0100 Subject: [PATCH 02/16] ppc: Fix migration of the TAR SPR The TAR special purpose register currently does not get migrated under KVM because it does not get synchronized with the kernel. Use spr_register_kvm() instead of spr_register() to fix this issue. Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- target-ppc/translate_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index f72148cc22..48a163511c 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7714,10 +7714,10 @@ static void spr_write_tar(DisasContext *ctx, int sprn, int gprn) static void gen_spr_power8_tce_address_control(CPUPPCState *env) { - spr_register(env, SPR_TAR, "TAR", - &spr_read_tar, &spr_write_tar, - &spr_read_generic, &spr_write_generic, - 0x00000000); + spr_register_kvm(env, SPR_TAR, "TAR", + &spr_read_tar, &spr_write_tar, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_TAR, 0x00000000); } static void spr_read_tm(DisasContext *ctx, int gprn, int sprn) From 14646457ae79628b797ff8da8842e7d4bad92458 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 2 Mar 2016 21:19:22 +0100 Subject: [PATCH 03/16] ppc: Add a few more P8 PMU SPRs Signed-off-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- target-ppc/cpu.h | 7 +++++++ target-ppc/translate_init.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 8fc0fb4ce8..8d90d862de 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1564,6 +1564,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_PERF0 (0x300) #define SPR_RCPU_MI_RBA0 (0x300) #define SPR_MPC_MI_CTR (0x300) +#define SPR_POWER_USIER (0x300) #define SPR_PERF1 (0x301) #define SPR_RCPU_MI_RBA1 (0x301) #define SPR_POWER_UMMCR2 (0x301) @@ -1613,6 +1614,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_PERFF (0x30F) #define SPR_MPC_MD_TW (0x30F) #define SPR_UPERF0 (0x310) +#define SPR_POWER_SIER (0x310) #define SPR_UPERF1 (0x311) #define SPR_POWER_MMCR2 (0x311) #define SPR_UPERF2 (0x312) @@ -1674,7 +1676,12 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_440_ITV2 (0x376) #define SPR_440_ITV3 (0x377) #define SPR_440_CCR1 (0x378) +#define SPR_TACR (0x378) +#define SPR_TCSCR (0x379) +#define SPR_CSIGR (0x37a) #define SPR_DCRIPR (0x37B) +#define SPR_POWER_SPMC1 (0x37C) +#define SPR_POWER_SPMC2 (0x37D) #define SPR_POWER_MMCRS (0x37E) #define SPR_PPR (0x380) #define SPR_750_GQR0 (0x390) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 48a163511c..06b008de96 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7603,6 +7603,30 @@ static void gen_spr_power8_pmu_sup(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_MMCRS, 0x00000000); + spr_register_kvm(env, SPR_POWER_SIER, "SIER", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_SIER, 0x00000000); + spr_register_kvm(env, SPR_POWER_SPMC1, "SPMC1", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_SPMC1, 0x00000000); + spr_register_kvm(env, SPR_POWER_SPMC2, "SPMC2", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_SPMC2, 0x00000000); + spr_register_kvm(env, SPR_TACR, "TACR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_TACR, 0x00000000); + spr_register_kvm(env, SPR_TCSCR, "TCSCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_TCSCR, 0x00000000); + spr_register_kvm(env, SPR_CSIGR, "CSIGR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_CSIGR, 0x00000000); } static void gen_spr_power8_pmu_user(CPUPPCState *env) @@ -7611,6 +7635,10 @@ static void gen_spr_power8_pmu_user(CPUPPCState *env) &spr_read_ureg, SPR_NOACCESS, &spr_read_ureg, &spr_write_ureg, 0x00000000); + spr_register(env, SPR_POWER_USIER, "USIER", + &spr_read_generic, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); } static void gen_spr_power5p_ear(CPUPPCState *env) From a88dced8eb69730df39cb04bb3e262e5b98d5f5c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Mar 2016 11:08:19 +1100 Subject: [PATCH 04/16] target-ppc: Add PVR for POWER8NVL processor This adds a new POWER8+NVLink CPU PVR which core is identical to POWER8 but has a different PVR. The only available machine now has PVR pvr 004c 0100 so this defines "POWER8NVL" alias as v1.0. The corresponding kernel commit is https://github.com/torvalds/linux/commit/ddee09c099c3 "powerpc: Add PVR for POWER8NVL processor" Signed-off-by: Alexey Kardashevskiy Signed-off-by: David Gibson --- target-ppc/cpu-models.c | 3 +++ target-ppc/cpu-models.h | 2 ++ target-ppc/translate_init.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c index ed005d79c3..5209e63a72 100644 --- a/target-ppc/cpu-models.c +++ b/target-ppc/cpu-models.c @@ -1143,6 +1143,8 @@ "POWER8E v2.1") POWERPC_DEF("POWER8_v2.0", CPU_POWERPC_POWER8_v20, POWER8, "POWER8 v2.0") + POWERPC_DEF("POWER8NVL_v1.0",CPU_POWERPC_POWER8NVL_v10, POWER8, + "POWER8NVL v1.0") POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, "PowerPC 970 v2.2") POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, @@ -1392,6 +1394,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "POWER7+", "POWER7+_v2.1" }, { "POWER8E", "POWER8E_v2.1" }, { "POWER8", "POWER8_v2.0" }, + { "POWER8NVL", "POWER8NVL_v1.0" }, { "970", "970_v2.2" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h index 2992427e31..f21a44c830 100644 --- a/target-ppc/cpu-models.h +++ b/target-ppc/cpu-models.h @@ -560,6 +560,8 @@ enum { CPU_POWERPC_POWER8E_v21 = 0x004B0201, CPU_POWERPC_POWER8_BASE = 0x004D0000, CPU_POWERPC_POWER8_v20 = 0x004D0200, + CPU_POWERPC_POWER8NVL_BASE = 0x004C0000, + CPU_POWERPC_POWER8NVL_v10 = 0x004C0100, CPU_POWERPC_970_v22 = 0x00390202, CPU_POWERPC_970FX_v10 = 0x00391100, CPU_POWERPC_970FX_v20 = 0x003C0200, diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 06b008de96..fb206aff29 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8256,6 +8256,9 @@ static void init_proc_POWER8(CPUPPCState *env) static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr) { + if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8NVL_BASE) { + return true; + } if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8E_BASE) { return true; } From 788d2599def0e26d92802593b07ec76e8701ccce Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 3 Mar 2016 15:55:36 -0600 Subject: [PATCH 05/16] spapr_pci: fix multifunction hotplug Since 3f1e147, QEMU has adopted a convention of supporting function hotplug by deferring hotplug events until func 0 is hotplugged. This is likely how management tools like libvirt would expose such support going forward. Since sPAPR guests rely on per-func events rather than slot-based, our protocol has been to hotplug func 0 *first* to avoid cases where devices appear within guests without func 0 present to avoid undefined behavior. To remain compatible with new convention, defer hotplug in a similar manner, but then generate events in 0-first order as we did in the past. Once func 0 present, fail any attempts to plug additional functions (as we do with PCIe). For unplug, defer unplug operations in a similar manner, but generate unplug events such that function 0 is removed last in guest. Signed-off-by: Michael Roth Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 93 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 3fc78955ec..9f40db2be8 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1142,14 +1142,21 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); } +static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, + uint32_t busnr, + int32_t devfn) +{ + return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, + (phb->index << 16) | + (busnr << 8) | + devfn); +} + static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, PCIDevice *pdev) { uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); - return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, - (phb->index << 16) | - (busnr << 8) | - pdev->devfn); + return spapr_phb_get_pci_func_drc(phb, busnr, pdev->devfn); } static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, @@ -1173,6 +1180,8 @@ static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, PCIDevice *pdev = PCI_DEVICE(plugged_dev); sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); Error *local_err = NULL; + PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); + uint32_t slotnr = PCI_SLOT(pdev->devfn); /* if DR is disabled we don't need to do anything in the case of * hotplug or coldplug callbacks @@ -1190,13 +1199,44 @@ static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, g_assert(drc); + /* Following the QEMU convention used for PCIe multifunction + * hotplug, we do not allow functions to be hotplugged to a + * slot that already has function 0 present + */ + if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] && + PCI_FUNC(pdev->devfn) != 0) { + error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s," + " additional functions can no longer be exposed to guest.", + slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name); + return; + } + spapr_phb_add_pci_device(drc, phb, pdev, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - if (plugged_dev->hotplugged) { - spapr_hotplug_req_add_by_index(drc); + + /* If this is function 0, signal hotplug for all the device functions. + * Otherwise defer sending the hotplug event. + */ + if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) { + int i; + + for (i = 0; i < 8; i++) { + sPAPRDRConnector *func_drc; + sPAPRDRConnectorClass *func_drck; + sPAPRDREntitySense state; + + func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus), + PCI_DEVFN(slotnr, i)); + func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); + func_drck->entity_sense(func_drc, &state); + + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) { + spapr_hotplug_req_add_by_index(func_drc); + } + } } } @@ -1219,12 +1259,51 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); if (!drck->release_pending(drc)) { + PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); + uint32_t slotnr = PCI_SLOT(pdev->devfn); + sPAPRDRConnector *func_drc; + sPAPRDRConnectorClass *func_drck; + sPAPRDREntitySense state; + int i; + + /* ensure any other present functions are pending unplug */ + if (PCI_FUNC(pdev->devfn) == 0) { + for (i = 1; i < 8; i++) { + func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus), + PCI_DEVFN(slotnr, i)); + func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); + func_drck->entity_sense(func_drc, &state); + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT + && !func_drck->release_pending(func_drc)) { + error_setg(errp, + "PCI: slot %d, function %d still present. " + "Must unplug all non-0 functions first.", + slotnr, i); + return; + } + } + } + spapr_phb_remove_pci_device(drc, phb, pdev, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - spapr_hotplug_req_remove_by_index(drc); + + /* if this isn't func 0, defer unplug event. otherwise signal removal + * for all present functions + */ + if (PCI_FUNC(pdev->devfn) == 0) { + for (i = 7; i >= 0; i--) { + func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus), + PCI_DEVFN(slotnr, i)); + func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); + func_drck->entity_sense(func_drc, &state); + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) { + spapr_hotplug_req_remove_by_index(func_drc); + } + } + } } } From a7a00a729a98e41501904dd26e8535571b3e9579 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 9 Mar 2016 11:58:33 +1100 Subject: [PATCH 06/16] target-ppc: Split out SREGS get/put functions Currently the getting and setting of Power MMU registers (sregs) take up large inline chunks of the kvm_arch_get_registers() and kvm_arch_put_registers() functions. Especially since there are two variants (for Book-E and Book-S CPUs), only one of which will be used in practice, this is pretty hard to read. This patch splits these out into helper functions for clarity. No functional change is expected. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Alexey Kardashevskiy Reviewed-by: Greg Kurz --- target-ppc/kvm.c | 421 +++++++++++++++++++++++++---------------------- 1 file changed, 228 insertions(+), 193 deletions(-) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index d67c169ba3..4161f646c7 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -867,6 +867,44 @@ static int kvm_put_vpa(CPUState *cs) } #endif /* TARGET_PPC64 */ +static int kvmppc_put_books_sregs(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + struct kvm_sregs sregs; + int i; + + sregs.pvr = env->spr[SPR_PVR]; + + sregs.u.s.sdr1 = env->spr[SPR_SDR1]; + + /* Sync SLB */ +#ifdef TARGET_PPC64 + for (i = 0; i < ARRAY_SIZE(env->slb); i++) { + sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; + if (env->slb[i].esid & SLB_ESID_V) { + sregs.u.s.ppc64.slb[i].slbe |= i; + } + sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; + } +#endif + + /* Sync SRs */ + for (i = 0; i < 16; i++) { + sregs.u.s.ppc32.sr[i] = env->sr[i]; + } + + /* Sync BATs */ + for (i = 0; i < 8; i++) { + /* Beware. We have to swap upper and lower bits here */ + sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) + | env->DBAT[1][i]; + sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) + | env->IBAT[1][i]; + } + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); +} + int kvm_arch_put_registers(CPUState *cs, int level) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -920,39 +958,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) } if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { - struct kvm_sregs sregs; - - sregs.pvr = env->spr[SPR_PVR]; - - sregs.u.s.sdr1 = env->spr[SPR_SDR1]; - - /* Sync SLB */ -#ifdef TARGET_PPC64 - for (i = 0; i < ARRAY_SIZE(env->slb); i++) { - sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; - if (env->slb[i].esid & SLB_ESID_V) { - sregs.u.s.ppc64.slb[i].slbe |= i; - } - sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; - } -#endif - - /* Sync SRs */ - for (i = 0; i < 16; i++) { - sregs.u.s.ppc32.sr[i] = env->sr[i]; - } - - /* Sync BATs */ - for (i = 0; i < 8; i++) { - /* Beware. We have to swap upper and lower bits here */ - sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) - | env->DBAT[1][i]; - sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) - | env->IBAT[1][i]; - } - - ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); - if (ret) { + ret = kvmppc_put_books_sregs(cpu); + if (ret < 0) { return ret; } } @@ -1014,12 +1021,197 @@ static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; } +static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + struct kvm_sregs sregs; + int ret; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); + if (ret < 0) { + return ret; + } + + if (sregs.u.e.features & KVM_SREGS_E_BASE) { + env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; + env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; + env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; + env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; + env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; + env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; + env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; + env->spr[SPR_DECR] = sregs.u.e.dec; + env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; + env->spr[SPR_TBU] = sregs.u.e.tb >> 32; + env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; + } + + if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { + env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; + env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; + env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; + env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; + env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; + } + + if (sregs.u.e.features & KVM_SREGS_E_64) { + env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; + } + + if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { + env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; + } + + if (sregs.u.e.features & KVM_SREGS_E_IVOR) { + env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; + kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); + env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; + kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); + env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; + kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); + env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; + kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); + env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; + kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); + env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; + kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); + env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; + kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); + env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; + kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); + env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; + kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); + env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; + kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); + env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; + kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); + env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; + kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); + env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; + kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); + env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; + kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); + env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; + kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); + env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; + kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); + + if (sregs.u.e.features & KVM_SREGS_E_SPE) { + env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; + kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); + env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; + kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); + env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; + kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); + } + + if (sregs.u.e.features & KVM_SREGS_E_PM) { + env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; + kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); + } + + if (sregs.u.e.features & KVM_SREGS_E_PC) { + env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; + kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); + env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; + kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); + } + } + + if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { + env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; + env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; + env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; + env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; + env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; + env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; + env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; + env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; + env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; + env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; + } + + if (sregs.u.e.features & KVM_SREGS_EXP) { + env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; + } + + if (sregs.u.e.features & KVM_SREGS_E_PD) { + env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; + env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; + } + + if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; + env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; + env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; + + if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { + env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; + env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; + } + } + + return 0; +} + +static int kvmppc_get_books_sregs(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + struct kvm_sregs sregs; + int ret; + int i; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); + if (ret < 0) { + return ret; + } + + if (!env->external_htab) { + ppc_store_sdr1(env, sregs.u.s.sdr1); + } + + /* Sync SLB */ +#ifdef TARGET_PPC64 + /* + * The packed SLB array we get from KVM_GET_SREGS only contains + * information about valid entries. So we flush our internal copy + * to get rid of stale ones, then put all valid SLB entries back + * in. + */ + memset(env->slb, 0, sizeof(env->slb)); + for (i = 0; i < ARRAY_SIZE(env->slb); i++) { + target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; + target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; + /* + * Only restore valid entries + */ + if (rb & SLB_ESID_V) { + ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); + } + } +#endif + + /* Sync SRs */ + for (i = 0; i < 16; i++) { + env->sr[i] = sregs.u.s.ppc32.sr[i]; + } + + /* Sync BATs */ + for (i = 0; i < 8; i++) { + env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; + env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; + env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; + env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; + } + + return 0; +} + int kvm_arch_get_registers(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; struct kvm_regs regs; - struct kvm_sregs sregs; uint32_t cr; int i, ret; @@ -1059,174 +1251,17 @@ int kvm_arch_get_registers(CPUState *cs) kvm_get_fp(cs); if (cap_booke_sregs) { - ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); + ret = kvmppc_get_booke_sregs(cpu); if (ret < 0) { return ret; } - - if (sregs.u.e.features & KVM_SREGS_E_BASE) { - env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; - env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; - env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; - env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; - env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; - env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; - env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; - env->spr[SPR_DECR] = sregs.u.e.dec; - env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; - env->spr[SPR_TBU] = sregs.u.e.tb >> 32; - env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; - } - - if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { - env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; - env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; - env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; - env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; - env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; - } - - if (sregs.u.e.features & KVM_SREGS_E_64) { - env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; - } - - if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { - env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; - } - - if (sregs.u.e.features & KVM_SREGS_E_IVOR) { - env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; - kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); - env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; - kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); - env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; - kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); - env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; - kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); - env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; - kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); - env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; - kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); - env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; - kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); - env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; - kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); - env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; - kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); - env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; - kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); - env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; - kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); - env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; - kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); - env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; - kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); - env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; - kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); - env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; - kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); - env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; - kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); - - if (sregs.u.e.features & KVM_SREGS_E_SPE) { - env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; - kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); - env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; - kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); - env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; - kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); - } - - if (sregs.u.e.features & KVM_SREGS_E_PM) { - env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; - kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); - } - - if (sregs.u.e.features & KVM_SREGS_E_PC) { - env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; - kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); - env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; - kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); - } - } - - if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { - env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; - env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; - env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; - env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; - env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; - env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; - env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; - env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; - env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; - env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; - } - - if (sregs.u.e.features & KVM_SREGS_EXP) { - env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; - } - - if (sregs.u.e.features & KVM_SREGS_E_PD) { - env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; - env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; - } - - if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { - env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; - env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; - env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; - - if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { - env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; - env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; - } - } } if (cap_segstate) { - ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); + ret = kvmppc_get_books_sregs(cpu); if (ret < 0) { return ret; } - - if (!env->external_htab) { - ppc_store_sdr1(env, sregs.u.s.sdr1); - } - - /* Sync SLB */ -#ifdef TARGET_PPC64 - /* - * The packed SLB array we get from KVM_GET_SREGS only contains - * information about valid entries. So we flush our internal - * copy to get rid of stale ones, then put all valid SLB entries - * back in. - */ - memset(env->slb, 0, sizeof(env->slb)); - for (i = 0; i < ARRAY_SIZE(env->slb); i++) { - target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; - target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; - /* - * Only restore valid entries - */ - if (rb & SLB_ESID_V) { - ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); - } - } -#endif - - /* Sync SRs */ - for (i = 0; i < 16; i++) { - env->sr[i] = sregs.u.s.ppc32.sr[i]; - } - - /* Sync BATs */ - for (i = 0; i < 8; i++) { - env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; - env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; - env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; - env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; - } } if (cap_hior) { From e5c0d3ce40e40c903a7e65ded61d8742af947655 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 8 Mar 2016 11:33:46 +1100 Subject: [PATCH 07/16] target-ppc: Add helpers for updating a CPU's SDR1 and external HPT When a Power cpu with 64-bit hash MMU has it's hash page table (HPT) pointer updated by a write to the SDR1 register we need to update some derived variables. Likewise, when the cpu is configured for an external HPT (one not in the guest memory space) some derived variables need to be updated. Currently the logic for this is (partially) duplicated in ppc_store_sdr1() and in spapr_cpu_reset(). In future we're going to need it in some other places, so make some common helpers for this update. In addition the new ppc_hash64_set_external_hpt() helper also updates SDR1 in KVM - it's not updated by the normal runtime KVM <-> qemu CPU synchronization. In a sense this belongs logically in the ppc_hash64_set_sdr1() helper, but that is called from kvm_arch_get_registers() so can't itself call cpu_synchronize_state() without infinite recursion. In practice this doesn't matter because the only other caller is TCG specific. Currently there aren't situations where updating SDR1 at runtime in KVM matters, but there are going to be in future. Signed-off-by: David Gibson Reviewed-by: Greg Kurz Reviewed-by: Thomas Huth --- hw/ppc/spapr.c | 13 ++----------- target-ppc/kvm.c | 2 +- target-ppc/kvm_ppc.h | 6 ++++++ target-ppc/mmu-hash64.c | 43 +++++++++++++++++++++++++++++++++++++++++ target-ppc/mmu-hash64.h | 6 ++++++ target-ppc/mmu_helper.c | 13 ++++++------- 6 files changed, 64 insertions(+), 19 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 298171a205..eb66a87136 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1196,17 +1196,8 @@ static void spapr_cpu_reset(void *opaque) env->spr[SPR_HIOR] = 0; - env->external_htab = (uint8_t *)spapr->htab; - env->htab_base = -1; - /* - * htab_mask is the mask used to normalize hash value to PTEG index. - * htab_shift is log2 of hash table size. - * We have 8 hpte per group, and each hpte is 16 bytes. - * ie have 128 bytes per hpte entry. - */ - env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1; - env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab | - (spapr->htab_shift - 18); + ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift, + &error_fatal); } static void spapr_create_nvram(sPAPRMachineState *spapr) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 4161f646c7..776336b8b4 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -867,7 +867,7 @@ static int kvm_put_vpa(CPUState *cs) } #endif /* TARGET_PPC64 */ -static int kvmppc_put_books_sregs(PowerPCCPU *cpu) +int kvmppc_put_books_sregs(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; struct kvm_sregs sregs; diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index fd64c44f4d..fc7931227d 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -55,6 +55,7 @@ void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index, target_ulong pte0, target_ulong pte1); bool kvmppc_has_cap_fixup_hcalls(void); int kvmppc_enable_hwrng(void); +int kvmppc_put_books_sregs(PowerPCCPU *cpu); #else @@ -246,6 +247,11 @@ static inline int kvmppc_enable_hwrng(void) { return -1; } + +static inline int kvmppc_put_books_sregs(PowerPCCPU *cpu) +{ + abort(); +} #endif #ifndef CONFIG_KVM diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 9c58fbf009..7b5200bdc6 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -258,6 +258,49 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb) /* * 64-bit hash table MMU handling */ +void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value, + Error **errp) +{ + CPUPPCState *env = &cpu->env; + target_ulong htabsize = value & SDR_64_HTABSIZE; + + env->spr[SPR_SDR1] = value; + if (htabsize > 28) { + error_setg(errp, + "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1", + htabsize); + htabsize = 28; + } + env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1; + env->htab_base = value & SDR_64_HTABORG; +} + +void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, + Error **errp) +{ + CPUPPCState *env = &cpu->env; + Error *local_err = NULL; + + cpu_synchronize_state(CPU(cpu)); + + env->external_htab = hpt; + ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18), + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Not strictly necessary, but makes it clearer that an external + * htab is in use when debugging */ + env->htab_base = -1; + + if (kvm_enabled()) { + if (kvmppc_put_books_sregs(cpu) < 0) { + error_setg(errp, "Unable to update SDR1 in KVM"); + } + } +} static int ppc_hash64_pte_prot(PowerPCCPU *cpu, ppc_slb_t *slb, ppc_hash_pte64_t pte) diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h index e7d9925411..138cd7e3ad 100644 --- a/target-ppc/mmu-hash64.h +++ b/target-ppc/mmu-hash64.h @@ -92,6 +92,12 @@ unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu, extern bool kvmppc_kern_htab; + +void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value, + Error **errp); +void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, + Error **errp); + uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index); void ppc_hash64_stop_access(uint64_t token); diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index e5ec8d6169..fcb2cc5c79 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -2005,15 +2005,14 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value) env->spr[SPR_SDR1] = value; #if defined(TARGET_PPC64) if (env->mmu_model & POWERPC_MMU_64) { - target_ulong htabsize = value & SDR_64_HTABSIZE; + PowerPCCPU *cpu = ppc_env_get_cpu(env); + Error *local_err = NULL; - if (htabsize > 28) { - fprintf(stderr, "Invalid HTABSIZE 0x" TARGET_FMT_lx - " stored in SDR1\n", htabsize); - htabsize = 28; + ppc_hash64_set_sdr1(cpu, value, &local_err); + if (local_err) { + error_report_err(local_err); + error_free(local_err); } - env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1; - env->htab_base = value & SDR_64_HTABORG; } else #endif /* defined(TARGET_PPC64) */ { From c18ad9a54b75495ce61e8b28d353f8eec51768fc Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 8 Mar 2016 11:35:15 +1100 Subject: [PATCH 08/16] target-ppc: Eliminate kvmppc_kern_htab global fa48b43 "target-ppc: Remove hack for ppc_hash64_load_hpte*() with HV KVM" purports to remove a hack in the handling of hash page tables (HPTs) managed by KVM instead of qemu. However, it actually went in the wrong direction. That patch requires anything looking for an external HPT (that is one not managed by the guest itself) to check both env->external_htab (for a qemu managed HPT) and kvmppc_kern_htab (for a KVM managed HPT). That's a problem because kvmppc_kern_htab is local to mmu-hash64.c, but some places which need to check for an external HPT are outside that, such as kvm_arch_get_registers(). The latter was subtly broken by the earlier patch such that gdbstub can no longer access memory. Basically a KVM managed HPT is much more like a qemu managed HPT than it is like a guest managed HPT, so the original "hack" was actually on the right track. This partially reverts fa48b43, so we again mark a KVM managed external HPT by putting a special but non-NULL value in env->external_htab. It then goes further, using that marker to eliminate the kvmppc_kern_htab global entirely. The ppc_hash64_set_external_hpt() helper function is extended to set that marker if passed a NULL value (if you're setting an external HPT, but don't have an actual HPT to set, the assumption is that it must be a KVM managed HPT). This also has some flow-on changes to the HPT access helpers, required by the above changes. Reported-by: Greg Kurz Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Greg Kurz Tested-by: Greg Kurz --- hw/ppc/spapr.c | 3 +-- hw/ppc/spapr_hcall.c | 10 +++++----- target-ppc/mmu-hash64.c | 40 ++++++++++++++++++---------------------- target-ppc/mmu-hash64.h | 9 +++------ 4 files changed, 27 insertions(+), 35 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index eb66a87136..79a70a9c0f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1091,7 +1091,7 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, } spapr->htab_shift = shift; - kvmppc_kern_htab = true; + spapr->htab = NULL; } else { /* kernel-side HPT not needed, allocate in userspace instead */ size_t size = 1ULL << shift; @@ -1106,7 +1106,6 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, memset(spapr->htab, 0, size); spapr->htab_shift = shift; - kvmppc_kern_htab = false; for (i = 0; i < size / HASH_PTE_SIZE_64; i++) { DIRTY_HPTE(HPTE(spapr->htab, i)); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 1733482de6..b2b1b93cfd 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -122,17 +122,17 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, break; } } - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); if (index == 8) { return H_PTEG_FULL; } } else { token = ppc_hash64_start_access(cpu, pte_index); if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) { - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); return H_PTEG_FULL; } - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); } ppc_hash64_store_hpte(cpu, pte_index + index, @@ -165,7 +165,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex, token = ppc_hash64_start_access(cpu, ptex); v = ppc_hash64_load_hpte0(cpu, token, 0); r = ppc_hash64_load_hpte1(cpu, token, 0); - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); if ((v & HPTE64_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || @@ -288,7 +288,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, token = ppc_hash64_start_access(cpu, pte_index); v = ppc_hash64_load_hpte0(cpu, token, 0); r = ppc_hash64_load_hpte1(cpu, token, 0); - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); if ((v & HPTE64_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 7b5200bdc6..d175fdad45 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -36,10 +36,11 @@ #endif /* - * Used to indicate whether we have allocated htab in the - * host kernel + * Used to indicate that a CPU has its hash page table (HPT) managed + * within the host kernel */ -bool kvmppc_kern_htab; +#define MMU_HASH64_KVM_MANAGED_HPT ((void *)-1) + /* * SLB handling */ @@ -283,7 +284,11 @@ void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, cpu_synchronize_state(CPU(cpu)); - env->external_htab = hpt; + if (hpt) { + env->external_htab = hpt; + } else { + env->external_htab = MMU_HASH64_KVM_MANAGED_HPT; + } ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18), &local_err); if (local_err) { @@ -396,25 +401,16 @@ uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index) hwaddr pte_offset; pte_offset = pte_index * HASH_PTE_SIZE_64; - if (kvmppc_kern_htab) { + if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) { /* * HTAB is controlled by KVM. Fetch the PTEG into a new buffer. */ token = kvmppc_hash64_read_pteg(cpu, pte_index); - if (token) { - return token; - } + } else if (cpu->env.external_htab) { /* - * pteg read failed, even though we have allocated htab via - * kvmppc_reset_htab. + * HTAB is controlled by QEMU. Just point to the internally + * accessible PTEG. */ - return 0; - } - /* - * HTAB is controlled by QEMU. Just point to the internally - * accessible PTEG. - */ - if (cpu->env.external_htab) { token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset; } else if (cpu->env.htab_base) { token = cpu->env.htab_base + pte_offset; @@ -422,9 +418,9 @@ uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index) return token; } -void ppc_hash64_stop_access(uint64_t token) +void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token) { - if (kvmppc_kern_htab) { + if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) { kvmppc_hash64_free_pteg(token); } } @@ -453,11 +449,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, && HPTE64_V_COMPARE(pte0, ptem)) { pte->pte0 = pte0; pte->pte1 = pte1; - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); return (pte_index + i) * HASH_PTE_SIZE_64; } } - ppc_hash64_stop_access(token); + ppc_hash64_stop_access(cpu, token); /* * We didn't find a valid entry. */ @@ -772,7 +768,7 @@ void ppc_hash64_store_hpte(PowerPCCPU *cpu, { CPUPPCState *env = &cpu->env; - if (kvmppc_kern_htab) { + if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) { kvmppc_hash64_write_pte(env, pte_index, pte0, pte1); return; } diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h index 138cd7e3ad..9bf8b9b267 100644 --- a/target-ppc/mmu-hash64.h +++ b/target-ppc/mmu-hash64.h @@ -90,16 +90,13 @@ unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu, #define HPTE64_V_1TB_SEG 0x4000000000000000ULL #define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL - -extern bool kvmppc_kern_htab; - void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value, Error **errp); void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, Error **errp); uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index); -void ppc_hash64_stop_access(uint64_t token); +void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token); static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu, uint64_t token, int index) @@ -108,7 +105,7 @@ static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu, uint64_t addr; addr = token + (index * HASH_PTE_SIZE_64); - if (kvmppc_kern_htab || env->external_htab) { + if (env->external_htab) { return ldq_p((const void *)(uintptr_t)addr); } else { return ldq_phys(CPU(cpu)->as, addr); @@ -122,7 +119,7 @@ static inline target_ulong ppc_hash64_load_hpte1(PowerPCCPU *cpu, uint64_t addr; addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2; - if (kvmppc_kern_htab || env->external_htab) { + if (env->external_htab) { return ldq_p((const void *)(uintptr_t)addr); } else { return ldq_phys(CPU(cpu)->as, addr); From f1a6cf3ef734aab142d5f7ce52e219474ababf6b Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 11 Mar 2016 19:48:47 +0100 Subject: [PATCH 09/16] spapr_rng: fix race with main loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit "60253ed1e6ec rng: add request queue support to rng-random", the use of a spapr_rng device may hang vCPU threads. The following path is taken without holding the lock to the main loop mutex: h_random() rng_backend_request_entropy() rng_random_request_entropy() qemu_set_fd_handler() The consequence is that entropy_available() may be called before the vCPU thread could even queue the request: depending on the scheduling, it may happen that entropy_available() does not call random_recv()->qemu_sem_post(). The vCPU thread will then sleep forever in h_random()->qemu_sem_wait(). This could not happen before 60253ed1e6ec because entropy_available() used to call random_recv() unconditionally. This patch ensures the lock is held to avoid the race. Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Reviewed-by: Thomas Huth Signed-off-by: David Gibson --- hw/ppc/spapr_rng.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c index a39d472b66..02d6be49f5 100644 --- a/hw/ppc/spapr_rng.c +++ b/hw/ppc/spapr_rng.c @@ -77,13 +77,13 @@ static target_ulong h_random(PowerPCCPU *cpu, sPAPRMachineState *spapr, hrdata.val.v64 = 0; hrdata.received = 0; - qemu_mutex_unlock_iothread(); while (hrdata.received < 8) { rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received, random_recv, &hrdata); + qemu_mutex_unlock_iothread(); qemu_sem_wait(&hrdata.sem); + qemu_mutex_lock_iothread(); } - qemu_mutex_lock_iothread(); qemu_sem_destroy(&hrdata.sem); args[0] = hrdata.val.v64; From 3153119e9bb79ebd82e08d2fde80d1c3a039b6b2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 9 Mar 2016 11:56:06 +1100 Subject: [PATCH 10/16] vfio: Start improving VFIO/EEH interface At present the code handling IBM's Enhanced Error Handling (EEH) interface on VFIO devices operates by bypassing the usual VFIO logic with vfio_container_ioctl(). That's a poorly designed interface with unclear semantics about exactly what can be operated on. In particular it operates on a single vfio container internally (hence the name), but takes an address space and group id, from which it deduces the container in a rather roundabout way. groupids are something that code outside vfio shouldn't even be aware of. This patch creates new interfaces for EEH operations. Internally we have vfio_eeh_container_op() which takes a VFIOContainer object directly. For external use we have vfio_eeh_as_ok() which determines if an AddressSpace is usable for EEH (at present this means it has a single container with exactly one group attached), and vfio_eeh_as_op() which will perform an operation on an AddressSpace in the unambiguous case, and otherwise returns an error. This interface still isn't great, but it's enough of an improvement to allow a number of cleanups in other places. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy Acked-by: Alex Williamson --- hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio.h | 2 + 2 files changed, 97 insertions(+) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 96ccb797fe..0636bb176e 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1137,3 +1137,98 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, return vfio_container_do_ioctl(as, groupid, req, param); } + +/* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +static bool vfio_eeh_container_ok(VFIOContainer *container) +{ + /* + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO + * implementation is broken if there are multiple groups in a + * container. The hardware works in units of Partitionable + * Endpoints (== IOMMU groups) and the EEH operations naively + * iterate across all groups in the container, without any logic + * to make sure the groups have their state synchronized. For + * certain operations (ENABLE) that might be ok, until an error + * occurs, but for others (GET_STATE) it's clearly broken. + */ + + /* + * XXX Once fixed kernels exist, test for them here + */ + + if (QLIST_EMPTY(&container->group_list)) { + return false; + } + + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { + return false; + } + + return true; +} + +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +{ + struct vfio_eeh_pe_op pe_op = { + .argsz = sizeof(pe_op), + .op = op, + }; + int ret; + + if (!vfio_eeh_container_ok(container)) { + error_report("vfio/eeh: EEH_PE_OP 0x%x: " + "kernel requires a container with exactly one group", op); + return -EPERM; + } + + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); + if (ret < 0) { + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); + return -errno; + } + + return 0; +} + +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +{ + VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOContainer *container = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + + container = QLIST_FIRST(&space->containers); + + if (QLIST_NEXT(container, next)) { + /* We don't yet have logic to synchronize EEH state across + * multiple containers */ + container = NULL; + goto out; + } + +out: + vfio_put_address_space(space); + return container; +} + +bool vfio_eeh_as_ok(AddressSpace *as) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + return (container != NULL) && vfio_eeh_container_ok(container); +} + +int vfio_eeh_as_op(AddressSpace *as, uint32_t op) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + if (!container) { + return -ENODEV; + } + return vfio_eeh_container_op(container, op); +} diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h index 0b26cd8e11..fd3933be62 100644 --- a/include/hw/vfio/vfio.h +++ b/include/hw/vfio/vfio.h @@ -5,5 +5,7 @@ extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, int req, void *param); +bool vfio_eeh_as_ok(AddressSpace *as); +int vfio_eeh_as_op(AddressSpace *as, uint32_t op); #endif From 76a9e9f6803f097d8e37afae239c9638ee95dd39 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 29 Feb 2016 14:00:34 +1100 Subject: [PATCH 11/16] spapr_pci: Switch to vfio_eeh_as_op() interface This switches all EEH on VFIO operations in spapr_pci_vfio.c from the broken vfio_container_ioctl() interface to the new vfio_as_eeh_op() interface. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_pci_vfio.c | 50 +++++++++++++---------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 2f3752ea18..b1e8e8e0bc 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -73,15 +73,9 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) spapr_tce_get_iommu(tcet)); } -static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb) +static void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb) { - struct vfio_eeh_pe_op op = { - .argsz = sizeof(op), - .op = VFIO_EEH_PE_ENABLE - }; - - vfio_container_ioctl(&svphb->phb.iommu_as, - svphb->iommugroupid, VFIO_EEH_PE_OP, &op); + vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); } static void spapr_phb_vfio_reset(DeviceState *qdev) @@ -92,19 +86,18 @@ static void spapr_phb_vfio_reset(DeviceState *qdev) * ensures that the contained PCI devices will work properly * after reboot. */ - spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev)); + spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); } static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, unsigned int addr, int option) { - sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); - struct vfio_eeh_pe_op op = { .argsz = sizeof(op) }; + uint32_t op; int ret; switch (option) { case RTAS_EEH_DISABLE: - op.op = VFIO_EEH_PE_DISABLE; + op = VFIO_EEH_PE_DISABLE; break; case RTAS_EEH_ENABLE: { PCIHostState *phb; @@ -122,21 +115,20 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, return RTAS_OUT_PARAM_ERROR; } - op.op = VFIO_EEH_PE_ENABLE; + op = VFIO_EEH_PE_ENABLE; break; } case RTAS_EEH_THAW_IO: - op.op = VFIO_EEH_PE_UNFREEZE_IO; + op = VFIO_EEH_PE_UNFREEZE_IO; break; case RTAS_EEH_THAW_DMA: - op.op = VFIO_EEH_PE_UNFREEZE_DMA; + op = VFIO_EEH_PE_UNFREEZE_DMA; break; default: return RTAS_OUT_PARAM_ERROR; } - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_EEH_PE_OP, &op); + ret = vfio_eeh_as_op(&sphb->iommu_as, op); if (ret < 0) { return RTAS_OUT_HW_ERROR; } @@ -146,13 +138,9 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) { - sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); - struct vfio_eeh_pe_op op = { .argsz = sizeof(op) }; int ret; - op.op = VFIO_EEH_PE_GET_STATE; - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_EEH_PE_OP, &op); + ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE); if (ret < 0) { return RTAS_OUT_PARAM_ERROR; } @@ -206,28 +194,26 @@ static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb) static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) { - sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); - struct vfio_eeh_pe_op op = { .argsz = sizeof(op) }; + uint32_t op; int ret; switch (option) { case RTAS_SLOT_RESET_DEACTIVATE: - op.op = VFIO_EEH_PE_RESET_DEACTIVATE; + op = VFIO_EEH_PE_RESET_DEACTIVATE; break; case RTAS_SLOT_RESET_HOT: spapr_phb_vfio_eeh_pre_reset(sphb); - op.op = VFIO_EEH_PE_RESET_HOT; + op = VFIO_EEH_PE_RESET_HOT; break; case RTAS_SLOT_RESET_FUNDAMENTAL: spapr_phb_vfio_eeh_pre_reset(sphb); - op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL; + op = VFIO_EEH_PE_RESET_FUNDAMENTAL; break; default: return RTAS_OUT_PARAM_ERROR; } - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_EEH_PE_OP, &op); + ret = vfio_eeh_as_op(&sphb->iommu_as, op); if (ret < 0) { return RTAS_OUT_HW_ERROR; } @@ -237,13 +223,9 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb) { - sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); - struct vfio_eeh_pe_op op = { .argsz = sizeof(op) }; int ret; - op.op = VFIO_EEH_PE_CONFIGURE; - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_EEH_PE_OP, &op); + ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE); if (ret < 0) { return RTAS_OUT_PARAM_ERROR; } From fbb4e983415dc5a15e167dd00bc4564c57121915 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 29 Feb 2016 17:45:05 +1100 Subject: [PATCH 12/16] spapr_pci: Eliminate class callbacks The EEH operations in the spapr-vfio-pci-host-bridge no longer rely on the special groupid field in sPAPRPHBVFIOState. So we can simplify, removing the class specific callbacks with direct calls based on a simple spapr_phb_eeh_enabled() helper. For now we implement that in terms of a boolean in the class, but we'll continue to clean that up later. On its own this is a rather strange way of doing things, but it's a useful intermediate step to further cleanups. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_pci.c | 44 ++++++++++++++++++------------------- hw/ppc/spapr_pci_vfio.c | 18 ++++++--------- include/hw/pci-host/spapr.h | 37 +++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 37 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 9f40db2be8..c4c5e7e414 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -92,6 +92,13 @@ PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, return pci_find_device(phb->bus, bus_num, devfn); } +static bool spapr_phb_eeh_available(sPAPRPHBState *sphb) +{ + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); + + return spc->eeh_available; +} + static uint32_t rtas_pci_cfgaddr(uint32_t arg) { /* This handles the encoding of extended config space addresses */ @@ -440,7 +447,6 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, target_ulong rets) { sPAPRPHBState *sphb; - sPAPRPHBClass *spc; uint32_t addr, option; uint64_t buid; int ret; @@ -458,12 +464,11 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, goto param_error_exit; } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - if (!spc->eeh_set_option) { + if (!spapr_phb_eeh_available(sphb)) { goto param_error_exit; } - ret = spc->eeh_set_option(sphb, addr, option); + ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option); rtas_st(rets, 0, ret); return; @@ -478,7 +483,6 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, target_ulong rets) { sPAPRPHBState *sphb; - sPAPRPHBClass *spc; PCIDevice *pdev; uint32_t addr, option; uint64_t buid; @@ -493,8 +497,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, goto param_error_exit; } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - if (!spc->eeh_set_option) { + if (!spapr_phb_eeh_available(sphb)) { goto param_error_exit; } @@ -534,7 +537,6 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, target_ulong rets) { sPAPRPHBState *sphb; - sPAPRPHBClass *spc; uint64_t buid; int state, ret; @@ -548,12 +550,11 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, goto param_error_exit; } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - if (!spc->eeh_get_state) { + if (!spapr_phb_eeh_available(sphb)) { goto param_error_exit; } - ret = spc->eeh_get_state(sphb, &state); + ret = spapr_phb_vfio_eeh_get_state(sphb, &state); rtas_st(rets, 0, ret); if (ret != RTAS_OUT_SUCCESS) { return; @@ -578,7 +579,6 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, target_ulong rets) { sPAPRPHBState *sphb; - sPAPRPHBClass *spc; uint32_t option; uint64_t buid; int ret; @@ -594,12 +594,11 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, goto param_error_exit; } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - if (!spc->eeh_reset) { + if (!spapr_phb_eeh_available(sphb)) { goto param_error_exit; } - ret = spc->eeh_reset(sphb, option); + ret = spapr_phb_vfio_eeh_reset(sphb, option); rtas_st(rets, 0, ret); return; @@ -614,7 +613,6 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu, target_ulong rets) { sPAPRPHBState *sphb; - sPAPRPHBClass *spc; uint64_t buid; int ret; @@ -628,12 +626,11 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu, goto param_error_exit; } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - if (!spc->eeh_configure) { + if (!spapr_phb_eeh_available(sphb)) { goto param_error_exit; } - ret = spc->eeh_configure(sphb); + ret = spapr_phb_vfio_eeh_configure(sphb); rtas_st(rets, 0, ret); return; @@ -649,7 +646,6 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, target_ulong rets) { sPAPRPHBState *sphb; - sPAPRPHBClass *spc; int option; uint64_t buid; @@ -663,8 +659,7 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, goto param_error_exit; } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - if (!spc->eeh_set_option) { + if (!spapr_phb_eeh_available(sphb)) { goto param_error_exit; } @@ -1513,6 +1508,10 @@ static void spapr_phb_reset(DeviceState *qdev) { /* Reset the IOMMU state */ object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL); + + if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) { + spapr_phb_vfio_reset(qdev); + } } static Property spapr_phb_properties[] = { @@ -1643,6 +1642,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; spc->finish_realize = spapr_phb_finish_realize; + spc->eeh_available = false; hp->plug = spapr_phb_hot_plug_child; hp->unplug = spapr_phb_hot_unplug_child; } diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index b1e8e8e0bc..10fa88a919 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -78,7 +78,7 @@ static void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb) vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); } -static void spapr_phb_vfio_reset(DeviceState *qdev) +void spapr_phb_vfio_reset(DeviceState *qdev) { /* * The PE might be in frozen state. To reenable the EEH @@ -89,8 +89,8 @@ static void spapr_phb_vfio_reset(DeviceState *qdev) spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); } -static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, - unsigned int addr, int option) +int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, + unsigned int addr, int option) { uint32_t op; int ret; @@ -136,7 +136,7 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, return RTAS_OUT_SUCCESS; } -static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) +int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) { int ret; @@ -192,7 +192,7 @@ static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb) pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); } -static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) +int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) { uint32_t op; int ret; @@ -221,7 +221,7 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) return RTAS_OUT_SUCCESS; } -static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb) +int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb) { int ret; @@ -239,12 +239,8 @@ static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); dc->props = spapr_phb_vfio_properties; - dc->reset = spapr_phb_vfio_reset; spc->finish_realize = spapr_phb_vfio_finish_realize; - spc->eeh_set_option = spapr_phb_vfio_eeh_set_option; - spc->eeh_get_state = spapr_phb_vfio_eeh_get_state; - spc->eeh_reset = spapr_phb_vfio_eeh_reset; - spc->eeh_configure = spapr_phb_vfio_eeh_configure; + spc->eeh_available = true; } static const TypeInfo spapr_phb_vfio_info = { diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 7de5e029b1..0b936c6973 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -49,10 +49,7 @@ struct sPAPRPHBClass { PCIHostBridgeClass parent_class; void (*finish_realize)(sPAPRPHBState *sphb, Error **errp); - int (*eeh_set_option)(sPAPRPHBState *sphb, unsigned int addr, int option); - int (*eeh_get_state)(sPAPRPHBState *sphb, int *state); - int (*eeh_reset)(sPAPRPHBState *sphb, int option); - int (*eeh_configure)(sPAPRPHBState *sphb); + bool eeh_available; }; typedef struct spapr_pci_msi { @@ -137,4 +134,36 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); +/* VFIO EEH hooks */ +#ifdef CONFIG_LINUX +int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, + unsigned int addr, int option); +int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state); +int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option); +int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb); +void spapr_phb_vfio_reset(DeviceState *qdev); +#else +static inline int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, + unsigned int addr, int option) +{ + return RTAS_OUT_HW_ERROR; +} +static inline int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, + int *state) +{ + return RTAS_OUT_HW_ERROR; +} +static inline int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) +{ + return RTAS_OUT_HW_ERROR; +} +static inline int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb) +{ + return RTAS_OUT_HW_ERROR; +} +static inline void spapr_phb_vfio_reset(DeviceState *qdev) +{ +} +#endif + #endif /* __HW_SPAPR_PCI_H__ */ From c1fa017c7e9b017ec0a75f8added7f9c4864fe8a Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 29 Feb 2016 17:19:42 +1100 Subject: [PATCH 13/16] spapr_pci: Allow EEH on spapr-pci-host-bridge Now that the EEH code is independent of the special spapr-vfio-pci-host-bridge device, we can allow it on all spapr PCI host bridges instead. We do this by changing spapr_phb_eeh_available() to be based on the vfio_eeh_as_ok() call instead of the host bridge class. Because the value of vfio_eeh_as_ok() can change with devices being hotplugged or unplugged, this can potentially lead to some strange edge cases where the guest starts using EEH, then it starts failing because of a change in status. However, it's not really any worse than the current situation. Cases that would have worked previously will still work (i.e. VFIO devices from at most one VFIO IOMMU group per vPHB), it's just that it's no longer necessary to use spapr-vfio-pci-host-bridge with the groupid pre-specified. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_pci.c | 10 ++-------- hw/ppc/spapr_pci_vfio.c | 6 +++++- include/hw/pci-host/spapr.h | 6 +++++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index c4c5e7e414..3ec1823ab8 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -42,6 +42,8 @@ #include "hw/ppc/spapr_drc.h" #include "sysemu/device_tree.h" +#include "hw/vfio/vfio.h" + /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 #define RTAS_CHANGE_FN 1 @@ -92,13 +94,6 @@ PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, return pci_find_device(phb->bus, bus_num, devfn); } -static bool spapr_phb_eeh_available(sPAPRPHBState *sphb) -{ - sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); - - return spc->eeh_available; -} - static uint32_t rtas_pci_cfgaddr(uint32_t arg) { /* This handles the encoding of extended config space addresses */ @@ -1642,7 +1637,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; spc->finish_realize = spapr_phb_finish_realize; - spc->eeh_available = false; hp->plug = spapr_phb_hot_plug_child; hp->unplug = spapr_phb_hot_unplug_child; } diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 10fa88a919..16a4a8f5e0 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -73,6 +73,11 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) spapr_tce_get_iommu(tcet)); } +bool spapr_phb_eeh_available(sPAPRPHBState *sphb) +{ + return vfio_eeh_as_ok(&sphb->iommu_as); +} + static void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb) { vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); @@ -240,7 +245,6 @@ static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) dc->props = spapr_phb_vfio_properties; spc->finish_realize = spapr_phb_vfio_finish_realize; - spc->eeh_available = true; } static const TypeInfo spapr_phb_vfio_info = { diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 0b936c6973..19a95e0706 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -49,7 +49,6 @@ struct sPAPRPHBClass { PCIHostBridgeClass parent_class; void (*finish_realize)(sPAPRPHBState *sphb, Error **errp); - bool eeh_available; }; typedef struct spapr_pci_msi { @@ -136,6 +135,7 @@ PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, /* VFIO EEH hooks */ #ifdef CONFIG_LINUX +bool spapr_phb_eeh_available(sPAPRPHBState *sphb); int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, unsigned int addr, int option); int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state); @@ -143,6 +143,10 @@ int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option); int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb); void spapr_phb_vfio_reset(DeviceState *qdev); #else +static inline bool spapr_phb_eeh_available(sPAPRPHBState *sphb) +{ + return false; +} static inline int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, unsigned int addr, int option) { From 72700d7e733948fa7fbb735ccdf2209931c88476 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 29 Feb 2016 17:19:50 +1100 Subject: [PATCH 14/16] spapr_pci: (Mostly) remove spapr-pci-vfio-host-bridge Now that the regular spapr-pci-host-bridge can handle EEH, there are only two things that spapr-pci-vfio-host-bridge does differently: 1. automatically sizes its DMA window to match the host IOMMU 2. checks if the attached VFIO container is backed by the VFIO_SPAPR_TCE_IOMMU type on the host (1) is not particularly useful, since the default window used by the regular host bridge will work with the host IOMMU configuration on all current systems anyway. Plus, automatically changing guest visible configuration (such as the DMA window) based on host settings is generally a bad idea. It's not definitively broken, since spapr-pci-vfio-host-bridge is only supposed to support VFIO devices which can't be migrated anyway, but still. (2) is not really useful, because if a guest tries to configure EEH on a different host IOMMU, the first call will fail and that will be that. It's possible there are scripts or tools out there which expect spapr-pci-vfio-host-bridge, so we don't remove it entirely. This patch reduces it to just a stub for backwards compatibility. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_pci_vfio.c | 61 +++++++++++-------------------------- include/hw/pci-host/spapr.h | 11 ------- 2 files changed, 17 insertions(+), 55 deletions(-) diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 16a4a8f5e0..9e15924f50 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -23,54 +23,29 @@ #include "hw/pci/msix.h" #include "linux/vfio.h" #include "hw/vfio/vfio.h" +#include "qemu/error-report.h" + +#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" + +#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \ + OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE) + +typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; + +struct sPAPRPHBVFIOState { + sPAPRPHBState phb; + + int32_t iommugroupid; +}; static Property spapr_phb_vfio_properties[] = { DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1), DEFINE_PROP_END_OF_LIST(), }; -static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) +static void spapr_phb_vfio_instance_init(Object *obj) { - sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); - struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) }; - int ret; - sPAPRTCETable *tcet; - uint32_t liobn = svphb->phb.dma_liobn; - - if (svphb->iommugroupid == -1) { - error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); - return; - } - - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_CHECK_EXTENSION, - (void *) VFIO_SPAPR_TCE_IOMMU); - if (ret != 1) { - error_setg_errno(errp, -ret, - "spapr-vfio: SPAPR extension is not supported"); - return; - } - - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); - if (ret) { - error_setg_errno(errp, -ret, - "spapr-vfio: get info from container failed"); - return; - } - - tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start, - SPAPR_TCE_PAGE_SHIFT, - info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT, - true); - if (!tcet) { - error_setg(errp, "spapr-vfio: failed to create VFIO TCE table"); - return; - } - - /* Register default 32bit DMA window */ - memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset, - spapr_tce_get_iommu(tcet)); + error_report("spapr-pci-vfio-host-bridge is deprecated"); } bool spapr_phb_eeh_available(sPAPRPHBState *sphb) @@ -241,18 +216,16 @@ int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb) static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); dc->props = spapr_phb_vfio_properties; - spc->finish_realize = spapr_phb_vfio_finish_realize; } static const TypeInfo spapr_phb_vfio_info = { .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE, .parent = TYPE_SPAPR_PCI_HOST_BRIDGE, .instance_size = sizeof(sPAPRPHBVFIOState), + .instance_init = spapr_phb_vfio_instance_init, .class_init = spapr_phb_vfio_class_init, - .class_size = sizeof(sPAPRPHBClass), }; static void spapr_pci_vfio_register_types(void) diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 19a95e0706..a08235eecb 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -28,14 +28,10 @@ #include "hw/ppc/xics.h" #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" -#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" #define SPAPR_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) -#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \ - OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE) - #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \ OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE) #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \ @@ -43,7 +39,6 @@ typedef struct sPAPRPHBClass sPAPRPHBClass; typedef struct sPAPRPHBState sPAPRPHBState; -typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; struct sPAPRPHBClass { PCIHostBridgeClass parent_class; @@ -90,12 +85,6 @@ struct sPAPRPHBState { QLIST_ENTRY(sPAPRPHBState) list; }; -struct sPAPRPHBVFIOState { - sPAPRPHBState phb; - - int32_t iommugroupid; -}; - #define SPAPR_PCI_MAX_INDEX 255 #define SPAPR_PCI_BASE_BUID 0x800000020000000ULL From a36304fdca8ae38f756f2c86158ce1f0831e7fbc Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 29 Feb 2016 17:20:00 +1100 Subject: [PATCH 15/16] spapr_pci: Remove finish_realize hook Now that spapr-pci-vfio-host-bridge is reduced to just a stub, there is only one implementation of the finish_realize hook in sPAPRPHBClass. So, we can fold that implementation into its (single) caller, and remove the hook. That's the last thing left in sPAPRPHBClass, so that can go away as well. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_pci.c | 25 +++++-------------------- include/hw/pci-host/spapr.h | 12 ------------ 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 3ec1823ab8..79baa7b177 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1303,11 +1303,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) SysBusDevice *s = SYS_BUS_DEVICE(dev); sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); PCIHostState *phb = PCI_HOST_BRIDGE(s); - sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s); char *namebuf; int i; PCIBus *bus; uint64_t msi_window_size = 4096; + sPAPRTCETable *tcet; + uint32_t nb_table; if (sphb->index != (uint32_t)-1) { hwaddr windows_base; @@ -1459,33 +1460,20 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } } - if (!info->finish_realize) { - error_setg(errp, "finish_realize not defined"); - return; - } - - info->finish_realize(sphb, errp); - - sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); -} - -static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) -{ - sPAPRTCETable *tcet; - uint32_t nb_table; - nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT; tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, 0, SPAPR_TCE_PAGE_SHIFT, nb_table, false); if (!tcet) { error_setg(errp, "Unable to create TCE table for %s", sphb->dtbusname); - return ; + return; } /* Register default 32bit DMA window */ memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr, spapr_tce_get_iommu(tcet)); + + sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); } static int spapr_phb_children_reset(Object *child, void *opaque) @@ -1626,7 +1614,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) { PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass); hc->root_bus_path = spapr_phb_root_bus_path; @@ -1636,7 +1623,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_spapr_pci; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; - spc->finish_realize = spapr_phb_finish_realize; hp->plug = spapr_phb_hot_plug_child; hp->unplug = spapr_phb_hot_unplug_child; } @@ -1646,7 +1632,6 @@ static const TypeInfo spapr_phb_info = { .parent = TYPE_PCI_HOST_BRIDGE, .instance_size = sizeof(sPAPRPHBState), .class_init = spapr_phb_class_init, - .class_size = sizeof(sPAPRPHBClass), .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { } diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index a08235eecb..03ee006406 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -32,20 +32,8 @@ #define SPAPR_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) -#define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \ - OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE) -#define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \ - OBJECT_GET_CLASS(sPAPRPHBClass, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) - -typedef struct sPAPRPHBClass sPAPRPHBClass; typedef struct sPAPRPHBState sPAPRPHBState; -struct sPAPRPHBClass { - PCIHostBridgeClass parent_class; - - void (*finish_realize)(sPAPRPHBState *sphb, Error **errp); -}; - typedef struct spapr_pci_msi { uint32_t first_irq; uint32_t num; From 3356128cd13d7ec7689b7cddd3efbfbc5339a262 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 9 Mar 2016 11:57:20 +1100 Subject: [PATCH 16/16] vfio: Eliminate vfio_container_ioctl() vfio_container_ioctl() was a bad interface that bypassed abstraction boundaries, had semantics that sat uneasily with its name, and was unsafe in many realistic circumstances. Now that spapr-pci-vfio-host-bridge has been folded into spapr-pci-host-bridge, there are no more users, so remove it. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy Acked-by: Alex Williamson --- hw/vfio/common.c | 45 ------------------------------------------ include/hw/vfio/vfio.h | 2 -- 2 files changed, 47 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 0636bb176e..fb588d8d8e 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1093,51 +1093,6 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, return 0; } -static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, - int req, void *param) -{ - VFIOGroup *group; - VFIOContainer *container; - int ret = -1; - - group = vfio_get_group(groupid, as); - if (!group) { - error_report("vfio: group %d not registered", groupid); - return ret; - } - - container = group->container; - if (group->container) { - ret = ioctl(container->fd, req, param); - if (ret < 0) { - error_report("vfio: failed to ioctl %d to container: ret=%d, %s", - _IOC_NR(req) - VFIO_BASE, ret, strerror(errno)); - } - } - - vfio_put_group(group); - - return ret; -} - -int vfio_container_ioctl(AddressSpace *as, int32_t groupid, - int req, void *param) -{ - /* We allow only certain ioctls to the container */ - switch (req) { - case VFIO_CHECK_EXTENSION: - case VFIO_IOMMU_SPAPR_TCE_GET_INFO: - case VFIO_EEH_PE_OP: - break; - default: - /* Return an error on unknown requests */ - error_report("vfio: unsupported ioctl %X", req); - return -1; - } - - return vfio_container_do_ioctl(as, groupid, req, param); -} - /* * Interfaces for IBM EEH (Enhanced Error Handling) */ diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h index fd3933be62..7153604cd1 100644 --- a/include/hw/vfio/vfio.h +++ b/include/hw/vfio/vfio.h @@ -3,8 +3,6 @@ #include "qemu/typedefs.h" -extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, - int req, void *param); bool vfio_eeh_as_ok(AddressSpace *as); int vfio_eeh_as_op(AddressSpace *as, uint32_t op);