From c4a2e3a9709aa7c84def7bc4bfbdcaf37ccf7527 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 27 Oct 2017 18:58:14 +0200 Subject: [PATCH 01/24] target/ppc: Use tcg_gen_lookup_and_goto_ptr Signed-off-by: Richard Henderson Reviewed-by: Daniel Henrique Barboza Signed-off-by: David Gibson --- target/ppc/translate.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 998fbed848..4075fc8589 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3419,7 +3419,7 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) } /*** Branch ***/ -static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) { if (NARROW_MODE(ctx)) { dest = (uint32_t) dest; @@ -3441,7 +3441,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) gen_debug_exception(ctx); } } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(); } } @@ -3479,7 +3479,7 @@ static void gen_b(DisasContext *ctx) #define BCOND_CTR 2 #define BCOND_TAR 3 -static inline void gen_bcond(DisasContext *ctx, int type) +static void gen_bcond(DisasContext *ctx, int type) { uint32_t bo = BO(ctx->opcode); TCGLabel *l1; @@ -3543,26 +3543,19 @@ static inline void gen_bcond(DisasContext *ctx, int type) } else { gen_goto_tb(ctx, 0, li); } - if ((bo & 0x14) != 0x14) { - gen_set_label(l1); - gen_goto_tb(ctx, 1, ctx->nip); - } } else { if (NARROW_MODE(ctx)) { tcg_gen_andi_tl(cpu_nip, target, (uint32_t)~3); } else { tcg_gen_andi_tl(cpu_nip, target, ~3); } - tcg_gen_exit_tb(0); - if ((bo & 0x14) != 0x14) { - gen_set_label(l1); - gen_update_nip(ctx, ctx->nip); - tcg_gen_exit_tb(0); - } - } - if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) { + tcg_gen_lookup_and_goto_ptr(); tcg_temp_free(target); } + if ((bo & 0x14) != 0x14) { + gen_set_label(l1); + gen_goto_tb(ctx, 1, ctx->nip); + } } static void gen_bc(DisasContext *ctx) From e0f7110acaaa222591e5f025953934c70c5ae15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 10 Nov 2017 15:20:08 +0000 Subject: [PATCH 02/24] ppc/xics: remove useless if condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous code section uses a 'first < 0' test and returns. Therefore, there is no need to test the 'first' variable against '>= 0' afterwards. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/intc/xics_spapr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index d98ea8b130..e8c0a1b3e9 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -329,10 +329,8 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, return -1; } - if (first >= 0) { - for (i = first; i < first + num; ++i) { - ics_set_irq_type(ics, i, lsi); - } + for (i = first; i < first + num; ++i) { + ics_set_irq_type(ics, i, lsi); } first += ics->offset; From 2b6154120cbd7f5514cefd3c6084d39922d26d88 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 13 Nov 2017 16:50:40 +1100 Subject: [PATCH 03/24] spapr: Add pseries-2.12 machine type While we're at it fix a couple of small errors in the 2.11 and 2.10 models (they didn't have any real effect, but don't quite match the template). Signed-off-by: David Gibson --- hw/ppc/spapr.c | 30 +++++++++++++++++++++++++----- include/hw/compat.h | 2 ++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1ac7eb0f8c..8881f2f1e8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3714,27 +3714,47 @@ static const TypeInfo spapr_machine_info = { type_init(spapr_machine_register_##suffix) /* - * pseries-2.11 + * pseries-2.12 */ -static void spapr_machine_2_11_instance_options(MachineState *machine) +static void spapr_machine_2_12_instance_options(MachineState *machine) { } -static void spapr_machine_2_11_class_options(MachineClass *mc) +static void spapr_machine_2_12_class_options(MachineClass *mc) { /* Defaults for the latest behaviour inherited from the base class */ } -DEFINE_SPAPR_MACHINE(2_11, "2.11", true); +DEFINE_SPAPR_MACHINE(2_12, "2.12", true); + +/* + * pseries-2.11 + */ +#define SPAPR_COMPAT_2_11 \ + HW_COMPAT_2_11 + +static void spapr_machine_2_11_instance_options(MachineState *machine) +{ + spapr_machine_2_12_instance_options(machine); +} + +static void spapr_machine_2_11_class_options(MachineClass *mc) +{ + spapr_machine_2_12_class_options(mc); + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_11); +} + +DEFINE_SPAPR_MACHINE(2_11, "2.11", false); /* * pseries-2.10 */ #define SPAPR_COMPAT_2_10 \ - HW_COMPAT_2_10 \ + HW_COMPAT_2_10 static void spapr_machine_2_10_instance_options(MachineState *machine) { + spapr_machine_2_11_instance_options(machine); } static void spapr_machine_2_10_class_options(MachineClass *mc) diff --git a/include/hw/compat.h b/include/hw/compat.h index cf389b4e85..0d2a6ac468 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -1,6 +1,8 @@ #ifndef HW_COMPAT_H #define HW_COMPAT_H +#define HW_COMPAT_2_11 + #define HW_COMPAT_2_10 \ {\ .driver = "virtio-mouse-device",\ From 94ad93bd976841c26af75322301f5aad925114d6 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 20 Nov 2017 10:19:54 +0100 Subject: [PATCH 04/24] spapr_cpu_core: instantiate CPUs separately The current code assumes that only the CPU core object holds a reference on each individual CPU object, and happily frees their allocated memory when the core is unrealized. This is dangerous as some other code can legitimely keep a pointer to a CPU if it calls object_ref(), but it would end up with a dangling pointer. Let's allocate all CPUs with object_new() and let QOM free them when their reference count reaches zero. This greatly simplify the code as we don't have to fiddle with the instance size anymore. Signed-off-by: Greg Kurz Acked-by: Igor Mammedov Signed-off-by: David Gibson --- hw/ppc/spapr.c | 11 +++-------- hw/ppc/spapr_cpu_core.c | 19 +++++++------------ include/hw/ppc/spapr_cpu_core.h | 2 +- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8881f2f1e8..f1b96a4e92 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3180,12 +3180,10 @@ void spapr_core_release(DeviceState *dev) if (smc->pre_2_10_has_unused_icps) { sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); - sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc)); - size_t size = object_type_get_instance_size(scc->cpu_type); int i; for (i = 0; i < cc->nr_threads; i++) { - CPUState *cs = CPU(sc->threads + i * size); + CPUState *cs = CPU(sc->threads[i]); pre_2_10_vmstate_register_dummy_icp(cs->cpu_index); } @@ -3231,7 +3229,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); - CPUState *cs = CPU(core->threads); + CPUState *cs = CPU(core->threads[0]); sPAPRDRConnector *drc; Error *local_err = NULL; int smt = kvmppc_smt_threads(); @@ -3276,15 +3274,12 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, core_slot->cpu = OBJECT(dev); if (smc->pre_2_10_has_unused_icps) { - sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc)); - size_t size = object_type_get_instance_size(scc->cpu_type); int i; for (i = 0; i < cc->nr_threads; i++) { sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev); - void *obj = sc->threads + i * size; - cs = CPU(obj); + cs = CPU(sc->threads[i]); pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 3a4c174012..588f9b4571 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -79,13 +79,11 @@ const char *spapr_get_cpu_core_type(const char *cpu_type) static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp) { sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); - sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev)); - size_t size = object_type_get_instance_size(scc->cpu_type); CPUCore *cc = CPU_CORE(dev); int i; for (i = 0; i < cc->nr_threads; i++) { - void *obj = sc->threads + i * size; + Object *obj = OBJECT(sc->threads[i]); DeviceState *dev = DEVICE(obj); CPUState *cs = CPU(dev); PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -146,9 +144,8 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev)); CPUCore *cc = CPU_CORE(OBJECT(dev)); - size_t size; Error *local_err = NULL; - void *obj; + Object *obj; int i, j; if (!spapr) { @@ -156,18 +153,16 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) return; } - size = object_type_get_instance_size(scc->cpu_type); - sc->threads = g_malloc0(size * cc->nr_threads); + sc->threads = g_new(PowerPCCPU *, cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { char id[32]; CPUState *cs; PowerPCCPU *cpu; - obj = sc->threads + i * size; + obj = object_new(scc->cpu_type); - object_initialize(obj, size, scc->cpu_type); cs = CPU(obj); - cpu = POWERPC_CPU(cs); + cpu = sc->threads[i] = POWERPC_CPU(obj); cs->cpu_index = cc->core_id + i; cpu->vcpu_id = (cc->core_id * spapr->vsmt / smp_threads) + i; if (kvm_enabled() && !kvm_vcpu_id_is_valid(cpu->vcpu_id)) { @@ -192,7 +187,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) } for (j = 0; j < cc->nr_threads; j++) { - obj = sc->threads + j * size; + obj = OBJECT(sc->threads[j]); spapr_cpu_core_realize_child(obj, spapr, &local_err); if (local_err) { @@ -203,7 +198,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) err: while (--i >= 0) { - obj = sc->threads + i * size; + obj = OBJECT(sc->threads[i]); object_unparent(obj); } g_free(sc->threads); diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h index f2d48d6a67..1129f344aa 100644 --- a/include/hw/ppc/spapr_cpu_core.h +++ b/include/hw/ppc/spapr_cpu_core.h @@ -28,7 +28,7 @@ typedef struct sPAPRCPUCore { CPUCore parent_obj; /*< public >*/ - void *threads; + PowerPCCPU **threads; int node_id; } sPAPRCPUCore; From e75ce32a75ba2fe78579882cfa06590edec2cd4a Mon Sep 17 00:00:00 2001 From: Michael Davidsaver Date: Sun, 19 Nov 2017 21:24:13 -0600 Subject: [PATCH 05/24] e500: name openpic and pci host bridge Signed-off-by: Michael Davidsaver Signed-off-by: David Gibson --- hw/ppc/e500.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 5cf0dabef3..c4fe06ea2a 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -685,6 +685,8 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500Params *params, int i, j, k; dev = qdev_create(NULL, TYPE_OPENPIC); + object_property_add_child(qdev_get_machine(), "pic", OBJECT(dev), + &error_fatal); qdev_prop_set_uint32(dev, "model", params->mpic_version); qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus); @@ -884,6 +886,8 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) /* PCI */ dev = qdev_create(NULL, "e500-pcihost"); + object_property_add_child(qdev_get_machine(), "pci-host", OBJECT(dev), + &error_abort); qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot); qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]); qdev_init_nofail(dev); From 5d8424dbd3e8335ea3d57f64eaa603c8fc80706f Mon Sep 17 00:00:00 2001 From: Michael Davidsaver Date: Sun, 19 Nov 2017 21:24:17 -0600 Subject: [PATCH 06/24] nvram: add AT24Cx i2c eeprom Signed-off-by: Michael Davidsaver Signed-off-by: David Gibson --- hw/nvram/Makefile.objs | 1 + hw/nvram/eeprom_at24c.c | 205 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 hw/nvram/eeprom_at24c.c diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs index c018f6b2ff..0f4ee71dcb 100644 --- a/hw/nvram/Makefile.objs +++ b/hw/nvram/Makefile.objs @@ -1,5 +1,6 @@ common-obj-$(CONFIG_DS1225Y) += ds1225y.o common-obj-y += eeprom93xx.o +common-obj-y += eeprom_at24c.o common-obj-y += fw_cfg.o common-obj-y += chrp_nvram.o common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o diff --git a/hw/nvram/eeprom_at24c.c b/hw/nvram/eeprom_at24c.c new file mode 100644 index 0000000000..efa3621ac6 --- /dev/null +++ b/hw/nvram/eeprom_at24c.c @@ -0,0 +1,205 @@ +/* + * *AT24C* series I2C EEPROM + * + * Copyright (c) 2015 Michael Davidsaver + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the LICENSE file in the top-level directory. + */ + +#include + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/hw.h" +#include "hw/i2c/i2c.h" +#include "sysemu/block-backend.h" + +/* #define DEBUG_AT24C */ + +#ifdef DEBUG_AT24C +#define DPRINTK(FMT, ...) printf(TYPE_AT24C_EE " : " FMT, ## __VA_ARGS__) +#else +#define DPRINTK(FMT, ...) do {} while (0) +#endif + +#define ERR(FMT, ...) fprintf(stderr, TYPE_AT24C_EE " : " FMT, \ + ## __VA_ARGS__) + +#define TYPE_AT24C_EE "at24c-eeprom" +#define AT24C_EE(obj) OBJECT_CHECK(EEPROMState, (obj), TYPE_AT24C_EE) + +typedef struct EEPROMState { + I2CSlave parent_obj; + + /* address counter */ + uint16_t cur; + /* total size in bytes */ + uint32_t rsize; + bool writable; + /* cells changed since last START? */ + bool changed; + /* during WRITE, # of address bytes transfered */ + uint8_t haveaddr; + + uint8_t *mem; + + BlockBackend *blk; +} EEPROMState; + +static +int at24c_eeprom_event(I2CSlave *s, enum i2c_event event) +{ + EEPROMState *ee = container_of(s, EEPROMState, parent_obj); + + switch (event) { + case I2C_START_SEND: + case I2C_START_RECV: + case I2C_FINISH: + ee->haveaddr = 0; + DPRINTK("clear\n"); + if (ee->blk && ee->changed) { + int len = blk_pwrite(ee->blk, 0, ee->mem, ee->rsize, 0); + if (len != ee->rsize) { + ERR(TYPE_AT24C_EE + " : failed to write backing file\n"); + } + DPRINTK("Wrote to backing file\n"); + } + ee->changed = false; + break; + case I2C_NACK: + break; + } + return 0; +} + +static +int at24c_eeprom_recv(I2CSlave *s) +{ + EEPROMState *ee = AT24C_EE(s); + int ret; + + ret = ee->mem[ee->cur]; + + ee->cur = (ee->cur + 1u) % ee->rsize; + DPRINTK("Recv %02x %c\n", ret, ret); + + return ret; +} + +static +int at24c_eeprom_send(I2CSlave *s, uint8_t data) +{ + EEPROMState *ee = AT24C_EE(s); + + if (ee->haveaddr < 2) { + ee->cur <<= 8; + ee->cur |= data; + ee->haveaddr++; + if (ee->haveaddr == 2) { + ee->cur %= ee->rsize; + DPRINTK("Set pointer %04x\n", ee->cur); + } + + } else { + if (ee->writable) { + DPRINTK("Send %02x\n", data); + ee->mem[ee->cur] = data; + ee->changed = true; + } else { + DPRINTK("Send error %02x read-only\n", data); + } + ee->cur = (ee->cur + 1u) % ee->rsize; + + } + + return 0; +} + +static +int at24c_eeprom_init(I2CSlave *i2c) +{ + EEPROMState *ee = AT24C_EE(i2c); + + ee->mem = g_malloc0(ee->rsize); + + if (ee->blk) { + int64_t len = blk_getlength(ee->blk); + + if (len != ee->rsize) { + ERR(TYPE_AT24C_EE " : Backing file size %lu != %u\n", + (unsigned long)len, (unsigned)ee->rsize); + exit(1); + } + + if (blk_set_perm(ee->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, &error_fatal) < 0) + { + ERR(TYPE_AT24C_EE + " : Backing file incorrect permission\n"); + exit(1); + } + } + return 0; +} + +static +void at24c_eeprom_reset(DeviceState *state) +{ + EEPROMState *ee = AT24C_EE(state); + + ee->changed = false; + ee->cur = 0; + ee->haveaddr = 0; + + memset(ee->mem, 0, ee->rsize); + + if (ee->blk) { + int len = blk_pread(ee->blk, 0, ee->mem, ee->rsize); + + if (len != ee->rsize) { + ERR(TYPE_AT24C_EE + " : Failed initial sync with backing file\n"); + } + DPRINTK("Reset read backing file\n"); + } +} + +static Property at24c_eeprom_props[] = { + DEFINE_PROP_UINT32("rom-size", EEPROMState, rsize, 0), + DEFINE_PROP_BOOL("writable", EEPROMState, writable, true), + DEFINE_PROP_DRIVE("drive", EEPROMState, blk), + DEFINE_PROP_END_OF_LIST() +}; + +static +void at24c_eeprom_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + I2CSlaveClass *k = I2C_SLAVE_CLASS(klass); + + k->init = &at24c_eeprom_init; + k->event = &at24c_eeprom_event; + k->recv = &at24c_eeprom_recv; + k->send = &at24c_eeprom_send; + + dc->props = at24c_eeprom_props; + dc->reset = at24c_eeprom_reset; +} + +static +const TypeInfo at24c_eeprom_type = { + .name = TYPE_AT24C_EE, + .parent = TYPE_I2C_SLAVE, + .instance_size = sizeof(EEPROMState), + .class_size = sizeof(I2CSlaveClass), + .class_init = at24c_eeprom_class_init, +}; + +static void at24c_eeprom_register(void) +{ + type_register_static(&at24c_eeprom_type); +} + +type_init(at24c_eeprom_register) From 403aacdb44219fcdb198e0293288a818b6cccc5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 23 Nov 2017 18:05:24 +0100 Subject: [PATCH 07/24] pcc: define the Power-saving mode Exit Cause Enable bits in PowerPCCPUClass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and use the value to define precisely the default value of the LPCR in the helper routine cpu_ppc_set_papr() Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- target/ppc/cpu-qom.h | 1 + target/ppc/translate_init.c | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index 429b47f959..deaa46a14b 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -191,6 +191,7 @@ typedef struct PowerPCCPUClass { uint64_t insns_flags; uint64_t insns_flags2; uint64_t msr_mask; + uint64_t lpcr_pm; /* Power-saving mode Exit Cause Enable bits */ powerpc_mmu_t mmu_model; powerpc_excp_t excp_model; powerpc_input_t bus_model; diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index 4e11e6f489..074c3a1d45 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -8535,6 +8535,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; + pcc->lpcr_pm = LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2; } static void init_proc_POWER8(CPUPPCState *env) @@ -8704,6 +8705,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; + pcc->lpcr_pm = LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 | + LPCR_P8_PECE3 | LPCR_P8_PECE4; } #ifdef CONFIG_SOFTMMU @@ -8898,11 +8901,13 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; + pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE; } #if !defined(CONFIG_USER_ONLY) void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) { + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); CPUPPCState *env = &cpu->env; ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR]; ppc_spr_t *amor = &env->spr_cb[SPR_AMOR]; @@ -8932,8 +8937,7 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) lpcr->default_value &= ~LPCR_RMLS; lpcr->default_value |= 1ull << LPCR_RMLS_SHIFT; - switch (env->mmu_model) { - case POWERPC_MMU_3_00: + if (env->mmu_model == POWERPC_MMU_3_00) { /* By default we choose legacy mode and switch to new hash or radix * when a register process table hcall is made. So disable process * tables and guest translation shootdown by default @@ -8947,18 +8951,13 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) } else { lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE); } - lpcr->default_value |= LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | - LPCR_OEE; - break; - default: - /* P7 and P8 has slightly different PECE bits, mostly because P8 adds - * bit 47 and 48 which are reserved on P7. Here we set them all, which - * will work as expected for both implementations - */ - lpcr->default_value |= LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 | - LPCR_P8_PECE3 | LPCR_P8_PECE4; } + /* Also set the power-saving mode bits which depend on the CPU + * family + */ + lpcr->default_value |= pcc->lpcr_pm; + /* We should be followed by a CPU reset but update the active value * just in case... */ From df592270447317d70c7f6ab204bbab27db1dee21 Mon Sep 17 00:00:00 2001 From: Michael Davidsaver Date: Sun, 26 Nov 2017 15:58:59 -0600 Subject: [PATCH 08/24] openpic: debug w/ info_report() Replace *printf() with *_report(). Remove trailing new lines. Signed-off-by: Michael Davidsaver Signed-off-by: David Gibson --- hw/intc/openpic.c | 102 +++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index 10d6e871fb..9159a06f07 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -46,6 +46,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/log.h" #include "qemu/timer.h" +#include "qemu/error-report.h" //#define DEBUG_OPENPIC @@ -58,8 +59,7 @@ static const int debug_openpic = 0; static int get_current_cpu(void); #define DPRINTF(fmt, ...) do { \ if (debug_openpic) { \ - printf("Core%d: ", get_current_cpu()); \ - printf(fmt , ## __VA_ARGS__); \ + info_report("Core%d: " fmt, get_current_cpu(), ## __VA_ARGS__); \ } \ } while (0) @@ -173,7 +173,7 @@ static int inttgt_to_output(int inttgt) } } - fprintf(stderr, "%s: unsupported inttgt %d\n", __func__, inttgt); + error_report("%s: unsupported inttgt %d", __func__, inttgt); return OPENPIC_OUTPUT_INT; } @@ -372,7 +372,7 @@ static void IRQ_check(OpenPICState *opp, IRQQueue *q) break; } - DPRINTF("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n", + DPRINTF("IRQ_check: irq %d set ivpr_pr=%d pr=%d", irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority); if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) { @@ -403,11 +403,11 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, dst = &opp->dst[n_CPU]; src = &opp->src[n_IRQ]; - DPRINTF("%s: IRQ %d active %d was %d\n", + DPRINTF("%s: IRQ %d active %d was %d", __func__, n_IRQ, active, was_active); if (src->output != OPENPIC_OUTPUT_INT) { - DPRINTF("%s: output %d irq %d active %d was %d count %d\n", + DPRINTF("%s: output %d irq %d active %d was %d count %d", __func__, src->output, n_IRQ, active, was_active, dst->outputs_active[src->output]); @@ -417,13 +417,13 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, */ if (active) { if (!was_active && dst->outputs_active[src->output]++ == 0) { - DPRINTF("%s: Raise OpenPIC output %d cpu %d irq %d\n", + DPRINTF("%s: Raise OpenPIC output %d cpu %d irq %d", __func__, src->output, n_CPU, n_IRQ); qemu_irq_raise(dst->irqs[src->output]); } } else { if (was_active && --dst->outputs_active[src->output] == 0) { - DPRINTF("%s: Lower OpenPIC output %d cpu %d irq %d\n", + DPRINTF("%s: Lower OpenPIC output %d cpu %d irq %d", __func__, src->output, n_CPU, n_IRQ); qemu_irq_lower(dst->irqs[src->output]); } @@ -446,7 +446,7 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, IRQ_check(opp, &dst->raised); if (active && priority <= dst->ctpr) { - DPRINTF("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n", + DPRINTF("%s: IRQ %d priority %d too low for ctpr %d on CPU %d", __func__, n_IRQ, priority, dst->ctpr, n_CPU); active = 0; } @@ -454,10 +454,10 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, if (active) { if (IRQ_get_next(opp, &dst->servicing) >= 0 && priority <= dst->servicing.priority) { - DPRINTF("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n", + DPRINTF("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d", __func__, n_IRQ, dst->servicing.next, n_CPU); } else { - DPRINTF("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", + DPRINTF("%s: Raise OpenPIC INT output cpu %d irq %d/%d", __func__, n_CPU, n_IRQ, dst->raised.next); qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]); } @@ -465,12 +465,12 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, IRQ_get_next(opp, &dst->servicing); if (dst->raised.priority > dst->ctpr && dst->raised.priority > dst->servicing.priority) { - DPRINTF("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n", + DPRINTF("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d", __func__, n_IRQ, dst->raised.next, dst->raised.priority, dst->ctpr, dst->servicing.priority, n_CPU); /* IRQ line stays asserted */ } else { - DPRINTF("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", + DPRINTF("%s: IRQ %d inactive, current prio %d/%d, CPU %d", __func__, n_IRQ, dst->ctpr, dst->servicing.priority, n_CPU); qemu_irq_lower(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]); } @@ -489,7 +489,7 @@ static void openpic_update_irq(OpenPICState *opp, int n_IRQ) if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) { /* Interrupt source is disabled */ - DPRINTF("%s: IRQ %d is disabled\n", __func__, n_IRQ); + DPRINTF("%s: IRQ %d is disabled", __func__, n_IRQ); active = false; } @@ -500,7 +500,7 @@ static void openpic_update_irq(OpenPICState *opp, int n_IRQ) * ctpr may have changed and we need to withdraw the interrupt. */ if (!active && !was_active) { - DPRINTF("%s: IRQ %d is already inactive\n", __func__, n_IRQ); + DPRINTF("%s: IRQ %d is already inactive", __func__, n_IRQ); return; } @@ -512,7 +512,7 @@ static void openpic_update_irq(OpenPICState *opp, int n_IRQ) if (src->destmask == 0) { /* No target */ - DPRINTF("%s: IRQ %d has no target\n", __func__, n_IRQ); + DPRINTF("%s: IRQ %d has no target", __func__, n_IRQ); return; } @@ -547,12 +547,12 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) IRQSource *src; if (n_IRQ >= OPENPIC_MAX_IRQ) { - fprintf(stderr, "%s: IRQ %d out of range\n", __func__, n_IRQ); + error_report("%s: IRQ %d out of range", __func__, n_IRQ); abort(); } src = &opp->src[n_IRQ]; - DPRINTF("openpic: set irq %d = %d ivpr=0x%08x\n", + DPRINTF("openpic: set irq %d = %d ivpr=0x%08x", n_IRQ, level, src->ivpr); if (src->level) { /* level-sensitive irq */ @@ -612,13 +612,13 @@ static inline void write_IRQreg_idr(OpenPICState *opp, int n_IRQ, uint32_t val) } src->idr = val & mask; - DPRINTF("Set IDR %d to 0x%08x\n", n_IRQ, src->idr); + DPRINTF("Set IDR %d to 0x%08x", n_IRQ, src->idr); if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { if (src->idr & crit_mask) { if (src->idr & normal_mask) { DPRINTF("%s: IRQ configured for multiple output types, using " - "critical\n", __func__); + "critical", __func__); } src->output = OPENPIC_OUTPUT_CINT; @@ -648,7 +648,7 @@ static inline void write_IRQreg_ilr(OpenPICState *opp, int n_IRQ, uint32_t val) IRQSource *src = &opp->src[n_IRQ]; src->output = inttgt_to_output(val & ILR_INTTGT_MASK); - DPRINTF("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, + DPRINTF("Set ILR %d to 0x%08x, output %d", n_IRQ, src->idr, src->output); /* TODO: on MPIC v4.0 only, set nomask for non-INT */ @@ -688,7 +688,7 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val) } openpic_update_irq(opp, n_IRQ); - DPRINTF("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val, + DPRINTF("Set IVPR %d to 0x%08x -> 0x%08x", n_IRQ, val, opp->src[n_IRQ].ivpr); } @@ -719,7 +719,7 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val, IRQDest *dst; int idx; - DPRINTF("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", + DPRINTF("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64, __func__, addr, val); if (addr & 0xF) { return; @@ -747,11 +747,11 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val, case 0x1090: /* PIR */ for (idx = 0; idx < opp->nb_cpus; idx++) { if ((val & (1 << idx)) && !(opp->pir & (1 << idx))) { - DPRINTF("Raise OpenPIC RESET output for CPU %d\n", idx); + DPRINTF("Raise OpenPIC RESET output for CPU %d", idx); dst = &opp->dst[idx]; qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_RESET]); } else if (!(val & (1 << idx)) && (opp->pir & (1 << idx))) { - DPRINTF("Lower OpenPIC RESET output for CPU %d\n", idx); + DPRINTF("Lower OpenPIC RESET output for CPU %d", idx); dst = &opp->dst[idx]; qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_RESET]); } @@ -781,7 +781,7 @@ static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len) OpenPICState *opp = opaque; uint32_t retval; - DPRINTF("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + DPRINTF("%s: addr %#" HWADDR_PRIx, __func__, addr); retval = 0xFFFFFFFF; if (addr & 0xF) { return retval; @@ -828,7 +828,7 @@ static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len) default: break; } - DPRINTF("%s: => 0x%08x\n", __func__, retval); + DPRINTF("%s: => 0x%08x", __func__, retval); return retval; } @@ -843,7 +843,7 @@ static void qemu_timer_cb(void *opaque) uint32_t val = tmr->tbcr & ~TBCR_CI; uint32_t tog = ((tmr->tccr & TCCR_TOG) ^ TCCR_TOG); /* invert toggle. */ - DPRINTF("%s n_IRQ=%d\n", __func__, n_IRQ); + DPRINTF("%s n_IRQ=%d", __func__, n_IRQ); /* Reload current count from base count and setup timer. */ tmr->tccr = val | tog; openpic_tmr_set_tmr(tmr, val, /*enabled=*/true); @@ -898,7 +898,7 @@ static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val, OpenPICState *opp = opaque; int idx; - DPRINTF("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", + DPRINTF("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64, __func__, (addr + 0x10f0), val); if (addr & 0xF) { return; @@ -943,7 +943,7 @@ static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len) uint32_t retval = -1; int idx; - DPRINTF("%s: addr %#" HWADDR_PRIx "\n", __func__, addr + 0x10f0); + DPRINTF("%s: addr %#" HWADDR_PRIx, __func__, addr + 0x10f0); if (addr & 0xF) { goto out; } @@ -970,7 +970,7 @@ static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len) } out: - DPRINTF("%s: => 0x%08x\n", __func__, retval); + DPRINTF("%s: => 0x%08x", __func__, retval); return retval; } @@ -981,7 +981,7 @@ static void openpic_src_write(void *opaque, hwaddr addr, uint64_t val, OpenPICState *opp = opaque; int idx; - DPRINTF("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", + DPRINTF("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64, __func__, addr, val); addr = addr & 0xffff; @@ -1006,7 +1006,7 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) uint32_t retval; int idx; - DPRINTF("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + DPRINTF("%s: addr %#" HWADDR_PRIx, __func__, addr); retval = 0xFFFFFFFF; addr = addr & 0xffff; @@ -1024,7 +1024,7 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) break; } - DPRINTF("%s: => 0x%08x\n", __func__, retval); + DPRINTF("%s: => 0x%08x", __func__, retval); return retval; } @@ -1035,7 +1035,7 @@ static void openpic_msi_write(void *opaque, hwaddr addr, uint64_t val, int idx = opp->irq_msi; int srs, ibs; - DPRINTF("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n", + DPRINTF("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64, __func__, addr, val); if (addr & 0xF) { return; @@ -1061,7 +1061,7 @@ static uint64_t openpic_msi_read(void *opaque, hwaddr addr, unsigned size) uint64_t r = 0; int i, srs; - DPRINTF("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + DPRINTF("%s: addr %#" HWADDR_PRIx, __func__, addr); if (addr & 0xF) { return -1; } @@ -1096,7 +1096,7 @@ static uint64_t openpic_summary_read(void *opaque, hwaddr addr, unsigned size) { uint64_t r = 0; - DPRINTF("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + DPRINTF("%s: addr %#" HWADDR_PRIx, __func__, addr); /* TODO: EISR/EIMR */ @@ -1106,7 +1106,7 @@ static uint64_t openpic_summary_read(void *opaque, hwaddr addr, unsigned size) static void openpic_summary_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { - DPRINTF("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n", + DPRINTF("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64, __func__, addr, val); /* TODO: EISR/EIMR */ @@ -1120,7 +1120,7 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, IRQDest *dst; int s_IRQ, n_IRQ; - DPRINTF("%s: cpu %d addr %#" HWADDR_PRIx " <= 0x%08x\n", __func__, idx, + DPRINTF("%s: cpu %d addr %#" HWADDR_PRIx " <= 0x%08x", __func__, idx, addr, val); if (idx < 0 || idx >= opp->nb_cpus) { @@ -1146,16 +1146,16 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, case 0x80: /* CTPR */ dst->ctpr = val & 0x0000000F; - DPRINTF("%s: set CPU %d ctpr to %d, raised %d servicing %d\n", + DPRINTF("%s: set CPU %d ctpr to %d, raised %d servicing %d", __func__, idx, dst->ctpr, dst->raised.priority, dst->servicing.priority); if (dst->raised.priority <= dst->ctpr) { - DPRINTF("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", + DPRINTF("%s: Lower OpenPIC INT output cpu %d due to ctpr", __func__, idx); qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]); } else if (dst->raised.priority > dst->servicing.priority) { - DPRINTF("%s: Raise OpenPIC INT output cpu %d irq %d\n", + DPRINTF("%s: Raise OpenPIC INT output cpu %d irq %d", __func__, idx, dst->raised.next); qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_INT]); } @@ -1168,11 +1168,11 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, /* Read-only register */ break; case 0xB0: /* EOI */ - DPRINTF("EOI\n"); + DPRINTF("EOI"); s_IRQ = IRQ_get_next(opp, &dst->servicing); if (s_IRQ < 0) { - DPRINTF("%s: EOI with no interrupt in service\n", __func__); + DPRINTF("%s: EOI with no interrupt in service", __func__); break; } @@ -1185,7 +1185,7 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, if (n_IRQ != -1 && (s_IRQ == -1 || IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { - DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n", + DPRINTF("Raise OpenPIC INT output cpu %d irq %d", idx, n_IRQ); qemu_irq_raise(opp->dst[idx].irqs[OPENPIC_OUTPUT_INT]); } @@ -1207,11 +1207,11 @@ static uint32_t openpic_iack(OpenPICState *opp, IRQDest *dst, int cpu) IRQSource *src; int retval, irq; - DPRINTF("Lower OpenPIC INT output\n"); + DPRINTF("Lower OpenPIC INT output"); qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]); irq = IRQ_get_next(opp, &dst->raised); - DPRINTF("IACK: irq=%d\n", irq); + DPRINTF("IACK: irq=%d", irq); if (irq == -1) { /* No more interrupt pending */ @@ -1221,7 +1221,7 @@ static uint32_t openpic_iack(OpenPICState *opp, IRQDest *dst, int cpu) src = &opp->src[irq]; if (!(src->ivpr & IVPR_ACTIVITY_MASK) || !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) { - fprintf(stderr, "%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n", + error_report("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x", __func__, irq, dst->ctpr, src->ivpr); openpic_update_irq(opp, irq); retval = opp->spve; @@ -1241,7 +1241,7 @@ static uint32_t openpic_iack(OpenPICState *opp, IRQDest *dst, int cpu) /* Timers and IPIs support multicast. */ if (((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + OPENPIC_MAX_IPI))) || ((irq >= opp->irq_tim0) && (irq < (opp->irq_tim0 + OPENPIC_MAX_TMR)))) { - DPRINTF("irq is IPI or TMR\n"); + DPRINTF("irq is IPI or TMR"); src->destmask &= ~(1 << cpu); if (src->destmask && !src->level) { /* trigger on CPUs that didn't know about it yet */ @@ -1262,7 +1262,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, IRQDest *dst; uint32_t retval; - DPRINTF("%s: cpu %d addr %#" HWADDR_PRIx "\n", __func__, idx, addr); + DPRINTF("%s: cpu %d addr %#" HWADDR_PRIx, __func__, idx, addr); retval = 0xFFFFFFFF; if (idx < 0 || idx >= opp->nb_cpus) { @@ -1290,7 +1290,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, default: break; } - DPRINTF("%s: => 0x%08x\n", __func__, retval); + DPRINTF("%s: => 0x%08x", __func__, retval); return retval; } From 67113c03423a23e60915574275aed7d60e9f85e1 Mon Sep 17 00:00:00 2001 From: Michael Davidsaver Date: Sun, 26 Nov 2017 15:59:05 -0600 Subject: [PATCH 09/24] e500: fix pci host bridge class/type Correct some confusion wrt. the PCI facing side of the PCI host bridge (not PCIe root complex). The ref. manual for the mpc8533 (as well as mpc8540 and mpc8540) give the class code as PCI_CLASS_PROCESSOR_POWERPC. While the PCI_HEADER_TYPE field is oddly omitted, the tables in the "PCI Configuration Header" section shows a type 0 layout using all 6 BAR registers (as 2x 32, and 2x 64 bit regions) So 997505065dc92e533debf5cb23012ba4e673d387 seems to be in error. Although there was perhaps some confusion as the mpc8533 has a separate PCIe root complex. With PCIe, a root complex has PCI_HEADER_TYPE=1. Neither the PCI host bridge, nor the PCIe root complex advertise class PCI_CLASS_BRIDGE_PCI. This was confusing Linux guests, which try to interpret the host bridge as a pci-pci bridge, but get confused and re-enumerate the bus when the primary/secondary/subordinate bus registers don't have valid values. Signed-off-by: Michael Davidsaver Signed-off-by: David Gibson --- hw/pci-host/ppce500.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index 39cd24464d..279badc894 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -423,11 +423,6 @@ static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp) PPCE500CCSRState *ccsr = CCSR(container_get(qdev_get_machine(), "/e500-ccsr")); - pci_config_set_class(d->config, PCI_CLASS_BRIDGE_PCI); - d->config[PCI_HEADER_TYPE] = - (d->config[PCI_HEADER_TYPE] & PCI_HEADER_TYPE_MULTI_FUNCTION) | - PCI_HEADER_TYPE_BRIDGE; - memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", &ccsr->ccsr_space, 0, int128_get64(ccsr->ccsr_space.size)); pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0); From 9a94ee5bb15793ef69692998ef57794a33074134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 24 Nov 2017 08:05:48 +0100 Subject: [PATCH 10/24] spapr/rtas: disable the decrementer interrupt when a CPU is unplugged MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CPU is stopped with the 'stop-self' RTAS call, its state 'halted' is switched to 1 and, in this case, the MSR is not taken into account anymore in the cpu_has_work() routine. Only the pending hardware interrupts are checked with their LPCR:PECE* enablement bit. If the DECR timer fires after 'stop-self' is called and before the CPU 'stop' state is reached, the nearly-dead CPU will have some work to do and the guest will crash. This case happens very frequently with the not yet upstream P9 XIVE exploitation mode. In XICS mode, the DECR is occasionally fired but after 'stop' state, so no work is to be done and the guest survives. I suspect there is a race between the QEMU mainloop triggering the timers and the TCG CPU thread but I could not quite identify the root cause. To be safe, let's disable in the LPCR all the exceptions which can cause an exit while the CPU is in power-saving mode and reenable them when the CPU is started. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/spapr_rtas.c | 11 +++++++++++ target/ppc/translate_init.c | 9 ++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index cdf0b607a0..858adb1bf3 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -162,6 +162,7 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, if (cpu != NULL) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); if (!cs->halted) { rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -174,6 +175,10 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, kvm_cpu_synchronize_state(cs); env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); + + /* Enable Power-saving mode Exit Cause exceptions for the new CPU */ + env->spr[SPR_LPCR] |= pcc->lpcr_pm; + env->nip = start; env->gpr[3] = r3; cs->halted = 0; @@ -197,6 +202,7 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr, { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); cs->halted = 1; qemu_cpu_kick(cs); @@ -210,6 +216,11 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr, * no need to bother with specific bits, we just clear it. */ env->msr = 0; + + /* Disable Power-saving mode Exit Cause exceptions for the CPU. + * This could deliver an interrupt on a dying CPU and crash the + * guest */ + env->spr[SPR_LPCR] &= ~pcc->lpcr_pm; } static inline int sysparm_st(target_ulong addr, target_ulong len, diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index 074c3a1d45..70ff15a51a 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -8911,6 +8911,7 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) CPUPPCState *env = &cpu->env; ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR]; ppc_spr_t *amor = &env->spr_cb[SPR_AMOR]; + CPUState *cs = CPU(cpu); cpu->vhyp = vhyp; @@ -8953,10 +8954,12 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) } } - /* Also set the power-saving mode bits which depend on the CPU - * family + /* Only enable Power-saving mode Exit Cause exceptions on the boot + * CPU. The RTAS command start-cpu will enable them on secondaries. */ - lpcr->default_value |= pcc->lpcr_pm; + if (cs == first_cpu) { + lpcr->default_value |= pcc->lpcr_pm; + } /* We should be followed by a CPU reset but update the active value * just in case... From d6322252b3210b663e303746f151abbae7d0b6db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 24 Nov 2017 08:05:49 +0100 Subject: [PATCH 11/24] spapr/rtas: fix reboot of a a SMP TCG guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like for hot unplug CPUs, when a guest is rebooted, the secondary CPUs can be awaken by the decrementer and start entering SLOF at the same time the boot CPU is. To be safe, let's disable on the secondaries all the exceptions which can cause an exit while the CPU is in power-saving mode. Based on previous work from Nikunj A Dadhania Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_cpu_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 588f9b4571..1ea0e295dd 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -26,6 +26,7 @@ static void spapr_cpu_reset(void *opaque) PowerPCCPU *cpu = opaque; CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); cpu_reset(cs); @@ -35,6 +36,13 @@ static void spapr_cpu_reset(void *opaque) cs->halted = 1; env->spr[SPR_HIOR] = 0; + + /* Disable Power-saving mode Exit Cause exceptions for the CPU. + * This can cause issues when rebooting the guest if a secondary + * is awaken */ + if (cs != first_cpu) { + env->spr[SPR_LPCR] &= ~pcc->lpcr_pm; + } } static void spapr_cpu_destroy(PowerPCCPU *cpu) From 3fe4f0fc8530e9411819f02accf2d17c128061b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 24 Nov 2017 08:05:50 +0100 Subject: [PATCH 12/24] spapr/rtas: do not reset the MSR in stop-self command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CPU is stopped with the 'stop-self' RTAS call, its state 'halted' is switched to 1 and, in this case, the MSR is not taken into account anymore in the cpu_has_work() routine. Only the pending hardware interrupts are checked with their LPCR:PECE* enablement bit. The CPU is now also protected from the decrementer interrupt by the LPCR:PECE* bits which are disabled in the 'stop-self' RTAS call. Reseting the MSR is pointless. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_rtas.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 858adb1bf3..4bb939d3d1 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -206,16 +206,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr, cs->halted = 1; qemu_cpu_kick(cs); - /* - * While stopping a CPU, the guest calls H_CPPR which - * effectively disables interrupts on XICS level. - * However decrementer interrupts in TCG can still - * wake the CPU up so here we disable interrupts in MSR - * as well. - * As rtas_start_cpu() resets the whole MSR anyway, there is - * no need to bother with specific bits, we just clear it. - */ - env->msr = 0; /* Disable Power-saving mode Exit Cause exceptions for the CPU. * This could deliver an interrupt on a dying CPU and crash the From 4f7a47beebd6d37861d08c81941be1b33a0ae627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 1 Dec 2017 17:06:00 +0100 Subject: [PATCH 13/24] ppc/xics: introduce an icp_create() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sPAPR and the PowerNV core objects create the interrupt presenter object of the CPUs in a very similar way. Let's provide a common routine in which we use the presenter 'type' as a child identifier. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/intc/xics.c | 21 +++++++++++++++++++++ hw/ppc/pnv_core.c | 10 +--------- hw/ppc/spapr_cpu_core.c | 13 ++----------- include/hw/ppc/xics.h | 3 +++ 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index a1cc0e420c..bfc6b5bb23 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -384,6 +384,27 @@ static const TypeInfo icp_info = { .class_size = sizeof(ICPStateClass), }; +Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error **errp) +{ + Error *local_err = NULL; + Object *obj; + + obj = object_new(type); + object_property_add_child(cpu, type, obj, &error_abort); + object_unref(obj); + object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(xi), + &error_abort); + object_property_add_const_link(obj, ICP_PROP_CPU, cpu, &error_abort); + object_property_set_bool(obj, true, "realized", &local_err); + if (local_err) { + object_unparent(obj); + error_propagate(errp, local_err); + obj = NULL; + } + + return obj; +} + /* * ICS: Source layer */ diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 82ff440b33..8d966e0802 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -126,7 +126,6 @@ static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp) Error *local_err = NULL; CPUState *cs = CPU(child); PowerPCCPU *cpu = POWERPC_CPU(cs); - Object *obj; object_property_set_bool(child, true, "realized", &local_err); if (local_err) { @@ -134,13 +133,7 @@ static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp) return; } - obj = object_new(TYPE_PNV_ICP); - object_property_add_child(child, "icp", obj, NULL); - object_unref(obj); - object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(xi), - &error_abort); - object_property_add_const_link(obj, ICP_PROP_CPU, child, &error_abort); - object_property_set_bool(obj, true, "realized", &local_err); + icp_create(child, TYPE_PNV_ICP, xi, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -148,7 +141,6 @@ static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp) powernv_cpu_init(cpu, &local_err); if (local_err) { - object_unparent(obj); error_propagate(errp, local_err); return; } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 1ea0e295dd..70e757f808 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -110,7 +110,6 @@ static void spapr_cpu_core_realize_child(Object *child, Error *local_err = NULL; CPUState *cs = CPU(child); PowerPCCPU *cpu = POWERPC_CPU(cs); - Object *obj; object_property_set_bool(child, true, "realized", &local_err); if (local_err) { @@ -122,21 +121,13 @@ static void spapr_cpu_core_realize_child(Object *child, goto error; } - obj = object_new(spapr->icp_type); - object_property_add_child(child, "icp", obj, &error_abort); - object_unref(obj); - object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(spapr), - &error_abort); - object_property_add_const_link(obj, ICP_PROP_CPU, child, &error_abort); - object_property_set_bool(obj, true, "realized", &local_err); + icp_create(child, spapr->icp_type, XICS_FABRIC(spapr), &local_err); if (local_err) { - goto free_icp; + goto error; } return; -free_icp: - object_unparent(obj); error: error_propagate(errp, local_err); } diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 2df99be111..2ba8b12208 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -212,4 +212,7 @@ typedef struct sPAPRMachineState sPAPRMachineState; int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); void xics_spapr_init(sPAPRMachineState *spapr); +Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, + Error **errp); + #endif /* XICS_H */ From ed0c37eedfdb953d61d1e0a41439cd404e914d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 1 Dec 2017 17:06:01 +0100 Subject: [PATCH 14/24] ppc/xics: assign of the CPU 'intc' pointer under the core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'intc' pointer of the CPU references the interrupt presenter in the XICS interrupt mode. When the XIVE interrupt mode is available and activated, the machine will need to reassign this pointer to reflect the change. Moving this assignment under the realize routine of the CPU will ease the process when the interrupt mode is toggled. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/intc/xics.c | 1 - hw/ppc/pnv_core.c | 2 +- hw/ppc/spapr_cpu_core.c | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index bfc6b5bb23..700f6baa13 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -334,7 +334,6 @@ static void icp_realize(DeviceState *dev, Error **errp) } cpu = POWERPC_CPU(obj); - cpu->intc = OBJECT(icp); icp->cs = CPU(obj); env = &cpu->env; diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 8d966e0802..03317db853 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -133,7 +133,7 @@ static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp) return; } - icp_create(child, TYPE_PNV_ICP, xi, &local_err); + cpu->intc = icp_create(child, TYPE_PNV_ICP, xi, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 70e757f808..032438b9ce 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -121,7 +121,8 @@ static void spapr_cpu_core_realize_child(Object *child, goto error; } - icp_create(child, spapr->icp_type, XICS_FABRIC(spapr), &local_err); + cpu->intc = icp_create(child, spapr->icp_type, XICS_FABRIC(spapr), + &local_err); if (local_err) { goto error; } From 60c6823b9bce6789f1ad95bca233fc490161b279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 1 Dec 2017 17:06:02 +0100 Subject: [PATCH 15/24] spapr: move the IRQ allocation routines under the machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also change the prototype to use a sPAPRMachineState and prefix them with spapr_irq_. It will let us synchronise the IRQ allocation with the XIVE interrupt mode when available. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/intc/trace-events | 4 -- hw/intc/xics_spapr.c | 114 ----------------------------------------- hw/ppc/spapr.c | 114 +++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_events.c | 4 +- hw/ppc/spapr_pci.c | 8 +-- hw/ppc/spapr_vio.c | 2 +- hw/ppc/trace-events | 4 ++ include/hw/ppc/spapr.h | 6 +++ include/hw/ppc/xics.h | 4 -- 9 files changed, 131 insertions(+), 129 deletions(-) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index b298fac7c6..7077aaaee6 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -64,10 +64,6 @@ xics_ics_simple_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq 0x%x] xics_ics_simple_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq 0x%x [src %d] server 0x%x prio 0x%x" xics_ics_simple_reject(int nr, int srcno) "reject irq 0x%x [src %d]" xics_ics_simple_eoi(int nr) "ics_eoi: irq 0x%x" -xics_alloc(int irq) "irq %d" -xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d, %d irqs, lsi=%d, alignnum %d" -xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs" -xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free" # hw/intc/s390_flic_kvm.c flic_create_device(int err) "flic: create device failed %d" diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index e8c0a1b3e9..5a0967caf4 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -245,120 +245,6 @@ void xics_spapr_init(sPAPRMachineState *spapr) spapr_register_hypercall(H_IPOLL, h_ipoll); } -#define ICS_IRQ_FREE(ics, srcno) \ - (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) - -static int ics_find_free_block(ICSState *ics, int num, int alignnum) -{ - int first, i; - - for (first = 0; first < ics->nr_irqs; first += alignnum) { - if (num > (ics->nr_irqs - first)) { - return -1; - } - for (i = first; i < first + num; ++i) { - if (!ICS_IRQ_FREE(ics, i)) { - break; - } - } - if (i == (first + num)) { - return first; - } - } - - return -1; -} - -int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp) -{ - int irq; - - if (!ics) { - return -1; - } - if (irq_hint) { - if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { - error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint); - return -1; - } - irq = irq_hint; - } else { - irq = ics_find_free_block(ics, 1, 1); - if (irq < 0) { - error_setg(errp, "can't allocate IRQ: no IRQ left"); - return -1; - } - irq += ics->offset; - } - - ics_set_irq_type(ics, irq - ics->offset, lsi); - trace_xics_alloc(irq); - - return irq; -} - -/* - * Allocate block of consecutive IRQs, and return the number of the first IRQ in - * the block. If align==true, aligns the first IRQ number to num. - */ -int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, - bool align, Error **errp) -{ - int i, first = -1; - - if (!ics) { - return -1; - } - - /* - * MSIMesage::data is used for storing VIRQ so - * it has to be aligned to num to support multiple - * MSI vectors. MSI-X is not affected by this. - * The hint is used for the first IRQ, the rest should - * be allocated continuously. - */ - if (align) { - assert((num == 1) || (num == 2) || (num == 4) || - (num == 8) || (num == 16) || (num == 32)); - first = ics_find_free_block(ics, num, num); - } else { - first = ics_find_free_block(ics, num, 1); - } - if (first < 0) { - error_setg(errp, "can't find a free %d-IRQ block", num); - return -1; - } - - for (i = first; i < first + num; ++i) { - ics_set_irq_type(ics, i, lsi); - } - first += ics->offset; - - trace_xics_alloc_block(first, num, lsi, align); - - return first; -} - -static void ics_free(ICSState *ics, int srcno, int num) -{ - int i; - - for (i = srcno; i < srcno + num; ++i) { - if (ICS_IRQ_FREE(ics, i)) { - trace_xics_ics_free_warn(0, i + ics->offset); - } - memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); - } -} - -void spapr_ics_free(ICSState *ics, int irq, int num) -{ - if (ics_valid_irq(ics, irq)) { - trace_xics_ics_free(0, irq, num); - ics_free(ics, irq - ics->offset, num); - } -} - void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle) { uint32_t interrupt_server_ranges_prop[] = { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f1b96a4e92..53c2c58a25 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3558,6 +3558,120 @@ static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id) return cpu ? ICP(cpu->intc) : NULL; } +#define ICS_IRQ_FREE(ics, srcno) \ + (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) + +static int ics_find_free_block(ICSState *ics, int num, int alignnum) +{ + int first, i; + + for (first = 0; first < ics->nr_irqs; first += alignnum) { + if (num > (ics->nr_irqs - first)) { + return -1; + } + for (i = first; i < first + num; ++i) { + if (!ICS_IRQ_FREE(ics, i)) { + break; + } + } + if (i == (first + num)) { + return first; + } + } + + return -1; +} + +int spapr_irq_alloc(sPAPRMachineState *spapr, int irq_hint, bool lsi, + Error **errp) +{ + ICSState *ics = spapr->ics; + int irq; + + if (!ics) { + return -1; + } + if (irq_hint) { + if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { + error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint); + return -1; + } + irq = irq_hint; + } else { + irq = ics_find_free_block(ics, 1, 1); + if (irq < 0) { + error_setg(errp, "can't allocate IRQ: no IRQ left"); + return -1; + } + irq += ics->offset; + } + + ics_set_irq_type(ics, irq - ics->offset, lsi); + trace_spapr_irq_alloc(irq); + + return irq; +} + +/* + * Allocate block of consecutive IRQs, and return the number of the first IRQ in + * the block. If align==true, aligns the first IRQ number to num. + */ +int spapr_irq_alloc_block(sPAPRMachineState *spapr, int num, bool lsi, + bool align, Error **errp) +{ + ICSState *ics = spapr->ics; + int i, first = -1; + + if (!ics) { + return -1; + } + + /* + * MSIMesage::data is used for storing VIRQ so + * it has to be aligned to num to support multiple + * MSI vectors. MSI-X is not affected by this. + * The hint is used for the first IRQ, the rest should + * be allocated continuously. + */ + if (align) { + assert((num == 1) || (num == 2) || (num == 4) || + (num == 8) || (num == 16) || (num == 32)); + first = ics_find_free_block(ics, num, num); + } else { + first = ics_find_free_block(ics, num, 1); + } + if (first < 0) { + error_setg(errp, "can't find a free %d-IRQ block", num); + return -1; + } + + for (i = first; i < first + num; ++i) { + ics_set_irq_type(ics, i, lsi); + } + first += ics->offset; + + trace_spapr_irq_alloc_block(first, num, lsi, align); + + return first; +} + +void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num) +{ + ICSState *ics = spapr->ics; + int srcno = irq - ics->offset; + int i; + + if (ics_valid_irq(ics, irq)) { + trace_spapr_irq_free(0, irq, num); + for (i = srcno; i < srcno + num; ++i) { + if (ICS_IRQ_FREE(ics, i)) { + trace_spapr_irq_free_warn(0, i + ics->offset); + } + memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); + } + } +} + static void spapr_pic_print_info(InterruptStatsProvider *obj, Monitor *mon) { diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index e377fc7dde..cead596f3e 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -718,7 +718,7 @@ void spapr_events_init(sPAPRMachineState *spapr) spapr->event_sources = spapr_event_sources_new(); spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW, - spapr_ics_alloc(spapr->ics, 0, false, + spapr_irq_alloc(spapr, 0, false, &error_fatal)); /* NOTE: if machine supports modern/dedicated hotplug event source, @@ -731,7 +731,7 @@ void spapr_events_init(sPAPRMachineState *spapr) */ if (spapr->use_hotplug_event_source) { spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG, - spapr_ics_alloc(spapr->ics, 0, false, + spapr_irq_alloc(spapr, 0, false, &error_fatal)); } diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 5a3122a9f9..e0ef77a480 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -314,7 +314,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - spapr_ics_free(spapr->ics, msi->first_irq, msi->num); + spapr_irq_free(spapr, msi->first_irq, msi->num); if (msi_present(pdev)) { spapr_msi_setmsg(pdev, 0, false, 0, 0); } @@ -352,7 +352,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, } /* Allocate MSIs */ - irq = spapr_ics_alloc_block(spapr->ics, req_num, false, + irq = spapr_irq_alloc_block(spapr, req_num, false, ret_intr_type == RTAS_TYPE_MSI, &err); if (err) { error_reportf_err(err, "Can't allocate MSIs for device %x: ", @@ -363,7 +363,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* Release previous MSIs */ if (msi) { - spapr_ics_free(spapr->ics, msi->first_irq, msi->num); + spapr_irq_free(spapr, msi->first_irq, msi->num); g_hash_table_remove(phb->msi, &config_addr); } @@ -1675,7 +1675,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) uint32_t irq; Error *local_err = NULL; - irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err); + irq = spapr_irq_alloc_block(spapr, 1, true, false, &local_err); if (local_err) { error_propagate(errp, local_err); error_prepend(errp, "can't allocate LSIs: "); diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index ea3bc8bd9e..bb7ed2c537 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) dev->qdev.id = id; } - dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err); + dev->irq = spapr_irq_alloc(spapr, dev->irq, false, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index 4a6a6490fa..b7c3e64b5e 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -12,6 +12,10 @@ spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) " # hw/ppc/spapr.c spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes" spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes" +spapr_irq_alloc(int irq) "irq %d" +spapr_irq_alloc_block(int first, int num, bool lsi, int align) "first irq %d, %d irqs, lsi=%d, alignnum %d" +spapr_irq_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs" +spapr_irq_free_warn(int src, int irq) "Source#%d, irq %d is already free" # hw/ppc/spapr_hcall.c spapr_cas_pvr_try(uint32_t pvr) "0x%x" diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 9d21ca9bde..895f48471e 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -707,4 +707,10 @@ void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg); int spapr_vcpu_id(PowerPCCPU *cpu); PowerPCCPU *spapr_find_cpu(int vcpu_id); +int spapr_irq_alloc(sPAPRMachineState *spapr, int irq_hint, bool lsi, + Error **errp); +int spapr_irq_alloc_block(sPAPRMachineState *spapr, int num, bool lsi, + bool align, Error **errp); +void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num); + #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 2ba8b12208..0b67abbbb9 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -181,10 +181,6 @@ typedef struct XICSFabricClass { #define XICS_IRQS_SPAPR 1024 -int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp); -int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, bool align, - Error **errp); -void spapr_ics_free(ICSState *ics, int irq, int num); void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle); qemu_irq xics_get_qirq(XICSFabric *xi, int irq); From 9e7dc5fc2e9d87a5492099de72800347e944e4ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 1 Dec 2017 17:06:03 +0100 Subject: [PATCH 16/24] spapr: introduce a spapr_irq_set_lsi() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will make synchronisation easier with the XIVE interrupt mode when available. The 'irq' parameter refers to the global IRQ number space. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 53c2c58a25..02cc7ffd4d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3582,6 +3582,14 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum) return -1; } +/* + * Allocate the IRQ number and set the IRQ type, LSI or MSI + */ +static void spapr_irq_set_lsi(sPAPRMachineState *spapr, int irq, bool lsi) +{ + ics_set_irq_type(spapr->ics, irq - spapr->ics->offset, lsi); +} + int spapr_irq_alloc(sPAPRMachineState *spapr, int irq_hint, bool lsi, Error **errp) { @@ -3606,7 +3614,7 @@ int spapr_irq_alloc(sPAPRMachineState *spapr, int irq_hint, bool lsi, irq += ics->offset; } - ics_set_irq_type(ics, irq - ics->offset, lsi); + spapr_irq_set_lsi(spapr, irq, lsi); trace_spapr_irq_alloc(irq); return irq; @@ -3645,10 +3653,10 @@ int spapr_irq_alloc_block(sPAPRMachineState *spapr, int num, bool lsi, return -1; } - for (i = first; i < first + num; ++i) { - ics_set_irq_type(ics, i, lsi); - } first += ics->offset; + for (i = first; i < first + num; ++i) { + spapr_irq_set_lsi(spapr, i, lsi); + } trace_spapr_irq_alloc_block(first, num, lsi, align); From 7718375584a0214c951668a6e92896aaed88b289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 1 Dec 2017 17:06:04 +0100 Subject: [PATCH 17/24] spapr: introduce a spapr_qirq() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xics_get_qirq() is only used by the sPAPR machine. Let's move it there and change its name to reflect its scope. It will be useful for XIVE support which will use its own set of qirqs. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/intc/xics.c | 12 ------------ hw/ppc/spapr.c | 11 +++++++++++ hw/ppc/spapr_events.c | 12 +++++------- hw/ppc/spapr_pci.c | 2 +- include/hw/pci-host/spapr.h | 2 +- include/hw/ppc/spapr.h | 1 + include/hw/ppc/spapr_vio.h | 2 +- include/hw/ppc/xics.h | 1 - 8 files changed, 20 insertions(+), 23 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 700f6baa13..e73e623e3b 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -713,18 +713,6 @@ static const TypeInfo xics_fabric_info = { /* * Exported functions */ -qemu_irq xics_get_qirq(XICSFabric *xi, int irq) -{ - XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi); - ICSState *ics = xic->ics_get(xi, irq); - - if (ics) { - return ics->qirqs[irq - ics->offset]; - } - - return NULL; -} - ICPState *xics_icp_get(XICSFabric *xi, int server) { XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 02cc7ffd4d..8a6a59f098 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3680,6 +3680,17 @@ void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num) } } +qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq) +{ + ICSState *ics = spapr->ics; + + if (ics_valid_irq(ics, irq)) { + return ics->qirqs[irq - ics->offset]; + } + + return NULL; +} + static void spapr_pic_print_info(InterruptStatsProvider *obj, Monitor *mon) { diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index cead596f3e..7dc87fc7bd 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -472,9 +472,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) rtas_event_log_queue(spapr, entry); - qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), - rtas_event_log_to_irq(spapr, - RTAS_LOG_TYPE_EPOW))); + qemu_irq_pulse(spapr_qirq(spapr, + rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_EPOW))); } static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, @@ -556,9 +555,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, rtas_event_log_queue(spapr, entry); - qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), - rtas_event_log_to_irq(spapr, - RTAS_LOG_TYPE_HOTPLUG))); + qemu_irq_pulse(spapr_qirq(spapr, + rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_HOTPLUG))); } void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) @@ -678,7 +676,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, spapr_event_sources_get_source(spapr->event_sources, i); g_assert(source->enabled); - qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq)); + qemu_irq_pulse(spapr_qirq(spapr, source->irq)); } } diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index e0ef77a480..39134f0ef0 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -723,7 +723,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr, trace_spapr_pci_msi_write(addr, data, irq); - qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq)); + qemu_irq_pulse(spapr_qirq(spapr, irq)); } static const MemoryRegionOps spapr_msi_ops = { diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 38470b2f0e..0fae4fc6a4 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -108,7 +108,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - return xics_get_qirq(XICS_FABRIC(spapr), phb->lsi_table[pin].irq); + return spapr_qirq(spapr, phb->lsi_table[pin].irq); } PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 895f48471e..6b8e04c787 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -712,5 +712,6 @@ int spapr_irq_alloc(sPAPRMachineState *spapr, int irq_hint, bool lsi, int spapr_irq_alloc_block(sPAPRMachineState *spapr, int num, bool lsi, bool align, Error **errp); void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num); +qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq); #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h index 2e9685a5d9..e8b006d18f 100644 --- a/include/hw/ppc/spapr_vio.h +++ b/include/hw/ppc/spapr_vio.h @@ -87,7 +87,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - return xics_get_qirq(XICS_FABRIC(spapr), dev->irq); + return spapr_qirq(spapr, dev->irq); } static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr, diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 0b67abbbb9..6cebff47a7 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -183,7 +183,6 @@ typedef struct XICSFabricClass { void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle); -qemu_irq xics_get_qirq(XICSFabric *xi, int irq); ICPState *xics_icp_get(XICSFabric *xi, int server); /* Internal XICS interfaces */ From f47bd1c839d57c12207a28421a9df718fbf476ba Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 5 Dec 2017 16:41:17 +0100 Subject: [PATCH 18/24] spapr: replace numa_get_node() with lookup in pc-dimm list SPAPR is the last user of numa_get_node() and a bunch of supporting code to maintain numa_info[x].addr list. Get LMB node id from pc-dimm list, which allows to remove ~80LOC maintaining dynamic address range lookup list. It also removes pc-dimm dependency on numa_[un]set_mem_node_id() and makes pc-dimms a sole source of information about which node it belongs to and removes duplicate data from global numa_info. Signed-off-by: Igor Mammedov Signed-off-by: David Gibson --- hw/mem/pc-dimm.c | 2 - hw/ppc/spapr.c | 29 ++++++++++++- include/sysemu/numa.h | 10 ----- numa.c | 94 ------------------------------------------- 4 files changed, 28 insertions(+), 107 deletions(-) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 66eace5a5c..6e74b61cb6 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); vmstate_register_ram(vmstate_mr, dev); - numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node); out: error_propagate(errp, local_err); @@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm); - numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node); memory_region_del_subregion(&hpms->mr, mr); vmstate_unregister_ram(vmstate_mr, dev); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8a6a59f098..306875e123 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) } +static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr) +{ + MemoryDeviceInfoList *info; + + for (info = list; info; info = info->next) { + MemoryDeviceInfo *value = info->value; + + if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) { + PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data; + + if (pcdimm_info->addr >= addr && + addr < (pcdimm_info->addr + pcdimm_info->size)) { + return pcdimm_info->node; + } + } + } + + return -1; +} + /* * Adds ibm,dynamic-reconfiguration-memory node. * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation @@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) lmb_size; uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; + MemoryDeviceInfoList *dimms = NULL; /* * Don't create the node if there is no hotpluggable memory @@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) goto out; } + if (hotplug_lmb_start) { + MemoryDeviceInfoList **prev = &dimms; + qmp_pc_dimm_device_list(qdev_get_machine(), &prev); + } + /* ibm,dynamic-memory */ int_buf[0] = cpu_to_be32(nr_lmbs); cur_index++; @@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc)); dynamic_memory[3] = cpu_to_be32(0); /* reserved */ - dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); + dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr)); if (memory_region_present(get_system_memory(), addr)) { dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); } else { @@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; } + qapi_free_MemoryDeviceInfoList(dimms); ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); if (ret < 0) { goto out; diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 5c6df2820b..b3545215f6 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -10,17 +10,10 @@ extern int nb_numa_nodes; /* Number of NUMA nodes */ extern bool have_numa_distance; -struct numa_addr_range { - ram_addr_t mem_start; - ram_addr_t mem_end; - QLIST_ENTRY(numa_addr_range) entry; -}; - struct node_info { uint64_t node_mem; struct HostMemoryBackend *node_memdev; bool present; - QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ uint8_t distance[MAX_NODES]; }; @@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES]; void parse_numa_opts(MachineState *ms); void query_numa_node_mem(NumaNodeMem node_mem[]); extern QemuOptsList qemu_numa_opts; -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); -uint32_t numa_get_node(ram_addr_t addr, Error **errp); void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, diff --git a/numa.c b/numa.c index 7151b24d1c..98fa9a4bcf 100644 --- a/numa.c +++ b/numa.c @@ -55,92 +55,6 @@ int nb_numa_nodes; bool have_numa_distance; NodeInfo numa_info[MAX_NODES]; -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) -{ - struct numa_addr_range *range; - - /* - * Memory-less nodes can come here with 0 size in which case, - * there is nothing to do. - */ - if (!size) { - return; - } - - range = g_malloc0(sizeof(*range)); - range->mem_start = addr; - range->mem_end = addr + size - 1; - QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); -} - -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) -{ - struct numa_addr_range *range, *next; - - QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) { - if (addr == range->mem_start && (addr + size - 1) == range->mem_end) { - QLIST_REMOVE(range, entry); - g_free(range); - return; - } - } -} - -static void numa_set_mem_ranges(void) -{ - int i; - ram_addr_t mem_start = 0; - - /* - * Deduce start address of each node and use it to store - * the address range info in numa_info address range list - */ - for (i = 0; i < nb_numa_nodes; i++) { - numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i); - mem_start += numa_info[i].node_mem; - } -} - -/* - * Check if @addr falls under NUMA @node. - */ -static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node) -{ - struct numa_addr_range *range; - - QLIST_FOREACH(range, &numa_info[node].addr, entry) { - if (addr >= range->mem_start && addr <= range->mem_end) { - return true; - } - } - return false; -} - -/* - * Given an address, return the index of the NUMA node to which the - * address belongs to. - */ -uint32_t numa_get_node(ram_addr_t addr, Error **errp) -{ - uint32_t i; - - /* For non NUMA configurations, check if the addr falls under node 0 */ - if (!nb_numa_nodes) { - if (numa_addr_belongs_to_node(addr, 0)) { - return 0; - } - } - - for (i = 0; i < nb_numa_nodes; i++) { - if (numa_addr_belongs_to_node(addr, i)) { - return i; - } - } - - error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any " - "NUMA node", addr); - return -1; -} static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, Error **errp) @@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms) exit(1); } - for (i = 0; i < nb_numa_nodes; i++) { - QLIST_INIT(&numa_info[i].addr); - } - - numa_set_mem_ranges(); - /* QEMU needs at least all unique node pair distances to build * the whole NUMA distance table. QEMU treats the distance table * as symmetric by default, i.e. distance A->B == distance B->A. @@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms) /* Validation succeeded, now fill in any missing distances. */ complete_init_numa_distance(); } - } else { - numa_set_mem_node_id(0, ram_size, 0); } } From bb2d8ab6369abc8e90a9f7e2e8f154fea752bdaf Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 6 Dec 2017 09:13:16 +0100 Subject: [PATCH 19/24] spapr: fix LSI interrupt specifiers in the device tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LoPAPR 1.1 B.6.9.1.2 describes the "#interrupt-cells" property of the PowerPC External Interrupt Source Controller node as follows: “#interrupt-cells” Standard property name to define the number of cells in an interrupt- specifier within an interrupt domain. prop-encoded-array: An integer, encoded as with encode-int, that denotes the number of cells required to represent an interrupt specifier in its child nodes. The value of this property for the PowerPC External Interrupt option shall be 2. Thus all interrupt specifiers (as used in the standard “interrupts” property) shall consist of two cells, each containing an integer encoded as with encode-int. The first integer represents the interrupt number the second integer is the trigger code: 0 for edge triggered, 1 for level triggered. This patch fixes the interrupt specifiers in the "interrupt-map" property of the PHB node, that were setting the second cell to 8 (confusion with IRQ_TYPE_LEVEL_LOW ?) instead of 1. VIO devices and RTAS event sources use the same format for interrupt specifiers: while here, we introduce a common helper to handle the encoding details. Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Tested-by: Cédric Le Goater -- v3: - reference public LoPAPR instead of internal PAPR+ in changelog - change helper name to spapr_dt_xics_irq() v2: - drop the erroneous changes to the "interrupts" prop in PCI device nodes - introduce a common helper to encode interrupt specifiers Signed-off-by: David Gibson --- hw/ppc/spapr_events.c | 3 +-- hw/ppc/spapr_pci.c | 3 +-- hw/ppc/spapr_vio.c | 3 ++- include/hw/ppc/spapr.h | 10 ++++++++++ 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 7dc87fc7bd..c7a64e6b8d 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -282,8 +282,7 @@ void spapr_dt_events(sPAPRMachineState *spapr, void *fdt) continue; } - interrupts[0] = cpu_to_be32(source->irq); - interrupts[1] = 0; + spapr_dt_xics_irq(interrupts, source->irq, false); _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name)); _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts, diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 39134f0ef0..88797b3d36 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -2121,8 +2121,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, irqmap[2] = 0; irqmap[3] = cpu_to_be32(j+1); irqmap[4] = cpu_to_be32(xics_phandle); - irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq); - irqmap[6] = cpu_to_be32(0x8); + spapr_dt_xics_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true); } } /* Write interrupt map */ diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index bb7ed2c537..472dd6f33a 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -126,8 +126,9 @@ static int vio_make_devnode(VIOsPAPRDevice *dev, } if (dev->irq) { - uint32_t ints_prop[] = {cpu_to_be32(dev->irq), 0}; + uint32_t ints_prop[2]; + spapr_dt_xics_irq(ints_prop, dev->irq, false); ret = fdt_setprop(fdt, node_off, "interrupts", ints_prop, sizeof(ints_prop)); if (ret < 0) { diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 6b8e04c787..14757b805e 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -590,6 +590,16 @@ void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr); #define RTAS_EVENT_SCAN_RATE 1 +/* This helper should be used to encode interrupt specifiers when the related + * "interrupt-controller" node has its "#interrupt-cells" property set to 2 (ie, + * VIO devices, RTAS event sources and PHBs). + */ +static inline void spapr_dt_xics_irq(uint32_t *intspec, int irq, bool is_lsi) +{ + intspec[0] = cpu_to_be32(irq); + intspec[1] = is_lsi ? cpu_to_be32(1) : 0; +} + typedef struct sPAPRTCETable sPAPRTCETable; #define TYPE_SPAPR_TCE_TABLE "spapr-tce-table" From 638f2caa01eb1ec2c4acf4f43798bea465a7eeb5 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 6 Dec 2017 09:16:52 +0100 Subject: [PATCH 20/24] spapr_events: drop bogus cell from "interrupt-ranges" property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to LoPAPR 1.1 B.6.12, the "/event-sources" node has an "interrupt- ranges" property, the format of which is described in B.6.9.1.2 as follows: “interrupt-ranges” Standard property name that defines the interrupt number(s) and range(s) handled by this unit. prop-encoded-array: List of (int-number, range) specifications. Int-number is encoded as with encode-int. Range is encoded as with encode-int. The first entry in this list shall contain the int-number associated with the first “reg” property entry. The int-num-ber is the value representing the interrupt source as would appear in the PowerPC External Interrupt Architecture XISR. The range shall be the number of sequential interrupt numbers which this unit can generate. There's no such thing as a cell count at the end of the array, like the one introduced by commit ffbb1705a33d in QEMU 2.8. It doesn't seem it had any impact on existing guests and I couldn't find any related workaround in linux. So, let's just drop the bogus lines. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr_events.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index c7a64e6b8d..86836f0626 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -292,9 +292,6 @@ void spapr_dt_events(sPAPRMachineState *spapr, void *fdt) irq_ranges[count++] = cpu_to_be32(1); } - irq_ranges[count] = cpu_to_be32(count); - count++; - _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0))); _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2))); _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges", From 2a83f9976efa9a85e8ceb9d1035a68f25c321334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 6 Dec 2017 10:41:50 +0100 Subject: [PATCH 21/24] target/ppc: introduce the PPC_BIT() macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and use them in a couple of obvious places. Other macros will be used in the model of the XIVE interrupt controller. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- target/ppc/cpu.h | 105 +++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 989761b795..370b05e76e 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -87,6 +87,13 @@ #define PPC_ELF_MACHINE EM_PPC #endif +#define PPC_BIT(bit) (0x8000000000000000UL >> (bit)) +#define PPC_BIT32(bit) (0x80000000UL >> (bit)) +#define PPC_BIT8(bit) (0x80UL >> (bit)) +#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) +#define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be)) | \ + PPC_BIT32(bs)) + /*****************************************************************************/ /* Exception vectors definitions */ enum { @@ -371,10 +378,10 @@ struct ppc_slb_t { #define MSR_LE 0 /* Little-endian mode 1 hflags */ /* LPCR bits */ -#define LPCR_VPM0 (1ull << (63 - 0)) -#define LPCR_VPM1 (1ull << (63 - 1)) -#define LPCR_ISL (1ull << (63 - 2)) -#define LPCR_KBV (1ull << (63 - 3)) +#define LPCR_VPM0 PPC_BIT(0) +#define LPCR_VPM1 PPC_BIT(1) +#define LPCR_ISL PPC_BIT(2) +#define LPCR_KBV PPC_BIT(3) #define LPCR_DPFD_SHIFT (63 - 11) #define LPCR_DPFD (0x7ull << LPCR_DPFD_SHIFT) #define LPCR_VRMASD_SHIFT (63 - 16) @@ -382,41 +389,41 @@ struct ppc_slb_t { /* P9: Power-saving mode Exit Cause Enable (Upper Section) Mask */ #define LPCR_PECE_U_SHIFT (63 - 19) #define LPCR_PECE_U_MASK (0x7ull << LPCR_PECE_U_SHIFT) -#define LPCR_HVEE (1ull << (63 - 17)) /* Hypervisor Virt Exit Enable */ +#define LPCR_HVEE PPC_BIT(17) /* Hypervisor Virt Exit Enable */ #define LPCR_RMLS_SHIFT (63 - 37) #define LPCR_RMLS (0xfull << LPCR_RMLS_SHIFT) -#define LPCR_ILE (1ull << (63 - 38)) +#define LPCR_ILE PPC_BIT(38) #define LPCR_AIL_SHIFT (63 - 40) /* Alternate interrupt location */ #define LPCR_AIL (3ull << LPCR_AIL_SHIFT) -#define LPCR_UPRT (1ull << (63 - 41)) /* Use Process Table */ -#define LPCR_EVIRT (1ull << (63 - 42)) /* Enhanced Virtualisation */ -#define LPCR_ONL (1ull << (63 - 45)) -#define LPCR_LD (1ull << (63 - 46)) /* Large Decrementer */ -#define LPCR_P7_PECE0 (1ull << (63 - 49)) -#define LPCR_P7_PECE1 (1ull << (63 - 50)) -#define LPCR_P7_PECE2 (1ull << (63 - 51)) -#define LPCR_P8_PECE0 (1ull << (63 - 47)) -#define LPCR_P8_PECE1 (1ull << (63 - 48)) -#define LPCR_P8_PECE2 (1ull << (63 - 49)) -#define LPCR_P8_PECE3 (1ull << (63 - 50)) -#define LPCR_P8_PECE4 (1ull << (63 - 51)) +#define LPCR_UPRT PPC_BIT(41) /* Use Process Table */ +#define LPCR_EVIRT PPC_BIT(42) /* Enhanced Virtualisation */ +#define LPCR_ONL PPC_BIT(45) +#define LPCR_LD PPC_BIT(46) /* Large Decrementer */ +#define LPCR_P7_PECE0 PPC_BIT(49) +#define LPCR_P7_PECE1 PPC_BIT(50) +#define LPCR_P7_PECE2 PPC_BIT(51) +#define LPCR_P8_PECE0 PPC_BIT(47) +#define LPCR_P8_PECE1 PPC_BIT(48) +#define LPCR_P8_PECE2 PPC_BIT(49) +#define LPCR_P8_PECE3 PPC_BIT(50) +#define LPCR_P8_PECE4 PPC_BIT(51) /* P9: Power-saving mode Exit Cause Enable (Lower Section) Mask */ #define LPCR_PECE_L_SHIFT (63 - 51) #define LPCR_PECE_L_MASK (0x1full << LPCR_PECE_L_SHIFT) -#define LPCR_PDEE (1ull << (63 - 47)) /* Privileged Doorbell Exit EN */ -#define LPCR_HDEE (1ull << (63 - 48)) /* Hyperv Doorbell Exit Enable */ -#define LPCR_EEE (1ull << (63 - 49)) /* External Exit Enable */ -#define LPCR_DEE (1ull << (63 - 50)) /* Decrementer Exit Enable */ -#define LPCR_OEE (1ull << (63 - 51)) /* Other Exit Enable */ -#define LPCR_MER (1ull << (63 - 52)) -#define LPCR_GTSE (1ull << (63 - 53)) /* Guest Translation Shootdown */ -#define LPCR_TC (1ull << (63 - 54)) -#define LPCR_HEIC (1ull << (63 - 59)) /* HV Extern Interrupt Control */ -#define LPCR_LPES0 (1ull << (63 - 60)) -#define LPCR_LPES1 (1ull << (63 - 61)) -#define LPCR_RMI (1ull << (63 - 62)) -#define LPCR_HVICE (1ull << (63 - 62)) /* HV Virtualisation Int Enable */ -#define LPCR_HDICE (1ull << (63 - 63)) +#define LPCR_PDEE PPC_BIT(47) /* Privileged Doorbell Exit EN */ +#define LPCR_HDEE PPC_BIT(48) /* Hyperv Doorbell Exit Enable */ +#define LPCR_EEE PPC_BIT(49) /* External Exit Enable */ +#define LPCR_DEE PPC_BIT(50) /* Decrementer Exit Enable */ +#define LPCR_OEE PPC_BIT(51) /* Other Exit Enable */ +#define LPCR_MER PPC_BIT(52) +#define LPCR_GTSE PPC_BIT(53) /* Guest Translation Shootdown */ +#define LPCR_TC PPC_BIT(54) +#define LPCR_HEIC PPC_BIT(59) /* HV Extern Interrupt Control */ +#define LPCR_LPES0 PPC_BIT(60) +#define LPCR_LPES1 PPC_BIT(61) +#define LPCR_RMI PPC_BIT(62) +#define LPCR_HVICE PPC_BIT(62) /* HV Virtualisation Int Enable */ +#define LPCR_HDICE PPC_BIT(63) #define msr_sf ((env->msr >> MSR_SF) & 1) #define msr_isf ((env->msr >> MSR_ISF) & 1) @@ -507,22 +514,22 @@ struct ppc_slb_t { #define FSCR_IC_TAR 8 /* Exception state register bits definition */ -#define ESR_PIL (1 << (63 - 36)) /* Illegal Instruction */ -#define ESR_PPR (1 << (63 - 37)) /* Privileged Instruction */ -#define ESR_PTR (1 << (63 - 38)) /* Trap */ -#define ESR_FP (1 << (63 - 39)) /* Floating-Point Operation */ -#define ESR_ST (1 << (63 - 40)) /* Store Operation */ -#define ESR_AP (1 << (63 - 44)) /* Auxiliary Processor Operation */ -#define ESR_PUO (1 << (63 - 45)) /* Unimplemented Operation */ -#define ESR_BO (1 << (63 - 46)) /* Byte Ordering */ -#define ESR_PIE (1 << (63 - 47)) /* Imprecise exception */ -#define ESR_DATA (1 << (63 - 53)) /* Data Access (Embedded page table) */ -#define ESR_TLBI (1 << (63 - 54)) /* TLB Ineligible (Embedded page table) */ -#define ESR_PT (1 << (63 - 55)) /* Page Table (Embedded page table) */ -#define ESR_SPV (1 << (63 - 56)) /* SPE/VMX operation */ -#define ESR_EPID (1 << (63 - 57)) /* External Process ID operation */ -#define ESR_VLEMI (1 << (63 - 58)) /* VLE operation */ -#define ESR_MIF (1 << (63 - 62)) /* Misaligned instruction (VLE) */ +#define ESR_PIL PPC_BIT(36) /* Illegal Instruction */ +#define ESR_PPR PPC_BIT(37) /* Privileged Instruction */ +#define ESR_PTR PPC_BIT(38) /* Trap */ +#define ESR_FP PPC_BIT(39) /* Floating-Point Operation */ +#define ESR_ST PPC_BIT(40) /* Store Operation */ +#define ESR_AP PPC_BIT(44) /* Auxiliary Processor Operation */ +#define ESR_PUO PPC_BIT(45) /* Unimplemented Operation */ +#define ESR_BO PPC_BIT(46) /* Byte Ordering */ +#define ESR_PIE PPC_BIT(47) /* Imprecise exception */ +#define ESR_DATA PPC_BIT(53) /* Data Access (Embedded page table) */ +#define ESR_TLBI PPC_BIT(54) /* TLB Ineligible (Embedded page table) */ +#define ESR_PT PPC_BIT(55) /* Page Table (Embedded page table) */ +#define ESR_SPV PPC_BIT(56) /* SPE/VMX operation */ +#define ESR_EPID PPC_BIT(57) /* External Process ID operation */ +#define ESR_VLEMI PPC_BIT(58) /* VLE operation */ +#define ESR_MIF PPC_BIT(62) /* Misaligned instruction (VLE) */ /* Transaction EXception And Summary Register bits */ #define TEXASR_FAILURE_PERSISTENT (63 - 7) @@ -1991,7 +1998,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define HID0_DEEPNAP (1 << 24) /* pre-2.06 */ #define HID0_DOZE (1 << 23) /* pre-2.06 */ #define HID0_NAP (1 << 22) /* pre-2.06 */ -#define HID0_HILE (1ull << (63 - 19)) /* POWER8 */ +#define HID0_HILE PPC_BIT(19) /* POWER8 */ /*****************************************************************************/ /* PowerPC Instructions types definitions */ From bcb5ce08cf3ddf69af87cc0fe750c3b564f1e6af Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 8 Dec 2017 12:47:34 +1100 Subject: [PATCH 22/24] spapr: Rename machine init functions for clarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Machine objects have two init functions - the generic QOM level instance_init which should only do static object initialization, and the Machine specific MachineClass::init which does the actual construction of the machine. In spapr the functions implementing these two have names - ppc_machine_initfn() and ppc_spapr_init() - which don't correspond closely to either of those. To prevent people (read, me) from confusing which is which, rename them spapr_instance_init() and spapr_machine_init() to make it clearer which is which. While we're there rename ppc_spapr_reset() to spapr_machine_reset() to match. Signed-off-by: David Gibson Reviewed-by: Cédric Le Goater Reviewed-by: Greg Kurz Reviewed-by: Suraj Jitindar Singh --- hw/ppc/spapr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 306875e123..42d6a2302a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1454,7 +1454,7 @@ static int spapr_reset_drcs(Object *child, void *opaque) return 0; } -static void ppc_spapr_reset(void) +static void spapr_machine_reset(void) { MachineState *machine = MACHINE(qdev_get_machine()); sPAPRMachineState *spapr = SPAPR_MACHINE(machine); @@ -2292,7 +2292,7 @@ out: } /* pSeries LPAR / sPAPR hardware init */ -static void ppc_spapr_init(MachineState *machine) +static void spapr_machine_init(MachineState *machine) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); @@ -2820,7 +2820,7 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name, visit_type_uint32(v, name, (uint32_t *)opaque, errp); } -static void spapr_machine_initfn(Object *obj) +static void spapr_instance_init(Object *obj) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); @@ -3777,8 +3777,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) * functions for the specific versioned machine types can override * these details for backwards compatibility */ - mc->init = ppc_spapr_init; - mc->reset = ppc_spapr_reset; + mc->init = spapr_machine_init; + mc->reset = spapr_machine_reset; mc->block_default_type = IF_SCSI; mc->max_cpus = 1024; mc->no_parallel = 1; @@ -3825,7 +3825,7 @@ static const TypeInfo spapr_machine_info = { .parent = TYPE_MACHINE, .abstract = true, .instance_size = sizeof(sPAPRMachineState), - .instance_init = spapr_machine_initfn, + .instance_init = spapr_instance_init, .instance_finalize = spapr_machine_finalizefn, .class_size = sizeof(sPAPRMachineClass), .class_init = spapr_machine_class_init, From 4f441474c61f317de7927edfdb1d042b0b6f3882 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 8 Dec 2017 14:11:49 +1100 Subject: [PATCH 23/24] spapr: Assume msi_nonbroken We conditionally adjust part of the guest device tree based on the global msi_nonbroken flag. However, the main machine type code initializes msi_nonbroken to true and there's nothing that would set it to false again. So replace the test with an assert(). Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 42d6a2302a..4f354a8760 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -943,9 +943,8 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate", RTAS_EVENT_SCAN_RATE)); - if (msi_nonbroken) { - _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0)); - } + g_assert(msi_nonbroken); + _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0)); /* * According to PAPR, rtas ibm,os-term does not guarantee a return From 1481fe5fcfeb7fcf3c1ebb9d8c0432e3e0188ccf Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 14 Dec 2017 19:09:48 +0100 Subject: [PATCH 24/24] spapr: don't initialize PATB entry if max-cpu-compat < power9 if KVM is enabled and KVM capabilities MMU radix is available, the partition table entry (patb_entry) for the radix mode is initialized by default in ppc_spapr_reset(). It's a problem if we want to migrate the guest to a POWER8 host while the kernel is not started to set the value to the one expected for a POWER8 CPU. The "-machine max-cpu-compat=power8" should allow to migrate a POWER9 KVM host to a POWER8 KVM host, but because patb_entry is set, the destination QEMU tries to enable radix mode on the POWER8 host. This fails and cancels the migration: Process table config unsupported by the host error while loading state for instance 0x0 of device 'spapr' load of migration failed: Invalid argument This patch doesn't set the PATB entry if the user provides a CPU compatibility mode that doesn't support radix mode. Signed-off-by: Laurent Vivier Signed-off-by: David Gibson --- hw/ppc/spapr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 4f354a8760..6785a90c60 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1466,7 +1466,10 @@ static void spapr_machine_reset(void) /* Check for unknown sysbus devices */ foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL); - if (kvm_enabled() && kvmppc_has_cap_mmu_radix()) { + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && + ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { /* If using KVM with radix mode available, VCPUs can be started * without a HPT because KVM will start them in radix mode. * Set the GR bit in PATB so that we know there is no HPT. */ @@ -1525,7 +1528,6 @@ static void spapr_machine_reset(void) g_free(fdt); /* Set up the entry state */ - first_ppc_cpu = POWERPC_CPU(first_cpu); first_ppc_cpu->env.gpr[3] = fdt_addr; first_ppc_cpu->env.gpr[5] = 0; first_cpu->halted = 0;