From 3d4f2534834cd9f9bbb3dd145fa61fd2ac0dd535 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 18 Jul 2016 15:19:04 +0200
Subject: [PATCH 1/5] ppc: Huge page detection mechanism fixes - Episode III

After already fixing two issues with the huge page detection mechanism
(see commit 159d2e39a860 and 86b50f2e1bef), Greg Kurz noticed another
case that caused the guest to crash where QEMU announces huge pages
though they should not be available for the guest:

qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
 -m 1G,slots=4,maxmem=32G
 -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
 -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
 -numa node,nodeid=0 -numa node,nodeid=1

That means if there is a global mem-path option, we still have
to look at the memory-backend objects that have been specified
additionally and return their minimum page size if that value
is smaller than the page size of the main memory.

Reported-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target-ppc/kvm.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 91e6daf4fd..84764edeae 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -366,10 +366,13 @@ static int find_max_supported_pagesize(Object *obj, void *opaque)
 static long getrampagesize(void)
 {
     long hpsize = LONG_MAX;
+    long mainrampagesize;
     Object *memdev_root;
 
     if (mem_path) {
-        return gethugepagesize(mem_path);
+        mainrampagesize = gethugepagesize(mem_path);
+    } else {
+        mainrampagesize = getpagesize();
     }
 
     /* it's possible we have memory-backend objects with
@@ -383,28 +386,26 @@ static long getrampagesize(void)
      * backend isn't backed by hugepages.
      */
     memdev_root = object_resolve_path("/objects", NULL);
-    if (!memdev_root) {
-        return getpagesize();
+    if (memdev_root) {
+        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
     }
-
-    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
-
-    if (hpsize == LONG_MAX || hpsize == getpagesize()) {
-        return getpagesize();
+    if (hpsize == LONG_MAX) {
+        /* No additional memory regions found ==> Report main RAM page size */
+        return mainrampagesize;
     }
 
     /* If NUMA is disabled or the NUMA nodes are not backed with a
-     * memory-backend, then there is at least one node using "normal"
-     * RAM. And since normal RAM has not been configured with "-mem-path"
-     * (what we've checked earlier here already), we can not use huge pages!
+     * memory-backend, then there is at least one node using "normal" RAM,
+     * so if its page size is smaller we have got to report that size instead.
      */
-    if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
+    if (hpsize > mainrampagesize &&
+        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
         static bool warned;
         if (!warned) {
             error_report("Huge page support disabled (n/a for main memory).");
             warned = true;
         }
-        return getpagesize();
+        return mainrampagesize;
     }
 
     return hpsize;

From c573fc03da65e35bf44bce0448ea12801e3631ac Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 21 Jul 2016 11:21:34 +0200
Subject: [PATCH 2/5] hw/ppc/spapr: Make sure to close the htab_fd when
 migration is canceled

When canceling a migration process, we currently do not close the
HTAB migration file descriptor since htab_save_complete() is never
called in that case. So we leave the migration process with a
dangling htab_fd value around, and this causes any further migration
attempts to fail. To fix this issue, simply make sure that the
htab_fd is closed during the migration cleanup stage. And since the
cleanup() function is also called when migration succeeds, we can
also remove the call to close_htab_fd() from the htab_save_complete()
function.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1354341
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7f33a1b2b5..9193ac2c12 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1512,7 +1512,6 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
         if (rc < 0) {
             return rc;
         }
-        close_htab_fd(spapr);
     } else {
         if (spapr->htab_first_pass) {
             htab_save_first_pass(f, spapr, -1);
@@ -1614,10 +1613,18 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+static void htab_cleanup(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    close_htab_fd(spapr);
+}
+
 static SaveVMHandlers savevm_htab_handlers = {
     .save_live_setup = htab_save_setup,
     .save_live_iterate = htab_save_iterate,
     .save_live_complete_precopy = htab_save_complete,
+    .cleanup = htab_cleanup,
     .load_state = htab_load,
 };
 

From cf472f48d57a533cc46ee651159abcbe0b362df0 Mon Sep 17 00:00:00 2001
From: "lvivier@redhat.com" <lvivier@redhat.com>
Date: Thu, 21 Jul 2016 14:05:46 +0200
Subject: [PATCH 3/5] spapr: fix spapr-nvram migration

When spapr-nvram is backed by a file using pflash interface,
migration fails on the destination guest with assert:

    bdrv_co_pwritev: Assertion `!(bs->open_flags & 0x0800)' failed.

This avoids the problem by delaying the pflash update until after
the device loads complete.

This fix is similar to the one for the pflash_cfi01 migration:

    90c647d Fix pflash migration

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/nvram/spapr_nvram.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 019f25dc58..4de5f705d8 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -39,6 +39,7 @@ typedef struct sPAPRNVRAM {
     uint32_t size;
     uint8_t *buf;
     BlockBackend *blk;
+    VMChangeStateEntry *vmstate;
 } sPAPRNVRAM;
 
 #define TYPE_VIO_SPAPR_NVRAM "spapr-nvram"
@@ -185,19 +186,25 @@ static int spapr_nvram_pre_load(void *opaque)
     return 0;
 }
 
+static void postload_update_cb(void *opaque, int running, RunState state)
+{
+    sPAPRNVRAM *nvram = opaque;
+
+    /* This is called after bdrv_invalidate_cache_all.  */
+
+    qemu_del_vm_change_state_handler(nvram->vmstate);
+    nvram->vmstate = NULL;
+
+    blk_pwrite(nvram->blk, 0, nvram->buf, nvram->size, 0);
+}
+
 static int spapr_nvram_post_load(void *opaque, int version_id)
 {
     sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(opaque);
 
     if (nvram->blk) {
-        int alen = blk_pwrite(nvram->blk, 0, nvram->buf, nvram->size, 0);
-
-        if (alen < 0) {
-            return alen;
-        }
-        if (alen != nvram->size) {
-            return -1;
-        }
+        nvram->vmstate = qemu_add_vm_change_state_handler(postload_update_cb,
+                                                          nvram);
     }
 
     return 0;

From 2fff4bad40abe5781d21c5e134161a1bd13c43cd Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 22 Jul 2016 18:53:51 +0200
Subject: [PATCH 4/5] target-ppc: add PPC_MFTB flag to e500mc and e5500

According to the e500mc and e5500 core reference manual they have support
for the mftb instruction.

Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target-ppc/translate_init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 5ecafc7b8b..5f28a36998 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -5133,7 +5133,7 @@ POWERPC_FAMILY(e500mc)(ObjectClass *oc, void *data)
     dc->desc = "e500mc core";
     pcc->init_proc = init_proc_e500mc;
     pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
                        PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
                        PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
                        PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
@@ -5179,7 +5179,7 @@ POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data)
     dc->desc = "e5500 core";
     pcc->init_proc = init_proc_e5500;
     pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
                        PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
                        PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
                        PPC_CACHE_DCBZ | PPC_CACHE_DCBA |

From 12bf2d33fe520f9cfd09f7bf9d46ae3202c3cb49 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 22 Jul 2016 13:10:36 +0200
Subject: [PATCH 5/5] spapr: disintricate core-id from DT semantics

The goal of this patch is to have a stable core-id which does not depend
on any DT related semantics, which involve non-obvious computations on
modern PowerPC server cpus.

With this patch, the DT core id is computed on-demand as:

       (core-id / smp_threads) * smt

where smt is the number of threads per core in the host.

This formula should be consolidated in a helper since it is needed in
several places.

Other uses for core-id includes: compute a stable cpu_index (which
allows random order hotplug/unplug without breaking migration) and
NUMA.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c          | 10 +++++-----
 hw/ppc/spapr_cpu_core.c | 24 +++++++++++-------------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9193ac2c12..fbbd0518ed 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1815,10 +1815,11 @@ static void ppc_spapr_init(MachineState *machine)
 
         spapr->cores = g_new0(Object *, spapr_max_cores);
         for (i = 0; i < spapr_max_cores; i++) {
-            int core_dt_id = i * smt;
+            int core_id = i * smp_threads;
             sPAPRDRConnector *drc =
                 spapr_dr_connector_new(OBJECT(spapr),
-                                       SPAPR_DR_CONNECTOR_TYPE_CPU, core_dt_id);
+                                       SPAPR_DR_CONNECTOR_TYPE_CPU,
+                                       (core_id / smp_threads) * smt);
 
             qemu_register_reset(spapr_drc_reset, drc);
 
@@ -1834,7 +1835,7 @@ static void ppc_spapr_init(MachineState *machine)
                 core  = object_new(type);
                 object_property_set_int(core, smp_threads, "nr-threads",
                                         &error_fatal);
-                object_property_set_int(core, core_dt_id, CPU_CORE_PROP_CORE_ID,
+                object_property_set_int(core, core_id, CPU_CORE_PROP_CORE_ID,
                                         &error_fatal);
                 object_property_set_bool(core, true, "realized", &error_fatal);
             }
@@ -2376,7 +2377,6 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
     HotpluggableCPUList *head = NULL;
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     int spapr_max_cores = max_cpus / smp_threads;
-    int smt = kvmppc_smt_threads();
 
     for (i = 0; i < spapr_max_cores; i++) {
         HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
@@ -2386,7 +2386,7 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
         cpu_item->type = spapr_get_cpu_core_type(machine->cpu_model);
         cpu_item->vcpus_count = smp_threads;
         cpu_props->has_core_id = true;
-        cpu_props->core_id = i * smt;
+        cpu_props->core_id = i * smp_threads;
         /* TODO: add 'has_node/node' here to describe
            to which node core belongs */
 
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4bfc96bd5a..c04aaa47d7 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -103,7 +103,6 @@ static void spapr_core_release(DeviceState *dev, void *opaque)
     size_t size = object_type_get_instance_size(typename);
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     CPUCore *cc = CPU_CORE(dev);
-    int smt = kvmppc_smt_threads();
     int i;
 
     for (i = 0; i < cc->nr_threads; i++) {
@@ -117,7 +116,7 @@ static void spapr_core_release(DeviceState *dev, void *opaque)
         object_unparent(obj);
     }
 
-    spapr->cores[cc->core_id / smt] = NULL;
+    spapr->cores[cc->core_id / smp_threads] = NULL;
 
     g_free(sc->threads);
     object_unparent(OBJECT(dev));
@@ -128,18 +127,19 @@ void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
     CPUCore *cc = CPU_CORE(dev);
+    int smt = kvmppc_smt_threads();
+    int index = cc->core_id / smp_threads;
     sPAPRDRConnector *drc =
-        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cc->core_id);
+        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
     sPAPRDRConnectorClass *drck;
     Error *local_err = NULL;
-    int smt = kvmppc_smt_threads();
-    int index = cc->core_id / smt;
     int spapr_max_cores = max_cpus / smp_threads;
     int i;
 
     for (i = spapr_max_cores - 1; i > index; i--) {
         if (spapr->cores[i]) {
-            error_setg(errp, "core-id %d should be removed first", i * smt);
+            error_setg(errp, "core-id %d should be removed first",
+                       i * smp_threads);
             return;
         }
     }
@@ -168,11 +168,10 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     Error *local_err = NULL;
     void *fdt = NULL;
     int fdt_offset = 0;
-    int index;
+    int index = cc->core_id / smp_threads;
     int smt = kvmppc_smt_threads();
 
-    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cc->core_id);
-    index = cc->core_id / smt;
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
     spapr->cores[index] = OBJECT(dev);
 
     if (!smc->dr_cpu_enabled) {
@@ -226,7 +225,6 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
     int spapr_max_cores = max_cpus / smp_threads;
     int index, i;
-    int smt = kvmppc_smt_threads();
     Error *local_err = NULL;
     CPUCore *cc = CPU_CORE(dev);
     char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
@@ -247,12 +245,12 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    if (cc->core_id % smt) {
+    if (cc->core_id % smp_threads) {
         error_setg(&local_err, "invalid core id %d\n", cc->core_id);
         goto out;
     }
 
-    index = cc->core_id / smt;
+    index = cc->core_id / smp_threads;
     if (index < 0 || index >= spapr_max_cores) {
         error_setg(&local_err, "core id %d out of range", cc->core_id);
         goto out;
@@ -266,7 +264,7 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     for (i = 0; i < index; i++) {
         if (!spapr->cores[i]) {
             error_setg(&local_err, "core-id %d should be added first",
-                       i * smt);
+                       i * smp_threads);
             goto out;
         }
     }