From 600f5ce356b44d8fa5a611ff6b034eb95ecf04e7 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 16 Nov 2016 20:17:32 +0000
Subject: [PATCH 1/7] virtio-crypto: fix virtio_queue_set_notification() race

We must check for new virtqueue buffers after re-enabling notifications.
This prevents the race condition where the guest added buffers just
after we stopped popping the virtqueue but before we re-enabled
notifications.

I think the virtio-crypto code was based on virtio-net but this crucial
detail was missed.  virtio-net does not have the race condition because
it processes the virtqueue one more time after re-enabling
notifications.

Cc: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
---
 hw/virtio/virtio-crypto.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 32938433b7..847dc9dafd 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -692,8 +692,17 @@ static void virtio_crypto_dataq_bh(void *opaque)
         return;
     }
 
-    virtio_crypto_handle_dataq(vdev, q->dataq);
-    virtio_queue_set_notification(q->dataq, 1);
+    for (;;) {
+        virtio_crypto_handle_dataq(vdev, q->dataq);
+        virtio_queue_set_notification(q->dataq, 1);
+
+        /* Are we done or did the guest add more buffers? */
+        if (virtio_queue_empty(q->dataq)) {
+            break;
+        }
+
+        virtio_queue_set_notification(q->dataq, 0);
+    }
 }
 
 static void

From 310837de6c1e0badfd736b1b316b1698c53120a7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Nov 2016 16:07:00 +0100
Subject: [PATCH 2/7] virtio: introduce grab/release_ioeventfd to fix vhost

Following the recent refactoring of virtio notifiers [1], more specifically
the patch ed08a2a0b ("virtio: use virtio_bus_set_host_notifier to
start/stop ioeventfd") that uses virtio_bus_set_host_notifier [2]
by default, core virtio code requires 'ioeventfd_started' to be set
to true/false when the host notifiers are configured.

When vhost is stopped and started, however, there is a stop followed by
another start. Since ioeventfd_started was never set to true, the 'stop'
operation triggered by virtio_bus_set_host_notifier() will not result
in a call to virtio_pci_ioeventfd_assign(assign=false). This leaves
the memory regions with stale notifiers and results on the next start
triggering the following assertion:

  kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
  Aborted

This patch reintroduces (hopefully in a cleaner way) the concept
that was present with ioeventfd_disabled before the refactoring.
When ioeventfd_grabbed>0, ioeventfd_started tracks whether ioeventfd
should be enabled or not, but ioeventfd is actually not started at
all until vhost releases the host notifiers.

[1] http://lists.nongnu.org/archive/html/qemu-devel/2016-10/msg07748.html
[2] http://lists.nongnu.org/archive/html/qemu-devel/2016-10/msg07760.html

Reported-by: Felipe Franciosi <felipe@nutanix.com>
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reported-by: Alex Williamson <alex.williamson@redhat.com>
Fixes: ed08a2a0b ("virtio: use virtio_bus_set_host_notifier to start/stop ioeventfd")
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/vhost.c              | 14 ++++----
 hw/virtio/virtio-bus.c         | 58 +++++++++++++++++++++++++++-------
 hw/virtio/virtio.c             | 16 ++++++++++
 include/hw/virtio/virtio-bus.h | 14 ++++++++
 include/hw/virtio/virtio.h     |  2 ++
 5 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 30aee88a3e..f7f70237db 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1214,17 +1214,17 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-    VirtioBusState *vbus = VIRTIO_BUS(qbus);
-    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
     int i, r, e;
 
-    if (!k->ioeventfd_assign) {
+    /* We will pass the notifiers to the kernel, make sure that QEMU
+     * doesn't interfere.
+     */
+    r = virtio_device_grab_ioeventfd(vdev);
+    if (r < 0) {
         error_report("binding does not support host notifiers");
-        r = -ENOSYS;
         goto fail;
     }
 
-    virtio_device_stop_ioeventfd(vdev);
     for (i = 0; i < hdev->nvqs; ++i) {
         r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
                                          true);
@@ -1244,7 +1244,7 @@ fail_vq:
         }
         assert (e >= 0);
     }
-    virtio_device_start_ioeventfd(vdev);
+    virtio_device_release_ioeventfd(vdev);
 fail:
     return r;
 }
@@ -1267,7 +1267,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
         }
         assert (r >= 0);
     }
-    virtio_device_start_ioeventfd(vdev);
+    virtio_device_release_ioeventfd(vdev);
 }
 
 /* Test and clear event pending status.
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index bf61f66a04..d6c0c72bd2 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -147,6 +147,39 @@ void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config)
     }
 }
 
+/* On success, ioeventfd ownership belongs to the caller.  */
+int virtio_bus_grab_ioeventfd(VirtioBusState *bus)
+{
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+
+    /* vhost can be used even if ioeventfd=off in the proxy device,
+     * so do not check k->ioeventfd_enabled.
+     */
+    if (!k->ioeventfd_assign) {
+        return -ENOSYS;
+    }
+
+    if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
+        virtio_bus_stop_ioeventfd(bus);
+        /* Remember that we need to restart ioeventfd
+         * when ioeventfd_grabbed becomes zero.
+         */
+        bus->ioeventfd_started = true;
+    }
+    bus->ioeventfd_grabbed++;
+    return 0;
+}
+
+void virtio_bus_release_ioeventfd(VirtioBusState *bus)
+{
+    assert(bus->ioeventfd_grabbed != 0);
+    if (--bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
+        /* Force virtio_bus_start_ioeventfd to act.  */
+        bus->ioeventfd_started = false;
+        virtio_bus_start_ioeventfd(bus);
+    }
+}
+
 int virtio_bus_start_ioeventfd(VirtioBusState *bus)
 {
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
@@ -161,10 +194,14 @@ int virtio_bus_start_ioeventfd(VirtioBusState *bus)
     if (bus->ioeventfd_started) {
         return 0;
     }
-    r = vdc->start_ioeventfd(vdev);
-    if (r < 0) {
-        error_report("%s: failed. Fallback to userspace (slower).", __func__);
-        return r;
+
+    /* Only set our notifier if we have ownership.  */
+    if (!bus->ioeventfd_grabbed) {
+        r = vdc->start_ioeventfd(vdev);
+        if (r < 0) {
+            error_report("%s: failed. Fallback to userspace (slower).", __func__);
+            return r;
+        }
     }
     bus->ioeventfd_started = true;
     return 0;
@@ -179,9 +216,12 @@ void virtio_bus_stop_ioeventfd(VirtioBusState *bus)
         return;
     }
 
-    vdev = virtio_bus_get_device(bus);
-    vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
-    vdc->stop_ioeventfd(vdev);
+    /* Only remove our notifier if we have ownership.  */
+    if (!bus->ioeventfd_grabbed) {
+        vdev = virtio_bus_get_device(bus);
+        vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+        vdc->stop_ioeventfd(vdev);
+    }
     bus->ioeventfd_started = false;
 }
 
@@ -211,7 +251,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
     }
 
     if (assign) {
-        assert(!bus->ioeventfd_started);
         r = event_notifier_init(notifier, 1);
         if (r < 0) {
             error_report("%s: unable to init event notifier: %s (%d)",
@@ -225,9 +264,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
         }
         return 0;
     } else {
-        if (!bus->ioeventfd_started) {
-            return 0;
-        }
         k->ioeventfd_assign(proxy, notifier, n, false);
     }
 
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 55a00cdf9e..b7d5828601 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2191,6 +2191,22 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
     virtio_bus_stop_ioeventfd(vbus);
 }
 
+int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_grab_ioeventfd(vbus);
+}
+
+void virtio_device_release_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    virtio_bus_release_ioeventfd(vbus);
+}
+
 static void virtio_device_class_init(ObjectClass *klass, void *data)
 {
     /* Set the default value here. */
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index fdf7fdab81..8a51e2c564 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -97,6 +97,16 @@ struct VirtioBusState {
      * Set if ioeventfd has been started.
      */
     bool ioeventfd_started;
+
+    /*
+     * Set if ioeventfd has been grabbed by vhost.  When ioeventfd
+     * is grabbed by vhost, we track its started/stopped state (which
+     * depends in turn on the virtio status register), but do not
+     * register a handler for the ioeventfd.  When ioeventfd is
+     * released, if ioeventfd_started is true we finally register
+     * the handler so that QEMU's device model can use ioeventfd.
+     */
+    int ioeventfd_grabbed;
 };
 
 void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp);
@@ -131,6 +141,10 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus);
 int virtio_bus_start_ioeventfd(VirtioBusState *bus);
 /* Stop the ioeventfd. */
 void virtio_bus_stop_ioeventfd(VirtioBusState *bus);
+/* Tell the bus that vhost is grabbing the ioeventfd. */
+int virtio_bus_grab_ioeventfd(VirtioBusState *bus);
+/* bus that vhost is not using the ioeventfd anymore. */
+void virtio_bus_release_ioeventfd(VirtioBusState *bus);
 /* Switch from/to the generic ioeventfd handler */
 int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
 
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 5951997f22..835b085d11 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -272,6 +272,8 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                 bool with_irqfd);
 int virtio_device_start_ioeventfd(VirtIODevice *vdev);
 void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
+int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
+void virtio_device_release_ioeventfd(VirtIODevice *vdev);
 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
 void virtio_queue_host_notifier_read(EventNotifier *n);

From 0687c37c5eeef8580b31cc6e1202d874833ae38a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Nov 2016 16:07:01 +0100
Subject: [PATCH 3/7] virtio: access ISR atomically

This will be needed once dataplane will be able to set it outside
the big QEMU lock.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio-mmio.c |  6 +++---
 hw/virtio/virtio-pci.c  |  9 +++------
 hw/virtio/virtio.c      | 22 +++++++++++++++++-----
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index a30270f902..17412cb7b5 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -191,7 +191,7 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
         return virtio_queue_get_addr(vdev, vdev->queue_sel)
             >> proxy->guest_page_shift;
     case VIRTIO_MMIO_INTERRUPTSTATUS:
-        return vdev->isr;
+        return atomic_read(&vdev->isr);
     case VIRTIO_MMIO_STATUS:
         return vdev->status;
     case VIRTIO_MMIO_HOSTFEATURESSEL:
@@ -299,7 +299,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
         }
         break;
     case VIRTIO_MMIO_INTERRUPTACK:
-        vdev->isr &= ~value;
+        atomic_and(&vdev->isr, ~value);
         virtio_update_irq(vdev);
         break;
     case VIRTIO_MMIO_STATUS:
@@ -347,7 +347,7 @@ static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
     if (!vdev) {
         return;
     }
-    level = (vdev->isr != 0);
+    level = (atomic_read(&vdev->isr) != 0);
     DPRINTF("virtio_mmio setting IRQ %d\n", level);
     qemu_set_irq(proxy->irq, level);
 }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 97b32febaf..521ba0b415 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -73,7 +73,7 @@ static void virtio_pci_notify(DeviceState *d, uint16_t vector)
         msix_notify(&proxy->pci_dev, vector);
     else {
         VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-        pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
+        pci_set_irq(&proxy->pci_dev, atomic_read(&vdev->isr) & 1);
     }
 }
 
@@ -449,8 +449,7 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
         break;
     case VIRTIO_PCI_ISR:
         /* reading from the ISR also clears it. */
-        ret = vdev->isr;
-        vdev->isr = 0;
+        ret = atomic_xchg(&vdev->isr, 0);
         pci_irq_deassert(&proxy->pci_dev);
         break;
     case VIRTIO_MSI_CONFIG_VECTOR:
@@ -1379,9 +1378,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
 {
     VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-    uint64_t val = vdev->isr;
-
-    vdev->isr = 0;
+    uint64_t val = atomic_xchg(&vdev->isr, 0);
     pci_irq_deassert(&proxy->pci_dev);
 
     return val;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index b7d5828601..138a414cbe 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -945,7 +945,7 @@ void virtio_reset(void *opaque)
     vdev->guest_features = 0;
     vdev->queue_sel = 0;
     vdev->status = 0;
-    vdev->isr = 0;
+    atomic_set(&vdev->isr, 0);
     vdev->config_vector = VIRTIO_NO_VECTOR;
     virtio_notify_vector(vdev, vdev->config_vector);
 
@@ -1318,10 +1318,22 @@ void virtio_del_queue(VirtIODevice *vdev, int n)
     vdev->vq[n].vring.num_default = 0;
 }
 
+static void virtio_set_isr(VirtIODevice *vdev, int value)
+{
+    uint8_t old = atomic_read(&vdev->isr);
+
+    /* Do not write ISR if it does not change, so that its cacheline remains
+     * shared in the common case where the guest does not read it.
+     */
+    if ((old & value) != value) {
+        atomic_or(&vdev->isr, value);
+    }
+}
+
 void virtio_irq(VirtQueue *vq)
 {
     trace_virtio_irq(vq);
-    vq->vdev->isr |= 0x01;
+    virtio_set_isr(vq->vdev, 0x1);
     virtio_notify_vector(vq->vdev, vq->vector);
 }
 
@@ -1355,7 +1367,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
     }
 
     trace_virtio_notify(vdev, vq);
-    vdev->isr |= 0x01;
+    virtio_set_isr(vq->vdev, 0x1);
     virtio_notify_vector(vdev, vq->vector);
 }
 
@@ -1364,7 +1376,7 @@ void virtio_notify_config(VirtIODevice *vdev)
     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
         return;
 
-    vdev->isr |= 0x03;
+    virtio_set_isr(vdev, 0x3);
     vdev->generation++;
     virtio_notify_vector(vdev, vdev->config_vector);
 }
@@ -1895,7 +1907,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
 
     vdev->device_id = device_id;
     vdev->status = 0;
-    vdev->isr = 0;
+    atomic_set(&vdev->isr, 0);
     vdev->queue_sel = 0;
     vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);

From 83d768b5640946b7da55ce8335509df297e2c7cd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Nov 2016 16:07:02 +0100
Subject: [PATCH 4/7] virtio: set ISR on dataplane notifications

Dataplane has been omitting forever the step of setting ISR when
an interrupt is raised.  This caused little breakage, because the
specification actually says that ISR may not be updated in MSI mode.

Some versions of the Windows drivers however didn't clear MSI mode
correctly, and proceeded using polling mode (using ISR, not the used
ring index!) for crashdump and hibernation.  If it were just crashdump
and hibernation it would not be a big deal, but recent releases of
Windows do not really shut down, but rather log out and hibernate to
make the next startup faster.  Hence, this manifested as a more serious
hang during shutdown with e.g. Windows 8.1 and virtio-win 1.8.0 RPMs.
Newer versions fixed this, while older versions do not use MSI at all.

The failure has always been there for virtio dataplane, but it became
visible after commits 9ffe337 ("virtio-blk: always use dataplane path
if ioeventfd is active", 2016-10-30) and ad07cd6 ("virtio-scsi: always
use dataplane path if ioeventfd is active", 2016-10-30) made virtio-blk
and virtio-scsi always use the dataplane code under KVM.  The good news
therefore is that it was not a bug in the patches---they were doing
exactly what they were meant for, i.e. shake out remaining dataplane bugs.

The fix is not hard, so it's worth arranging for the broken drivers.
The virtio_should_notify+event_notifier_set pair that is common to
virtio-blk and virtio-scsi dataplane is replaced with a new public
function virtio_notify_irqfd that also sets ISR.  The irqfd emulation
code now need not set ISR anymore, so virtio_irq is removed.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/block/dataplane/virtio-blk.c |  4 +---
 hw/scsi/virtio-scsi-dataplane.c |  7 -------
 hw/scsi/virtio-scsi.c           |  2 +-
 hw/virtio/trace-events          |  2 +-
 hw/virtio/virtio.c              | 36 +++++++++++++++++++++++++--------
 include/hw/virtio/virtio-scsi.h |  1 -
 include/hw/virtio/virtio.h      |  2 +-
 7 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 90ef557c8c..d1f9f63eaf 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -68,9 +68,7 @@ static void notify_guest_bh(void *opaque)
             unsigned i = j + ctzl(bits);
             VirtQueue *vq = virtio_get_queue(s->vdev, i);
 
-            if (virtio_should_notify(s->vdev, vq)) {
-                event_notifier_set(virtio_queue_get_guest_notifier(vq));
-            }
+            virtio_notify_irqfd(s->vdev, vq);
 
             bits &= bits - 1; /* clear right-most bit */
         }
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index f2ea29dbc3..6b8d0f0024 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -95,13 +95,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
     return 0;
 }
 
-void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req)
-{
-    if (virtio_should_notify(vdev, req->vq)) {
-        event_notifier_set(virtio_queue_get_guest_notifier(req->vq));
-    }
-}
-
 /* assumes s->ctx held */
 static void virtio_scsi_clear_aio(VirtIOSCSI *s)
 {
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 3e5ae6ac0f..10fd687193 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -69,7 +69,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
     qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
     virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
     if (s->dataplane_started && !s->dataplane_fenced) {
-        virtio_scsi_dataplane_notify(vdev, req);
+        virtio_notify_irqfd(vdev, vq);
     } else {
         virtio_notify(vdev, vq);
     }
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 8756cefa79..7b6f55e70e 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -5,7 +5,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "
 virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
 virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
 virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
-virtio_irq(void *vq) "vq %p"
+virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
 virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
 virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
 
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 138a414cbe..1af2de2714 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1330,13 +1330,6 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
     }
 }
 
-void virtio_irq(VirtQueue *vq)
-{
-    trace_virtio_irq(vq);
-    virtio_set_isr(vq->vdev, 0x1);
-    virtio_notify_vector(vq->vdev, vq->vector);
-}
-
 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     uint16_t old, new;
@@ -1360,6 +1353,33 @@ bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
     return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
+{
+    if (!virtio_should_notify(vdev, vq)) {
+        return;
+    }
+
+    trace_virtio_notify_irqfd(vdev, vq);
+
+    /*
+     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
+     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
+     * incorrectly polling this bit during crashdump and hibernation
+     * in MSI mode, causing a hang if this bit is never updated.
+     * Recent releases of Windows do not really shut down, but rather
+     * log out and hibernate to make the next startup faster.  Hence,
+     * this manifested as a more serious hang during shutdown with
+     *
+     * Next driver release from 2016 fixed this problem, so working around it
+     * is not a must, but it's easy to do so let's do it here.
+     *
+     * Note: it's safe to update ISR from any thread as it was switched
+     * to an atomic operation.
+     */
+    virtio_set_isr(vq->vdev, 0x1);
+    event_notifier_set(&vq->guest_notifier);
+}
+
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     if (!virtio_should_notify(vdev, vq)) {
@@ -1994,7 +2014,7 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
 {
     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
     if (event_notifier_test_and_clear(n)) {
-        virtio_irq(vq);
+        virtio_notify_vector(vq->vdev, vq->vector);
     }
 }
 
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index 9fbc7d7475..73751969ba 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -137,6 +137,5 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
 void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
 int virtio_scsi_dataplane_start(VirtIODevice *s);
 void virtio_scsi_dataplane_stop(VirtIODevice *s);
-void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req);
 
 #endif /* QEMU_VIRTIO_SCSI_H */
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 835b085d11..ab0e030cc4 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -181,6 +181,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
                                unsigned max_in_bytes, unsigned max_out_bytes);
 
 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq);
+void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
@@ -280,7 +281,6 @@ void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
                                                 void (*fn)(VirtIODevice *,
                                                            VirtQueue *));
-void virtio_irq(VirtQueue *vq);
 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
 VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
 

From be4e0d737527d8670dc271712faae0de6a181b4e Mon Sep 17 00:00:00 2001
From: Zhuang Yanying <ann.zhuangyanying@huawei.com>
Date: Thu, 17 Nov 2016 20:31:03 +0800
Subject: [PATCH 5/7] ivshmem: Fix 64 bit memory bar configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Device ivshmem property use64=0 is designed to make the device
expose a 32 bit shared memory BAR instead of 64 bit one.  The
default is a 64 bit BAR, except pc-1.2 and older retain a 32 bit
BAR.  A 32 bit BAR can support only up to 1 GiB of shared memory.

This worked as designed until commit 5400c02 accidentally flipped
its sense: since then, we misinterpret use64=0 as use64=1 and vice
versa.  Worse, the default got flipped as well.  Devices
ivshmem-plain and ivshmem-doorbell are not affected.

Fix by restoring the test of IVShmemState member not_legacy_32bit
that got messed up in commit 5400c02.  Also update its
initialization for devices ivhsmem-plain and ivshmem-doorbell.
Without that, they'd regress to 32 bit BARs.

Cc: qemu-stable@nongnu.org
Signed-off-by: Zhuang Yanying <ann.zhuangyanying@huawei.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
---
 hw/misc/ivshmem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 230e51b6e0..abeaf3da08 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -858,7 +858,7 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
                      &s->ivshmem_mmio);
 
-    if (!s->not_legacy_32bit) {
+    if (s->not_legacy_32bit) {
         attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
     }
 
@@ -1045,6 +1045,7 @@ static void ivshmem_plain_init(Object *obj)
                              ivshmem_check_memdev_is_busy,
                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
                              &error_abort);
+    s->not_legacy_32bit = 1;
 }
 
 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
@@ -1116,6 +1117,7 @@ static void ivshmem_doorbell_init(Object *obj)
 
     s->features |= (1 << IVSHMEM_MSI);
     s->legacy_size = SIZE_MAX;  /* whatever the server sends */
+    s->not_legacy_32bit = 1;
 }
 
 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)

From d668fc4c7c69a3251be5965601015f3c17800818 Mon Sep 17 00:00:00 2001
From: ZhuangYanying <ann.zhuangyanying@huawei.com>
Date: Fri, 18 Nov 2016 16:22:48 +0800
Subject: [PATCH 6/7] ipmi: fix qemu crash while migrating with ipmi

Qemu crash in the source side while migrating, after starting ipmi service inside vm.

./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m 4096 \
-drive file=/work/suse/suse11_sp3_64_vt,format=raw,if=none,id=drive-virtio-disk0,cache=none \
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0 \
-vnc :99 -monitor vc -device ipmi-bmc-sim,id=bmc0 -device isa-ipmi-kcs,bmc=bmc0,ioport=0xca2

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffec4268700 (LWP 7657)]
__memcpy_ssse3_back () at ../sysdeps/x86_64/multiarch/memcpy-ssse3-back.S:2757
(gdb) bt
 #0  __memcpy_ssse3_back () at ../sysdeps/x86_64/multiarch/memcpy-ssse3-back.S:2757
 #1  0x00005555559ef775 in memcpy (__len=3, __src=0xc1421c, __dest=<optimized out>)
     at /usr/include/bits/string3.h:51
 #2  qemu_put_buffer (f=0x555557a97690, buf=0xc1421c <Address 0xc1421c out of bounds>, size=3)
     at migration/qemu-file.c:346
 #3  0x00005555559eef66 in vmstate_save_state (f=f@entry=0x555557a97690,
     vmsd=0x555555f8a5a0 <vmstate_ISAIPMIKCSDevice>, opaque=0x555557231160,
     vmdesc=vmdesc@entry=0x55555798cc40) at migration/vmstate.c:333
 #4  0x00005555557cfe45 in vmstate_save (f=f@entry=0x555557a97690, se=se@entry=0x555557231de0,
     vmdesc=vmdesc@entry=0x55555798cc40) at /mnt/sdb/zyy/qemu/migration/savevm.c:720
 #5  0x00005555557d2be7 in qemu_savevm_state_complete_precopy (f=0x555557a97690,
     iterable_only=iterable_only@entry=false) at /mnt/sdb/zyy/qemu/migration/savevm.c:1128
 #6  0x00005555559ea102 in migration_completion (start_time=<synthetic pointer>,
     old_vm_running=<synthetic pointer>, current_active_state=<optimized out>,
     s=0x5555560eaa80 <current_migration.44078>) at migration/migration.c:1707
 #7  migration_thread (opaque=0x5555560eaa80 <current_migration.44078>) at migration/migration.c:1855
 #8  0x00007ffff3900dc5 in start_thread (arg=0x7ffec4268700) at pthread_create.c:308
 #9  0x00007fffefc6c71d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113

Signed-off-by: Zhuang Yanying <ann.zhuangyanying@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/ipmi/isa_ipmi_kcs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/hw/ipmi/isa_ipmi_kcs.c b/hw/ipmi/isa_ipmi_kcs.c
index 9a38f8a28a..80444977a0 100644
--- a/hw/ipmi/isa_ipmi_kcs.c
+++ b/hw/ipmi/isa_ipmi_kcs.c
@@ -433,10 +433,8 @@ const VMStateDescription vmstate_ISAIPMIKCSDevice = {
         VMSTATE_BOOL(kcs.use_irq, ISAIPMIKCSDevice),
         VMSTATE_BOOL(kcs.irqs_enabled, ISAIPMIKCSDevice),
         VMSTATE_UINT32(kcs.outpos, ISAIPMIKCSDevice),
-        VMSTATE_VBUFFER_UINT32(kcs.outmsg, ISAIPMIKCSDevice, 1, NULL, 0,
-                               kcs.outlen),
-        VMSTATE_VBUFFER_UINT32(kcs.inmsg, ISAIPMIKCSDevice, 1, NULL, 0,
-                               kcs.inlen),
+        VMSTATE_UINT8_ARRAY(kcs.outmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE),
+        VMSTATE_UINT8_ARRAY(kcs.inmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE),
         VMSTATE_BOOL(kcs.write_end, ISAIPMIKCSDevice),
         VMSTATE_UINT8(kcs.status_reg, ISAIPMIKCSDevice),
         VMSTATE_UINT8(kcs.data_out_reg, ISAIPMIKCSDevice),

From 4b5b47abbf23246bd8dde4c6faaed8b7249d8654 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 11 Nov 2016 14:45:42 -0200
Subject: [PATCH 7/7] acpi: Use apic_id_limit when calculating legacy ACPI
 table size

The code that calculates the legacy ACPI table size for migration
compatibility uses max_cpus when calculating legacy_aml_len (the size of
the DSDT and SSDT tables). However, the SSDT grows according to APIC ID
limit, not max_cpus.

The bug is not triggered very often because of the 4k alignment on the
table size. But it can be triggered if you are unlucky enough to cross a
4k boundary.

Change the legacy_aml_len calculation to use apic_id_limit, to calculate
the right size.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/acpi-build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a15585717a..45a2ccfc4c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2860,7 +2860,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
          */
         int legacy_aml_len =
             pcmc->legacy_acpi_table_size +
-            ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus;
+            ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit;
         int legacy_table_size =
             ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
                      ACPI_BUILD_ALIGN_SIZE);