diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 90ef557c8c..d1f9f63eaf 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -68,9 +68,7 @@ static void notify_guest_bh(void *opaque) unsigned i = j + ctzl(bits); VirtQueue *vq = virtio_get_queue(s->vdev, i); - if (virtio_should_notify(s->vdev, vq)) { - event_notifier_set(virtio_queue_get_guest_notifier(vq)); - } + virtio_notify_irqfd(s->vdev, vq); bits &= bits - 1; /* clear right-most bit */ } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a15585717a..45a2ccfc4c 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2860,7 +2860,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) */ int legacy_aml_len = pcmc->legacy_acpi_table_size + - ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus; + ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit; int legacy_table_size = ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, ACPI_BUILD_ALIGN_SIZE); diff --git a/hw/ipmi/isa_ipmi_kcs.c b/hw/ipmi/isa_ipmi_kcs.c index 9a38f8a28a..80444977a0 100644 --- a/hw/ipmi/isa_ipmi_kcs.c +++ b/hw/ipmi/isa_ipmi_kcs.c @@ -433,10 +433,8 @@ const VMStateDescription vmstate_ISAIPMIKCSDevice = { VMSTATE_BOOL(kcs.use_irq, ISAIPMIKCSDevice), VMSTATE_BOOL(kcs.irqs_enabled, ISAIPMIKCSDevice), VMSTATE_UINT32(kcs.outpos, ISAIPMIKCSDevice), - VMSTATE_VBUFFER_UINT32(kcs.outmsg, ISAIPMIKCSDevice, 1, NULL, 0, - kcs.outlen), - VMSTATE_VBUFFER_UINT32(kcs.inmsg, ISAIPMIKCSDevice, 1, NULL, 0, - kcs.inlen), + VMSTATE_UINT8_ARRAY(kcs.outmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE), + VMSTATE_UINT8_ARRAY(kcs.inmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE), VMSTATE_BOOL(kcs.write_end, ISAIPMIKCSDevice), VMSTATE_UINT8(kcs.status_reg, ISAIPMIKCSDevice), VMSTATE_UINT8(kcs.data_out_reg, ISAIPMIKCSDevice), diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index f2ea29dbc3..6b8d0f0024 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -95,13 +95,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, return 0; } -void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req) -{ - if (virtio_should_notify(vdev, req->vq)) { - event_notifier_set(virtio_queue_get_guest_notifier(req->vq)); - } -} - /* assumes s->ctx held */ static void virtio_scsi_clear_aio(VirtIOSCSI *s) { diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 3e5ae6ac0f..10fd687193 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -69,7 +69,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size); virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size); if (s->dataplane_started && !s->dataplane_fenced) { - virtio_scsi_dataplane_notify(vdev, req); + virtio_notify_irqfd(vdev, vq); } else { virtio_notify(vdev, vq); } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 8756cefa79..7b6f55e70e 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -5,7 +5,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) " virtqueue_flush(void *vq, unsigned int count) "vq %p count %u" virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u" virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p" -virtio_irq(void *vq) "vq %p" +virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p" virtio_notify(void *vdev, void *vq) "vdev %p vq %p" virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 30aee88a3e..f7f70237db 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1214,17 +1214,17 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusState *vbus = VIRTIO_BUS(qbus); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); int i, r, e; - if (!k->ioeventfd_assign) { + /* We will pass the notifiers to the kernel, make sure that QEMU + * doesn't interfere. + */ + r = virtio_device_grab_ioeventfd(vdev); + if (r < 0) { error_report("binding does not support host notifiers"); - r = -ENOSYS; goto fail; } - virtio_device_stop_ioeventfd(vdev); for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, true); @@ -1244,7 +1244,7 @@ fail_vq: } assert (e >= 0); } - virtio_device_start_ioeventfd(vdev); + virtio_device_release_ioeventfd(vdev); fail: return r; } @@ -1267,7 +1267,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) } assert (r >= 0); } - virtio_device_start_ioeventfd(vdev); + virtio_device_release_ioeventfd(vdev); } /* Test and clear event pending status. diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index bf61f66a04..d6c0c72bd2 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -147,6 +147,39 @@ void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config) } } +/* On success, ioeventfd ownership belongs to the caller. */ +int virtio_bus_grab_ioeventfd(VirtioBusState *bus) +{ + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + + /* vhost can be used even if ioeventfd=off in the proxy device, + * so do not check k->ioeventfd_enabled. + */ + if (!k->ioeventfd_assign) { + return -ENOSYS; + } + + if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) { + virtio_bus_stop_ioeventfd(bus); + /* Remember that we need to restart ioeventfd + * when ioeventfd_grabbed becomes zero. + */ + bus->ioeventfd_started = true; + } + bus->ioeventfd_grabbed++; + return 0; +} + +void virtio_bus_release_ioeventfd(VirtioBusState *bus) +{ + assert(bus->ioeventfd_grabbed != 0); + if (--bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) { + /* Force virtio_bus_start_ioeventfd to act. */ + bus->ioeventfd_started = false; + virtio_bus_start_ioeventfd(bus); + } +} + int virtio_bus_start_ioeventfd(VirtioBusState *bus) { VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); @@ -161,10 +194,14 @@ int virtio_bus_start_ioeventfd(VirtioBusState *bus) if (bus->ioeventfd_started) { return 0; } - r = vdc->start_ioeventfd(vdev); - if (r < 0) { - error_report("%s: failed. Fallback to userspace (slower).", __func__); - return r; + + /* Only set our notifier if we have ownership. */ + if (!bus->ioeventfd_grabbed) { + r = vdc->start_ioeventfd(vdev); + if (r < 0) { + error_report("%s: failed. Fallback to userspace (slower).", __func__); + return r; + } } bus->ioeventfd_started = true; return 0; @@ -179,9 +216,12 @@ void virtio_bus_stop_ioeventfd(VirtioBusState *bus) return; } - vdev = virtio_bus_get_device(bus); - vdc = VIRTIO_DEVICE_GET_CLASS(vdev); - vdc->stop_ioeventfd(vdev); + /* Only remove our notifier if we have ownership. */ + if (!bus->ioeventfd_grabbed) { + vdev = virtio_bus_get_device(bus); + vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + vdc->stop_ioeventfd(vdev); + } bus->ioeventfd_started = false; } @@ -211,7 +251,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) } if (assign) { - assert(!bus->ioeventfd_started); r = event_notifier_init(notifier, 1); if (r < 0) { error_report("%s: unable to init event notifier: %s (%d)", @@ -225,9 +264,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) } return 0; } else { - if (!bus->ioeventfd_started) { - return 0; - } k->ioeventfd_assign(proxy, notifier, n, false); } diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 32938433b7..847dc9dafd 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -692,8 +692,17 @@ static void virtio_crypto_dataq_bh(void *opaque) return; } - virtio_crypto_handle_dataq(vdev, q->dataq); - virtio_queue_set_notification(q->dataq, 1); + for (;;) { + virtio_crypto_handle_dataq(vdev, q->dataq); + virtio_queue_set_notification(q->dataq, 1); + + /* Are we done or did the guest add more buffers? */ + if (virtio_queue_empty(q->dataq)) { + break; + } + + virtio_queue_set_notification(q->dataq, 0); + } } static void diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index a30270f902..17412cb7b5 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -191,7 +191,7 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) return virtio_queue_get_addr(vdev, vdev->queue_sel) >> proxy->guest_page_shift; case VIRTIO_MMIO_INTERRUPTSTATUS: - return vdev->isr; + return atomic_read(&vdev->isr); case VIRTIO_MMIO_STATUS: return vdev->status; case VIRTIO_MMIO_HOSTFEATURESSEL: @@ -299,7 +299,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, } break; case VIRTIO_MMIO_INTERRUPTACK: - vdev->isr &= ~value; + atomic_and(&vdev->isr, ~value); virtio_update_irq(vdev); break; case VIRTIO_MMIO_STATUS: @@ -347,7 +347,7 @@ static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector) if (!vdev) { return; } - level = (vdev->isr != 0); + level = (atomic_read(&vdev->isr) != 0); DPRINTF("virtio_mmio setting IRQ %d\n", level); qemu_set_irq(proxy->irq, level); } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 97b32febaf..521ba0b415 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -73,7 +73,7 @@ static void virtio_pci_notify(DeviceState *d, uint16_t vector) msix_notify(&proxy->pci_dev, vector); else { VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - pci_set_irq(&proxy->pci_dev, vdev->isr & 1); + pci_set_irq(&proxy->pci_dev, atomic_read(&vdev->isr) & 1); } } @@ -449,8 +449,7 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr) break; case VIRTIO_PCI_ISR: /* reading from the ISR also clears it. */ - ret = vdev->isr; - vdev->isr = 0; + ret = atomic_xchg(&vdev->isr, 0); pci_irq_deassert(&proxy->pci_dev); break; case VIRTIO_MSI_CONFIG_VECTOR: @@ -1379,9 +1378,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr, { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - uint64_t val = vdev->isr; - - vdev->isr = 0; + uint64_t val = atomic_xchg(&vdev->isr, 0); pci_irq_deassert(&proxy->pci_dev); return val; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 55a00cdf9e..1af2de2714 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -945,7 +945,7 @@ void virtio_reset(void *opaque) vdev->guest_features = 0; vdev->queue_sel = 0; vdev->status = 0; - vdev->isr = 0; + atomic_set(&vdev->isr, 0); vdev->config_vector = VIRTIO_NO_VECTOR; virtio_notify_vector(vdev, vdev->config_vector); @@ -1318,11 +1318,16 @@ void virtio_del_queue(VirtIODevice *vdev, int n) vdev->vq[n].vring.num_default = 0; } -void virtio_irq(VirtQueue *vq) +static void virtio_set_isr(VirtIODevice *vdev, int value) { - trace_virtio_irq(vq); - vq->vdev->isr |= 0x01; - virtio_notify_vector(vq->vdev, vq->vector); + uint8_t old = atomic_read(&vdev->isr); + + /* Do not write ISR if it does not change, so that its cacheline remains + * shared in the common case where the guest does not read it. + */ + if ((old & value) != value) { + atomic_or(&vdev->isr, value); + } } bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) @@ -1348,6 +1353,33 @@ bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } +void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) +{ + if (!virtio_should_notify(vdev, vq)) { + return; + } + + trace_virtio_notify_irqfd(vdev, vq); + + /* + * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but + * windows drivers included in virtio-win 1.8.0 (circa 2015) are + * incorrectly polling this bit during crashdump and hibernation + * in MSI mode, causing a hang if this bit is never updated. + * Recent releases of Windows do not really shut down, but rather + * log out and hibernate to make the next startup faster. Hence, + * this manifested as a more serious hang during shutdown with + * + * Next driver release from 2016 fixed this problem, so working around it + * is not a must, but it's easy to do so let's do it here. + * + * Note: it's safe to update ISR from any thread as it was switched + * to an atomic operation. + */ + virtio_set_isr(vq->vdev, 0x1); + event_notifier_set(&vq->guest_notifier); +} + void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) { if (!virtio_should_notify(vdev, vq)) { @@ -1355,7 +1387,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) } trace_virtio_notify(vdev, vq); - vdev->isr |= 0x01; + virtio_set_isr(vq->vdev, 0x1); virtio_notify_vector(vdev, vq->vector); } @@ -1364,7 +1396,7 @@ void virtio_notify_config(VirtIODevice *vdev) if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) return; - vdev->isr |= 0x03; + virtio_set_isr(vdev, 0x3); vdev->generation++; virtio_notify_vector(vdev, vdev->config_vector); } @@ -1895,7 +1927,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->device_id = device_id; vdev->status = 0; - vdev->isr = 0; + atomic_set(&vdev->isr, 0); vdev->queue_sel = 0; vdev->config_vector = VIRTIO_NO_VECTOR; vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); @@ -1982,7 +2014,7 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) { VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); if (event_notifier_test_and_clear(n)) { - virtio_irq(vq); + virtio_notify_vector(vq->vdev, vq->vector); } } @@ -2191,6 +2223,22 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev) virtio_bus_stop_ioeventfd(vbus); } +int virtio_device_grab_ioeventfd(VirtIODevice *vdev) +{ + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + + return virtio_bus_grab_ioeventfd(vbus); +} + +void virtio_device_release_ioeventfd(VirtIODevice *vdev) +{ + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + + virtio_bus_release_ioeventfd(vbus); +} + static void virtio_device_class_init(ObjectClass *klass, void *data) { /* Set the default value here. */ diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index fdf7fdab81..8a51e2c564 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -97,6 +97,16 @@ struct VirtioBusState { * Set if ioeventfd has been started. */ bool ioeventfd_started; + + /* + * Set if ioeventfd has been grabbed by vhost. When ioeventfd + * is grabbed by vhost, we track its started/stopped state (which + * depends in turn on the virtio status register), but do not + * register a handler for the ioeventfd. When ioeventfd is + * released, if ioeventfd_started is true we finally register + * the handler so that QEMU's device model can use ioeventfd. + */ + int ioeventfd_grabbed; }; void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp); @@ -131,6 +141,10 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus); int virtio_bus_start_ioeventfd(VirtioBusState *bus); /* Stop the ioeventfd. */ void virtio_bus_stop_ioeventfd(VirtioBusState *bus); +/* Tell the bus that vhost is grabbing the ioeventfd. */ +int virtio_bus_grab_ioeventfd(VirtioBusState *bus); +/* bus that vhost is not using the ioeventfd anymore. */ +void virtio_bus_release_ioeventfd(VirtioBusState *bus); /* Switch from/to the generic ioeventfd handler */ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign); diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 9fbc7d7475..73751969ba 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -137,6 +137,5 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); int virtio_scsi_dataplane_start(VirtIODevice *s); void virtio_scsi_dataplane_stop(VirtIODevice *s); -void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req); #endif /* QEMU_VIRTIO_SCSI_H */ diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 5951997f22..ab0e030cc4 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -181,6 +181,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, unsigned max_in_bytes, unsigned max_out_bytes); bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq); +void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); void virtio_save(VirtIODevice *vdev, QEMUFile *f); @@ -272,13 +273,14 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, bool with_irqfd); int virtio_device_start_ioeventfd(VirtIODevice *vdev); void virtio_device_stop_ioeventfd(VirtIODevice *vdev); +int virtio_device_grab_ioeventfd(VirtIODevice *vdev); +void virtio_device_release_ioeventfd(VirtIODevice *vdev); bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); void virtio_queue_host_notifier_read(EventNotifier *n); void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, void (*fn)(VirtIODevice *, VirtQueue *)); -void virtio_irq(VirtQueue *vq); VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); VirtQueue *virtio_vector_next_queue(VirtQueue *vq);