From 57914ecb069e56ca53fc97a53437bcf684f5cc13 Mon Sep 17 00:00:00 2001 From: Jay Zhou Date: Thu, 4 Jan 2018 13:29:48 +0800 Subject: [PATCH 01/47] memory: update comments and fix some typos Signed-off-by: Jay Zhou Message-Id: <1515043788-38300-1-git-send-email-jianjay.zhou@huawei.com> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 07c5d6d597..67e4ee349d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -324,7 +324,7 @@ FlatView *address_space_to_flatview(AddressSpace *as); * MemoryRegionSection: describes a fragment of a #MemoryRegion * * @mr: the region, or %NULL if empty - * @address_space: the address space the region is mapped in + * @fv: the flat view of the address space the region is mapped in * @offset_within_region: the beginning of the section, relative to @mr's start * @size: the size of the section; will not exceed @mr's boundaries * @offset_within_address_space: the address of the first byte of the section @@ -610,6 +610,7 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr, * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count * @ops: callbacks for write access handling (must not be NULL). + * @opaque: passed to the read and write callbacks of the @ops structure. * @name: Region name, becomes part of RAMBlock name used in migration stream * must be unique within any device * @size: size of the region. @@ -653,11 +654,10 @@ static inline void memory_region_init_reservation(MemoryRegion *mr, * An IOMMU region translates addresses and forwards accesses to a target * memory region. * - * @typename: QOM class name * @_iommu_mr: the #IOMMUMemoryRegion to be initialized * @instance_size: the IOMMUMemoryRegion subclass instance size + * @mrtypename: the type name of the #IOMMUMemoryRegion * @owner: the object that tracks the region's reference count - * @ops: a function that translates addresses into the @target region * @name: used for debugging; not visible to the user or ABI * @size: size of the region. */ @@ -827,8 +827,8 @@ static inline IOMMUMemoryRegion *memory_region_get_iommu(MemoryRegion *mr) * memory_region_get_iommu_class_nocheck: returns iommu memory region class * if an iommu or NULL if not * - * Returns pointer to IOMMUMemoryRegioniClass if a memory region is an iommu, - * otherwise NULL. This is fast path avoinding QOM checking, use with caution. + * Returns pointer to IOMMUMemoryRegionClass if a memory region is an iommu, + * otherwise NULL. This is fast path avoiding QOM checking, use with caution. * * @mr: the memory region being queried */ @@ -993,7 +993,8 @@ int memory_region_get_fd(MemoryRegion *mr); * protecting the pointer, such as a reference to the region that includes * the incoming ram_addr_t. * - * @mr: the memory region being queried. + * @ptr: the host pointer to be converted + * @offset: the offset within memory region */ MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset); @@ -1270,7 +1271,7 @@ void memory_region_clear_global_locking(MemoryRegion *mr); * @size: the size of the access to trigger the eventfd * @match_data: whether to match against @data, instead of just @addr * @data: the data to match against the guest write - * @fd: the eventfd to be triggered when @addr, @size, and @data all match. + * @e: event notifier to be triggered when @addr, @size, and @data all match. **/ void memory_region_add_eventfd(MemoryRegion *mr, hwaddr addr, @@ -1290,7 +1291,7 @@ void memory_region_add_eventfd(MemoryRegion *mr, * @size: the size of the access to trigger the eventfd * @match_data: whether to match against @data, instead of just @addr * @data: the data to match against the guest write - * @fd: the eventfd to be triggered when @addr, @size, and @data all match. + * @e: event notifier to be triggered when @addr, @size, and @data all match. */ void memory_region_del_eventfd(MemoryRegion *mr, hwaddr addr, @@ -1526,7 +1527,7 @@ bool memory_region_request_mmio_ptr(MemoryRegion *mr, hwaddr addr); * will need to request the pointer again. * * @mr: #MemoryRegion associated to the pointer. - * @addr: address within that region + * @offset: offset within the memory region * @size: size of that area. */ void memory_region_invalidate_mmio_ptr(MemoryRegion *mr, hwaddr offset, @@ -1595,6 +1596,7 @@ void address_space_destroy(AddressSpace *as); * @addr: address within that address space * @attrs: memory transaction attributes * @buf: buffer with the data transferred + * @len: the number of bytes to read or write * @is_write: indicates the transfer direction */ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, @@ -1612,6 +1614,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, * @addr: address within that address space * @attrs: memory transaction attributes * @buf: buffer with the data transferred + * @len: the number of bytes to write */ MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, @@ -1810,7 +1813,7 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, * called from an RCU critical section, to avoid that the last reference * to the returned region disappears after address_space_translate returns. * - * @as: #AddressSpace to be accessed + * @fv: #FlatView to be accessed * @addr: address within that address space * @xlat: pointer to address within the returned memory region section's * #MemoryRegion. @@ -1868,7 +1871,7 @@ void *address_space_map(AddressSpace *as, hwaddr addr, * the amount of memory that was actually read or written by the caller. * * @as: #AddressSpace used - * @addr: address within that address space + * @buffer: host pointer as returned by address_space_map() * @len: buffer length as returned by address_space_map() * @access_len: amount of data actually transferred * @is_write: indicates the transfer direction @@ -1905,7 +1908,7 @@ static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) * or failed (eg unassigned memory, device rejected the transaction, * IOMMU fault). * - * @as: #AddressSpace to be accessed + * @fv: #FlatView to be accessed * @addr: address within that address space * @attrs: memory transaction attributes * @buf: buffer with the data transferred From b850f664a1dbbc1ea27bef12cd251ee5da0bfe05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 13 Jan 2018 23:04:10 -0300 Subject: [PATCH 02/47] qdev: rename typedef qdev_resetfn() -> DeviceReset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit following the DeviceRealize and DeviceUnrealize typedefs, this unify a bit the new QOM API. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20180114020412.26160-2-f4bug@amsat.org> Reviewed-by: Laurent Vivier Signed-off-by: Paolo Bonzini --- include/hw/qdev-core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 51473eee7b..85fa26d8a1 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -32,9 +32,9 @@ typedef enum DeviceCategory { typedef int (*qdev_initfn)(DeviceState *dev); typedef int (*qdev_event)(DeviceState *dev); -typedef void (*qdev_resetfn)(DeviceState *dev); typedef void (*DeviceRealize)(DeviceState *dev, Error **errp); typedef void (*DeviceUnrealize)(DeviceState *dev, Error **errp); +typedef void (*DeviceReset)(DeviceState *dev); typedef void (*BusRealize)(BusState *bus, Error **errp); typedef void (*BusUnrealize)(BusState *bus, Error **errp); @@ -117,7 +117,7 @@ typedef struct DeviceClass { bool hotpluggable; /* callbacks */ - void (*reset)(DeviceState *dev); + DeviceReset reset; DeviceRealize realize; DeviceUnrealize unrealize; From 46795cf2e2f643ace9454822022ba8b1e9c0cf61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 13 Jan 2018 23:04:11 -0300 Subject: [PATCH 03/47] qdev: add helpers to be more explicit when using abstract QOM parent functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QOM API learning curve is quite hard, in particular when devices inherit from abstract parent. To be more explicit about when a device class change the parent hooks, add few helpers hoping a device class_init() will be easier to understand. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20180114020412.26160-3-f4bug@amsat.org> Reviewed-by: Laurent Vivier Signed-off-by: Paolo Bonzini --- hw/core/qdev.c | 24 ++++++++++++++++++++++++ include/hw/qdev-core.h | 10 ++++++++++ 2 files changed, 34 insertions(+) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 2456035d1a..11f8a27a69 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -1075,6 +1075,30 @@ static void device_class_init(ObjectClass *class, void *data) dc->user_creatable = true; } +void device_class_set_parent_reset(DeviceClass *dc, + DeviceReset dev_reset, + DeviceReset *parent_reset) +{ + *parent_reset = dc->reset; + dc->reset = dev_reset; +} + +void device_class_set_parent_realize(DeviceClass *dc, + DeviceRealize dev_realize, + DeviceRealize *parent_realize) +{ + *parent_realize = dc->realize; + dc->realize = dev_realize; +} + +void device_class_set_parent_unrealize(DeviceClass *dc, + DeviceUnrealize dev_unrealize, + DeviceUnrealize *parent_unrealize) +{ + *parent_unrealize = dc->unrealize; + dc->unrealize = dev_unrealize; +} + void device_reset(DeviceState *dev) { DeviceClass *klass = DEVICE_GET_CLASS(dev); diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 85fa26d8a1..18c0251b40 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -382,6 +382,16 @@ void qdev_machine_init(void); */ void device_reset(DeviceState *dev); +void device_class_set_parent_reset(DeviceClass *dc, + DeviceReset dev_reset, + DeviceReset *parent_reset); +void device_class_set_parent_realize(DeviceClass *dc, + DeviceRealize dev_realize, + DeviceRealize *parent_realize); +void device_class_set_parent_unrealize(DeviceClass *dc, + DeviceUnrealize dev_unrealize, + DeviceUnrealize *parent_unrealize); + const struct VMStateDescription *qdev_get_vmsd(DeviceState *dev); const char *qdev_fw_name(DeviceState *dev); From bf853881690db8bbd1de39e4be580310a9cb0ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 13 Jan 2018 23:04:12 -0300 Subject: [PATCH 04/47] qdev: use device_class_set_parent_realize/unrealize/reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit changes generated using the following Coccinelle patch: @@ type DeviceParentClass; DeviceParentClass *pc; DeviceClass *dc; identifier parent_fn; identifier child_fn; @@ ( +device_class_set_parent_realize(dc, child_fn, &pc->parent_fn); -pc->parent_fn = dc->realize; ... -dc->realize = child_fn; | +device_class_set_parent_unrealize(dc, child_fn, &pc->parent_fn); -pc->parent_fn = dc->unrealize; ... -dc->unrealize = child_fn; | +device_class_set_parent_reset(dc, child_fn, &pc->parent_fn); -pc->parent_fn = dc->reset; ... -dc->reset = child_fn; ) Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20180114020412.26160-4-f4bug@amsat.org> Reviewed-by: Marcel Apfelbaum Acked-by: David Gibson Acked-by: Cornelia Huck Reviewed-by: Laurent Vivier Signed-off-by: Paolo Bonzini --- hw/i386/kvm/i8254.c | 4 ++-- hw/i386/kvm/i8259.c | 3 +-- hw/input/adb-kbd.c | 4 ++-- hw/input/adb-mouse.c | 4 ++-- hw/intc/arm_gic.c | 3 +-- hw/intc/arm_gic_kvm.c | 7 +++---- hw/intc/arm_gicv3.c | 3 +-- hw/intc/arm_gicv3_its_kvm.c | 3 +-- hw/intc/arm_gicv3_kvm.c | 7 +++---- hw/intc/i8259.c | 3 +-- hw/net/vmxnet3.c | 4 ++-- hw/pci-bridge/gen_pcie_root_port.c | 3 +-- hw/scsi/vmw_pvscsi.c | 4 ++-- hw/timer/i8254.c | 3 +-- hw/vfio/amd-xgbe.c | 4 ++-- hw/vfio/calxeda-xgmac.c | 4 ++-- hw/virtio/virtio-pci.c | 4 ++-- target/alpha/cpu.c | 4 ++-- target/arm/cpu.c | 4 ++-- target/cris/cpu.c | 4 ++-- target/hppa/cpu.c | 4 ++-- target/i386/cpu.c | 8 ++++---- target/lm32/cpu.c | 5 ++--- target/m68k/cpu.c | 5 ++--- target/microblaze/cpu.c | 5 ++--- target/mips/cpu.c | 5 ++--- target/moxie/cpu.c | 5 ++--- target/nios2/cpu.c | 4 ++-- target/openrisc/cpu.c | 5 ++--- target/ppc/translate_init.c | 8 ++++---- target/s390x/cpu.c | 4 ++-- target/sh4/cpu.c | 4 ++-- target/sparc/cpu.c | 4 ++-- target/tilegx/cpu.c | 4 ++-- target/tricore/cpu.c | 4 ++-- target/unicore32/cpu.c | 4 ++-- target/xtensa/cpu.c | 4 ++-- 37 files changed, 73 insertions(+), 88 deletions(-) diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c index 521a58498a..13f20f47d9 100644 --- a/hw/i386/kvm/i8254.c +++ b/hw/i386/kvm/i8254.c @@ -315,8 +315,8 @@ static void kvm_pit_class_init(ObjectClass *klass, void *data) PITCommonClass *k = PIT_COMMON_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - kpc->parent_realize = dc->realize; - dc->realize = kvm_pit_realizefn; + device_class_set_parent_realize(dc, kvm_pit_realizefn, + &kpc->parent_realize); k->set_channel_gate = kvm_pit_set_gate; k->get_channel_info = kvm_pit_get_channel_info; dc->reset = kvm_pit_reset; diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c index b91e98074e..05394cdb7b 100644 --- a/hw/i386/kvm/i8259.c +++ b/hw/i386/kvm/i8259.c @@ -142,8 +142,7 @@ static void kvm_i8259_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); dc->reset = kvm_pic_reset; - kpc->parent_realize = dc->realize; - dc->realize = kvm_pic_realize; + device_class_set_parent_realize(dc, kvm_pic_realize, &kpc->parent_realize); k->pre_save = kvm_pic_get; k->post_load = kvm_pic_put; } diff --git a/hw/input/adb-kbd.c b/hw/input/adb-kbd.c index 354f56e41e..266aed1b7b 100644 --- a/hw/input/adb-kbd.c +++ b/hw/input/adb-kbd.c @@ -374,8 +374,8 @@ static void adb_kbd_class_init(ObjectClass *oc, void *data) ADBDeviceClass *adc = ADB_DEVICE_CLASS(oc); ADBKeyboardClass *akc = ADB_KEYBOARD_CLASS(oc); - akc->parent_realize = dc->realize; - dc->realize = adb_kbd_realizefn; + device_class_set_parent_realize(dc, adb_kbd_realizefn, + &akc->parent_realize); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); adc->devreq = adb_kbd_request; diff --git a/hw/input/adb-mouse.c b/hw/input/adb-mouse.c index c9004233b8..47e88faf25 100644 --- a/hw/input/adb-mouse.c +++ b/hw/input/adb-mouse.c @@ -228,8 +228,8 @@ static void adb_mouse_class_init(ObjectClass *oc, void *data) ADBDeviceClass *adc = ADB_DEVICE_CLASS(oc); ADBMouseClass *amc = ADB_MOUSE_CLASS(oc); - amc->parent_realize = dc->realize; - dc->realize = adb_mouse_realizefn; + device_class_set_parent_realize(dc, adb_mouse_realizefn, + &amc->parent_realize); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); adc->devreq = adb_mouse_request; diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index 724bc9fa61..ea0323f969 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -1461,8 +1461,7 @@ static void arm_gic_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); ARMGICClass *agc = ARM_GIC_CLASS(klass); - agc->parent_realize = dc->realize; - dc->realize = arm_gic_realize; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); } static const TypeInfo arm_gic_info = { diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index ae095d08a3..6f467e68a8 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -591,10 +591,9 @@ static void kvm_arm_gic_class_init(ObjectClass *klass, void *data) agcc->pre_save = kvm_arm_gic_get; agcc->post_load = kvm_arm_gic_put; - kgc->parent_realize = dc->realize; - kgc->parent_reset = dc->reset; - dc->realize = kvm_arm_gic_realize; - dc->reset = kvm_arm_gic_reset; + device_class_set_parent_realize(dc, kvm_arm_gic_realize, + &kgc->parent_realize); + device_class_set_parent_reset(dc, kvm_arm_gic_reset, &kgc->parent_reset); } static const TypeInfo kvm_arm_gic_info = { diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index f0c967b304..479c66733c 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -385,8 +385,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); agcc->post_load = arm_gicv3_post_load; - agc->parent_realize = dc->realize; - dc->realize = arm_gic_realize; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); } static const TypeInfo arm_gicv3_info = { diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c index bf290b8bff..eea6a73df2 100644 --- a/hw/intc/arm_gicv3_its_kvm.c +++ b/hw/intc/arm_gicv3_its_kvm.c @@ -245,11 +245,10 @@ static void kvm_arm_its_class_init(ObjectClass *klass, void *data) dc->realize = kvm_arm_its_realize; dc->props = kvm_arm_its_props; - ic->parent_reset = dc->reset; + device_class_set_parent_reset(dc, kvm_arm_its_reset, &ic->parent_reset); icc->send_msi = kvm_its_send_msi; icc->pre_save = kvm_arm_its_pre_save; icc->post_load = kvm_arm_its_post_load; - dc->reset = kvm_arm_its_reset; } static const TypeInfo kvm_arm_its_info = { diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 481fe5405a..ec371772b3 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -795,10 +795,9 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) agcc->pre_save = kvm_arm_gicv3_get; agcc->post_load = kvm_arm_gicv3_put; - kgc->parent_realize = dc->realize; - kgc->parent_reset = dc->reset; - dc->realize = kvm_arm_gicv3_realize; - dc->reset = kvm_arm_gicv3_reset; + device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, + &kgc->parent_realize); + device_class_set_parent_reset(dc, kvm_arm_gicv3_reset, &kgc->parent_reset); } static const TypeInfo kvm_arm_gicv3_info = { diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index 1602255a87..76f3d873b8 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -443,8 +443,7 @@ static void i8259_class_init(ObjectClass *klass, void *data) PICClass *k = PIC_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - k->parent_realize = dc->realize; - dc->realize = pic_realize; + device_class_set_parent_realize(dc, pic_realize, &k->parent_realize); dc->reset = pic_reset; } diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 0654d594c1..3648630386 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -2664,8 +2664,8 @@ static void vmxnet3_class_init(ObjectClass *class, void *data) c->class_id = PCI_CLASS_NETWORK_ETHERNET; c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; - vc->parent_dc_realize = dc->realize; - dc->realize = vmxnet3_realize; + device_class_set_parent_realize(dc, vmxnet3_realize, + &vc->parent_dc_realize); dc->desc = "VMWare Paravirtualized Ethernet v3"; dc->reset = vmxnet3_qdev_reset; dc->vmsd = &vmstate_vmxnet3; diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index 0e2f2e8bf1..3dbacc6cea 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -137,8 +137,7 @@ static void gen_rp_dev_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_rp_dev; dc->props = gen_rp_props; - rpc->parent_realize = dc->realize; - dc->realize = gen_rp_realize; + device_class_set_parent_realize(dc, gen_rp_realize, &rpc->parent_realize); rpc->aer_vector = gen_rp_aer_vector; rpc->interrupts_init = gen_rp_interrupts_init; diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index 27749c0e42..a3a019e30a 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1284,8 +1284,8 @@ static void pvscsi_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI; k->class_id = PCI_CLASS_STORAGE_SCSI; k->subsystem_id = 0x1000; - pvs_k->parent_dc_realize = dc->realize; - dc->realize = pvscsi_realize; + device_class_set_parent_realize(dc, pvscsi_realize, + &pvs_k->parent_dc_realize); dc->reset = pvscsi_reset; dc->vmsd = &vmstate_pvscsi; dc->props = pvscsi_properties; diff --git a/hw/timer/i8254.c b/hw/timer/i8254.c index dbc4a0baec..1057850808 100644 --- a/hw/timer/i8254.c +++ b/hw/timer/i8254.c @@ -358,8 +358,7 @@ static void pit_class_initfn(ObjectClass *klass, void *data) PITCommonClass *k = PIT_COMMON_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - pc->parent_realize = dc->realize; - dc->realize = pit_realizefn; + device_class_set_parent_realize(dc, pit_realizefn, &pc->parent_realize); k->set_channel_gate = pit_set_channel_gate; k->get_channel_info = pit_get_channel_info_common; k->post_load = pit_post_load; diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c index fab196cebf..0c4ec4ba25 100644 --- a/hw/vfio/amd-xgbe.c +++ b/hw/vfio/amd-xgbe.c @@ -34,8 +34,8 @@ static void vfio_amd_xgbe_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); VFIOAmdXgbeDeviceClass *vcxc = VFIO_AMD_XGBE_DEVICE_CLASS(klass); - vcxc->parent_realize = dc->realize; - dc->realize = amd_xgbe_realize; + device_class_set_parent_realize(dc, amd_xgbe_realize, + &vcxc->parent_realize); dc->desc = "VFIO AMD XGBE"; dc->vmsd = &vfio_platform_amd_xgbe_vmstate; /* Supported by TYPE_VIRT_MACHINE */ diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c index 7bb17af7ad..24cee6d065 100644 --- a/hw/vfio/calxeda-xgmac.c +++ b/hw/vfio/calxeda-xgmac.c @@ -34,8 +34,8 @@ static void vfio_calxeda_xgmac_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); VFIOCalxedaXgmacDeviceClass *vcxc = VFIO_CALXEDA_XGMAC_DEVICE_CLASS(klass); - vcxc->parent_realize = dc->realize; - dc->realize = calxeda_xgmac_realize; + device_class_set_parent_realize(dc, calxeda_xgmac_realize, + &vcxc->parent_realize); dc->desc = "VFIO Calxeda XGMAC"; dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate; /* Supported by TYPE_VIRT_MACHINE */ diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 9ae10f0cdd..c20537f31d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1907,8 +1907,8 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data) k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_OTHERS; - vpciklass->parent_dc_realize = dc->realize; - dc->realize = virtio_pci_dc_realize; + device_class_set_parent_realize(dc, virtio_pci_dc_realize, + &vpciklass->parent_dc_realize); dc->reset = virtio_pci_reset; } diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 7d6366bae9..55675ce419 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -233,8 +233,8 @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); AlphaCPUClass *acc = ALPHA_CPU_CLASS(oc); - acc->parent_realize = dc->realize; - dc->realize = alpha_cpu_realizefn; + device_class_set_parent_realize(dc, alpha_cpu_realizefn, + &acc->parent_realize); cc->class_by_name = alpha_cpu_class_by_name; cc->has_work = alpha_cpu_has_work; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 9da6ea505c..89ccdeae12 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1722,8 +1722,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(acc); DeviceClass *dc = DEVICE_CLASS(oc); - acc->parent_realize = dc->realize; - dc->realize = arm_cpu_realizefn; + device_class_set_parent_realize(dc, arm_cpu_realizefn, + &acc->parent_realize); dc->props = arm_cpu_properties; acc->parent_reset = cc->reset; diff --git a/target/cris/cpu.c b/target/cris/cpu.c index 949c7a6e25..db8d0884a1 100644 --- a/target/cris/cpu.c +++ b/target/cris/cpu.c @@ -260,8 +260,8 @@ static void cris_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); CRISCPUClass *ccc = CRIS_CPU_CLASS(oc); - ccc->parent_realize = dc->realize; - dc->realize = cris_cpu_realizefn; + device_class_set_parent_realize(dc, cris_cpu_realizefn, + &ccc->parent_realize); ccc->parent_reset = cc->reset; cc->reset = cris_cpu_reset; diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 5213347720..7b635cc4ac 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -168,8 +168,8 @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); HPPACPUClass *acc = HPPA_CPU_CLASS(oc); - acc->parent_realize = dc->realize; - dc->realize = hppa_cpu_realizefn; + device_class_set_parent_realize(dc, hppa_cpu_realizefn, + &acc->parent_realize); cc->class_by_name = hppa_cpu_class_by_name; cc->has_work = hppa_cpu_has_work; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index a49d2221ad..d70954b8b7 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -4705,10 +4705,10 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - xcc->parent_realize = dc->realize; - xcc->parent_unrealize = dc->unrealize; - dc->realize = x86_cpu_realizefn; - dc->unrealize = x86_cpu_unrealizefn; + device_class_set_parent_realize(dc, x86_cpu_realizefn, + &xcc->parent_realize); + device_class_set_parent_unrealize(dc, x86_cpu_unrealizefn, + &xcc->parent_unrealize); dc->props = x86_cpu_properties; xcc->parent_reset = cc->reset; diff --git a/target/lm32/cpu.c b/target/lm32/cpu.c index 6f5c14767b..96c2499d0b 100644 --- a/target/lm32/cpu.c +++ b/target/lm32/cpu.c @@ -236,9 +236,8 @@ static void lm32_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - lcc->parent_realize = dc->realize; - dc->realize = lm32_cpu_realizefn; - + device_class_set_parent_realize(dc, lm32_cpu_realizefn, + &lcc->parent_realize); lcc->parent_reset = cc->reset; cc->reset = lm32_cpu_reset; diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index 98919b358b..6a80be009b 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -255,9 +255,8 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data) CPUClass *cc = CPU_CLASS(c); DeviceClass *dc = DEVICE_CLASS(c); - mcc->parent_realize = dc->realize; - dc->realize = m68k_cpu_realizefn; - + device_class_set_parent_realize(dc, m68k_cpu_realizefn, + &mcc->parent_realize); mcc->parent_reset = cc->reset; cc->reset = m68k_cpu_reset; diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 5700652e06..d8df2fb07e 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -258,9 +258,8 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); MicroBlazeCPUClass *mcc = MICROBLAZE_CPU_CLASS(oc); - mcc->parent_realize = dc->realize; - dc->realize = mb_cpu_realizefn; - + device_class_set_parent_realize(dc, mb_cpu_realizefn, + &mcc->parent_realize); mcc->parent_reset = cc->reset; cc->reset = mb_cpu_reset; diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 069f93560e..497706b669 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -174,9 +174,8 @@ static void mips_cpu_class_init(ObjectClass *c, void *data) CPUClass *cc = CPU_CLASS(c); DeviceClass *dc = DEVICE_CLASS(c); - mcc->parent_realize = dc->realize; - dc->realize = mips_cpu_realizefn; - + device_class_set_parent_realize(dc, mips_cpu_realizefn, + &mcc->parent_realize); mcc->parent_reset = cc->reset; cc->reset = mips_cpu_reset; diff --git a/target/moxie/cpu.c b/target/moxie/cpu.c index f1389e5097..4170284da6 100644 --- a/target/moxie/cpu.c +++ b/target/moxie/cpu.c @@ -102,9 +102,8 @@ static void moxie_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); MoxieCPUClass *mcc = MOXIE_CPU_CLASS(oc); - mcc->parent_realize = dc->realize; - dc->realize = moxie_cpu_realizefn; - + device_class_set_parent_realize(dc, moxie_cpu_realizefn, + &mcc->parent_realize); mcc->parent_reset = cc->reset; cc->reset = moxie_cpu_reset; diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c index 4742e52c78..fbfaa2ce26 100644 --- a/target/nios2/cpu.c +++ b/target/nios2/cpu.c @@ -187,8 +187,8 @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); Nios2CPUClass *ncc = NIOS2_CPU_CLASS(oc); - ncc->parent_realize = dc->realize; - dc->realize = nios2_cpu_realizefn; + device_class_set_parent_realize(dc, nios2_cpu_realizefn, + &ncc->parent_realize); dc->props = nios2_properties; ncc->parent_reset = cc->reset; cc->reset = nios2_cpu_reset; diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index e0394b8b06..20b115afae 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -132,9 +132,8 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(occ); DeviceClass *dc = DEVICE_CLASS(oc); - occ->parent_realize = dc->realize; - dc->realize = openrisc_cpu_realizefn; - + device_class_set_parent_realize(dc, openrisc_cpu_realizefn, + &occ->parent_realize); occ->parent_reset = cc->reset; cc->reset = openrisc_cpu_reset; diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index 55c99c97e3..e7b1044944 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -10556,12 +10556,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - pcc->parent_realize = dc->realize; - pcc->parent_unrealize = dc->unrealize; + device_class_set_parent_realize(dc, ppc_cpu_realizefn, + &pcc->parent_realize); + device_class_set_parent_unrealize(dc, ppc_cpu_unrealizefn, + &pcc->parent_unrealize); pcc->pvr_match = ppc_pvr_match_default; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_always; - dc->realize = ppc_cpu_realizefn; - dc->unrealize = ppc_cpu_unrealizefn; dc->props = ppc_cpu_properties; pcc->parent_reset = cc->reset; diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index d2e6b9f5c7..979469dc3c 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -464,8 +464,8 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(scc); DeviceClass *dc = DEVICE_CLASS(oc); - scc->parent_realize = dc->realize; - dc->realize = s390_cpu_realizefn; + device_class_set_parent_realize(dc, s390_cpu_realizefn, + &scc->parent_realize); dc->props = s390x_cpu_properties; dc->user_creatable = true; diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index e0b99fbc89..e37c187ca2 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -236,8 +236,8 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); SuperHCPUClass *scc = SUPERH_CPU_CLASS(oc); - scc->parent_realize = dc->realize; - dc->realize = superh_cpu_realizefn; + device_class_set_parent_realize(dc, superh_cpu_realizefn, + &scc->parent_realize); scc->parent_reset = cc->reset; cc->reset = superh_cpu_reset; diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index c7adc281de..ff6ed91f9a 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -858,8 +858,8 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - scc->parent_realize = dc->realize; - dc->realize = sparc_cpu_realizefn; + device_class_set_parent_realize(dc, sparc_cpu_realizefn, + &scc->parent_realize); dc->props = sparc_cpu_properties; scc->parent_reset = cc->reset; diff --git a/target/tilegx/cpu.c b/target/tilegx/cpu.c index c140b461ac..b7451bdcf2 100644 --- a/target/tilegx/cpu.c +++ b/target/tilegx/cpu.c @@ -141,8 +141,8 @@ static void tilegx_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); TileGXCPUClass *tcc = TILEGX_CPU_CLASS(oc); - tcc->parent_realize = dc->realize; - dc->realize = tilegx_cpu_realizefn; + device_class_set_parent_realize(dc, tilegx_cpu_realizefn, + &tcc->parent_realize); tcc->parent_reset = cc->reset; cc->reset = tilegx_cpu_reset; diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index 179c997aa4..2edaef1aef 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -153,8 +153,8 @@ static void tricore_cpu_class_init(ObjectClass *c, void *data) CPUClass *cc = CPU_CLASS(c); DeviceClass *dc = DEVICE_CLASS(c); - mcc->parent_realize = dc->realize; - dc->realize = tricore_cpu_realizefn; + device_class_set_parent_realize(dc, tricore_cpu_realizefn, + &mcc->parent_realize); mcc->parent_reset = cc->reset; cc->reset = tricore_cpu_reset; diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c index 17dc1504d7..fb837aab4c 100644 --- a/target/unicore32/cpu.c +++ b/target/unicore32/cpu.c @@ -132,8 +132,8 @@ static void uc32_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); UniCore32CPUClass *ucc = UNICORE32_CPU_CLASS(oc); - ucc->parent_realize = dc->realize; - dc->realize = uc32_cpu_realizefn; + device_class_set_parent_realize(dc, uc32_cpu_realizefn, + &ucc->parent_realize); cc->class_by_name = uc32_cpu_class_by_name; cc->has_work = uc32_cpu_has_work; diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c index 1c982a0b2e..4573388a45 100644 --- a/target/xtensa/cpu.c +++ b/target/xtensa/cpu.c @@ -151,8 +151,8 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(oc); XtensaCPUClass *xcc = XTENSA_CPU_CLASS(cc); - xcc->parent_realize = dc->realize; - dc->realize = xtensa_cpu_realizefn; + device_class_set_parent_realize(dc, xtensa_cpu_realizefn, + &xcc->parent_realize); xcc->parent_reset = cc->reset; cc->reset = xtensa_cpu_reset; From fe29141bee3e8603a39838897698d8349c3daef0 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 15 Dec 2017 16:23:26 +1100 Subject: [PATCH 05/47] kvm: Add kvm_set_user_memory tracepoint This adds a tracepoint to trace the KVM_SET_USER_MEMORY_REGION ioctl parameters which is quite useful for debugging VFIO memory regions being actually registered with KVM. Signed-off-by: Alexey Kardashevskiy Message-Id: <20171215052326.21386-1-aik@ozlabs.ru> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 6 +++++- accel/kvm/trace-events | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f290f487a5..b91fcb7160 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -235,6 +235,7 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot) { KVMState *s = kvm_state; struct kvm_userspace_memory_region mem; + int ret; mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; @@ -248,7 +249,10 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot) kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); } mem.memory_size = slot->memory_size; - return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, + mem.memory_size, mem.userspace_addr, ret); + return ret; } int kvm_destroy_vcpu(CPUState *cpu) diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index f89ba5578d..58e98efe5d 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -12,4 +12,5 @@ kvm_irqchip_commit_routes(void) "" kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" +kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" From e85687ffe2233e35864cb1c307183df29c05f622 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 15 Jan 2018 18:17:01 -0200 Subject: [PATCH 06/47] qemu: improve hugepage allocation failure message Improve hugepage allocation failure message, indicating what is happening to the user. Signed-off-by: Marcelo Tosatti Message-Id: <20180115201700.GA4439@amt.cnet> Signed-off-by: Paolo Bonzini --- numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/numa.c b/numa.c index 83675a03f3..a9528aaa7d 100644 --- a/numa.c +++ b/numa.c @@ -463,6 +463,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, if (mem_prealloc) { exit(1); } + error_report("falling back to regular RAM allocation."); /* Legacy behavior: if allocation failed, fall back to * regular RAM allocation. From 0b15209571e5eae706027f42da2ecd175eddc4e3 Mon Sep 17 00:00:00 2001 From: linzhecheng Date: Sun, 14 Jan 2018 20:55:19 +0800 Subject: [PATCH 07/47] memory: set ioeventfd_update_pending after address_space_update_ioeventfds We should set ioeventfd_update_pending same as memory_region_update_pending. Signed-off-by: linzhecheng Message-Id: <1515934519-16158-1-git-send-email-linzc@zju.edu.cn> Cc: qemu-stable@nongnu.org Fixes: ade9c1aac5292ff698fa550adebe794c37d86cc9 Signed-off-by: Paolo Bonzini --- memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/memory.c b/memory.c index 449a1429b9..9e8349668d 100644 --- a/memory.c +++ b/memory.c @@ -1091,6 +1091,7 @@ void memory_region_transaction_commit(void) address_space_update_ioeventfds(as); } memory_region_update_pending = false; + ioeventfd_update_pending = false; MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); } else if (ioeventfd_update_pending) { QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { From c6caae553c65475009dc18fdae0c89a1a1d7b427 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 18 Jan 2018 10:52:45 +0800 Subject: [PATCH 08/47] scsi-generic: Simplify error handling code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity doesn't like the ignored return value introduced in 9d3b155186c278 (hw/block: Fix the return type), and other callers are converted already in ceff3e1f01. This one was added lately in d9bcd6f7f23a and missed the train. Do it now. Signed-off-by: Fam Zheng Message-Id: <20180118025245.13042-1-famz@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-generic.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index ba70c0dc19..7414fe2d67 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -482,7 +482,6 @@ static void scsi_generic_realize(SCSIDevice *s, Error **errp) int rc; int sg_version; struct sg_scsi_id scsiid; - Error *local_err = NULL; if (!s->conf.blk) { error_setg(errp, "drive property not set"); @@ -516,11 +515,9 @@ static void scsi_generic_realize(SCSIDevice *s, Error **errp) error_setg(errp, "SG_GET_SCSI_ID ioctl failed"); return; } - blkconf_apply_backend_options(&s->conf, - blk_is_read_only(s->conf.blk), - true, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!blkconf_apply_backend_options(&s->conf, + blk_is_read_only(s->conf.blk), + true, errp)) { return; } From 4e4b57342e14f260ca6e2d4cfcbafb876d227909 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 18 Jan 2018 13:20:49 +0800 Subject: [PATCH 09/47] chardev: fix incorrect unref of source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit glib reported error when pty chardev used: $ ./qemu-system-x86_64 -chardev pty,id=foo -device isa-serial,chardev=foo qemu-system-x86_64: -chardev pty,id=foo: char device redirected to /dev/pts/2 (label foo) (qemu-system-x86_64:27885): GLib-CRITICAL **: g_source_unref: assertion 'source != NULL' failed (qemu-system-x86_64:27885): GLib-CRITICAL **: g_source_unref: assertion 'source != NULL' failed This patch fixes that. Fixes: 2c716ba150 ("chardev: introduce qemu_chr_timeout_add_ms()") CC: Paolo Bonzini Reported-by: Marc-André Lureau Signed-off-by: Peter Xu Message-Id: <20180118052049.31119-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- chardev/char-pty.c | 48 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/chardev/char-pty.c b/chardev/char-pty.c index 89315e6807..68fd4e20c3 100644 --- a/chardev/char-pty.c +++ b/chardev/char-pty.c @@ -51,23 +51,6 @@ typedef struct { static void pty_chr_update_read_handler_locked(Chardev *chr); static void pty_chr_state(Chardev *chr, int connected); -static gboolean pty_chr_timer(gpointer opaque) -{ - struct Chardev *chr = CHARDEV(opaque); - PtyChardev *s = PTY_CHARDEV(opaque); - - qemu_mutex_lock(&chr->chr_write_lock); - s->timer_src = NULL; - g_source_unref(s->open_source); - s->open_source = NULL; - if (!s->connected) { - /* Next poll ... */ - pty_chr_update_read_handler_locked(chr); - } - qemu_mutex_unlock(&chr->chr_write_lock); - return FALSE; -} - static void pty_chr_timer_cancel(PtyChardev *s) { if (s->timer_src) { @@ -77,6 +60,31 @@ static void pty_chr_timer_cancel(PtyChardev *s) } } +static void pty_chr_open_src_cancel(PtyChardev *s) +{ + if (s->open_source) { + g_source_destroy(s->open_source); + g_source_unref(s->open_source); + s->open_source = NULL; + } +} + +static gboolean pty_chr_timer(gpointer opaque) +{ + struct Chardev *chr = CHARDEV(opaque); + PtyChardev *s = PTY_CHARDEV(opaque); + + qemu_mutex_lock(&chr->chr_write_lock); + pty_chr_timer_cancel(s); + pty_chr_open_src_cancel(s); + if (!s->connected) { + /* Next poll ... */ + pty_chr_update_read_handler_locked(chr); + } + qemu_mutex_unlock(&chr->chr_write_lock); + return FALSE; +} + /* Called with chr_write_lock held. */ static void pty_chr_rearm_timer(Chardev *chr, int ms) { @@ -195,11 +203,7 @@ static void pty_chr_state(Chardev *chr, int connected) PtyChardev *s = PTY_CHARDEV(chr); if (!connected) { - if (s->open_source) { - g_source_destroy(s->open_source); - g_source_unref(s->open_source); - s->open_source = NULL; - } + pty_chr_open_src_cancel(s); remove_fd_in_watch(chr); s->connected = 0; /* (re-)connect poll interval for idle guests: once per second. From 4183e2ea6d092ea9d7f18af085cb1076fae08512 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 18 Jan 2018 11:41:03 +0100 Subject: [PATCH 10/47] readline: don't free completions in readline_free() Since commit e5dc1a6c6c43, QEMU aborts on exit if completion was used in the monitor: *** Error in `obj/ppc64-softmmu/qemu-system-ppc64': double free or corruption (fasttop): 0x00000100331069d0 *** /home/greg/Work/qemu/qemu-spapr/util/readline.c:514 /home/greg/Work/qemu/qemu-spapr/monitor.c:586 /home/greg/Work/qemu/qemu-spapr/monitor.c:4125 argv=, envp=) at /home/greg/Work/qemu/qemu-spapr/vl.c:4795 Completion strings are not persistent accross completions (why would they?). They are allocated under readline_completion(), which already takes care of freeing them before returning. Maybe all completion related bits should be moved out of ReadLineState to a dedicated structure ? In the meantime, let's drop the offending lines from readline_free() to fix the crash. Signed-off-by: Greg Kurz Message-Id: <151627206353.4505.4602428849861610759.stgit@bahia.lan> Fixes: e5dc1a6c6c43 Signed-off-by: Paolo Bonzini --- util/readline.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/util/readline.c b/util/readline.c index 24ec839854..ec91ee0fea 100644 --- a/util/readline.c +++ b/util/readline.c @@ -510,9 +510,6 @@ void readline_free(ReadLineState *rs) for (i = 0; i < READLINE_MAX_CMDS; i++) { g_free(rs->history[i]); } - for (i = 0; i < READLINE_MAX_COMPLETIONS; i++) { - g_free(rs->completions[i]); - } g_free(rs); } From b90d80a73e2f546117a776e899f826e2a9eb8301 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 18 Jan 2018 18:11:37 +0100 Subject: [PATCH 11/47] scripts/qemu-gdb/timers.py: define encoding in header comment This is required otherwise python complains because of the accentuated letter in Alex's last name: Traceback (most recent call last): File "scripts/qemu-gdb.py", line 29, in from qemugdb import aio, mtree, coroutine, tcg, timers File "scripts/qemugdb/timers.py", line 1 SyntaxError: Non-ASCII character '\xc3' in file scripts/qemugdb/timers.py on line 1, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details Signed-off-by: Greg Kurz Message-Id: <151629549711.18276.15497684562308683805.stgit@bahia.lan> Signed-off-by: Paolo Bonzini --- scripts/qemugdb/timers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/qemugdb/timers.py b/scripts/qemugdb/timers.py index be71a001e3..51ea04b5e2 100644 --- a/scripts/qemugdb/timers.py +++ b/scripts/qemugdb/timers.py @@ -1,4 +1,5 @@ #!/usr/bin/python +# -*- coding: utf-8 -*- # GDB debugging support # # Copyright 2017 Linaro Ltd From 9d70618c6842570226866f9409d7b8c4d6489da6 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 21 Nov 2017 15:08:08 +0000 Subject: [PATCH 12/47] memory-internal.h: Remove obsolete claim that header is obsolete The memory-internal.h header claims that it is for "obsolete exec.c functions" which "will be removed soon". This statement was added in 2011, six years ago, but the header is still here. (Admittedly none of the prototypes added in commit 67d95c153bef55f6 are still in the header.) It's convenient to have a place to put prototypes for functions which are used internally to the various .c files of the memory system or by the accel/tcg code, which is inevitably fairly closely coupled. So keep the header but update the comments to reflect what we're actually using it for. Signed-off-by: Peter Maydell Message-Id: <1511276888-17834-1-git-send-email-peter.maydell@linaro.org> Signed-off-by: Paolo Bonzini --- include/exec/memory-internal.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index 98d82964cc..4162474fd5 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -1,5 +1,5 @@ /* - * Declarations for obsolete exec.c functions + * Declarations for functions which are internal to the memory subsystem. * * Copyright 2011 Red Hat, Inc. and/or its affiliates * @@ -12,8 +12,9 @@ */ /* - * This header is for use by exec.c and memory.c ONLY. Do not include it. - * The functions declared here will be removed soon. + * This header is for use by exec.c, memory.c and accel/tcg/cputlb.c ONLY, + * for declarations which are shared between the memory subsystem's + * internals and the TCG TLB code. Do not include it from elsewhere. */ #ifndef MEMORY_INTERNAL_H From 50876ead08531686275e6fe1999a2d80c7450d67 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 22 Jan 2018 16:27:33 +0100 Subject: [PATCH 13/47] i2c: Add a CONFIG_I2C master switch to the configuration files The i2c core and the at24c EEPROM should only be compiled and linked on the machines that support i2c. Otherwise it's quite strange to see the at24c-eeprom to be "available" on qemu-system-s390x for example. Signed-off-by: Thomas Huth Message-Id: <1516634853-15883-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- default-configs/arm-softmmu.mak | 1 + default-configs/i386-softmmu.mak | 1 + default-configs/mips-softmmu-common.mak | 1 + default-configs/ppc-softmmu.mak | 1 + default-configs/ppcemb-softmmu.mak | 1 + default-configs/x86_64-softmmu.mak | 1 + hw/i2c/Makefile.objs | 2 +- hw/nvram/Makefile.objs | 2 +- 8 files changed, 8 insertions(+), 2 deletions(-) diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index b0d6e65038..ca34cf4462 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -67,6 +67,7 @@ CONFIG_CADENCE=y CONFIG_XGMAC=y CONFIG_EXYNOS4=y CONFIG_PXA2XX=y +CONFIG_I2C=y CONFIG_BITBANG_I2C=y CONFIG_FRAMEBUFFER=y CONFIG_XILINX_SPIPS=y diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index ac27700e79..3326e3e0bb 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -62,3 +62,4 @@ CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y CONFIG_ACPI_VMGENID=y CONFIG_FW_CFG_DMA=y +CONFIG_I2C=y diff --git a/default-configs/mips-softmmu-common.mak b/default-configs/mips-softmmu-common.mak index 7d8f5db983..e31f046b3b 100644 --- a/default-configs/mips-softmmu-common.mak +++ b/default-configs/mips-softmmu-common.mak @@ -34,3 +34,4 @@ CONFIG_ISA_TESTDEV=y CONFIG_EMPTY_SLOT=y CONFIG_MIPS_CPS=y CONFIG_MIPS_ITU=y +CONFIG_I2C=y diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak index 3baed6a8fd..65680d85bc 100644 --- a/default-configs/ppc-softmmu.mak +++ b/default-configs/ppc-softmmu.mak @@ -23,6 +23,7 @@ CONFIG_PLATFORM_BUS=y CONFIG_ETSEC=y CONFIG_SM501=y CONFIG_IDE_SII3112=y +CONFIG_I2C=y # For Macs CONFIG_MAC=y diff --git a/default-configs/ppcemb-softmmu.mak b/default-configs/ppcemb-softmmu.mak index 5db4618a5a..bc5e1b3ffe 100644 --- a/default-configs/ppcemb-softmmu.mak +++ b/default-configs/ppcemb-softmmu.mak @@ -17,3 +17,4 @@ CONFIG_XILINX=y CONFIG_XILINX_ETHLITE=y CONFIG_SM501=y CONFIG_IDE_SII3112=y +CONFIG_I2C=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index b2104ade19..1c6cda1d9a 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -62,3 +62,4 @@ CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y CONFIG_ACPI_VMGENID=y CONFIG_FW_CFG_DMA=y +CONFIG_I2C=y diff --git a/hw/i2c/Makefile.objs b/hw/i2c/Makefile.objs index 0594dea3ae..37cacde978 100644 --- a/hw/i2c/Makefile.objs +++ b/hw/i2c/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y += core.o smbus.o smbus_eeprom.o +common-obj-$(CONFIG_I2C) += core.o smbus.o smbus_eeprom.o common-obj-$(CONFIG_DDC) += i2c-ddc.o common-obj-$(CONFIG_VERSATILE_I2C) += versatile_i2c.o common-obj-$(CONFIG_ACPI_X86) += smbus_ich9.o diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs index 0f4ee71dcb..a912d25391 100644 --- a/hw/nvram/Makefile.objs +++ b/hw/nvram/Makefile.objs @@ -1,6 +1,6 @@ common-obj-$(CONFIG_DS1225Y) += ds1225y.o common-obj-y += eeprom93xx.o -common-obj-y += eeprom_at24c.o +common-obj-$(CONFIG_I2C) += eeprom_at24c.o common-obj-y += fw_cfg.o common-obj-y += chrp_nvram.o common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o From d781e24d05dfc7737af577ad9f03e9064015c337 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Tue, 23 Jan 2018 14:36:38 +0200 Subject: [PATCH 14/47] Add missing hvdos public domain attribution: hvf.c and vmx.h contain code from hvdos.c that is released as public domain: from hvdos github: https://github.com/mist64/hvdos "License See LICENSE.txt (2-clause-BSD). In order to simplify use of this code as a template, you can consider any parts from "hvdos.c" and "interface.h" as being in the public domain." Signed-off-by: Izik Eidus Message-Id: <20180123123639.35255-2-izik@veertu.com> Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 3 +++ target/i386/hvf/vmx.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 010866ed22..ab4820c3f5 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -17,6 +17,9 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . + * + * This file contain code under public domain from the hvdos project: + * https://github.com/mist64/hvdos */ #include "qemu/osdep.h" #include "qemu-common.h" diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 9dfcd2f2eb..162a7d51ae 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -17,6 +17,9 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . + * + * This file contain code under public domain from the hvdos project: + * https://github.com/mist64/hvdos */ #ifndef VMX_H From 4d98a8e5ecfc6b26c8b5d79feb25a3d0330638c5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 26 Jan 2018 11:37:32 +0100 Subject: [PATCH 15/47] hvf: ept_emulation_fault() needs NetApp BSD attribution Add the BSD license there. Reported-by: Izik Eidus Message-Id: <20180123123639.35255-3-izik@veertu.com> Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index ab4820c3f5..85e5964365 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -20,6 +20,30 @@ * * This file contain code under public domain from the hvdos project: * https://github.com/mist64/hvdos + * + * Parts Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include "qemu/osdep.h" #include "qemu-common.h" From b1cef6d02f84bd842fb94a6109ad4e2ad873e8e5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 25 Jan 2018 16:19:49 +0000 Subject: [PATCH 16/47] Drop remaining bits of ia64 host support We dropped support for ia64 host CPUs in the 2.11 release (removing the TCG backend for it, and advertising the support as being completely removed in the changelog). However there are a few bits and pieces of code still floating about. Remove those, too. We can drop the check in configure for "ia64 or hppa host?" entirely, because we don't support hppa hosts either any more. Signed-off-by: Peter Maydell Message-Id: <1516897189-11035-1-git-send-email-peter.maydell@linaro.org> Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- accel/tcg/user-exec.c | 33 --------------------------------- configure | 5 ----- include/qemu/processor.h | 3 --- include/qemu/timer.h | 9 --------- linux-user/syscall.c | 7 +------ 5 files changed, 1 insertion(+), 56 deletions(-) diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index a0a4a1924e..77899584f2 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -503,39 +503,6 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } -#elif defined(__ia64) - -#ifndef __ISR_VALID - /* This ought to be in ... */ -# define __ISR_VALID 1 -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - unsigned long ip; - int is_write = 0; - - ip = uc->uc_mcontext.sc_ip; - switch (host_signum) { - case SIGILL: - case SIGFPE: - case SIGSEGV: - case SIGBUS: - case SIGTRAP: - if (info->si_code && (info->si_segvflags & __ISR_VALID)) { - /* ISR.W (write-access) is bit 33: */ - is_write = (info->si_isr >> 33) & 1; - } - break; - - default: - break; - } - return handle_cpu_signal(ip, info, is_write, (sigset_t *)&uc->uc_sigmask); -} - #elif defined(__s390__) int cpu_signal_handler(int host_signum, void *pinfo, diff --git a/configure b/configure index 302fdc92ff..bdbd097c84 100755 --- a/configure +++ b/configure @@ -636,8 +636,6 @@ elif check_define _ARCH_PPC ; then fi elif check_define __mips__ ; then cpu="mips" -elif check_define __ia64__ ; then - cpu="ia64" elif check_define __s390__ ; then if check_define __s390x__ ; then cpu="s390x" @@ -4744,9 +4742,6 @@ if test "$coroutine_pool" = ""; then fi if test "$debug_stack_usage" = "yes"; then - if test "$cpu" = "ia64" -o "$cpu" = "hppa"; then - error_exit "stack usage debugging is not supported for $cpu" - fi if test "$coroutine_pool" = "yes"; then echo "WARN: disabling coroutine pool for stack usage debugging" coroutine_pool=no diff --git a/include/qemu/processor.h b/include/qemu/processor.h index 8b2570283a..8e16c9277d 100644 --- a/include/qemu/processor.h +++ b/include/qemu/processor.h @@ -12,9 +12,6 @@ #if defined(__i386__) || defined(__x86_64__) # define cpu_relax() asm volatile("rep; nop" ::: "memory") -#elif defined(__ia64__) -# define cpu_relax() asm volatile("hint @pause" ::: "memory") - #elif defined(__aarch64__) # define cpu_relax() asm volatile("yield" ::: "memory") diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 1b518bca30..3b5a54b014 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -931,15 +931,6 @@ static inline int64_t cpu_get_host_ticks(void) return val; } -#elif defined(__ia64) - -static inline int64_t cpu_get_host_ticks(void) -{ - int64_t val; - asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory"); - return val; -} - #elif defined(__s390__) static inline int64_t cpu_get_host_ticks(void) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 74378947f0..df1edf0cd3 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -36,10 +36,6 @@ #include #include #include -#ifdef __ia64__ -int __clone2(int (*fn)(void *), void *child_stack_base, - size_t stack_size, int flags, void *arg, ...); -#endif #include #include #include @@ -246,8 +242,7 @@ static type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, \ #define __NR_sys_inotify_add_watch __NR_inotify_add_watch #define __NR_sys_inotify_rm_watch __NR_inotify_rm_watch -#if defined(__alpha__) || defined (__ia64__) || defined(__x86_64__) || \ - defined(__s390x__) +#if defined(__alpha__) || defined(__x86_64__) || defined(__s390x__) #define __NR__llseek __NR_lseek #endif From 3e32370a96d2ad82839d97e7e622bca793de8af5 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Thu, 25 Jan 2018 17:14:12 +0000 Subject: [PATCH 17/47] sockets: fix parsing of ipv4/ipv6 opts in parse_socket_addr The inet_parse() function looks for 'ipv4' and 'ipv6' flags, but only treats them as bare bool flags. The normal QemuOpts parsing would allow on/off values to be set too. This updates inet_parse() so that its handling of the 'ipv4' and 'ipv6' flags matches that done by QemuOpts. This impacts the NBD block driver parsing the legacy filename syntax and the migration code parsing the socket scheme. Signed-off-by: Daniel P. Berrange Message-Id: <20180125171412.21627-1-berrange@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- util/qemu-sockets.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index d6a1e1759e..fbbef69f62 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -554,6 +554,33 @@ err: } /* compatibility wrapper */ +static int inet_parse_flag(const char *flagname, const char *optstr, bool *val, + Error **errp) +{ + char *end; + size_t len; + + end = strstr(optstr, ","); + if (end) { + if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */ + error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); + return -1; + } + len = end - optstr; + } else { + len = strlen(optstr); + } + if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) { + *val = true; + } else if (len == 4 && strncmp(optstr, "=off", len) == 0) { + *val = false; + } else { + error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); + return -1; + } + return 0; +} + int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) { const char *optstr, *h; @@ -561,6 +588,7 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) char port[33]; int to; int pos; + char *begin; memset(addr, 0, sizeof(*addr)); @@ -602,11 +630,19 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) addr->has_to = true; addr->to = to; } - if (strstr(optstr, ",ipv4")) { - addr->ipv4 = addr->has_ipv4 = true; + begin = strstr(optstr, ",ipv4"); + if (begin) { + if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) { + return -1; + } + addr->has_ipv4 = true; } - if (strstr(optstr, ",ipv6")) { - addr->ipv6 = addr->has_ipv6 = true; + begin = strstr(optstr, ",ipv6"); + if (begin) { + if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) { + return -1; + } + addr->has_ipv6 = true; } return 0; } From 6c549dc14113a8a389ef6cf8c9078df66e208ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 16 Jan 2018 16:11:50 +0100 Subject: [PATCH 18/47] exynos4210: workaround UBSAN compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 5.4.0-6ubuntu1~16.04.5 build with UBSAN enabled error: CC hw/display/exynos4210_fimd.o /home/petmay01/linaro/qemu-for-merges/hw/display/exynos4210_fimd.c: In function ‘fimd_get_buffer_id’: /home/petmay01/linaro/qemu-for-merges/hw/display/exynos4210_fimd.c:1105:5: error: case label does not reduce to an integer constant case FIMD_WINCON_BUF2_STAT: Because FIMD_WINCON_BUF2_STAT case contains an integer overflow, use U suffix to get the unsigned type. Signed-off-by: Marc-André Lureau Message-Id: <20180116151152.4040-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- hw/display/exynos4210_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c index fd0b2bec65..86e37e93e9 100644 --- a/hw/display/exynos4210_fimd.c +++ b/hw/display/exynos4210_fimd.c @@ -98,7 +98,7 @@ #define FIMD_WINCON_BUFSTATUS ((1 << 21) | (1 << 31)) #define FIMD_WINCON_BUF0_STAT ((0 << 21) | (0 << 31)) #define FIMD_WINCON_BUF1_STAT ((1 << 21) | (0 << 31)) -#define FIMD_WINCON_BUF2_STAT ((0 << 21) | (1 << 31)) +#define FIMD_WINCON_BUF2_STAT ((0 << 21) | (1U << 31)) #define FIMD_WINCON_BUFSELECT ((1 << 20) | (1 << 30)) #define FIMD_WINCON_BUF0_SEL ((0 << 20) | (0 << 30)) #define FIMD_WINCON_BUF1_SEL ((1 << 20) | (0 << 30)) From 247724cb302af5d70c8853154b640dfabf2bbb56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 16 Jan 2018 16:11:51 +0100 Subject: [PATCH 19/47] build-sys: add --enable-sanitizers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typical slowdown introduced by AddressSanitizer is 2x. UBSan shouldn't have much impact on runtime cost. Enable it by default when --enable-debug, unless --disable-sanitizers. Signed-off-by: Marc-André Lureau Message-Id: <20180116151152.4040-3-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- configure | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/configure b/configure index bdbd097c84..35147adc8e 100755 --- a/configure +++ b/configure @@ -342,6 +342,7 @@ rdma="" gprof="no" debug_tcg="no" debug="no" +sanitizers="no" fortify_source="" strip_opt="yes" tcg_interpreter="no" @@ -993,6 +994,10 @@ for opt do strip_opt="no" fortify_source="no" ;; + --enable-sanitizers) sanitizers="yes" + ;; + --disable-sanitizers) sanitizers="no" + ;; --enable-sparse) sparse="yes" ;; --disable-sparse) sparse="no" @@ -1474,6 +1479,7 @@ Advanced options (experts only): --firmwarepath=PATH search PATH for firmware files --with-confsuffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir [$confsuffix] --enable-debug enable common debug build options + --enable-sanitizers enable default sanitizers --disable-strip disable stripping binaries --disable-werror disable compilation abort on warning --disable-stack-protector disable compiler-provided stack protection @@ -5200,6 +5206,23 @@ if compile_prog "" "" ; then have_utmpx=yes fi +########################################## +# checks for sanitizers + +write_c_skeleton + +have_asan=no +have_ubsan=no + +if test "$sanitizers" = "yes" ; then + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" ""; then + have_asan=yes + fi + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=undefined" ""; then + have_ubsan=yes + fi +fi + ########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -5224,6 +5247,13 @@ else CFLAGS="-O2 $CFLAGS" fi +if test "$have_asan" = "yes"; then + CFLAGS="-fsanitize=address $CFLAGS" +fi +if test "$have_ubsan" = "yes"; then + CFLAGS="-fsanitize=undefined $CFLAGS" +fi + ########################################## # Do we have libnfs if test "$libnfs" != "no" ; then From d83414e1fd1941ca8228b5cf6a06697bd1ff7f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 16 Jan 2018 16:11:52 +0100 Subject: [PATCH 20/47] ucontext: annotate coroutine stack for ASAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It helps ASAN to detect more leaks on coroutine stacks, and to get rid of some extra warnings. Before: tests/test-coroutine -p /basic/lifecycle /basic/lifecycle: ==20781==WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases! ==20781==WARNING: ASan is ignoring requested __asan_handle_no_return: stack top: 0x7ffcb184d000; bottom 0x7ff6c4cfd000; size: 0x0005ecb50000 (25446121472) False positive error reports may follow For details see https://github.com/google/sanitizers/issues/189 OK After: tests/test-coroutine -p /basic/lifecycle /basic/lifecycle: ==21110==WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases! OK A similar work would need to be done for sigaltstack & windows fibers to have similar coverage. Since ucontext is preferred, I didn't bother checking the other coroutine implementations for now. Update travis to fix the build with ASAN annotations. Signed-off-by: Marc-André Lureau Message-Id: <20180116151152.4040-4-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- .travis.yml | 3 ++- configure | 30 ++++++++++++++++++++++++ include/qemu/compiler.h | 4 ++++ util/coroutine-ucontext.c | 48 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f583839755..f2291e87a6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,12 +13,13 @@ addons: - libattr1-dev - libbrlapi-dev - libcap-ng-dev + - libgcc-6-dev - libgnutls-dev - libgtk-3-dev - libiscsi-dev - liblttng-ust-dev - - libnfs-dev - libncurses5-dev + - libnfs-dev - libnss3-dev - libpixman-1-dev - libpng12-dev diff --git a/configure b/configure index 35147adc8e..529aad5491 100755 --- a/configure +++ b/configure @@ -5213,6 +5213,8 @@ write_c_skeleton have_asan=no have_ubsan=no +have_asan_iface_h=no +have_asan_iface_fiber=no if test "$sanitizers" = "yes" ; then if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" ""; then @@ -5221,12 +5223,29 @@ if test "$sanitizers" = "yes" ; then if compile_prog "$CPU_CFLAGS -Werror -fsanitize=undefined" ""; then have_ubsan=yes fi + + if check_include "sanitizer/asan_interface.h" ; then + have_asan_iface_h=yes + fi + + cat > $TMPC << EOF +#include +int main(void) { + __sanitizer_start_switch_fiber(0, 0, 0); + return 0; +} +EOF + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" "" ; then + have_asan_iface_fiber=yes + fi fi ########################################## # End of CC checks # After here, no more $cc or $ld runs +write_c_skeleton + if test "$gcov" = "yes" ; then CFLAGS="-fprofile-arcs -ftest-coverage -g $CFLAGS" LDFLAGS="-fprofile-arcs -ftest-coverage $LDFLAGS" @@ -5249,6 +5268,13 @@ fi if test "$have_asan" = "yes"; then CFLAGS="-fsanitize=address $CFLAGS" + if test "$have_asan_iface_h" = "no" ; then + echo "ASAN build enabled, but ASAN header missing." \ + "Without code annotation, the report may be inferior." + elif test "$have_asan_iface_fiber" = "no" ; then + echo "ASAN build enabled, but ASAN header is too old." \ + "Without code annotation, the report may be inferior." + fi fi if test "$have_ubsan" = "yes"; then CFLAGS="-fsanitize=undefined $CFLAGS" @@ -6237,6 +6263,10 @@ if test "$valgrind_h" = "yes" ; then echo "CONFIG_VALGRIND_H=y" >> $config_host_mak fi +if test "$have_asan_iface_fiber" = "yes" ; then + echo "CONFIG_ASAN_IFACE_FIBER=y" >> $config_host_mak +fi + if test "$has_environ" = "yes" ; then echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak fi diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 340e5fdc09..5fcc4f7ec7 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -111,4 +111,8 @@ #define GCC_FMT_ATTR(n, m) #endif +#ifndef __has_feature +#define __has_feature(x) 0 /* compatibility with non-clang compilers */ +#endif + #endif /* COMPILER_H */ diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 6621f3f692..926d3402e3 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -31,6 +31,13 @@ #include #endif +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer) +#ifdef CONFIG_ASAN_IFACE_FIBER +#define CONFIG_ASAN 1 +#include +#endif +#endif + typedef struct { Coroutine base; void *stack; @@ -59,11 +66,37 @@ union cc_arg { int i[2]; }; +static void finish_switch_fiber(void *fake_stack_save) +{ +#ifdef CONFIG_ASAN + const void *bottom_old; + size_t size_old; + + __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); + + if (!leader.stack) { + leader.stack = (void *)bottom_old; + leader.stack_size = size_old; + } +#endif +} + +static void start_switch_fiber(void **fake_stack_save, + const void *bottom, size_t size) +{ +#ifdef CONFIG_ASAN + __sanitizer_start_switch_fiber(fake_stack_save, bottom, size); +#endif +} + static void coroutine_trampoline(int i0, int i1) { union cc_arg arg; CoroutineUContext *self; Coroutine *co; + void *fake_stack_save = NULL; + + finish_switch_fiber(NULL); arg.i[0] = i0; arg.i[1] = i1; @@ -72,9 +105,13 @@ static void coroutine_trampoline(int i0, int i1) /* Initialize longjmp environment and switch back the caller */ if (!sigsetjmp(self->env, 0)) { + start_switch_fiber(&fake_stack_save, + leader.stack, leader.stack_size); siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); } + finish_switch_fiber(fake_stack_save); + while (true) { co->entry(co->entry_arg); qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); @@ -87,6 +124,7 @@ Coroutine *qemu_coroutine_new(void) ucontext_t old_uc, uc; sigjmp_buf old_env; union cc_arg arg = {0}; + void *fake_stack_save = NULL; /* The ucontext functions preserve signal masks which incurs a * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not @@ -122,8 +160,12 @@ Coroutine *qemu_coroutine_new(void) /* swapcontext() in, siglongjmp() back out */ if (!sigsetjmp(old_env, 0)) { + start_switch_fiber(&fake_stack_save, co->stack, co->stack_size); swapcontext(&old_uc, &uc); } + + finish_switch_fiber(fake_stack_save); + return &co->base; } @@ -169,13 +211,19 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); int ret; + void *fake_stack_save = NULL; current = to_; ret = sigsetjmp(from->env, 0); if (ret == 0) { + start_switch_fiber(action == COROUTINE_TERMINATE ? + NULL : &fake_stack_save, to->stack, to->stack_size); siglongjmp(to->env, action); } + + finish_switch_fiber(fake_stack_save); + return ret; } From 0750b060216de69ed1f14bc08181bf4ad27fc622 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 22 Jan 2018 14:02:41 +0800 Subject: [PATCH 21/47] vhost: add traces for memory listeners Trace these operations on two memory listeners. It helps to verify the new memory listener fix, and good to keep them there. Signed-off-by: Peter Xu Message-Id: <20180122060244.29368-2-peterx@redhat.com> Acked-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- hw/virtio/trace-events | 6 ++++++ hw/virtio/vhost.c | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 775461ae98..2b8f81eb25 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -25,3 +25,9 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d" virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d" virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: 0x%"PRIx64" num_pages: %d" + +# hw/virtio/vhost.c +vhost_region_add(void *p, const char *mr) "dev %p mr %s" +vhost_region_del(void *p, const char *mr) "dev %p mr %s" +vhost_iommu_region_add(void *p, const char *mr) "dev %p mr %s" +vhost_iommu_region_del(void *p, const char *mr) "dev %p mr %s" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 386aef85be..c4f654c704 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,6 +27,7 @@ #include "hw/virtio/virtio-access.h" #include "migration/blocker.h" #include "sysemu/dma.h" +#include "trace.h" /* enabled until disconnected backend stabilizes */ #define _VHOST_DEBUG 1 @@ -687,6 +688,7 @@ static void vhost_region_add(MemoryListener *listener, return; } + trace_vhost_region_add(dev, section->mr->name ?: NULL); ++dev->n_mem_sections; dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections, dev->n_mem_sections); @@ -706,6 +708,7 @@ static void vhost_region_del(MemoryListener *listener, return; } + trace_vhost_region_del(dev, section->mr->name ?: NULL); vhost_set_memory(listener, section, false); memory_region_unref(section->mr); for (i = 0; i < dev->n_mem_sections; ++i) { @@ -743,6 +746,8 @@ static void vhost_iommu_region_add(MemoryListener *listener, return; } + trace_vhost_iommu_region_add(dev, section->mr->name ?: NULL); + iommu = g_malloc0(sizeof(*iommu)); end = int128_add(int128_make64(section->offset_within_region), section->size); @@ -771,6 +776,8 @@ static void vhost_iommu_region_del(MemoryListener *listener, return; } + trace_vhost_iommu_region_del(dev, section->mr->name ?: NULL); + QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) { if (iommu->mr == section->mr && iommu->n.start == section->offset_within_region) { From 0bbe435410c37c230c6e7bd926931a13810816e2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 22 Jan 2018 14:02:42 +0800 Subject: [PATCH 22/47] arm: postpone device listener unregister It's a preparation for follow-up patch to call region_del() in memory_listener_unregister(), otherwise all device addr attached with kvm_devices_head will be reset before calling kvm_arm_set_device_addr. Signed-off-by: Peter Xu Message-Id: <20180122060244.29368-3-peterx@redhat.com> Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- target/arm/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 211a7bf7be..1219d0062b 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -266,7 +266,6 @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data) { KVMDevice *kd, *tkd; - memory_listener_unregister(&devlistener); QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { if (kd->kda.addr != -1) { kvm_arm_set_device_addr(kd); @@ -274,6 +273,7 @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data) memory_region_unref(kd->mr); g_free(kd); } + memory_listener_unregister(&devlistener); } static Notifier notify = { From 369686267a4612d4000a1b67720c7f0aedd27539 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 22 Jan 2018 14:02:43 +0800 Subject: [PATCH 23/47] vfio: listener unregister before unset container After next patch, listener unregister will need the container to be alive. Let's move this unregister phase to be before unset container, since that operation will free the backend container in kernel, otherwise we'll get these after next patch: qemu-system-x86_64: VFIO_UNMAP_DMA: -22 qemu-system-x86_64: vfio_dma_unmap(0x559bf53a4590, 0x0, 0xa0000) = -22 (Invalid argument) Signed-off-by: Peter Xu Message-Id: <20180122060244.29368-4-peterx@redhat.com> Reviewed-by: Paolo Bonzini Acked-by: Alex Williamson Signed-off-by: Paolo Bonzini --- hw/vfio/common.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b77be3a8b3..76cf28d462 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1161,19 +1161,27 @@ static void vfio_disconnect_container(VFIOGroup *group) { VFIOContainer *container = group->container; + QLIST_REMOVE(group, container_next); + group->container = NULL; + + /* + * Explicitly release the listener first before unset container, + * since unset may destroy the backend container if it's the last + * group. + */ + if (QLIST_EMPTY(&container->group_list)) { + vfio_listener_release(container); + } + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { error_report("vfio: error disconnecting group %d from container", group->groupid); } - QLIST_REMOVE(group, container_next); - group->container = NULL; - if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; - vfio_listener_release(container); QLIST_REMOVE(container, next); QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { From d25836cafd7508090d211e97acfc0abc5ae88daa Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 22 Jan 2018 14:02:44 +0800 Subject: [PATCH 24/47] memory: do explicit cleanup when remove listeners When unregister memory listeners, we should call, e.g., region_del() (and possibly other undo operations) on every existing memory region sections there, otherwise we may leak resources that are held during the region_add(). This patch undo the stuff for the listeners, which emulates the case when the address space is set from current to an empty state. I found this problem when debugging a refcount leak issue that leads to a device unplug event lost (please see the "Bug:" line below). In that case, the leakage of resource is the PCI BAR memory region refcount. And since memory regions are not keeping their own refcount but onto their owners, so the vfio-pci device's (who is the owner of the PCI BAR memory regions) refcount is leaked, and event missing. We had encountered similar issues before and fixed in other way (ee4c112846, "vhost: Release memory references on cleanup"). This patch can be seen as a more high-level fix of similar problems that are caused by the resource leaks from memory listeners. So now we can remove the explicit unref of memory regions since that'll be done altogether during unregistering of listeners now. Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1531393 Signed-off-by: Peter Xu Message-Id: <20180122060244.29368-5-peterx@redhat.com> Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- hw/virtio/vhost.c | 4 ---- memory.c | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index c4f654c704..d16c0c813d 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1368,10 +1368,6 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) if (hdev->mem) { /* those are only safe after successful init */ memory_listener_unregister(&hdev->memory_listener); - for (i = 0; i < hdev->n_mem_sections; ++i) { - MemoryRegionSection *section = &hdev->mem_sections[i]; - memory_region_unref(section->mr); - } QLIST_REMOVE(hdev, entry); } if (hdev->migration_blocker) { diff --git a/memory.c b/memory.c index 9e8349668d..5686698542 100644 --- a/memory.c +++ b/memory.c @@ -2612,6 +2612,32 @@ static void listener_add_address_space(MemoryListener *listener, flatview_unref(view); } +static void listener_del_address_space(MemoryListener *listener, + AddressSpace *as) +{ + FlatView *view; + FlatRange *fr; + + if (listener->begin) { + listener->begin(listener); + } + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + MemoryRegionSection section = section_from_flat_range(fr, view); + + if (fr->dirty_log_mask && listener->log_stop) { + listener->log_stop(listener, §ion, fr->dirty_log_mask, 0); + } + if (listener->region_del) { + listener->region_del(listener, §ion); + } + } + if (listener->commit) { + listener->commit(listener); + } + flatview_unref(view); +} + void memory_listener_register(MemoryListener *listener, AddressSpace *as) { MemoryListener *other = NULL; @@ -2652,6 +2678,7 @@ void memory_listener_unregister(MemoryListener *listener) return; } + listener_del_address_space(listener, listener->address_space); QTAILQ_REMOVE(&memory_listeners, listener, link); QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as); listener->address_space = NULL; From a8aa6197a2aec4ca4ef5a4fdd8a39216e7f42da6 Mon Sep 17 00:00:00 2001 From: Klim Kireev Date: Thu, 25 Jan 2018 16:51:29 +0300 Subject: [PATCH 25/47] chardev/char-socket: add POLLHUP handler The following behavior was observed for QEMU configured by libvirt to use guest agent as usual for the guests without virtio-serial driver (Windows or the guest remaining in BIOS stage). In QEMU on first connect to listen character device socket the listen socket is removed from poll just after the accept(). virtio_serial_guest_ready() returns 0 and the descriptor of the connected Unix socket is removed from poll and it will not be present in poll() until the guest will initialize the driver and change the state of the serial to "guest connected". In libvirt connect() to guest agent is performed on restart and is run under VM state lock. Connect() is blocking and can wait forever. In this case libvirt can not perform ANY operation on that VM. The bug can be easily reproduced this way: Terminal 1: qemu-system-x86_64 -m 512 -device pci-serial,chardev=serial1 -chardev socket,id=serial1,path=/tmp/console.sock,server,nowait (virtio-serial and isa-serial also fit) Terminal 2: minicom -D unix\#/tmp/console.sock (type something and press enter) C-a x (to exit) Do 3 times: minicom -D unix\#/tmp/console.sock C-a x It needs 4 connections, because the first one is accepted by QEMU, then two are queued by the kernel, and the 4th blocks. The problem is that QEMU doesn't add a read watcher after succesful read until the guest device wants to acquire recieved data, so I propose to install a separate pullhup watcher regardless of whether the device waits for data or not. Signed-off-by: Klim Kireev Message-Id: <20180125135129.9305-1-klim.kireev@virtuozzo.com> Signed-off-by: Paolo Bonzini --- chardev/char-socket.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/chardev/char-socket.c b/chardev/char-socket.c index 77cdf487eb..a340af6cd3 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -42,6 +42,7 @@ typedef struct { QIOChannel *ioc; /* Client I/O channel */ QIOChannelSocket *sioc; /* Client master channel */ QIONetListener *listener; + GSource *hup_source; QCryptoTLSCreds *tls_creds; int connected; int max_size; @@ -352,6 +353,12 @@ static void tcp_chr_free_connection(Chardev *chr) s->read_msgfds_num = 0; } + if (s->hup_source != NULL) { + g_source_destroy(s->hup_source); + g_source_unref(s->hup_source); + s->hup_source = NULL; + } + tcp_set_msgfds(chr, NULL, 0); remove_fd_in_watch(chr); object_unref(OBJECT(s->sioc)); @@ -455,6 +462,15 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) return TRUE; } +static gboolean tcp_chr_hup(QIOChannel *channel, + GIOCondition cond, + void *opaque) +{ + Chardev *chr = CHARDEV(opaque); + tcp_chr_disconnect(chr); + return G_SOURCE_REMOVE; +} + static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len) { SocketChardev *s = SOCKET_CHARDEV(chr); @@ -528,6 +544,12 @@ static void tcp_chr_connect(void *opaque) tcp_chr_read, chr, chr->gcontext); } + + s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP); + g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup, + chr, NULL); + g_source_attach(s->hup_source, chr->gcontext); + qemu_chr_be_event(chr, CHR_EVENT_OPENED); } From e6a354be6ea0a52f5921f230a91518625247af82 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Mon, 11 Dec 2017 08:21:07 +0100 Subject: [PATCH 26/47] ivshmem: Don't update non-existent MSI routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of commit 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications"), QEMU crashes with: kvm_irqchip_commit_routes: Assertion `ret == 0' failed. if the ivshmem device is configured with more vectors than what the server supports. This is caused by the ivshmem_vector_unmask() being called on vectors that have not been initialized by ivshmem_add_kvm_msi_virq(). This commit fixes it by adding a simple check to the mask and unmask callbacks. Note that the opposite mismatch, if the server supplies more vectors than what the device is configured for, is already handled and leads to output like: Too many eventfd received, device has 1 vectors To reproduce the assert, run: ivshmem-server -n 0 and QEMU with: -device ivshmem-doorbell,chardev=iv -chardev socket,path=/tmp/ivshmem_socket,id=iv then load the Windows driver, at the time of writing available at: https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem The issue is believed to have been masked by other guest drivers, notably Linux ones, not enabling MSI-X on the device. Fixes: 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications") Signed-off-by: Ladi Prosek Reviewed-by: Marc-André Lureau Reviewed-by: Markus Armbruster Message-Id: <20171211072110.9058-2-lprosek@redhat.com> Signed-off-by: Paolo Bonzini --- hw/misc/ivshmem.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 4919011f38..0b471d9a89 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -316,6 +316,10 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, int ret; IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); + if (!v->pdev) { + error_report("ivshmem: vector %d route does not exist", vector); + return -EINVAL; + } ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); if (ret < 0) { @@ -330,12 +334,16 @@ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) { IVShmemState *s = IVSHMEM_COMMON(dev); EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; + MSIVector *v = &s->msi_vectors[vector]; int ret; IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); + if (!v->pdev) { + error_report("ivshmem: vector %d route does not exist", vector); + return; + } - ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, - s->msi_vectors[vector].virq); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); if (ret != 0) { error_report("remove_irqfd_notifier_gsi failed"); } From 089fd80376196adc0274a53eb9729c3ef7ee5ae7 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Mon, 11 Dec 2017 08:21:08 +0100 Subject: [PATCH 27/47] ivshmem: Always remove irqfd notifiers As of commit 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications"), QEMU crashes with: ivshmem: msix_set_vector_notifiers failed msix_unset_vector_notifiers: Assertion `dev->msix_vector_use_notifier && dev->msix_vector_release_notifier' failed. if MSI-X is repeatedly enabled and disabled on the ivshmem device, for example by loading and unloading the Windows ivshmem driver. This is because msix_unset_vector_notifiers() doesn't call any of the release notifier callbacks since MSI-X is already disabled at that point (msix_enabled() returning false is how this transition is detected in the first place). Thus ivshmem_vector_mask() doesn't run and when MSI-X is subsequently enabled again ivshmem_vector_unmask() fails. This is fixed by keeping track of unmasked vectors and making sure that ivshmem_vector_mask() always runs on MSI-X disable. Fixes: 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications") Signed-off-by: Ladi Prosek Reviewed-by: Markus Armbruster Message-Id: <20171211072110.9058-3-lprosek@redhat.com> Signed-off-by: Paolo Bonzini --- hw/misc/ivshmem.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 0b471d9a89..95e85e4c10 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -76,6 +76,7 @@ typedef struct Peer { typedef struct MSIVector { PCIDevice *pdev; int virq; + bool unmasked; } MSIVector; typedef struct IVShmemState { @@ -320,6 +321,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, error_report("ivshmem: vector %d route does not exist", vector); return -EINVAL; } + assert(!v->unmasked); ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); if (ret < 0) { @@ -327,7 +329,13 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, } kvm_irqchip_commit_routes(kvm_state); - return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); + ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); + if (ret < 0) { + return ret; + } + v->unmasked = true; + + return 0; } static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) @@ -342,11 +350,14 @@ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) error_report("ivshmem: vector %d route does not exist", vector); return; } + assert(v->unmasked); ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); - if (ret != 0) { + if (ret < 0) { error_report("remove_irqfd_notifier_gsi failed"); + return; } + v->unmasked = false; } static void ivshmem_vector_poll(PCIDevice *dev, @@ -816,11 +827,20 @@ static void ivshmem_disable_irqfd(IVShmemState *s) PCIDevice *pdev = PCI_DEVICE(s); int i; + msix_unset_vector_notifiers(pdev); + for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { + /* + * MSI-X is already disabled here so msix_unset_vector_notifiers() + * didn't call our release notifier. Do it now to keep our masks and + * unmasks balanced. + */ + if (s->msi_vectors[i].unmasked) { + ivshmem_vector_mask(pdev, i); + } ivshmem_remove_kvm_msi_virq(s, i); } - msix_unset_vector_notifiers(pdev); } static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, From 0b88dd942073e7e65f095551d60be5dc0c8e1413 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Mon, 11 Dec 2017 08:21:09 +0100 Subject: [PATCH 28/47] ivshmem: Improve MSI irqfd error handling Adds a rollback path to ivshmem_enable_irqfd() and fixes ivshmem_disable_irqfd() to bail if irqfd has not been enabled. To reproduce, run: ivshmem-server -n 0 and QEMU with: -device ivshmem-doorbell,chardev=iv -chardev socket,path=/tmp/ivshmem_socket,id=iv then load, unload, and load again the Windows driver, at the time of writing available at: https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem The issue is believed to have been masked by other guest drivers, notably Linux ones, not enabling MSI-X on the device. Signed-off-by: Ladi Prosek Reviewed-by: Markus Armbruster Message-Id: <20171211072110.9058-4-lprosek@redhat.com> Signed-off-by: Paolo Bonzini --- hw/misc/ivshmem.c | 57 ++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 95e85e4c10..fe1d8d1669 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -785,29 +785,6 @@ static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) return 0; } -static void ivshmem_enable_irqfd(IVShmemState *s) -{ - PCIDevice *pdev = PCI_DEVICE(s); - int i; - - for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { - Error *err = NULL; - - ivshmem_add_kvm_msi_virq(s, i, &err); - if (err) { - error_report_err(err); - /* TODO do we need to handle the error? */ - } - } - - if (msix_set_vector_notifiers(pdev, - ivshmem_vector_unmask, - ivshmem_vector_mask, - ivshmem_vector_poll)) { - error_report("ivshmem: msix_set_vector_notifiers failed"); - } -} - static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) { IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); @@ -822,11 +799,45 @@ static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) s->msi_vectors[vector].pdev = NULL; } +static void ivshmem_enable_irqfd(IVShmemState *s) +{ + PCIDevice *pdev = PCI_DEVICE(s); + int i; + + for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { + Error *err = NULL; + + ivshmem_add_kvm_msi_virq(s, i, &err); + if (err) { + error_report_err(err); + goto undo; + } + } + + if (msix_set_vector_notifiers(pdev, + ivshmem_vector_unmask, + ivshmem_vector_mask, + ivshmem_vector_poll)) { + error_report("ivshmem: msix_set_vector_notifiers failed"); + goto undo; + } + return; + +undo: + while (--i >= 0) { + ivshmem_remove_kvm_msi_virq(s, i); + } +} + static void ivshmem_disable_irqfd(IVShmemState *s) { PCIDevice *pdev = PCI_DEVICE(s); int i; + if (!pdev->msix_vector_use_notifier) { + return; + } + msix_unset_vector_notifiers(pdev); for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { From a40227911c4cac4ac2551c57058d220baae4e91f Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Mon, 11 Dec 2017 08:21:10 +0100 Subject: [PATCH 29/47] ivshmem: Disable irqfd on device reset The effects of ivshmem_enable_irqfd() was not undone on device reset. This manifested as: ivshmem_add_kvm_msi_virq: Assertion `!s->msi_vectors[vector].pdev' failed. when irqfd was enabled before reset and then enabled again after reset, making ivshmem_enable_irqfd() run for the second time. To reproduce, run: ivshmem-server and QEMU with: -device ivshmem-doorbell,chardev=iv -chardev socket,path=/tmp/ivshmem_socket,id=iv then install the Windows driver, at the time of writing available at: https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem and crash-reboot the guest by inducing a BSOD. Signed-off-by: Ladi Prosek Message-Id: <20171211072110.9058-5-lprosek@redhat.com> Signed-off-by: Paolo Bonzini --- hw/misc/ivshmem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index fe1d8d1669..16f03701b7 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -757,10 +757,14 @@ static void ivshmem_msix_vector_use(IVShmemState *s) } } +static void ivshmem_disable_irqfd(IVShmemState *s); + static void ivshmem_reset(DeviceState *d) { IVShmemState *s = IVSHMEM_COMMON(d); + ivshmem_disable_irqfd(s); + s->intrstatus = 0; s->intrmask = 0; if (ivshmem_has_feature(s, IVSHMEM_MSI)) { From 9857c2d2f7fb09fe37d7a72b004b1f7bcf70248c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jan 2018 16:28:49 +0100 Subject: [PATCH 30/47] cpus: hax: register/unregister thread with RCU, exit loop on unplug Signed-off-by: Paolo Bonzini --- cpus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index 2cb0af9b22..08a84628c1 100644 --- a/cpus.c +++ b/cpus.c @@ -1473,6 +1473,7 @@ static void *qemu_hax_cpu_thread_fn(void *arg) CPUState *cpu = arg; int r; + rcu_register_thread(); qemu_mutex_lock_iothread(); qemu_thread_get_self(cpu->thread); @@ -1484,7 +1485,7 @@ static void *qemu_hax_cpu_thread_fn(void *arg) hax_init_vcpu(cpu); qemu_cond_signal(&qemu_cpu_cond); - while (1) { + do { if (cpu_can_run(cpu)) { r = hax_smp_cpu_exec(cpu); if (r == EXCP_DEBUG) { @@ -1493,7 +1494,8 @@ static void *qemu_hax_cpu_thread_fn(void *arg) } qemu_wait_io_event(cpu); - } + } while (!cpu->unplug || cpu_can_run(cpu)); + rcu_unregister_thread(); return NULL; } From 57615ed56c005ba0405420eb871f85fa1e5e563b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jan 2018 11:04:36 -0500 Subject: [PATCH 31/47] cpus: kvm: unregister thread with RCU Signed-off-by: Paolo Bonzini --- cpus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cpus.c b/cpus.c index 08a84628c1..1063dace88 100644 --- a/cpus.c +++ b/cpus.c @@ -1205,6 +1205,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) cpu->created = false; qemu_cond_signal(&qemu_cpu_cond); qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); return NULL; } From d2831ab065765c8af13a5bb42627150323a96662 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jan 2018 11:04:53 -0500 Subject: [PATCH 32/47] cpus: dummy: unregister thread with RCU, exit loop on unplug Signed-off-by: Paolo Bonzini --- cpus.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index 1063dace88..190ad5aabb 100644 --- a/cpus.c +++ b/cpus.c @@ -1234,7 +1234,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) cpu->created = true; qemu_cond_signal(&qemu_cpu_cond); - while (1) { + do { qemu_mutex_unlock_iothread(); do { int sig; @@ -1246,8 +1246,9 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) } qemu_mutex_lock_iothread(); qemu_wait_io_event(cpu); - } + } while (!cpu->unplug); + rcu_unregister_thread(); return NULL; #endif } From 9b0605f9837b68fd56c7fc7c96a3a1a3b983687d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jan 2018 11:05:06 -0500 Subject: [PATCH 33/47] cpus: tcg: unregister thread with RCU, fix exiting of loop on unplug Keep running until cpu_can_run(cpu) becomes false, for consistency with other acceslerators. Signed-off-by: Paolo Bonzini --- cpus.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpus.c b/cpus.c index 190ad5aabb..269d527938 100644 --- a/cpus.c +++ b/cpus.c @@ -1467,6 +1467,7 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) deal_with_unplugged_cpus(); } + rcu_unregister_thread(); return NULL; } @@ -1603,18 +1604,17 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* Ignore everything else? */ break; } - } else if (cpu->unplug) { - qemu_tcg_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - return NULL; } atomic_mb_set(&cpu->exit_request, 0); qemu_wait_io_event(cpu); - } + } while (!cpu->unplug || cpu_can_run(cpu)); + qemu_tcg_destroy_vcpu(cpu); + cpu->created = false; + qemu_cond_signal(&qemu_cpu_cond); + qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); return NULL; } From 8178e6376f311a5691ab6122e15863faa9a1f008 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jan 2018 11:05:21 -0500 Subject: [PATCH 34/47] cpus: hvf: unregister thread with RCU Signed-off-by: Paolo Bonzini --- cpus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cpus.c b/cpus.c index 269d527938..fe127ac9cf 100644 --- a/cpus.c +++ b/cpus.c @@ -1541,6 +1541,7 @@ static void *qemu_hvf_cpu_thread_fn(void *arg) cpu->created = false; qemu_cond_signal(&qemu_cpu_cond); qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); return NULL; } From dbadee4ff4a02d4b4cc138dd63b769b1d9391896 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jan 2018 16:40:12 +0100 Subject: [PATCH 35/47] cpus: join thread when removing a vCPU If no one joins the thread, its associated memory is leaked. Reported-by: CheneyLin Signed-off-by: Paolo Bonzini --- cpus.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/cpus.c b/cpus.c index fe127ac9cf..8d8bb7c6d3 100644 --- a/cpus.c +++ b/cpus.c @@ -1752,19 +1752,14 @@ void resume_all_vcpus(void) } } -void cpu_remove(CPUState *cpu) +void cpu_remove_sync(CPUState *cpu) { cpu->stop = true; cpu->unplug = true; qemu_cpu_kick(cpu); -} - -void cpu_remove_sync(CPUState *cpu) -{ - cpu_remove(cpu); - while (cpu->created) { - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); - } + qemu_mutex_unlock_iothread(); + qemu_thread_join(cpu->thread); + qemu_mutex_lock_iothread(); } /* For temporary buffers for forming a name */ From 0f2956f9159e4aecc9f4de6b8412a1d1ac5a2da0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:51 +0100 Subject: [PATCH 36/47] memfd: add error argument, instead of perror() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow callers to silence error report when the call is allowed to failed. Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- hw/virtio/vhost.c | 8 ++++++- include/qemu/memfd.h | 5 ++-- util/memfd.c | 57 +++++++++++++++++++++++--------------------- 3 files changed, 40 insertions(+), 30 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index d16c0c813d..338e4395b7 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -330,6 +330,7 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) { + Error *err = NULL; struct vhost_log *log; uint64_t logsize = size * sizeof(*(log->log)); int fd = -1; @@ -338,7 +339,12 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) if (share) { log->log = qemu_memfd_alloc("vhost-log", logsize, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, - &fd); + &fd, &err); + if (err) { + error_report_err(err); + g_free(log); + return NULL; + } memset(log->log, 0, logsize); } else { log->log = g_malloc0(logsize); diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h index 41c24d807c..b9d09873b5 100644 --- a/include/qemu/memfd.h +++ b/include/qemu/memfd.h @@ -16,9 +16,10 @@ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #endif -int qemu_memfd_create(const char *name, size_t size, unsigned int seals); +int qemu_memfd_create(const char *name, size_t size, unsigned int seals, + Error **errp); void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, - int *fd); + int *fd, Error **errp); void qemu_memfd_free(void *ptr, size_t size, int fd); bool qemu_memfd_check(void); diff --git a/util/memfd.c b/util/memfd.c index dce61f9d21..8d27307137 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -27,6 +27,7 @@ #include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu/memfd.h" #if defined CONFIG_LINUX && !defined CONFIG_MEMFD @@ -51,11 +52,11 @@ static int memfd_create(const char *name, unsigned int flags) #define MFD_ALLOW_SEALING 0x0002U #endif -int qemu_memfd_create(const char *name, size_t size, unsigned int seals) +int qemu_memfd_create(const char *name, size_t size, + unsigned int seals, Error **errp) { - int mfd = -1; - #ifdef CONFIG_LINUX + int mfd = -1; unsigned int flags = MFD_CLOEXEC; if (seals) { @@ -64,23 +65,26 @@ int qemu_memfd_create(const char *name, size_t size, unsigned int seals) mfd = memfd_create(name, flags); if (mfd < 0) { - return -1; + goto err; } if (ftruncate(mfd, size) == -1) { - perror("ftruncate"); - close(mfd); - return -1; + goto err; } if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) { - perror("fcntl"); - close(mfd); - return -1; + goto err; } -#endif return mfd; + +err: + if (mfd >= 0) { + close(mfd); + } +#endif + error_setg_errno(errp, errno, "failed to create memfd"); + return -1; } /* @@ -90,14 +94,14 @@ int qemu_memfd_create(const char *name, size_t size, unsigned int seals) * sealing. */ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, - int *fd) + int *fd, Error **errp) { void *ptr; - int mfd = qemu_memfd_create(name, size, seals); + int mfd = qemu_memfd_create(name, size, seals, NULL); /* some systems have memfd without sealing */ if (mfd == -1) { - mfd = qemu_memfd_create(name, size, 0); + mfd = qemu_memfd_create(name, size, 0, NULL); } if (mfd == -1) { @@ -109,27 +113,26 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, unlink(fname); g_free(fname); - if (mfd == -1) { - perror("mkstemp"); - return NULL; - } - - if (ftruncate(mfd, size) == -1) { - perror("ftruncate"); - close(mfd); - return NULL; + if (mfd == -1 || + ftruncate(mfd, size) == -1) { + goto err; } } ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); if (ptr == MAP_FAILED) { - perror("mmap"); - close(mfd); - return NULL; + goto err; } *fd = mfd; return ptr; + +err: + error_setg_errno(errp, errno, "failed to allocate shared memory"); + if (mfd >= 0) { + close(mfd); + } + return NULL; } void qemu_memfd_free(void *ptr, size_t size, int fd) @@ -157,7 +160,7 @@ bool qemu_memfd_check(void) int fd; void *ptr; - ptr = qemu_memfd_alloc("test", 4096, 0, &fd); + ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL); memfd_check = ptr ? MEMFD_OK : MEMFD_KO; qemu_memfd_free(ptr, 4096, fd); } From c5b2a9e0782c54402b3e06afd14b4c1de9efba8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:52 +0100 Subject: [PATCH 37/47] memfd: add hugetlb support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux commit 749df87bd7bee5a79cef073f5d032ddb2b211de8 (v4.14-rc1) added a new flag MFD_HUGETLB to memfd_create() that specify the file to be created resides in the hugetlbfs filesystem. This is the generic hugetlbfs filesystem not associated with any specific mount point. hugetlbfs does not support sealing operations in v4.14, therefore specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL. However, I added sealing support in "[PATCH v3 0/9] memfd: add sealing to hugetlb-backed memory" series, queued in -mm tree for v4.16. Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-3-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/memfd.h | 4 ++-- util/memfd.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h index b9d09873b5..1d3ecc7458 100644 --- a/include/qemu/memfd.h +++ b/include/qemu/memfd.h @@ -16,8 +16,8 @@ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #endif -int qemu_memfd_create(const char *name, size_t size, unsigned int seals, - Error **errp); +int qemu_memfd_create(const char *name, size_t size, bool hugetlb, + unsigned int seals, Error **errp); void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, int *fd, Error **errp); void qemu_memfd_free(void *ptr, size_t size, int fd); diff --git a/util/memfd.c b/util/memfd.c index 8d27307137..7594af7089 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -52,7 +52,11 @@ static int memfd_create(const char *name, unsigned int flags) #define MFD_ALLOW_SEALING 0x0002U #endif -int qemu_memfd_create(const char *name, size_t size, +#ifndef MFD_HUGETLB +#define MFD_HUGETLB 0x0004U +#endif + +int qemu_memfd_create(const char *name, size_t size, bool hugetlb, unsigned int seals, Error **errp) { #ifdef CONFIG_LINUX @@ -62,6 +66,9 @@ int qemu_memfd_create(const char *name, size_t size, if (seals) { flags |= MFD_ALLOW_SEALING; } + if (hugetlb) { + flags |= MFD_HUGETLB; + } mfd = memfd_create(name, flags); if (mfd < 0) { @@ -97,11 +104,11 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, int *fd, Error **errp) { void *ptr; - int mfd = qemu_memfd_create(name, size, seals, NULL); + int mfd = qemu_memfd_create(name, size, false, seals, NULL); /* some systems have memfd without sealing */ if (mfd == -1) { - mfd = qemu_memfd_create(name, size, 0, NULL); + mfd = qemu_memfd_create(name, size, false, 0, NULL); } if (mfd == -1) { From 2ef8c0c99be7ee5b9dbceaae41b8890e7c81240f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:53 +0100 Subject: [PATCH 38/47] memfd: add hugetlbsize argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Learn to specificy hugetlb size as qemu_memfd_create() argument. Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-4-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/memfd.h | 2 +- util/memfd.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h index 1d3ecc7458..de10198ed6 100644 --- a/include/qemu/memfd.h +++ b/include/qemu/memfd.h @@ -17,7 +17,7 @@ #endif int qemu_memfd_create(const char *name, size_t size, bool hugetlb, - unsigned int seals, Error **errp); + uint64_t hugetlbsize, unsigned int seals, Error **errp); void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, int *fd, Error **errp); void qemu_memfd_free(void *ptr, size_t size, int fd); diff --git a/util/memfd.c b/util/memfd.c index 7594af7089..07d579ea7d 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -29,6 +29,7 @@ #include "qapi/error.h" #include "qemu/memfd.h" +#include "qemu/host-utils.h" #if defined CONFIG_LINUX && !defined CONFIG_MEMFD #include @@ -56,9 +57,22 @@ static int memfd_create(const char *name, unsigned int flags) #define MFD_HUGETLB 0x0004U #endif +#ifndef MFD_HUGE_SHIFT +#define MFD_HUGE_SHIFT 26 +#endif + int qemu_memfd_create(const char *name, size_t size, bool hugetlb, - unsigned int seals, Error **errp) + uint64_t hugetlbsize, unsigned int seals, Error **errp) { + int htsize = hugetlbsize ? ctz64(hugetlbsize) : 0; + + if (htsize && 1 << htsize != hugetlbsize) { + error_setg(errp, "Hugepage size must be a power of 2"); + return -1; + } + + htsize = htsize << MFD_HUGE_SHIFT; + #ifdef CONFIG_LINUX int mfd = -1; unsigned int flags = MFD_CLOEXEC; @@ -68,8 +82,8 @@ int qemu_memfd_create(const char *name, size_t size, bool hugetlb, } if (hugetlb) { flags |= MFD_HUGETLB; + flags |= htsize; } - mfd = memfd_create(name, flags); if (mfd < 0) { goto err; @@ -104,11 +118,11 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, int *fd, Error **errp) { void *ptr; - int mfd = qemu_memfd_create(name, size, false, seals, NULL); + int mfd = qemu_memfd_create(name, size, false, 0, seals, NULL); /* some systems have memfd without sealing */ if (mfd == -1) { - mfd = qemu_memfd_create(name, size, false, 0, NULL); + mfd = qemu_memfd_create(name, size, false, 0, 0, NULL); } if (mfd == -1) { From dbb9e0f40d7d561dcffcf7e41ac9f6a5ec90e5b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:54 +0100 Subject: [PATCH 39/47] Add memfd based hostmem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new memory backend, similar to hostmem-file, except that it doesn't need to create files. It also enforces memory sealing. This backend is mainly useful for sharing the memory with other processes. Note that Linux supports transparent huge-pages of shmem/memfd memory since 4.8. It is relatively easier to set up THP than a dedicate hugepage mount point by using "madvise" in /sys/kernel/mm/transparent_hugepage/shmem_enabled. Since 4.14, memfd allows to set hugetlb requirement explicitly. Pending for merge in 4.16 is memfd sealing support for hugetlb backed memory. Usage: -object memory-backend-memfd,id=mem1,size=1G Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-5-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- backends/Makefile.objs | 2 + backends/hostmem-memfd.c | 170 +++++++++++++++++++++++++++++++++++++++ qemu-options.hx | 22 +++++ 3 files changed, 194 insertions(+) create mode 100644 backends/hostmem-memfd.c diff --git a/backends/Makefile.objs b/backends/Makefile.objs index 0400799efd..67eeeba5fc 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -8,3 +8,5 @@ common-obj-$(CONFIG_LINUX) += hostmem-file.o common-obj-y += cryptodev.o common-obj-y += cryptodev-builtin.o + +common-obj-$(CONFIG_LINUX) += hostmem-memfd.o diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c new file mode 100644 index 0000000000..1e20fe0ba8 --- /dev/null +++ b/backends/hostmem-memfd.c @@ -0,0 +1,170 @@ +/* + * QEMU host memfd memory backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-André Lureau + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "sysemu/hostmem.h" +#include "sysemu/sysemu.h" +#include "qom/object_interfaces.h" +#include "qemu/memfd.h" +#include "qapi/error.h" + +#define TYPE_MEMORY_BACKEND_MEMFD "memory-backend-memfd" + +#define MEMORY_BACKEND_MEMFD(obj) \ + OBJECT_CHECK(HostMemoryBackendMemfd, (obj), TYPE_MEMORY_BACKEND_MEMFD) + +typedef struct HostMemoryBackendMemfd HostMemoryBackendMemfd; + +struct HostMemoryBackendMemfd { + HostMemoryBackend parent_obj; + + bool hugetlb; + uint64_t hugetlbsize; + bool seal; +}; + +static void +memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend); + char *name; + int fd; + + if (!backend->size) { + error_setg(errp, "can't create backend with size 0"); + return; + } + + if (host_memory_backend_mr_inited(backend)) { + return; + } + + backend->force_prealloc = mem_prealloc; + fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size, + m->hugetlb, m->hugetlbsize, m->seal ? + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0, + errp); + if (fd == -1) { + return; + } + + name = object_get_canonical_path(OBJECT(backend)); + memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), + name, backend->size, true, fd, errp); + g_free(name); +} + +static bool +memfd_backend_get_hugetlb(Object *o, Error **errp) +{ + return MEMORY_BACKEND_MEMFD(o)->hugetlb; +} + +static void +memfd_backend_set_hugetlb(Object *o, bool value, Error **errp) +{ + MEMORY_BACKEND_MEMFD(o)->hugetlb = value; +} + +static void +memfd_backend_set_hugetlbsize(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(obj); + Error *local_err = NULL; + uint64_t value; + + if (host_memory_backend_mr_inited(MEMORY_BACKEND(obj))) { + error_setg(&local_err, "cannot change property value"); + goto out; + } + + visit_type_size(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%" + PRIu64 "'", object_get_typename(obj), name, value); + goto out; + } + m->hugetlbsize = value; +out: + error_propagate(errp, local_err); +} + +static void +memfd_backend_get_hugetlbsize(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(obj); + uint64_t value = m->hugetlbsize; + + visit_type_size(v, name, &value, errp); +} + +static bool +memfd_backend_get_seal(Object *o, Error **errp) +{ + return MEMORY_BACKEND_MEMFD(o)->seal; +} + +static void +memfd_backend_set_seal(Object *o, bool value, Error **errp) +{ + MEMORY_BACKEND_MEMFD(o)->seal = value; +} + +static void +memfd_backend_instance_init(Object *obj) +{ + HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(obj); + + /* default to sealed file */ + m->seal = true; +} + +static void +memfd_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = memfd_backend_memory_alloc; + + object_class_property_add_bool(oc, "hugetlb", + memfd_backend_get_hugetlb, + memfd_backend_set_hugetlb, + &error_abort); + object_class_property_add(oc, "hugetlbsize", "int", + memfd_backend_get_hugetlbsize, + memfd_backend_set_hugetlbsize, + NULL, NULL, &error_abort); + object_class_property_add_bool(oc, "seal", + memfd_backend_get_seal, + memfd_backend_set_seal, + &error_abort); +} + +static const TypeInfo memfd_backend_info = { + .name = TYPE_MEMORY_BACKEND_MEMFD, + .parent = TYPE_MEMORY_BACKEND, + .instance_init = memfd_backend_instance_init, + .class_init = memfd_backend_class_init, + .instance_size = sizeof(HostMemoryBackendMemfd), +}; + +static void register_types(void) +{ + type_register_static(&memfd_backend_info); +} + +type_init(register_types); diff --git a/qemu-options.hx b/qemu-options.hx index 8ce427da78..265cf9fa38 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4024,6 +4024,28 @@ Memory backend objects offer more control than the @option{-m} option that is traditionally used to define guest RAM. Please refer to @option{memory-backend-file} for a description of the options. +@item -object memory-backend-memfd,id=@var{id},merge=@var{on|off},dump=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave},seal=@var{on|off},hugetlb=@var{on|off},hugetlbsize=@var{size} + +Creates an anonymous memory file backend object, which allows QEMU to +share the memory with an external process (e.g. when using +vhost-user). The memory is allocated with memfd and optional +sealing. (Linux only) + +The @option{seal} option creates a sealed-file, that will block +further resizing the memory ('on' by default). + +The @option{hugetlb} option specify the file to be created resides in +the hugetlbfs filesystem (since Linux 4.14). Used in conjunction with +the @option{hugetlb} option, the @option{hugetlbsize} option specify +the hugetlb page size on systems that support multiple hugetlb page +sizes (it must be a power of 2 value supported by the system). + +In some versions of Linux, the @option{hugetlb} option is incompatible +with the @option{seal} option (requires at least Linux 4.16). + +Please refer to @option{memory-backend-file} for a description of the +other options. + @item -object rng-random,id=@var{id},filename=@var{/dev/random} Creates a random number generator backend which obtains entropy from From 7a9ec6541be5a3e70e9760811601454a92e0f4bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:55 +0100 Subject: [PATCH 40/47] tests: keep compiling failing vhost-user tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's protect the failing tests under a QTEST_VHOST_USER_FIXME environment variable, so we keep compiling the tests and we can easily run them. Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-6-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- tests/vhost-user-test.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index ec6ac9dc9e..713ff5c5c6 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -30,8 +30,6 @@ #include #include -#define VHOST_USER_NET_TESTS_WORKING 0 /* broken as of 2.10.0 */ - /* GLIB version compatibility flags */ #if !GLIB_CHECK_VERSION(2, 26, 0) #define G_TIME_SPAN_SECOND (G_GINT64_CONSTANT(1000000)) @@ -765,7 +763,7 @@ static void wait_for_rings_started(TestServer *s, size_t count) g_mutex_unlock(&s->data_mutex); } -#if VHOST_USER_NET_TESTS_WORKING && defined(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) +#if defined(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) static inline void test_server_connect(TestServer *server) { test_server_create_chr(server, ",reconnect=1"); @@ -956,16 +954,19 @@ int main(int argc, char **argv) qtest_add_func("/vhost-user/migrate", test_migrate); qtest_add_func("/vhost-user/multiqueue", test_multiqueue); -#if VHOST_USER_NET_TESTS_WORKING && defined(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) - qtest_add_func("/vhost-user/reconnect/subprocess", - test_reconnect_subprocess); - qtest_add_func("/vhost-user/reconnect", test_reconnect); - qtest_add_func("/vhost-user/connect-fail/subprocess", - test_connect_fail_subprocess); - qtest_add_func("/vhost-user/connect-fail", test_connect_fail); - qtest_add_func("/vhost-user/flags-mismatch/subprocess", - test_flags_mismatch_subprocess); - qtest_add_func("/vhost-user/flags-mismatch", test_flags_mismatch); +#if defined(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) + /* keeps failing on build-system since Aug 15 2017 */ + if (getenv("QTEST_VHOST_USER_FIXME")) { + qtest_add_func("/vhost-user/reconnect/subprocess", + test_reconnect_subprocess); + qtest_add_func("/vhost-user/reconnect", test_reconnect); + qtest_add_func("/vhost-user/connect-fail/subprocess", + test_connect_fail_subprocess); + qtest_add_func("/vhost-user/connect-fail", test_connect_fail); + qtest_add_func("/vhost-user/flags-mismatch/subprocess", + test_flags_mismatch_subprocess); + qtest_add_func("/vhost-user/flags-mismatch", test_flags_mismatch); + } #endif ret = g_test_run(); From 83265145a4f42a38f10bad0051e3a8229f19c166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:56 +0100 Subject: [PATCH 41/47] vhost-user-test: make read-guest-mem setup its own qemu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-7-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- tests/vhost-user-test.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 713ff5c5c6..a22075ff26 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -48,6 +48,14 @@ #define QEMU_CMD QEMU_CMD_MEM QEMU_CMD_CHR \ QEMU_CMD_NETDEV QEMU_CMD_NET +#define GET_QEMU_CMD(s) \ + g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name, \ + (s)->socket_path, "", (s)->chr_name) + +#define GET_QEMU_CMDE(s, mem, chr_opts, extra, ...) \ + g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \ + (s)->socket_path, (chr_opts), (s)->chr_name, ##__VA_ARGS__) + #define HUGETLBFS_MAGIC 0x958458f6 /*********** FROM hw/virtio/vhost-user.c *************************************/ @@ -159,6 +167,10 @@ typedef struct TestServer { QGuestAllocator *alloc; } TestServer; +static TestServer *test_server_new(const gchar *name); +static void test_server_free(TestServer *server); +static void test_server_listen(TestServer *server); + static const char *tmpfs; static const char *root; @@ -225,9 +237,8 @@ static void wait_for_fds(TestServer *s) g_mutex_unlock(&s->data_mutex); } -static void read_guest_mem(const void *data) +static void read_guest_mem_server(TestServer *s) { - TestServer *s = (void *)data; uint32_t *guest_mem; int i, j; size_t size; @@ -492,14 +503,6 @@ static void test_server_listen(TestServer *server) test_server_create_chr(server, ",server,nowait"); } -#define GET_QEMU_CMD(s) \ - g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name, \ - (s)->socket_path, "", (s)->chr_name) - -#define GET_QEMU_CMDE(s, mem, chr_opts, extra, ...) \ - g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \ - (s)->socket_path, (chr_opts), (s)->chr_name, ##__VA_ARGS__) - static gboolean _test_server_free(TestServer *server) { int i; @@ -652,7 +655,7 @@ static void test_read_guest_mem(void) init_virtio_dev(server, 1u << VIRTIO_NET_F_MAC); - read_guest_mem(server); + read_guest_mem_server(server); uninit_virtio_dev(server); @@ -730,7 +733,7 @@ static void test_migrate(void) global_qtest = to; qmp_eventwait("RESUME"); - read_guest_mem(dest); + read_guest_mem_server(dest); uninit_virtio_dev(s); From 7e49f5e8e508ed020c96798b3f7083e24e0e425b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 1 Feb 2018 14:27:57 +0100 Subject: [PATCH 42/47] tests: use memfd in vhost-user-test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will exercise the memfd memory backend and should generally be better for testing than memory-backend-file (thanks to anonymous files and sealing). If memfd is available, it is preferred. However, in order to check that file & memfd backends both work correctly, the read-guest-mem test is checked explicitly for each. Signed-off-by: Marc-André Lureau Message-Id: <20180201132757.23063-8-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- tests/vhost-user-test.c | 90 +++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index a22075ff26..95eb449cfc 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -17,6 +17,7 @@ #include "qemu/range.h" #include "qemu/sockets.h" #include "chardev/char-fe.h" +#include "qemu/memfd.h" #include "sysemu/sysemu.h" #include "libqos/libqos.h" #include "libqos/pci-pc.h" @@ -39,23 +40,14 @@ #define HAVE_MONOTONIC_TIME #endif -#define QEMU_CMD_MEM " -m %d -object memory-backend-file,id=mem,size=%dM,"\ +#define QEMU_CMD_MEM " -m %d -object memory-backend-file,id=mem,size=%dM," \ "mem-path=%s,share=on -numa node,memdev=mem" +#define QEMU_CMD_MEMFD " -m %d -object memory-backend-memfd,id=mem,size=%dM," \ + " -numa node,memdev=mem" #define QEMU_CMD_CHR " -chardev socket,id=%s,path=%s%s" #define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=%s,vhostforce" #define QEMU_CMD_NET " -device virtio-net-pci,netdev=net0" -#define QEMU_CMD QEMU_CMD_MEM QEMU_CMD_CHR \ - QEMU_CMD_NETDEV QEMU_CMD_NET - -#define GET_QEMU_CMD(s) \ - g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name, \ - (s)->socket_path, "", (s)->chr_name) - -#define GET_QEMU_CMDE(s, mem, chr_opts, extra, ...) \ - g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \ - (s)->socket_path, (chr_opts), (s)->chr_name, ##__VA_ARGS__) - #define HUGETLBFS_MAGIC 0x958458f6 /*********** FROM hw/virtio/vhost-user.c *************************************/ @@ -174,6 +166,33 @@ static void test_server_listen(TestServer *server); static const char *tmpfs; static const char *root; +enum test_memfd { + TEST_MEMFD_AUTO, + TEST_MEMFD_YES, + TEST_MEMFD_NO, +}; + +static char *get_qemu_cmd(TestServer *s, + int mem, enum test_memfd memfd, const char *mem_path, + const char *chr_opts, const char *extra) +{ + if (memfd == TEST_MEMFD_AUTO && qemu_memfd_check()) { + memfd = TEST_MEMFD_YES; + } + + if (memfd == TEST_MEMFD_YES) { + return g_strdup_printf(QEMU_CMD_MEMFD QEMU_CMD_CHR + QEMU_CMD_NETDEV QEMU_CMD_NET "%s", mem, mem, + s->chr_name, s->socket_path, + chr_opts, s->chr_name, extra); + } else { + return g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR + QEMU_CMD_NETDEV QEMU_CMD_NET "%s", mem, mem, + mem_path, s->chr_name, s->socket_path, + chr_opts, s->chr_name, extra); + } +} + static void init_virtio_dev(TestServer *s, uint32_t features_mask) { uint32_t features; @@ -639,8 +658,9 @@ GSourceFuncs test_migrate_source_funcs = { .check = test_migrate_source_check, }; -static void test_read_guest_mem(void) +static void test_read_guest_mem(const void *arg) { + enum test_memfd memfd = GPOINTER_TO_INT(arg); TestServer *server = NULL; char *qemu_cmd = NULL; QTestState *s = NULL; @@ -648,7 +668,7 @@ static void test_read_guest_mem(void) server = test_server_new("test"); test_server_listen(server); - qemu_cmd = GET_QEMU_CMD(server); + qemu_cmd = get_qemu_cmd(server, 512, memfd, root, "", ""); s = qtest_start(qemu_cmd); g_free(qemu_cmd); @@ -670,7 +690,7 @@ static void test_migrate(void) char *uri = g_strdup_printf("%s%s", "unix:", dest->mig_path); QTestState *global = global_qtest, *from, *to; GSource *source; - gchar *cmd; + gchar *cmd, *tmp; QDict *rsp; guint8 *log; guint64 size; @@ -678,7 +698,7 @@ static void test_migrate(void) test_server_listen(s); test_server_listen(dest); - cmd = GET_QEMU_CMDE(s, 2, "", ""); + cmd = get_qemu_cmd(s, 2, TEST_MEMFD_AUTO, root, "", ""); from = qtest_start(cmd); g_free(cmd); @@ -687,7 +707,9 @@ static void test_migrate(void) size = get_log_size(s); g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8)); - cmd = GET_QEMU_CMDE(dest, 2, "", " -incoming %s", uri); + tmp = g_strdup_printf(" -incoming %s", uri); + cmd = get_qemu_cmd(dest, 2, TEST_MEMFD_AUTO, root, "", tmp); + g_free(tmp); to = qtest_init(cmd); g_free(cmd); @@ -800,7 +822,7 @@ static void test_reconnect_subprocess(void) char *cmd; g_thread_new("connect", connect_thread, s); - cmd = GET_QEMU_CMDE(s, 2, ",server", ""); + cmd = get_qemu_cmd(s, 2, TEST_MEMFD_AUTO, root, ",server", ""); qtest_start(cmd); g_free(cmd); @@ -838,7 +860,7 @@ static void test_connect_fail_subprocess(void) s->test_fail = true; g_thread_new("connect", connect_thread, s); - cmd = GET_QEMU_CMDE(s, 2, ",server", ""); + cmd = get_qemu_cmd(s, 2, TEST_MEMFD_AUTO, root, ",server", ""); qtest_start(cmd); g_free(cmd); @@ -868,7 +890,7 @@ static void test_flags_mismatch_subprocess(void) s->test_flags = TEST_FLAGS_DISCONNECT; g_thread_new("connect", connect_thread, s); - cmd = GET_QEMU_CMDE(s, 2, ",server", ""); + cmd = get_qemu_cmd(s, 2, TEST_MEMFD_AUTO, root, ",server", ""); qtest_start(cmd); g_free(cmd); @@ -903,11 +925,21 @@ static void test_multiqueue(void) s->queues = 2; test_server_listen(s); - cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d " - "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d", - 512, 512, root, s->chr_name, - s->socket_path, "", s->chr_name, - s->queues, s->queues * 2 + 2); + if (qemu_memfd_check()) { + cmd = g_strdup_printf( + QEMU_CMD_MEMFD QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d " + "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d", + 512, 512, s->chr_name, + s->socket_path, "", s->chr_name, + s->queues, s->queues * 2 + 2); + } else { + cmd = g_strdup_printf( + QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d " + "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d", + 512, 512, root, s->chr_name, + s->socket_path, "", s->chr_name, + s->queues, s->queues * 2 + 2); + } qtest_start(cmd); g_free(cmd); @@ -953,7 +985,13 @@ int main(int argc, char **argv) /* run the main loop thread so the chardev may operate */ thread = g_thread_new(NULL, thread_function, loop); - qtest_add_func("/vhost-user/read-guest-mem", test_read_guest_mem); + if (qemu_memfd_check()) { + qtest_add_data_func("/vhost-user/read-guest-mem/memfd", + GINT_TO_POINTER(TEST_MEMFD_YES), + test_read_guest_mem); + } + qtest_add_data_func("/vhost-user/read-guest-mem/memfile", + GINT_TO_POINTER(TEST_MEMFD_NO), test_read_guest_mem); qtest_add_func("/vhost-user/migrate", test_migrate); qtest_add_func("/vhost-user/multiqueue", test_multiqueue); From 8f6d701044bc87197aac8aa2a627d70ce8471726 Mon Sep 17 00:00:00 2001 From: Klim Kireev Date: Thu, 1 Feb 2018 16:48:31 +0300 Subject: [PATCH 43/47] tests/test-filter-redirector: move close() Since we have separate handler on POLLHUP, which drops data after closing the connection we need to fix this test, because it sends data and instantly close the socket creating race condition. In some cases on other end of socket client closes it faster than reads data. To prevent it I suggest to close socket after recieving. Signed-off-by: Klim Kireev Message-Id: <20180201134831.17709-1-klim.kireev@virtuozzo.com> Signed-off-by: Paolo Bonzini --- tests/test-filter-redirector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-filter-redirector.c b/tests/test-filter-redirector.c index f2566144cf..fbaf19bbd8 100644 --- a/tests/test-filter-redirector.c +++ b/tests/test-filter-redirector.c @@ -186,7 +186,6 @@ static void test_redirector_rx(void) ret = iov_send(send_sock, iov, 2, 0, sizeof(size) + sizeof(send_buf)); g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size)); - close(send_sock); ret = qemu_recv(backend_sock[0], &len, sizeof(len), 0); g_assert_cmpint(ret, ==, sizeof(len)); @@ -197,6 +196,7 @@ static void test_redirector_rx(void) ret = qemu_recv(backend_sock[0], recv_buf, len, 0); g_assert_cmpstr(recv_buf, ==, send_buf); + close(send_sock); g_free(recv_buf); unlink(sock_path0); unlink(sock_path1); From d661d9a42bdbe25013eac9bbf1341c159fade09a Mon Sep 17 00:00:00 2001 From: "Justin Terry (VM)" Date: Mon, 22 Jan 2018 13:07:46 -0800 Subject: [PATCH 44/47] Add the Windows Hypervisor Platform accelerator. Introduces the configure support for the new Windows Hypervisor Platform that allows for hypervisor acceleration from usermode components on the Windows platform. Signed-off-by: Justin Terry (VM) Message-Id: <1516655269-1785-2-git-send-email-juterry@microsoft.com> Signed-off-by: Paolo Bonzini --- configure | 48 +++++++++++++++++++++++++++++++++++++++++++++++- qemu-options.hx | 8 ++++---- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 529aad5491..ba60205904 100755 --- a/configure +++ b/configure @@ -222,6 +222,17 @@ supported_hvf_target() { return 1 } +supported_whpx_target() { + test "$whpx" = "yes" || return 1 + glob "$1" "*-softmmu" || return 1 + case "${1%-softmmu}" in + i386|x86_64) + return 0 + ;; + esac + return 1 +} + supported_target() { case "$1" in *-softmmu) @@ -248,6 +259,7 @@ supported_target() { supported_xen_target "$1" && return 0 supported_hax_target "$1" && return 0 supported_hvf_target "$1" && return 0 + supported_whpx_target "$1" && return 0 print_error "TCG disabled, but hardware accelerator not available for '$target'" return 1 } @@ -338,6 +350,7 @@ vhost_user="" kvm="no" hax="no" hvf="no" +whpx="no" rdma="" gprof="no" debug_tcg="no" @@ -1058,6 +1071,10 @@ for opt do ;; --enable-hvf) hvf="yes" ;; + --disable-whpx) whpx="no" + ;; + --enable-whpx) whpx="yes" + ;; --disable-tcg-interpreter) tcg_interpreter="no" ;; --enable-tcg-interpreter) tcg_interpreter="yes" @@ -1557,6 +1574,7 @@ disabled with --disable-FEATURE, default is enabled if available: kvm KVM acceleration support hax HAX acceleration support hvf Hypervisor.framework acceleration support + whpx Windows Hypervisor Platform acceleration support rdma RDMA-based migration support vde support for vde network netmap support for netmap network @@ -2455,6 +2473,30 @@ if test "$xen_pv_domain_build" = "yes" && "which requires Xen support." fi +########################################## +# Windows Hypervisor Platform accelerator (WHPX) check +if test "$whpx" != "no" ; then + cat > $TMPC << EOF +#include +#include +#include +int main(void) { + WHV_CAPABILITY whpx_cap; + WHvGetCapability(WHvCapabilityCodeFeatures, &whpx_cap, sizeof(whpx_cap)); + return 0; +} +EOF + if compile_prog "" "-lwinhvplatform -lwinhvemulation" ; then + libs_softmmu="$libs_softmmu -lwinhvplatform -lwinhvemulation" + whpx="yes" + else + if test "$whpx" = "yes"; then + feature_not_found "winhvplatform" "winhvemulation is not installed" + fi + whpx="no" + fi +fi + ########################################## # Sparse probe if test "$sparse" != "no" ; then @@ -5648,6 +5690,7 @@ echo "Install blobs $blobs" echo "KVM support $kvm" echo "HAX support $hax" echo "HVF support $hvf" +echo "WHPX support $whpx" echo "TCG support $tcg" if test "$tcg" = "yes" ; then echo "TCG debug enabled $debug_tcg" @@ -5815,7 +5858,7 @@ if test "$mingw32" = "yes" ; then echo "CONFIG_QGA_NTDDDISK=y" >> $config_host_mak fi if test "$guest_agent_msi" = "yes"; then - echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak + echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak echo "QEMU_GA_MSI_WITH_VSS=${QEMU_GA_MSI_WITH_VSS}" >> $config_host_mak echo "QEMU_GA_MSI_ARCH=${QEMU_GA_MSI_ARCH}" >> $config_host_mak @@ -6748,6 +6791,9 @@ fi if supported_hvf_target $target; then echo "CONFIG_HVF=y" >> $config_target_mak fi +if supported_whpx_target $target; then + echo "CONFIG_WHPX=y" >> $config_target_mak +fi if test "$target_bigendian" = "yes" ; then echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak fi diff --git a/qemu-options.hx b/qemu-options.hx index 265cf9fa38..d15c1713d1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -31,7 +31,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ "-machine [type=]name[,prop[=value][,...]]\n" " selects emulated machine ('-machine help' for list)\n" " property accel=accel1[:accel2[:...]] selects accelerator\n" - " supported accelerators are kvm, xen, hax, hvf or tcg (default: tcg)\n" + " supported accelerators are kvm, xen, hax, hvf, whpx or tcg (default: tcg)\n" " kernel_irqchip=on|off|split controls accelerated irqchip support (default=off)\n" " vmport=on|off|auto controls emulation of vmport (default: auto)\n" " kvm_shadow_mem=size of KVM shadow MMU in bytes\n" @@ -66,7 +66,7 @@ Supported machine properties are: @table @option @item accel=@var{accels1}[:@var{accels2}[:...]] This is used to enable an accelerator. Depending on the target architecture, -kvm, xen, hax, hvf or tcg can be available. By default, tcg is used. If there is +kvm, xen, hax, hvf, whpx or tcg can be available. By default, tcg is used. If there is more than one accelerator specified, the next one is used if the previous one fails to initialize. @item kernel_irqchip=on|off @@ -126,13 +126,13 @@ ETEXI DEF("accel", HAS_ARG, QEMU_OPTION_accel, "-accel [accel=]accelerator[,thread=single|multi]\n" - " select accelerator (kvm, xen, hax, hvf or tcg; use 'help' for a list)\n" + " select accelerator (kvm, xen, hax, hvf, whpx or tcg; use 'help' for a list)\n" " thread=single|multi (enable multi-threaded TCG)", QEMU_ARCH_ALL) STEXI @item -accel @var{name}[,prop=@var{value}[,...]] @findex -accel This is used to enable an accelerator. Depending on the target architecture, -kvm, xen, hax, hvf or tcg can be available. By default, tcg is used. If there is +kvm, xen, hax, hvf, whpx or tcg can be available. By default, tcg is used. If there is more than one accelerator specified, the next one is used if the previous one fails to initialize. @table @option From 29b22c79bcacce7839209c7344948e8c305fa2d3 Mon Sep 17 00:00:00 2001 From: "Justin Terry (VM)" Date: Mon, 22 Jan 2018 13:07:47 -0800 Subject: [PATCH 45/47] Add the WHPX vcpu API Adds support for the Windows Hypervisor Platform accelerator (WHPX) stubs and introduces the whpx.h sysemu API for managing the vcpu scheduling and management. Signed-off-by: Justin Terry (VM) Message-Id: <1516655269-1785-3-git-send-email-juterry@microsoft.com> Signed-off-by: Paolo Bonzini --- accel/stubs/Makefile.objs | 9 ++++---- accel/stubs/whpx-stub.c | 48 +++++++++++++++++++++++++++++++++++++++ include/sysemu/whpx.h | 40 ++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 accel/stubs/whpx-stub.c create mode 100644 include/sysemu/whpx.h diff --git a/accel/stubs/Makefile.objs b/accel/stubs/Makefile.objs index 779343b0c0..3894caf95d 100644 --- a/accel/stubs/Makefile.objs +++ b/accel/stubs/Makefile.objs @@ -1,4 +1,5 @@ -obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o -obj-$(call lnot,$(CONFIG_HVF)) += hvf-stub.o -obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o +obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o +obj-$(call lnot,$(CONFIG_HVF)) += hvf-stub.o +obj-$(call lnot,$(CONFIG_WHPX)) += whpx-stub.o +obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o +obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o diff --git a/accel/stubs/whpx-stub.c b/accel/stubs/whpx-stub.c new file mode 100644 index 0000000000..5fb049c281 --- /dev/null +++ b/accel/stubs/whpx-stub.c @@ -0,0 +1,48 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) stub + * + * Copyright Microsoft Corp. 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "sysemu/whpx.h" + +int whpx_init_vcpu(CPUState *cpu) +{ + return -1; +} + +int whpx_vcpu_exec(CPUState *cpu) +{ + return -1; +} + +void whpx_destroy_vcpu(CPUState *cpu) +{ +} + +void whpx_vcpu_kick(CPUState *cpu) +{ +} + +void whpx_cpu_synchronize_state(CPUState *cpu) +{ +} + +void whpx_cpu_synchronize_post_reset(CPUState *cpu) +{ +} + +void whpx_cpu_synchronize_post_init(CPUState *cpu) +{ +} + +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ +} diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h new file mode 100644 index 0000000000..89592ae4fa --- /dev/null +++ b/include/sysemu/whpx.h @@ -0,0 +1,40 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) support + * + * Copyright Microsoft, Corp. 2017 + * + * Authors: + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_WHPX_H +#define QEMU_WHPX_H + +#include "config-host.h" +#include "qemu-common.h" + +int whpx_init_vcpu(CPUState *cpu); +int whpx_vcpu_exec(CPUState *cpu); +void whpx_destroy_vcpu(CPUState *cpu); +void whpx_vcpu_kick(CPUState *cpu); + + +void whpx_cpu_synchronize_state(CPUState *cpu); +void whpx_cpu_synchronize_post_reset(CPUState *cpu); +void whpx_cpu_synchronize_post_init(CPUState *cpu); +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); + +#ifdef CONFIG_WHPX + +int whpx_enabled(void); + +#else /* CONFIG_WHPX */ + +#define whpx_enabled() (0) + +#endif /* CONFIG_WHPX */ + +#endif /* QEMU_WHPX_H */ From 812d49f2a3eed1f53d5a922461e81ea7e3581dd5 Mon Sep 17 00:00:00 2001 From: "Justin Terry (VM)" Date: Mon, 22 Jan 2018 13:07:48 -0800 Subject: [PATCH 46/47] Introduce the WHPX impl Implements the Windows Hypervisor Platform accelerator (WHPX) target. Which acts as a hypervisor accelerator for QEMU on the Windows platform. This enables QEMU much greater speed over the emulated x86_64 path's that are taken on Windows today. 1. Adds support for vPartition management. 2. Adds support for vCPU management. 3. Adds support for MMIO/PortIO. 4. Registers the WHPX ACCEL_CLASS. Signed-off-by: Justin Terry (VM) Message-Id: <1516655269-1785-4-git-send-email-juterry@microsoft.com> Signed-off-by: Paolo Bonzini --- target/i386/Makefile.objs | 1 + target/i386/whpx-all.c | 1366 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1367 insertions(+) create mode 100644 target/i386/whpx-all.c diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index 44103a693b..f5c6ef20a7 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -14,3 +14,4 @@ ifdef CONFIG_DARWIN obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o obj-$(CONFIG_HVF) += hvf/ endif +obj-$(CONFIG_WHPX) += whpx-all.o diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c new file mode 100644 index 0000000000..0015b27509 --- /dev/null +++ b/target/i386/whpx-all.c @@ -0,0 +1,1366 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) + * + * Copyright Microsoft Corp. 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/address-spaces.h" +#include "exec/exec-all.h" +#include "exec/ioport.h" +#include "qemu-common.h" +#include "strings.h" +#include "sysemu/accel.h" +#include "sysemu/whpx.h" +#include "sysemu/sysemu.h" +#include "sysemu/cpus.h" +#include "qemu/main-loop.h" +#include "hw/boards.h" +#include "qemu/error-report.h" +#include "qemu/queue.h" +#include "qapi/error.h" +#include "migration/blocker.h" + +#include +#include + +struct whpx_state { + uint64_t mem_quota; + WHV_PARTITION_HANDLE partition; + uint32_t exit_ctx_size; +}; + +static const WHV_REGISTER_NAME whpx_register_names[] = { + + /* X64 General purpose registers */ + WHvX64RegisterRax, + WHvX64RegisterRcx, + WHvX64RegisterRdx, + WHvX64RegisterRbx, + WHvX64RegisterRsp, + WHvX64RegisterRbp, + WHvX64RegisterRsi, + WHvX64RegisterRdi, + WHvX64RegisterR8, + WHvX64RegisterR9, + WHvX64RegisterR10, + WHvX64RegisterR11, + WHvX64RegisterR12, + WHvX64RegisterR13, + WHvX64RegisterR14, + WHvX64RegisterR15, + WHvX64RegisterRip, + WHvX64RegisterRflags, + + /* X64 Segment registers */ + WHvX64RegisterEs, + WHvX64RegisterCs, + WHvX64RegisterSs, + WHvX64RegisterDs, + WHvX64RegisterFs, + WHvX64RegisterGs, + WHvX64RegisterLdtr, + WHvX64RegisterTr, + + /* X64 Table registers */ + WHvX64RegisterIdtr, + WHvX64RegisterGdtr, + + /* X64 Control Registers */ + WHvX64RegisterCr0, + WHvX64RegisterCr2, + WHvX64RegisterCr3, + WHvX64RegisterCr4, + WHvX64RegisterCr8, + + /* X64 Debug Registers */ + /* + * WHvX64RegisterDr0, + * WHvX64RegisterDr1, + * WHvX64RegisterDr2, + * WHvX64RegisterDr3, + * WHvX64RegisterDr6, + * WHvX64RegisterDr7, + */ + + /* X64 Floating Point and Vector Registers */ + WHvX64RegisterXmm0, + WHvX64RegisterXmm1, + WHvX64RegisterXmm2, + WHvX64RegisterXmm3, + WHvX64RegisterXmm4, + WHvX64RegisterXmm5, + WHvX64RegisterXmm6, + WHvX64RegisterXmm7, + WHvX64RegisterXmm8, + WHvX64RegisterXmm9, + WHvX64RegisterXmm10, + WHvX64RegisterXmm11, + WHvX64RegisterXmm12, + WHvX64RegisterXmm13, + WHvX64RegisterXmm14, + WHvX64RegisterXmm15, + WHvX64RegisterFpMmx0, + WHvX64RegisterFpMmx1, + WHvX64RegisterFpMmx2, + WHvX64RegisterFpMmx3, + WHvX64RegisterFpMmx4, + WHvX64RegisterFpMmx5, + WHvX64RegisterFpMmx6, + WHvX64RegisterFpMmx7, + WHvX64RegisterFpControlStatus, + WHvX64RegisterXmmControlStatus, + + /* X64 MSRs */ + WHvX64RegisterTsc, + WHvX64RegisterEfer, +#ifdef TARGET_X86_64 + WHvX64RegisterKernelGsBase, +#endif + WHvX64RegisterApicBase, + /* WHvX64RegisterPat, */ + WHvX64RegisterSysenterCs, + WHvX64RegisterSysenterEip, + WHvX64RegisterSysenterEsp, + WHvX64RegisterStar, +#ifdef TARGET_X86_64 + WHvX64RegisterLstar, + WHvX64RegisterCstar, + WHvX64RegisterSfmask, +#endif + + /* Interrupt / Event Registers */ + /* + * WHvRegisterPendingInterruption, + * WHvRegisterInterruptState, + * WHvRegisterPendingEvent0, + * WHvRegisterPendingEvent1 + * WHvX64RegisterDeliverabilityNotifications, + */ +}; + +struct whpx_register_set { + WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)]; +}; + +struct whpx_vcpu { + WHV_EMULATOR_HANDLE emulator; + bool window_registered; + bool interruptable; + uint64_t tpr; + uint64_t apic_base; + WHV_X64_PENDING_INTERRUPTION_REGISTER interrupt_in_flight; + + /* Must be the last field as it may have a tail */ + WHV_RUN_VP_EXIT_CONTEXT exit_ctx; +}; + +static bool whpx_allowed; + +struct whpx_state whpx_global; + + +/* + * VP support + */ + +static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu) +{ + return (struct whpx_vcpu *)cpu->hax_vcpu; +} + +static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86, + int r86) +{ + WHV_X64_SEGMENT_REGISTER hs; + unsigned flags = qs->flags; + + hs.Base = qs->base; + hs.Limit = qs->limit; + hs.Selector = qs->selector; + + if (v86) { + hs.Attributes = 0; + hs.SegmentType = 3; + hs.Present = 1; + hs.DescriptorPrivilegeLevel = 3; + hs.NonSystemSegment = 1; + + } else { + hs.Attributes = (flags >> DESC_TYPE_SHIFT); + + if (r86) { + /* hs.Base &= 0xfffff; */ + } + } + + return hs; +} + +static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) +{ + SegmentCache qs; + + qs.base = hs->Base; + qs.limit = hs->Limit; + qs.selector = hs->Selector; + + qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT; + + return qs; +} + +static void whpx_set_registers(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + struct whpx_register_set vcxt = {0}; + HRESULT hr; + int idx = 0; + int i; + int v86, r86; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + v86 = (env->eflags & VM_MASK); + r86 = !(env->cr[0] & CR0_PE_MASK); + + vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); + + /* Indexes for first 16 registers match between HV and QEMU definitions */ + for (idx = 0; idx < CPU_NB_REGS64; idx += 1) { + vcxt.values[idx].Reg64 = env->regs[idx]; + } + + /* Same goes for RIP and RFLAGS */ + assert(whpx_register_names[idx] == WHvX64RegisterRip); + vcxt.values[idx++].Reg64 = env->eip; + + assert(whpx_register_names[idx] == WHvX64RegisterRflags); + vcxt.values[idx++].Reg64 = env->eflags; + + /* Translate 6+4 segment registers. HV and QEMU order matches */ + assert(idx == WHvX64RegisterEs); + for (i = 0; i < 6; i += 1, idx += 1) { + vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86); + } + + assert(idx == WHvX64RegisterLdtr); + vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0); + + assert(idx == WHvX64RegisterTr); + vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0); + + assert(idx == WHvX64RegisterIdtr); + vcxt.values[idx].Table.Base = env->idt.base; + vcxt.values[idx].Table.Limit = env->idt.limit; + idx += 1; + + assert(idx == WHvX64RegisterGdtr); + vcxt.values[idx].Table.Base = env->gdt.base; + vcxt.values[idx].Table.Limit = env->gdt.limit; + idx += 1; + + /* CR0, 2, 3, 4, 8 */ + assert(whpx_register_names[idx] == WHvX64RegisterCr0); + vcxt.values[idx++].Reg64 = env->cr[0]; + assert(whpx_register_names[idx] == WHvX64RegisterCr2); + vcxt.values[idx++].Reg64 = env->cr[2]; + assert(whpx_register_names[idx] == WHvX64RegisterCr3); + vcxt.values[idx++].Reg64 = env->cr[3]; + assert(whpx_register_names[idx] == WHvX64RegisterCr4); + vcxt.values[idx++].Reg64 = env->cr[4]; + assert(whpx_register_names[idx] == WHvX64RegisterCr8); + vcxt.values[idx++].Reg64 = vcpu->tpr; + + /* 8 Debug Registers - Skipped */ + + /* 16 XMM registers */ + assert(whpx_register_names[idx] == WHvX64RegisterXmm0); + for (i = 0; i < 16; i += 1, idx += 1) { + vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0); + vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1); + } + + /* 8 FP registers */ + assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); + for (i = 0; i < 8; i += 1, idx += 1) { + vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0); + /* vcxt.values[idx].Fp.AsUINT128.High64 = + env->fpregs[i].mmx.MMX_Q(1); + */ + } + + /* FP control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); + vcxt.values[idx].FpControlStatus.FpControl = env->fpuc; + vcxt.values[idx].FpControlStatus.FpStatus = + (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + vcxt.values[idx].FpControlStatus.FpTag = 0; + for (i = 0; i < 8; ++i) { + vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i; + } + vcxt.values[idx].FpControlStatus.Reserved = 0; + vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop; + vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip; + idx += 1; + + /* XMM control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); + vcxt.values[idx].XmmControlStatus.LastFpRdp = 0; + vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr; + vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff; + idx += 1; + + /* MSRs */ + assert(whpx_register_names[idx] == WHvX64RegisterTsc); + vcxt.values[idx++].Reg64 = env->tsc; + assert(whpx_register_names[idx] == WHvX64RegisterEfer); + vcxt.values[idx++].Reg64 = env->efer; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); + vcxt.values[idx++].Reg64 = env->kernelgsbase; +#endif + + assert(whpx_register_names[idx] == WHvX64RegisterApicBase); + vcxt.values[idx++].Reg64 = vcpu->apic_base; + + /* WHvX64RegisterPat - Skipped */ + + assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); + vcxt.values[idx++].Reg64 = env->sysenter_cs; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); + vcxt.values[idx++].Reg64 = env->sysenter_eip; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); + vcxt.values[idx++].Reg64 = env->sysenter_esp; + assert(whpx_register_names[idx] == WHvX64RegisterStar); + vcxt.values[idx++].Reg64 = env->star; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterLstar); + vcxt.values[idx++].Reg64 = env->lstar; + assert(whpx_register_names[idx] == WHvX64RegisterCstar); + vcxt.values[idx++].Reg64 = env->cstar; + assert(whpx_register_names[idx] == WHvX64RegisterSfmask); + vcxt.values[idx++].Reg64 = env->fmask; +#endif + + /* Interrupt / Event Registers - Skipped */ + + assert(idx == RTL_NUMBER_OF(whpx_register_names)); + + hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, + whpx_register_names, + RTL_NUMBER_OF(whpx_register_names), + &vcxt.values[0]); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set virtual processor context, hr=%08lx", + hr); + __debugbreak(); + } + + return; +} + +static void whpx_get_registers(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + struct whpx_register_set vcxt; + uint64_t tpr, apic_base; + HRESULT hr; + int idx = 0; + int i; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + hr = WHvGetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, + whpx_register_names, + RTL_NUMBER_OF(whpx_register_names), + &vcxt.values[0]); + if (FAILED(hr)) { + error_report("WHPX: Failed to get virtual processor context, hr=%08lx", + hr); + __debugbreak(); + } + + /* Indexes for first 16 registers match between HV and QEMU definitions */ + for (idx = 0; idx < CPU_NB_REGS64; idx += 1) { + env->regs[idx] = vcxt.values[idx].Reg64; + } + + /* Same goes for RIP and RFLAGS */ + assert(whpx_register_names[idx] == WHvX64RegisterRip); + env->eip = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterRflags); + env->eflags = vcxt.values[idx++].Reg64; + + /* Translate 6+4 segment registers. HV and QEMU order matches */ + assert(idx == WHvX64RegisterEs); + for (i = 0; i < 6; i += 1, idx += 1) { + env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment); + } + + assert(idx == WHvX64RegisterLdtr); + env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment); + assert(idx == WHvX64RegisterTr); + env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment); + assert(idx == WHvX64RegisterIdtr); + env->idt.base = vcxt.values[idx].Table.Base; + env->idt.limit = vcxt.values[idx].Table.Limit; + idx += 1; + assert(idx == WHvX64RegisterGdtr); + env->gdt.base = vcxt.values[idx].Table.Base; + env->gdt.limit = vcxt.values[idx].Table.Limit; + idx += 1; + + /* CR0, 2, 3, 4, 8 */ + assert(whpx_register_names[idx] == WHvX64RegisterCr0); + env->cr[0] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr2); + env->cr[2] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr3); + env->cr[3] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr4); + env->cr[4] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr8); + tpr = vcxt.values[idx++].Reg64; + if (tpr != vcpu->tpr) { + vcpu->tpr = tpr; + cpu_set_apic_tpr(x86_cpu->apic_state, tpr); + } + + /* 8 Debug Registers - Skipped */ + + /* 16 XMM registers */ + assert(whpx_register_names[idx] == WHvX64RegisterXmm0); + for (i = 0; i < 16; i += 1, idx += 1) { + env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64; + env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64; + } + + /* 8 FP registers */ + assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); + for (i = 0; i < 8; i += 1, idx += 1) { + env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64; + /* env->fpregs[i].mmx.MMX_Q(1) = + vcxt.values[idx].Fp.AsUINT128.High64; + */ + } + + /* FP control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); + env->fpuc = vcxt.values[idx].FpControlStatus.FpControl; + env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7; + env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800; + for (i = 0; i < 8; ++i) { + env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1); + } + env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp; + env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip; + idx += 1; + + /* XMM control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); + env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl; + idx += 1; + + /* MSRs */ + assert(whpx_register_names[idx] == WHvX64RegisterTsc); + env->tsc = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterEfer); + env->efer = vcxt.values[idx++].Reg64; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); + env->kernelgsbase = vcxt.values[idx++].Reg64; +#endif + + assert(whpx_register_names[idx] == WHvX64RegisterApicBase); + apic_base = vcxt.values[idx++].Reg64; + if (apic_base != vcpu->apic_base) { + vcpu->apic_base = apic_base; + cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base); + } + + /* WHvX64RegisterPat - Skipped */ + + assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); + env->sysenter_cs = vcxt.values[idx++].Reg64;; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); + env->sysenter_eip = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); + env->sysenter_esp = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterStar); + env->star = vcxt.values[idx++].Reg64; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterLstar); + env->lstar = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCstar); + env->cstar = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSfmask); + env->fmask = vcxt.values[idx++].Reg64; +#endif + + /* Interrupt / Event Registers - Skipped */ + + assert(idx == RTL_NUMBER_OF(whpx_register_names)); + + return; +} + +static HRESULT CALLBACK whpx_emu_ioport_callback( + void *ctx, + WHV_EMULATOR_IO_ACCESS_INFO *IoAccess) +{ + MemTxAttrs attrs = { 0 }; + address_space_rw(&address_space_io, IoAccess->Port, attrs, + (uint8_t *)&IoAccess->Data, IoAccess->AccessSize, + IoAccess->Direction); + return S_OK; +} + +static HRESULT CALLBACK whpx_emu_memio_callback( + void *ctx, + WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) +{ + cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, + ma->Direction); + return S_OK; +} + +static HRESULT CALLBACK whpx_emu_getreg_callback( + void *ctx, + const WHV_REGISTER_NAME *RegisterNames, + UINT32 RegisterCount, + WHV_REGISTER_VALUE *RegisterValues) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + + hr = WHvGetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, + RegisterNames, RegisterCount, + RegisterValues); + if (FAILED(hr)) { + error_report("WHPX: Failed to get virtual processor registers," + " hr=%08lx", hr); + __debugbreak(); + } + + return hr; +} + +static HRESULT CALLBACK whpx_emu_setreg_callback( + void *ctx, + const WHV_REGISTER_NAME *RegisterNames, + UINT32 RegisterCount, + const WHV_REGISTER_VALUE *RegisterValues) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + + hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, + RegisterNames, RegisterCount, + RegisterValues); + if (FAILED(hr)) { + error_report("WHPX: Failed to set virtual processor registers," + " hr=%08lx", hr); + __debugbreak(); + } + + /* + * The emulator just successfully wrote the register state. We clear the + * dirty state so we avoid the double write on resume of the VP. + */ + cpu->vcpu_dirty = false; + + return hr; +} + +static HRESULT CALLBACK whpx_emu_translate_callback( + void *ctx, + WHV_GUEST_VIRTUAL_ADDRESS Gva, + WHV_TRANSLATE_GVA_FLAGS TranslateFlags, + WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult, + WHV_GUEST_PHYSICAL_ADDRESS *Gpa) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + WHV_TRANSLATE_GVA_RESULT res; + + hr = WHvTranslateGva(whpx->partition, cpu->cpu_index, + Gva, TranslateFlags, &res, Gpa); + if (FAILED(hr)) { + error_report("WHPX: Failed to translate GVA, hr=%08lx", hr); + __debugbreak(); + } else { + *TranslationResult = res.ResultCode; + } + + return hr; +} + +static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = { + .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback, + .WHvEmulatorMemoryCallback = whpx_emu_memio_callback, + .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback, + .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback, + .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback, +}; + +static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx) +{ + HRESULT hr; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + WHV_EMULATOR_STATUS emu_status; + + hr = WHvEmulatorTryMmioEmulation(vcpu->emulator, cpu, ctx, &emu_status); + if (FAILED(hr)) { + __debugbreak(); + error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr); + return -1; + } + + if (!emu_status.EmulationSuccessful) { + __debugbreak(); + error_report("WHPX: Failed to emulate MMIO access"); + return -1; + } + + return 0; +} + +static int whpx_handle_portio(CPUState *cpu, + WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx) +{ + HRESULT hr; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + WHV_EMULATOR_STATUS emu_status; + + hr = WHvEmulatorTryIoEmulation(vcpu->emulator, cpu, ctx, &emu_status); + if (FAILED(hr)) { + __debugbreak(); + error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr); + return -1; + } + + if (!emu_status.EmulationSuccessful) { + __debugbreak(); + error_report("WHPX: Failed to emulate PortMMIO access"); + return -1; + } + + return 0; +} + +static int whpx_handle_halt(CPUState *cpu) +{ + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + int ret = 0; + + qemu_mutex_lock_iothread(); + if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) && + !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->exception_index = EXCP_HLT; + cpu->halted = true; + ret = 1; + } + qemu_mutex_unlock_iothread(); + + return ret; +} + +static void whpx_vcpu_pre_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + int irq; + WHV_X64_PENDING_INTERRUPTION_REGISTER new_int = {0}; + UINT32 reg_count = 0; + WHV_REGISTER_VALUE reg_values[3] = {0}; + WHV_REGISTER_NAME reg_names[3]; + + qemu_mutex_lock_iothread(); + + /* Inject NMI */ + if (!vcpu->interrupt_in_flight.InterruptionPending && + cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + vcpu->interruptable = false; + new_int.InterruptionType = WHvX64PendingNmi; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = 2; + } + if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + qemu_mutex_lock_iothread(); + cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + __debugbreak(); + qemu_mutex_unlock_iothread(); + } + } + + /* + * Force the VCPU out of its inner loop to process any INIT requests or + * commit pending TPR access. + */ + if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + cpu->exit_request = 1; + } + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->exit_request = 1; + } + } + + /* Get pending hard interruption or replay one that was overwritten */ + if (!vcpu->interrupt_in_flight.InterruptionPending && + vcpu->interruptable && (env->eflags & IF_MASK)) { + assert(!new_int.InterruptionPending); + if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + new_int.InterruptionType = WHvX64PendingInterrupt; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = irq; + } + } + } + + /* Setup interrupt state if new one was prepared */ + if (new_int.InterruptionPending) { + reg_values[reg_count].PendingInterruption = new_int; + reg_names[reg_count] = WHvRegisterPendingInterruption; + reg_count += 1; + } + + /* Sync the TPR to the CR8 if was modified during the intercept */ + reg_values[reg_count].Reg64 = cpu_get_apic_tpr(x86_cpu->apic_state); + if (reg_values[reg_count].Reg64 != vcpu->tpr) { + vcpu->tpr = reg_values[reg_count].Reg64; + cpu->exit_request = 1; + reg_names[reg_count] = WHvX64RegisterCr8; + reg_count += 1; + } + + /* Update the state of the interrupt delivery notification */ + if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + reg_values[reg_count].DeliverabilityNotifications.InterruptNotification + = 1; + if (vcpu->window_registered != 1) { + vcpu->window_registered = 1; + } + reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; + reg_count += 1; + } + + qemu_mutex_unlock_iothread(); + + if (reg_count) { + hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, + reg_names, reg_count, reg_values); + if (FAILED(hr)) { + error_report("WHPX: Failed to set interrupt state registers," + " hr=%08lx", hr); + __debugbreak(); + } + } + + return; +} + +static void whpx_vcpu_post_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + WHV_REGISTER_VALUE reg_values[4]; + const WHV_REGISTER_NAME reg_names[4] = { + WHvX64RegisterRflags, + WHvX64RegisterCr8, + WHvRegisterPendingInterruption, + WHvRegisterInterruptState, + }; + + hr = WHvGetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, + reg_names, 4, reg_values); + if (FAILED(hr)) { + error_report("WHPX: Failed to get interrupt state regusters," + " hr=%08lx", hr); + __debugbreak(); + vcpu->interruptable = false; + return; + } + + assert(reg_names[0] == WHvX64RegisterRflags); + env->eflags = reg_values[0].Reg64; + + assert(reg_names[1] == WHvX64RegisterCr8); + if (vcpu->tpr != reg_values[1].Reg64) { + vcpu->tpr = reg_values[1].Reg64; + qemu_mutex_lock_iothread(); + cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr); + qemu_mutex_unlock_iothread(); + } + + assert(reg_names[2] == WHvRegisterPendingInterruption); + vcpu->interrupt_in_flight = reg_values[2].PendingInterruption; + + assert(reg_names[3] == WHvRegisterInterruptState); + vcpu->interruptable = !reg_values[3].InterruptState.InterruptShadow; + + return; +} + +static void whpx_vcpu_process_async_events(CPUState *cpu) +{ + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + + do_cpu_init(x86_cpu); + cpu->vcpu_dirty = true; + vcpu->interruptable = true; + } + + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(x86_cpu->apic_state); + } + + if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) || + (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->halted = false; + } + + if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + if (!cpu->vcpu_dirty) { + whpx_get_registers(cpu); + } + do_cpu_sipi(x86_cpu); + } + + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (!cpu->vcpu_dirty) { + whpx_get_registers(cpu); + } + apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, + env->tpr_access_type); + } + + return; +} + +static int whpx_vcpu_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + int ret; + + whpx_vcpu_process_async_events(cpu); + if (cpu->halted) { + cpu->exception_index = EXCP_HLT; + atomic_set(&cpu->exit_request, false); + return 0; + } + + qemu_mutex_unlock_iothread(); + cpu_exec_start(cpu); + + do { + if (cpu->vcpu_dirty) { + whpx_set_registers(cpu); + cpu->vcpu_dirty = false; + } + + whpx_vcpu_pre_run(cpu); + + if (atomic_read(&cpu->exit_request)) { + whpx_vcpu_kick(cpu); + } + + for (;;) { + hr = WHvRunVirtualProcessor(whpx->partition, cpu->cpu_index, + &vcpu->exit_ctx, whpx->exit_ctx_size); + + if (SUCCEEDED(hr) && (vcpu->exit_ctx.ExitReason == + WHvRunVpExitReasonAlerted)) { + WHvCancelRunVirtualProcessor(whpx->partition, cpu->cpu_index, + 0); + } else { + break; + } + } + + if (FAILED(hr)) { + error_report("WHPX: Failed to exec a virtual processor," + " hr=%08lx", hr); + ret = -1; + break; + } + + whpx_vcpu_post_run(cpu); + + switch (vcpu->exit_ctx.ExitReason) { + case WHvRunVpExitReasonMemoryAccess: + ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); + break; + + case WHvRunVpExitReasonX64IoPortAccess: + ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); + break; + + case WHvRunVpExitReasonX64InterruptWindow: + vcpu->window_registered = 0; + break; + + case WHvRunVpExitReasonX64Halt: + ret = whpx_handle_halt(cpu); + break; + + case WHvRunVpExitReasonCanceled: + cpu->exception_index = EXCP_INTERRUPT; + ret = 1; + break; + + case WHvRunVpExitReasonNone: + case WHvRunVpExitReasonUnrecoverableException: + case WHvRunVpExitReasonInvalidVpRegisterValue: + case WHvRunVpExitReasonUnsupportedFeature: + case WHvRunVpExitReasonX64MsrAccess: + case WHvRunVpExitReasonX64Cpuid: + case WHvRunVpExitReasonException: + case WHvRunVpExitReasonAlerted: + default: + error_report("WHPX: Unexpected VP exit code %d", + vcpu->exit_ctx.ExitReason); + whpx_get_registers(cpu); + qemu_mutex_lock_iothread(); + qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + qemu_mutex_unlock_iothread(); + break; + } + + } while (!ret); + + cpu_exec_end(cpu); + qemu_mutex_lock_iothread(); + current_cpu = cpu; + + atomic_set(&cpu->exit_request, false); + + return ret < 0; +} + +static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) +{ + whpx_get_registers(cpu); + cpu->vcpu_dirty = true; +} + +static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, + run_on_cpu_data arg) +{ + whpx_set_registers(cpu); + cpu->vcpu_dirty = false; +} + +static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, + run_on_cpu_data arg) +{ + whpx_set_registers(cpu); + cpu->vcpu_dirty = false; +} + +static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, + run_on_cpu_data arg) +{ + cpu->vcpu_dirty = true; +} + +/* + * CPU support. + */ + +void whpx_cpu_synchronize_state(CPUState *cpu) +{ + if (!cpu->vcpu_dirty) { + run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); + } +} + +void whpx_cpu_synchronize_post_reset(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_post_init(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); +} + +/* + * Vcpu support. + */ + +static Error *whpx_migration_blocker; + +int whpx_init_vcpu(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu; + Error *local_error = NULL; + + /* Add migration blockers for all unsupported features of the + * Windows Hypervisor Platform + */ + if (whpx_migration_blocker == NULL) { + error_setg(&whpx_migration_blocker, + "State blocked due to non-migratable CPUID feature support," + "dirty memory tracking support, and XSAVE/XRSTOR support"); + + (void)migrate_add_blocker(whpx_migration_blocker, &local_error); + if (local_error) { + error_report_err(local_error); + error_free(whpx_migration_blocker); + migrate_del_blocker(whpx_migration_blocker); + return -EINVAL; + } + } + + vcpu = g_malloc0(FIELD_OFFSET(struct whpx_vcpu, exit_ctx) + + whpx->exit_ctx_size); + + if (!vcpu) { + error_report("WHPX: Failed to allocte VCPU context."); + return -ENOMEM; + } + + hr = WHvEmulatorCreateEmulator(whpx_emu_callbacks, &vcpu->emulator); + if (FAILED(hr)) { + error_report("WHPX: Failed to setup instruction completion support," + " hr=%08lx", hr); + g_free(vcpu); + return -EINVAL; + } + + hr = WHvCreateVirtualProcessor(whpx->partition, cpu->cpu_index, 0); + if (FAILED(hr)) { + error_report("WHPX: Failed to create a virtual processor," + " hr=%08lx", hr); + WHvEmulatorDestroyEmulator(vcpu->emulator); + g_free(vcpu); + return -EINVAL; + } + + vcpu->interruptable = true; + + cpu->vcpu_dirty = true; + cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; + + return 0; +} + +int whpx_vcpu_exec(CPUState *cpu) +{ + int ret; + int fatal; + + for (;;) { + if (cpu->exception_index >= EXCP_INTERRUPT) { + ret = cpu->exception_index; + cpu->exception_index = -1; + break; + } + + fatal = whpx_vcpu_run(cpu); + + if (fatal) { + error_report("WHPX: Failed to exec a virtual processor"); + abort(); + } + } + + return ret; +} + +void whpx_destroy_vcpu(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + + WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); + WHvEmulatorDestroyEmulator(vcpu->emulator); + g_free(cpu->hax_vcpu); + return; +} + +void whpx_vcpu_kick(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + WHvCancelRunVirtualProcessor(whpx->partition, cpu->cpu_index, 0); +} + +/* + * Memory support. + */ + +static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size, + void *host_va, int add, int rom, + const char *name) +{ + struct whpx_state *whpx = &whpx_global; + HRESULT hr; + + /* + if (add) { + printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n", + (void*)start_pa, (void*)size, host_va, + (rom ? "ROM" : "RAM"), name); + } else { + printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n", + (void*)start_pa, (void*)size, host_va, name); + } + */ + + if (add) { + hr = WHvMapGpaRange(whpx->partition, + host_va, + start_pa, + size, + (WHvMapGpaRangeFlagRead | + WHvMapGpaRangeFlagExecute | + (rom ? 0 : WHvMapGpaRangeFlagWrite))); + } else { + hr = WHvUnmapGpaRange(whpx->partition, + start_pa, + size); + } + + if (FAILED(hr)) { + error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes," + " Host:%p, hr=%08lx", + (add ? "MAP" : "UNMAP"), name, + (void *)start_pa, (void *)size, host_va, hr); + } +} + +static void whpx_process_section(MemoryRegionSection *section, int add) +{ + MemoryRegion *mr = section->mr; + hwaddr start_pa = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + unsigned int delta; + uint64_t host_va; + + if (!memory_region_is_ram(mr)) { + return; + } + + delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); + delta &= ~qemu_real_host_page_mask; + if (delta > size) { + return; + } + start_pa += delta; + size -= delta; + size &= qemu_real_host_page_mask; + if (!size || (start_pa & ~qemu_real_host_page_mask)) { + return; + } + + host_va = (uintptr_t)memory_region_get_ram_ptr(mr) + + section->offset_within_region + delta; + + whpx_update_mapping(start_pa, size, (void *)host_va, add, + memory_region_is_rom(mr), mr->name); +} + +static void whpx_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + memory_region_ref(section->mr); + whpx_process_section(section, 1); +} + +static void whpx_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + whpx_process_section(section, 0); + memory_region_unref(section->mr); +} + +static void whpx_transaction_begin(MemoryListener *listener) +{ +} + +static void whpx_transaction_commit(MemoryListener *listener) +{ +} + +static void whpx_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + MemoryRegion *mr = section->mr; + + if (!memory_region_is_ram(mr)) { + return; + } + + memory_region_set_dirty(mr, 0, int128_get64(section->size)); +} + +static MemoryListener whpx_memory_listener = { + .begin = whpx_transaction_begin, + .commit = whpx_transaction_commit, + .region_add = whpx_region_add, + .region_del = whpx_region_del, + .log_sync = whpx_log_sync, + .priority = 10, +}; + +static void whpx_memory_init(void) +{ + memory_listener_register(&whpx_memory_listener, &address_space_memory); +} + +static void whpx_handle_interrupt(CPUState *cpu, int mask) +{ + cpu->interrupt_request |= mask; + + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); + } +} + +/* + * Partition support + */ + +static int whpx_accel_init(MachineState *ms) +{ + struct whpx_state *whpx; + int ret; + HRESULT hr; + WHV_CAPABILITY whpx_cap; + WHV_PARTITION_PROPERTY prop; + + whpx = &whpx_global; + + memset(whpx, 0, sizeof(struct whpx_state)); + whpx->mem_quota = ms->ram_size; + + hr = WHvGetCapability(WHvCapabilityCodeHypervisorPresent, &whpx_cap, + sizeof(whpx_cap)); + if (FAILED(hr) || !whpx_cap.HypervisorPresent) { + error_report("WHPX: No accelerator found, hr=%08lx", hr); + ret = -ENOSPC; + goto error; + } + + hr = WHvCreatePartition(&whpx->partition); + if (FAILED(hr)) { + error_report("WHPX: Failed to create partition, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); + prop.PropertyCode = WHvPartitionPropertyCodeProcessorCount; + prop.ProcessorCount = smp_cpus; + hr = WHvSetPartitionProperty(whpx->partition, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition core count to %d," + " hr=%08lx", smp_cores, hr); + ret = -EINVAL; + goto error; + } + + hr = WHvSetupPartition(whpx->partition); + if (FAILED(hr)) { + error_report("WHPX: Failed to setup partition, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + whpx->exit_ctx_size = WHvGetRunExitContextSize(); + assert(whpx->exit_ctx_size); + + whpx_memory_init(); + + cpu_interrupt_handler = whpx_handle_interrupt; + + printf("Windows Hypervisor Platform accelerator is operational\n"); + return 0; + + error: + + if (NULL != whpx->partition) { + WHvDeletePartition(whpx->partition); + whpx->partition = NULL; + } + + + return ret; +} + +int whpx_enabled(void) +{ + return whpx_allowed; +} + +static void whpx_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "WHPX"; + ac->init_machine = whpx_accel_init; + ac->allowed = &whpx_allowed; +} + +static const TypeInfo whpx_accel_type = { + .name = ACCEL_CLASS_NAME("whpx"), + .parent = TYPE_ACCEL, + .class_init = whpx_accel_class_init, +}; + +static void whpx_type_init(void) +{ + type_register_static(&whpx_accel_type); +} + +type_init(whpx_type_init); From 19306806ae30b7fb5fe61a9130c6995402acad00 Mon Sep 17 00:00:00 2001 From: "Justin Terry (VM)" Date: Mon, 22 Jan 2018 13:07:49 -0800 Subject: [PATCH 47/47] Add the WHPX acceleration enlightenments Implements the WHPX accelerator cpu enlightenments to actually use the whpx-all accelerator on Windows platforms. Signed-off-by: Justin Terry (VM) Message-Id: <1516655269-1785-5-git-send-email-juterry@microsoft.com> [Register/unregister VCPU thread with RCU. - Paolo] Signed-off-by: Paolo Bonzini --- cpus.c | 69 ++++++++++++++++++++++++++++++++++++++- include/sysemu/hw_accel.h | 13 ++++++++ target/i386/helper.c | 2 +- 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index 8d8bb7c6d3..6825fe10c6 100644 --- a/cpus.c +++ b/cpus.c @@ -38,6 +38,7 @@ #include "sysemu/kvm.h" #include "sysemu/hax.h" #include "sysemu/hvf.h" +#include "sysemu/whpx.h" #include "qmp-commands.h" #include "exec/exec-all.h" @@ -1545,6 +1546,49 @@ static void *qemu_hvf_cpu_thread_fn(void *arg) return NULL; } +static void *qemu_whpx_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + int r; + + rcu_register_thread(); + + qemu_mutex_lock_iothread(); + qemu_thread_get_self(cpu->thread); + cpu->thread_id = qemu_get_thread_id(); + current_cpu = cpu; + + r = whpx_init_vcpu(cpu); + if (r < 0) { + fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r)); + exit(1); + } + + /* signal CPU creation */ + cpu->created = true; + qemu_cond_signal(&qemu_cpu_cond); + + do { + if (cpu_can_run(cpu)) { + r = whpx_vcpu_exec(cpu); + if (r == EXCP_DEBUG) { + cpu_handle_guest_debug(cpu); + } + } + while (cpu_thread_is_idle(cpu)) { + qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); + } + qemu_wait_io_event_common(cpu); + } while (!cpu->unplug || cpu_can_run(cpu)); + + whpx_destroy_vcpu(cpu); + cpu->created = false; + qemu_cond_signal(&qemu_cpu_cond); + qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); + return NULL; +} + #ifdef _WIN32 static void CALLBACK dummy_apc_func(ULONG_PTR unused) { @@ -1635,7 +1679,9 @@ static void qemu_cpu_kick_thread(CPUState *cpu) } #else /* _WIN32 */ if (!qemu_cpu_is_self(cpu)) { - if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) { + if (whpx_enabled()) { + whpx_vcpu_kick(cpu); + } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) { fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n", __func__, GetLastError()); exit(1); @@ -1877,6 +1923,25 @@ static void qemu_hvf_start_vcpu(CPUState *cpu) } } +static void qemu_whpx_start_vcpu(CPUState *cpu) +{ + char thread_name[VCPU_THREAD_NAME_SIZE]; + + cpu->thread = g_malloc0(sizeof(QemuThread)); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX", + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); +#ifdef _WIN32 + cpu->hThread = qemu_thread_get_handle(cpu->thread); +#endif + while (!cpu->created) { + qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); + } +} + static void qemu_dummy_start_vcpu(CPUState *cpu) { char thread_name[VCPU_THREAD_NAME_SIZE]; @@ -1915,6 +1980,8 @@ void qemu_init_vcpu(CPUState *cpu) qemu_hvf_start_vcpu(cpu); } else if (tcg_enabled()) { qemu_tcg_init_vcpu(cpu); + } else if (whpx_enabled()) { + qemu_whpx_start_vcpu(cpu); } else { qemu_dummy_start_vcpu(cpu); } diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h index 469ffda460..d2ddfb5ad0 100644 --- a/include/sysemu/hw_accel.h +++ b/include/sysemu/hw_accel.h @@ -14,6 +14,7 @@ #include "qom/cpu.h" #include "sysemu/hax.h" #include "sysemu/kvm.h" +#include "sysemu/whpx.h" static inline void cpu_synchronize_state(CPUState *cpu) { @@ -23,6 +24,9 @@ static inline void cpu_synchronize_state(CPUState *cpu) if (hax_enabled()) { hax_cpu_synchronize_state(cpu); } + if (whpx_enabled()) { + whpx_cpu_synchronize_state(cpu); + } } static inline void cpu_synchronize_post_reset(CPUState *cpu) @@ -33,6 +37,9 @@ static inline void cpu_synchronize_post_reset(CPUState *cpu) if (hax_enabled()) { hax_cpu_synchronize_post_reset(cpu); } + if (whpx_enabled()) { + whpx_cpu_synchronize_post_reset(cpu); + } } static inline void cpu_synchronize_post_init(CPUState *cpu) @@ -43,6 +50,9 @@ static inline void cpu_synchronize_post_init(CPUState *cpu) if (hax_enabled()) { hax_cpu_synchronize_post_init(cpu); } + if (whpx_enabled()) { + whpx_cpu_synchronize_post_init(cpu); + } } static inline void cpu_synchronize_pre_loadvm(CPUState *cpu) @@ -53,6 +63,9 @@ static inline void cpu_synchronize_pre_loadvm(CPUState *cpu) if (hax_enabled()) { hax_cpu_synchronize_pre_loadvm(cpu); } + if (whpx_enabled()) { + whpx_cpu_synchronize_pre_loadvm(cpu); + } } #endif /* QEMU_HW_ACCEL_H */ diff --git a/target/i386/helper.c b/target/i386/helper.c index f63eb3d3f4..9fba146b7f 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -986,7 +986,7 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) X86CPU *cpu = x86_env_get_cpu(env); CPUState *cs = CPU(cpu); - if (kvm_enabled()) { + if (kvm_enabled() || whpx_enabled()) { env->tpr_access_type = access; cpu_interrupt(cs, CPU_INTERRUPT_TPR);