From 6b4495401bdf442457b713b7e3994b465c55af35 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 19 Jul 2016 23:16:19 +0300 Subject: [PATCH 01/41] pcie: fix link active status bit migration We changed link status register in pci express endpoint capability over time. Specifically, commit b2101eae63ea57b571cee4a9075a4287d24ba4a4 ("pcie: Set the "link active" in the link status register") set data link layer link active bit in this register without adding compatibility to old machine types. When migrating from qemu 2.3 and older this affects xhci devices which under machine type 2.0 and older have a pci express endpoint capability even if they are on a pci bus. Add compatibility flags to make this bit value match what it was under 2.3. Additionally, to avoid breaking migration from qemu 2.3 and up, suppress checking link status during migration: this seems sane since hardware can change link status at any time. https://bugzilla.redhat.com/show_bug.cgi?id=1352860 Reported-by: Gerd Hoffmann Fixes: b2101eae63ea57b571cee4a9075a4287d24ba4a4 ("pcie: Set the "link active" in the link status register") Cc: qemu-stable@nongnu.org Cc: Benjamin Herrenschmidt Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 2 ++ hw/pci/pcie.c | 24 ++++++++++++++++++------ include/hw/compat.h | 4 ++++ include/hw/pci/pci.h | 3 +++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 728c6d4b3b..24fae1689d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -62,6 +62,8 @@ static Property pci_props[] = { QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), DEFINE_PROP_BIT("command_serr_enable", PCIDevice, cap_present, QEMU_PCI_CAP_SERR_BITNR, true), + DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present, + QEMU_PCIE_LNKSTA_DLLLA_BITNR, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 9599fdef57..99cfb4561b 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -45,8 +45,11 @@ */ static void -pcie_cap_v1_fill(uint8_t *exp_cap, uint8_t port, uint8_t type, uint8_t version) +pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) { + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint8_t *cmask = dev->cmask + dev->exp.exp_cap; + /* capability register interrupt message number defaults to 0 */ pci_set_word(exp_cap + PCI_EXP_FLAGS, @@ -69,7 +72,18 @@ pcie_cap_v1_fill(uint8_t *exp_cap, uint8_t port, uint8_t type, uint8_t version) PCI_EXP_LNK_LS_25); pci_set_word(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA); + PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25); + + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + + /* We changed link status bits over time, and changing them across + * migrations is generally fine as hardware changes them too. + * Let's not bother checking. + */ + pci_set_word(cmask + PCI_EXP_LNKSTA, 0); } int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) @@ -88,7 +102,7 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) exp_cap = dev->config + pos; /* Filling values common with v1 */ - pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER2); + pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER2); /* Filling v2 specific values */ pci_set_long(exp_cap + PCI_EXP_DEVCAP2, @@ -103,7 +117,6 @@ int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, uint8_t type, { /* PCIe cap v1 init */ int pos; - uint8_t *exp_cap; assert(pci_is_express(dev)); @@ -112,9 +125,8 @@ int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, uint8_t type, return pos; } dev->exp.exp_cap = pos; - exp_cap = dev->config + pos; - pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER1); + pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER1); return pos; } diff --git a/include/hw/compat.h b/include/hw/compat.h index 9914e7a59e..e5113dcabc 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -80,6 +80,10 @@ .driver = "virtio-rng-pci",\ .property = "any_layout",\ .value = "off",\ + },{\ + .driver = TYPE_PCI_DEVICE,\ + .property = "x-pcie-lnksta-dllla",\ + .value = "off",\ }, #define HW_COMPAT_2_2 \ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 74d797d1cf..929ec2fb07 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -174,6 +174,9 @@ enum { /* PCI Express capability - Power Controller Present */ #define QEMU_PCIE_SLTCAP_PCP_BITNR 7 QEMU_PCIE_SLTCAP_PCP = (1 << QEMU_PCIE_SLTCAP_PCP_BITNR), + /* Link active status in endpoint capability is always set */ +#define QEMU_PCIE_LNKSTA_DLLLA_BITNR 8 + QEMU_PCIE_LNKSTA_DLLLA = (1 << QEMU_PCIE_LNKSTA_DLLLA_BITNR), }; #define TYPE_PCI_DEVICE "pci-device" From 2c533c5479f0f712578422529b20651f01adb0f4 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 17 Jul 2016 19:53:08 +0300 Subject: [PATCH 02/41] hw/pcie-root-port: Fix PCIe root port initialization Specify the root port interrupt pin as part of the init process for cases when msi/msix are not enabled. Fixes "hw/pci/pci.c:196:23: runtime error: shift exponent -1 is negative" warning from clang's sanitizer. Reported-by: Peter Maydell Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/ioh3420.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c index 0444b596c6..c8b5ac4207 100644 --- a/hw/pci-bridge/ioh3420.c +++ b/hw/pci-bridge/ioh3420.c @@ -100,6 +100,7 @@ static int ioh3420_initfn(PCIDevice *d) int rc; Error *err = NULL; + pci_config_set_interrupt_pin(d->config, 1); pci_bridge_initfn(d, TYPE_PCIE_BUS); pcie_port_init_reg(d); From 7b346c742cd95816c1d5badcc88e18bb95ace08c Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 17 Jul 2016 19:53:10 +0300 Subject: [PATCH 03/41] hw/pxb: declare pxb devices as not hot-pluggable Prevent future issues when hotplug will work for devices attached to pxbs. Suggested-by: Igor Mammedov Signed-off-by: Marcel Apfelbaum Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_expander_bridge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index ab8612158d..b4f8ca275f 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -310,6 +310,7 @@ static void pxb_dev_class_init(ObjectClass *klass, void *data) dc->desc = "PCI Expander Bridge"; dc->props = pxb_dev_properties; + dc->hotpluggable = false; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); } @@ -343,6 +344,7 @@ static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data) dc->desc = "PCI Express Expander Bridge"; dc->props = pxb_dev_properties; + dc->hotpluggable = false; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); } From c99cb18eeb8bca145ee260a507ccce6775a6d260 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 17 Jul 2016 19:53:11 +0300 Subject: [PATCH 04/41] hw/acpi: fix a DSDT table issue when a pxb is present. PXBs do not support hotplug so they don't have a PCNT function. Since the PXB's PCI root-bus is a child bus of bus 0, the build_dsdt code will add a call to the corresponding PCNT function. Fix this by skipping the PCNT call for the above case. While at it skip also PCIe child buses. Reported-by: Igor Mammedov Signed-off-by: Marcel Apfelbaum Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 77c40d92e2..5c0d643331 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -600,6 +600,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, QLIST_FOREACH(sec, &bus->child, sibling) { int32_t devfn = sec->parent_dev->devfn; + if (pci_bus_is_root(sec) || pci_bus_is_express(sec)) { + continue; + } + aml_append(method, aml_name("^S%.02X.PCNT", devfn)); } } From 2df5a7b52f2b1923f59f47da4ac2fc5501b4bd3c Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 17 Jul 2016 19:53:12 +0300 Subject: [PATCH 05/41] acpi: refactor pxb crs computation Instead of always passing both IO and MEM ranges when computing CRS ranges, define a new CrsRangeSet structure that include them both. This is done before introducing a third type of range, 64-bit MEM, so it will be easier to pass them all around. Reviewed-by: Igor Mammedov Signed-off-by: Marcel Apfelbaum Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 81 +++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 5c0d643331..aa540abf4d 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -751,6 +751,23 @@ static void crs_range_free(gpointer data) g_free(entry); } +typedef struct CrsRangeSet { + GPtrArray *io_ranges; + GPtrArray *mem_ranges; + } CrsRangeSet; + +static void crs_range_set_init(CrsRangeSet *range_set) +{ + range_set->io_ranges = g_ptr_array_new_with_free_func(crs_range_free); + range_set->mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); +} + +static void crs_range_set_free(CrsRangeSet *range_set) +{ + g_ptr_array_free(range_set->io_ranges, true); + g_ptr_array_free(range_set->mem_ranges, true); +} + static gint crs_range_compare(gconstpointer a, gconstpointer b) { CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; @@ -835,18 +852,17 @@ static void crs_range_merge(GPtrArray *range) g_ptr_array_free(tmp, true); } -static Aml *build_crs(PCIHostState *host, - GPtrArray *io_ranges, GPtrArray *mem_ranges) +static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set) { Aml *crs = aml_resource_template(); - GPtrArray *host_io_ranges = g_ptr_array_new_with_free_func(crs_range_free); - GPtrArray *host_mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); + CrsRangeSet temp_range_set; CrsRangeEntry *entry; uint8_t max_bus = pci_bus_num(host->bus); uint8_t type; int devfn; int i; + crs_range_set_init(&temp_range_set); for (devfn = 0; devfn < ARRAY_SIZE(host->bus->devices); devfn++) { uint64_t range_base, range_limit; PCIDevice *dev = host->bus->devices[devfn]; @@ -870,9 +886,11 @@ static Aml *build_crs(PCIHostState *host, } if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { - crs_range_insert(host_io_ranges, range_base, range_limit); + crs_range_insert(temp_range_set.io_ranges, + range_base, range_limit); } else { /* "memory" */ - crs_range_insert(host_mem_ranges, range_base, range_limit); + crs_range_insert(temp_range_set.mem_ranges, + range_base, range_limit); } } @@ -891,7 +909,8 @@ static Aml *build_crs(PCIHostState *host, * that do not support multiple root buses */ if (range_base && range_base <= range_limit) { - crs_range_insert(host_io_ranges, range_base, range_limit); + crs_range_insert(temp_range_set.io_ranges, + range_base, range_limit); } range_base = @@ -904,7 +923,8 @@ static Aml *build_crs(PCIHostState *host, * that do not support multiple root buses */ if (range_base && range_base <= range_limit) { - crs_range_insert(host_mem_ranges, range_base, range_limit); + crs_range_insert(temp_range_set.mem_ranges, + range_base, range_limit); } range_base = @@ -917,35 +937,36 @@ static Aml *build_crs(PCIHostState *host, * that do not support multiple root buses */ if (range_base && range_base <= range_limit) { - crs_range_insert(host_mem_ranges, range_base, range_limit); + crs_range_insert(temp_range_set.mem_ranges, + range_base, range_limit); } } } - crs_range_merge(host_io_ranges); - for (i = 0; i < host_io_ranges->len; i++) { - entry = g_ptr_array_index(host_io_ranges, i); + crs_range_merge(temp_range_set.io_ranges); + for (i = 0; i < temp_range_set.io_ranges->len; i++) { + entry = g_ptr_array_index(temp_range_set.io_ranges, i); aml_append(crs, aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, AML_ENTIRE_RANGE, 0, entry->base, entry->limit, 0, entry->limit - entry->base + 1)); - crs_range_insert(io_ranges, entry->base, entry->limit); + crs_range_insert(range_set->io_ranges, entry->base, entry->limit); } - g_ptr_array_free(host_io_ranges, true); - crs_range_merge(host_mem_ranges); - for (i = 0; i < host_mem_ranges->len; i++) { - entry = g_ptr_array_index(host_mem_ranges, i); + crs_range_merge(temp_range_set.mem_ranges); + for (i = 0; i < temp_range_set.mem_ranges->len; i++) { + entry = g_ptr_array_index(temp_range_set.mem_ranges, i); aml_append(crs, aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, AML_NON_CACHEABLE, AML_READ_WRITE, 0, entry->base, entry->limit, 0, entry->limit - entry->base + 1)); - crs_range_insert(mem_ranges, entry->base, entry->limit); + crs_range_insert(range_set->mem_ranges, entry->base, entry->limit); } - g_ptr_array_free(host_mem_ranges, true); + + crs_range_set_free(&temp_range_set); aml_append(crs, aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, @@ -1902,8 +1923,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, { CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; - GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); - GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); + CrsRangeSet crs_range_set; PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); uint32_t nr_mem = machine->ram_slots; @@ -1992,6 +2012,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } aml_append(dsdt, scope); + crs_range_set_init(&crs_range_set); bus = PC_MACHINE(machine)->bus; if (bus) { QLIST_FOREACH(bus, &bus->child, sibling) { @@ -2018,8 +2039,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } aml_append(dev, build_prt(false)); - crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), - io_ranges, mem_ranges); + crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), &crs_range_set); aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); aml_append(dsdt, scope); @@ -2040,9 +2060,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AML_POS_DECODE, AML_ENTIRE_RANGE, 0x0000, 0x0000, 0x0CF7, 0x0000, 0x0CF8)); - crs_replace_with_free_ranges(io_ranges, 0x0D00, 0xFFFF); - for (i = 0; i < io_ranges->len; i++) { - entry = g_ptr_array_index(io_ranges, i); + crs_replace_with_free_ranges(crs_range_set.io_ranges, 0x0D00, 0xFFFF); + for (i = 0; i < crs_range_set.io_ranges->len; i++) { + entry = g_ptr_array_index(crs_range_set.io_ranges, i); aml_append(crs, aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, AML_ENTIRE_RANGE, @@ -2055,11 +2075,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AML_CACHEABLE, AML_READ_WRITE, 0, 0x000A0000, 0x000BFFFF, 0, 0x00020000)); - crs_replace_with_free_ranges(mem_ranges, + crs_replace_with_free_ranges(crs_range_set.mem_ranges, range_lob(pci_hole), range_upb(pci_hole)); - for (i = 0; i < mem_ranges->len; i++) { - entry = g_ptr_array_index(mem_ranges, i); + for (i = 0; i < crs_range_set.mem_ranges->len; i++) { + entry = g_ptr_array_index(crs_range_set.mem_ranges, i); aml_append(crs, aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, AML_NON_CACHEABLE, AML_READ_WRITE, @@ -2094,8 +2114,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); - g_ptr_array_free(io_ranges, true); - g_ptr_array_free(mem_ranges, true); + crs_range_set_free(&crs_range_set); /* reserve PCIHP resources */ if (pm->pcihp_io_len) { From 16de88a4164f2e580b3b2ee67d1b9ac1dace1711 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 17 Jul 2016 19:53:13 +0300 Subject: [PATCH 06/41] hw/apci: handle 64-bit MMIO regions correctly In build_crs(), the calculation and merging of the ranges already happens in 64-bit, but the entry boundaries are silently truncated to 32-bit in the call to aml_dword_memory(). Fix it by handling the 64-bit MMIO ranges separately. This fixes 64-bit BARs behind PXBs. Reported-by: Laszlo Ersek Reviewed-by: Igor Mammedov Tested-by: Laszlo Ersek Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 54 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index aa540abf4d..a26a4bb03f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -754,18 +754,22 @@ static void crs_range_free(gpointer data) typedef struct CrsRangeSet { GPtrArray *io_ranges; GPtrArray *mem_ranges; + GPtrArray *mem_64bit_ranges; } CrsRangeSet; static void crs_range_set_init(CrsRangeSet *range_set) { range_set->io_ranges = g_ptr_array_new_with_free_func(crs_range_free); range_set->mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); + range_set->mem_64bit_ranges = + g_ptr_array_new_with_free_func(crs_range_free); } static void crs_range_set_free(CrsRangeSet *range_set) { g_ptr_array_free(range_set->io_ranges, true); g_ptr_array_free(range_set->mem_ranges, true); + g_ptr_array_free(range_set->mem_64bit_ranges, true); } static gint crs_range_compare(gconstpointer a, gconstpointer b) @@ -923,8 +927,14 @@ static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set) * that do not support multiple root buses */ if (range_base && range_base <= range_limit) { - crs_range_insert(temp_range_set.mem_ranges, - range_base, range_limit); + uint64_t length = range_limit - range_base + 1; + if (range_limit <= UINT32_MAX && length <= UINT32_MAX) { + crs_range_insert(temp_range_set.mem_ranges, + range_base, range_limit); + } else { + crs_range_insert(temp_range_set.mem_64bit_ranges, + range_base, range_limit); + } } range_base = @@ -937,8 +947,14 @@ static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set) * that do not support multiple root buses */ if (range_base && range_base <= range_limit) { - crs_range_insert(temp_range_set.mem_ranges, - range_base, range_limit); + uint64_t length = range_limit - range_base + 1; + if (range_limit <= UINT32_MAX && length <= UINT32_MAX) { + crs_range_insert(temp_range_set.mem_ranges, + range_base, range_limit); + } else { + crs_range_insert(temp_range_set.mem_64bit_ranges, + range_base, range_limit); + } } } } @@ -966,6 +982,19 @@ static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set) crs_range_insert(range_set->mem_ranges, entry->base, entry->limit); } + crs_range_merge(temp_range_set.mem_64bit_ranges); + for (i = 0; i < temp_range_set.mem_64bit_ranges->len; i++) { + entry = g_ptr_array_index(temp_range_set.mem_64bit_ranges, i); + aml_append(crs, + aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, entry->base, entry->limit, 0, + entry->limit - entry->base + 1)); + crs_range_insert(range_set->mem_64bit_ranges, + entry->base, entry->limit); + } + crs_range_set_free(&temp_range_set); aml_append(crs, @@ -2088,11 +2117,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } if (!range_is_empty(pci_hole64)) { - aml_append(crs, - aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, - AML_CACHEABLE, AML_READ_WRITE, - 0, range_lob(pci_hole64), range_upb(pci_hole64), 0, - range_upb(pci_hole64) + 1 - range_lob(pci_hole64))); + crs_replace_with_free_ranges(crs_range_set.mem_64bit_ranges, + range_lob(pci_hole64), + range_upb(pci_hole64)); + for (i = 0; i < crs_range_set.mem_64bit_ranges->len; i++) { + entry = g_ptr_array_index(crs_range_set.mem_64bit_ranges, i); + aml_append(crs, + aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, + AML_CACHEABLE, AML_READ_WRITE, + 0, entry->base, entry->limit, + 0, entry->limit - entry->base + 1)); + } } if (misc->tpm_version != TPM_VERSION_UNSPEC) { From 86395eb31f5c7bf919ed456c3c410720243f733a Mon Sep 17 00:00:00 2001 From: Wei Jiangang Date: Tue, 17 May 2016 18:18:46 +0800 Subject: [PATCH 07/41] hw/pci-bridge: Convert pxb initialization functions to Error Firstly, convert pxb_dev_init_common() to Error and rename it to pxb_dev_realize_common(). Actually, pxb_register_bus() is converted as well. And then, convert pxb_dev_initfn() and pxb_pcie_dev_initfn() to Error, rename them to pxb_dev_realize() and pxb_pcie_dev_realize() respectively. Signed-off-by: Wei Jiangang Signed-off-by: Cao jin Reviewed-by: Marcel Apfelbaum Reviewed-by: Markus Armbruster Reviewed-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_expander_bridge.c | 52 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index b4f8ca275f..1cc598f7e9 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pci_host.h" @@ -162,30 +163,25 @@ static const TypeInfo pxb_host_info = { }; /* - * Registers the PXB bus as a child of the i440fx root bus. - * - * Returns 0 on successs, -1 if i440fx host was not - * found or the bus number is already in use. + * Registers the PXB bus as a child of pci host root bus. */ -static int pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus) +static void pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus, Error **errp) { PCIBus *bus = dev->bus; int pxb_bus_num = pci_bus_num(pxb_bus); if (bus->parent_dev) { - error_report("PXB devices can be attached only to root bus."); - return -1; + error_setg(errp, "PXB devices can be attached only to root bus"); + return; } QLIST_FOREACH(bus, &bus->child, sibling) { if (pci_bus_num(bus) == pxb_bus_num) { - error_report("Bus %d is already in use.", pxb_bus_num); - return -1; + error_setg(errp, "Bus %d is already in use", pxb_bus_num); + return; } } QLIST_INSERT_HEAD(&dev->bus->child, pxb_bus, sibling); - - return 0; } static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) @@ -215,17 +211,18 @@ static gint pxb_compare(gconstpointer a, gconstpointer b) 0; } -static int pxb_dev_init_common(PCIDevice *dev, bool pcie) +static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) { PXBDev *pxb = convert_to_pxb(dev); DeviceState *ds, *bds = NULL; PCIBus *bus; const char *dev_name = NULL; + Error *local_err = NULL; if (pxb->numa_node != NUMA_NODE_UNASSIGNED && pxb->numa_node >= nb_numa_nodes) { - error_report("Illegal numa node %d.", pxb->numa_node); - return -EINVAL; + error_setg(errp, "Illegal numa node %d", pxb->numa_node); + return; } if (dev->qdev.id && *dev->qdev.id) { @@ -250,7 +247,9 @@ static int pxb_dev_init_common(PCIDevice *dev, bool pcie) PCI_HOST_BRIDGE(ds)->bus = bus; - if (pxb_register_bus(dev, bus)) { + pxb_register_bus(dev, bus, &local_err); + if (local_err) { + error_propagate(errp, local_err); goto err_register_bus; } @@ -264,23 +263,22 @@ static int pxb_dev_init_common(PCIDevice *dev, bool pcie) pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_HOST); pxb_dev_list = g_list_insert_sorted(pxb_dev_list, pxb, pxb_compare); - return 0; + return; err_register_bus: object_unref(OBJECT(bds)); object_unparent(OBJECT(bus)); object_unref(OBJECT(ds)); - return -EINVAL; } -static int pxb_dev_initfn(PCIDevice *dev) +static void pxb_dev_realize(PCIDevice *dev, Error **errp) { if (pci_bus_is_express(dev->bus)) { - error_report("pxb devices cannot reside on a PCIe bus!"); - return -EINVAL; + error_setg(errp, "pxb devices cannot reside on a PCIe bus"); + return; } - return pxb_dev_init_common(dev, false); + pxb_dev_realize_common(dev, false, errp); } static void pxb_dev_exitfn(PCIDevice *pci_dev) @@ -302,7 +300,7 @@ static void pxb_dev_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->init = pxb_dev_initfn; + k->realize = pxb_dev_realize; k->exit = pxb_dev_exitfn; k->vendor_id = PCI_VENDOR_ID_REDHAT; k->device_id = PCI_DEVICE_ID_REDHAT_PXB; @@ -321,14 +319,14 @@ static const TypeInfo pxb_dev_info = { .class_init = pxb_dev_class_init, }; -static int pxb_pcie_dev_initfn(PCIDevice *dev) +static void pxb_pcie_dev_realize(PCIDevice *dev, Error **errp) { if (!pci_bus_is_express(dev->bus)) { - error_report("pxb-pcie devices cannot reside on a PCI bus!"); - return -EINVAL; + error_setg(errp, "pxb-pcie devices cannot reside on a PCI bus"); + return; } - return pxb_dev_init_common(dev, true); + pxb_dev_realize_common(dev, true, errp); } static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data) @@ -336,7 +334,7 @@ static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->init = pxb_pcie_dev_initfn; + k->realize = pxb_pcie_dev_realize; k->exit = pxb_dev_exitfn; k->vendor_id = PCI_VENDOR_ID_REDHAT; k->device_id = PCI_DEVICE_ID_REDHAT_PXB_PCIE; From be0d9760d7fa30e50ded15956f06506d993cbc88 Mon Sep 17 00:00:00 2001 From: Wei Jiangang Date: Tue, 17 May 2016 18:18:47 +0800 Subject: [PATCH 08/41] apb: convert init to realize Convert a device model where initialization obviously can't fail, make it implement realize() rather than init(). Signed-off-by: Wei Jiangang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Markus Armbruster --- hw/pci-host/apb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index 16587f8373..653e711121 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -634,7 +634,7 @@ static void pci_apb_set_irq(void *opaque, int irq_num, int level) } } -static int apb_pci_bridge_initfn(PCIDevice *dev) +static void apb_pci_bridge_realize(PCIDevice *dev, Error **errp) { pci_bridge_initfn(dev, TYPE_PCI_BUS); @@ -652,7 +652,6 @@ static int apb_pci_bridge_initfn(PCIDevice *dev) pci_set_word(dev->config + PCI_STATUS, PCI_STATUS_FAST_BACK | PCI_STATUS_66MHZ | PCI_STATUS_DEVSEL_MEDIUM); - return 0; } PCIBus *pci_apb_init(hwaddr special_base, @@ -843,7 +842,7 @@ static void pbm_pci_bridge_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->init = apb_pci_bridge_initfn; + k->realize = apb_pci_bridge_realize; k->exit = pci_bridge_exitfn; k->vendor_id = PCI_VENDOR_ID_SUN; k->device_id = PCI_DEVICE_ID_SUN_SIMBA; From 9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Wed, 20 Jul 2016 18:28:21 +0300 Subject: [PATCH 09/41] hw/virtio-pci: fix virtio behaviour Enable transitional virtio devices by default. Enable virtio-1.0 for devices plugged into PCIe ports (Root ports or Downstream ports). Using the virtio-1 mode will remove the limitation of the number of devices that can be attached to a machine by removing the need for the IO BAR. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- hw/display/virtio-gpu-pci.c | 4 +--- hw/display/virtio-vga.c | 4 +--- hw/virtio/virtio-pci.c | 34 ++++++++++++++++++---------------- hw/virtio/virtio-pci.h | 21 +++++++++++++++++---- include/hw/compat.h | 8 ++++++++ 5 files changed, 45 insertions(+), 26 deletions(-) diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index a71b230d33..34a724c754 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -30,9 +30,7 @@ static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) int i; qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); - /* force virtio-1.0 */ - vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN; - vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY; + virtio_pci_force_virtio_1(vpci_dev); object_property_set_bool(OBJECT(vdev), true, "realized", errp); for (i = 0; i < g->conf.max_outputs; i++) { diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index 315b7fc58c..5b510a17fd 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -134,9 +134,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) /* init virtio bits */ qdev_set_parent_bus(DEVICE(g), BUS(&vpci_dev->bus)); - /* force virtio-1.0 */ - vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN; - vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY; + virtio_pci_force_virtio_1(vpci_dev); object_property_set_bool(OBJECT(g), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index f0677b73d8..755f9218b7 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -161,7 +161,7 @@ static bool virtio_pci_modern_state_needed(void *opaque) { VirtIOPCIProxy *proxy = opaque; - return !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + return virtio_pci_modern(proxy); } static const VMStateDescription vmstate_virtio_pci_modern_state = { @@ -300,8 +300,8 @@ static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); VirtQueue *vq = virtio_get_queue(vdev, n); - bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY); - bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + bool legacy = virtio_pci_legacy(proxy); + bool modern = virtio_pci_modern(proxy); bool fast_mmio = kvm_ioeventfd_any_length_enabled(); bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; MemoryRegion *modern_mr = &proxy->notify.mr; @@ -1574,8 +1574,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) { VirtIOPCIProxy *proxy = VIRTIO_PCI(d); VirtioBusState *bus = &proxy->bus; - bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY); - bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + bool legacy = virtio_pci_legacy(proxy); + bool modern = virtio_pci_modern(proxy); bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; uint8_t *config; uint32_t size; @@ -1694,7 +1694,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) static void virtio_pci_device_unplugged(DeviceState *d) { VirtIOPCIProxy *proxy = VIRTIO_PCI(d); - bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + bool modern = virtio_pci_modern(proxy); bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; virtio_pci_stop_ioeventfd(proxy); @@ -1714,6 +1714,8 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) { VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev); + bool pcie_port = pci_bus_is_express(pci_dev->bus) && + !pci_bus_is_root(pci_dev->bus); /* * virtio pci bar layout used by default. @@ -1764,8 +1766,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) address_space_init(&proxy->modern_as, &proxy->modern_cfg, "virtio-pci-cfg-as"); - if (pci_is_express(pci_dev) && pci_bus_is_express(pci_dev->bus) && - !pci_bus_is_root(pci_dev->bus)) { + if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) { + proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + } + + if (pcie_port && pci_is_express(pci_dev)) { int pos; pos = pcie_endpoint_cap_init(pci_dev, 0); @@ -1819,10 +1824,9 @@ static void virtio_pci_reset(DeviceState *qdev) static Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_BIT("disable-legacy", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT, false), - DEFINE_PROP_BIT("disable-modern", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT, true), + DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, @@ -1839,7 +1843,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) PCIDevice *pci_dev = &proxy->pci_dev; if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && - !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN)) { + virtio_pci_modern(proxy)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } @@ -2301,9 +2305,7 @@ static void virtio_input_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) DeviceState *vdev = DEVICE(&vinput->vdev); qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); - /* force virtio-1.0 */ - vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN; - vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY; + virtio_pci_force_virtio_1(vpci_dev); object_property_set_bool(OBJECT(vdev), true, "realized", errp); } diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index e4548c2f97..25fbf8a375 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -61,8 +61,6 @@ typedef struct VirtioBusClass VirtioPCIBusClass; enum { VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, - VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT, - VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT, VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, @@ -77,8 +75,6 @@ enum { #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) /* virtio version flags */ -#define VIRTIO_PCI_FLAG_DISABLE_LEGACY (1 << VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT) -#define VIRTIO_PCI_FLAG_DISABLE_MODERN (1 << VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT) #define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) /* migrate extra state */ @@ -144,6 +140,8 @@ struct VirtIOPCIProxy { uint32_t modern_mem_bar; int config_cap; uint32_t flags; + bool disable_modern; + OnOffAuto disable_legacy; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +156,21 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) +{ + return !proxy->disable_modern; +} + +static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy) +{ + return proxy->disable_legacy == ON_OFF_AUTO_OFF; +} + +static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy) +{ + proxy->disable_modern = false; + proxy->disable_legacy = ON_OFF_AUTO_ON; +} /* * virtio-scsi-pci: This extends VirtioPCIProxy. diff --git a/include/hw/compat.h b/include/hw/compat.h index e5113dcabc..7ee7299c36 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -6,6 +6,14 @@ .driver = "virtio-mmio",\ .property = "format_transport_address",\ .value = "off",\ + },{\ + .driver = "virtio-pci",\ + .property = "disable-modern",\ + .value = "on",\ + },{\ + .driver = "virtio-pci",\ + .property = "disable-legacy",\ + .value = "off",\ }, #define HW_COMPAT_2_5 \ From 1e7aed70144b4673fc26e73062064b6724795e5f Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Wed, 27 Jul 2016 21:07:56 +0530 Subject: [PATCH 10/41] virtio: check vring descriptor buffer length virtio back end uses set of buffers to facilitate I/O operations. An infinite loop unfolds in virtqueue_pop() if a buffer was of zero size. Add check to avoid it. Reported-by: Li Qiang Signed-off-by: Prasad J Pandit Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/virtio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 752b2715d0..b4d05110d2 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -458,6 +458,11 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove unsigned num_sg = *p_num_sg; assert(num_sg <= max_num_sg); + if (!sz) { + error_report("virtio: zero sized buffers are not allowed"); + exit(1); + } + while (sz) { hwaddr len = sz; From 01edc230d9d3276bd77b79a66d770e3b66a8ca4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:14:55 +0400 Subject: [PATCH 11/41] misc: indentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index f92d3f829e..3677a8297a 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -313,7 +313,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, * properly. */ if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - dev->use_guest_notifier_mask = false; + dev->use_guest_notifier_mask = false; } } From d9d261142d554504a32d95b771e9d8191631323f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:14:56 +0400 Subject: [PATCH 12/41] vhost-user: minor simplification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shorten the code and make it more clear by using the specialized function g_str_has_prefix(). Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-user.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/vhost-user.c b/net/vhost-user.c index c4d63e05eb..2af212bddb 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -316,7 +316,6 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) { const char *name = opaque; const char *driver, *netdev; - const char virtio_name[] = "virtio-net-"; driver = qemu_opt_get(opts, "driver"); netdev = qemu_opt_get(opts, "netdev"); @@ -326,7 +325,7 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) } if (strcmp(netdev, name) == 0 && - strncmp(driver, virtio_name, strlen(virtio_name)) != 0) { + !g_str_has_prefix(driver, "virtio-net-")) { error_setg(errp, "vhost-user requires frontend driver virtio-net-*"); return -1; } From 9c7d18b3a59f6e4464543905dede90f1a65096cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:14:57 +0400 Subject: [PATCH 13/41] vhost-user: disconnect on HUP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, qemu_chr_fe_read_all() on HUP event doesn't raise CHR_EVENT_CLOSED because the read/recv function returns -1 on disconnected peers (for example with tch_chr_recv, an ECONNRESET errno overwritten as EIO). It is simpler to explicitely disconnect on HUP, rising CHR_EVENT_CLOSED if it wasn't disconnected already. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-user.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/vhost-user.c b/net/vhost-user.c index 2af212bddb..2cac32e3b7 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -188,12 +188,8 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, void *opaque) { VhostUserState *s = opaque; - uint8_t buf[1]; - /* We don't actually want to read anything, but CHR_EVENT_CLOSED will be - * raised as a side-effect of the read. - */ - qemu_chr_fe_read_all(s->chr, buf, sizeof(buf)); + qemu_chr_disconnect(s->chr); return FALSE; } From 7cb8a9b9f235098096c493cc1a1487cd8dbd6820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:14:58 +0400 Subject: [PATCH 14/41] vhost: don't assume opaque is a fd, use backend cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost-dev opaque isn't necessarily an fd, it can be a chardev when using vhost-user. Goto fail, so vhost_backend_cleanup() is called to handle backend cleanup appropriately. vhost_set_backend_type() should never fail, use an assert(). Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index ec3abda9d5..429499aefa 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1019,21 +1019,19 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, hdev->migration_blocker = NULL; - if (vhost_set_backend_type(hdev, backend_type) < 0) { - close((uintptr_t)opaque); - return -1; - } + r = vhost_set_backend_type(hdev, backend_type); + assert(r >= 0); - if (hdev->vhost_ops->vhost_backend_init(hdev, opaque) < 0) { - close((uintptr_t)opaque); - return -errno; + r = hdev->vhost_ops->vhost_backend_init(hdev, opaque); + if (r < 0) { + goto fail; } if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { fprintf(stderr, "vhost backend memory slots limit is less" " than current number of present memory slots\n"); - close((uintptr_t)opaque); - return -1; + r = -1; + goto fail; } QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); From 9e0bc24fa57b98561d75ab8f61a419f2d69ec6aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:14:59 +0400 Subject: [PATCH 15/41] vhost: make vhost_log_put() idempotent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although not strictly required, it is nice to have vhost_log_put() safely callable multiple times. Clear dev->log* when calling vhost_log_put() to make the function idempotent. This also simplifies a bit the caller work. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 429499aefa..9bac1636c5 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -362,6 +362,8 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) if (!log) { return; } + dev->log = NULL; + dev->log_size = 0; --log->refcnt; if (log->refcnt == 0) { @@ -710,8 +712,6 @@ static int vhost_migration_log(MemoryListener *listener, int enable) return r; } vhost_log_put(dev, false); - dev->log = NULL; - dev->log_size = 0; } else { vhost_dev_log_resize(dev, vhost_get_log_size(dev)); r = vhost_dev_set_log(dev, true); @@ -1328,7 +1328,4 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) vhost_log_put(hdev, true); hdev->started = false; - hdev->log = NULL; - hdev->log_size = 0; } - From 7b527247f09740920d8bc8aaa5ef4445256ca478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:00 +0400 Subject: [PATCH 16/41] vhost: assert the log was cleaned up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the log was released on cleanup, or it will leak (the alternative is to call vhost_log_put() unconditionally, but it may hide some dev state issues). Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9bac1636c5..8a18f9b669 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1134,6 +1134,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) g_free(hdev->mem); g_free(hdev->mem_sections); hdev->vhost_ops->vhost_backend_cleanup(hdev); + assert(!hdev->log); QLIST_REMOVE(hdev, entry); } From 5be5f9be724d43fbec3d6f955a43bc64a62d55cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:01 +0400 Subject: [PATCH 17/41] vhost: fix cleanup on not fully initialized device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vhost_dev_init() failed, caller may still call vhost_dev_cleanup() later. However, vhost_dev_cleanup() tries to remove the device from the list even if it wasn't yet added, which may lead to crashes. Similarly for the memory listener. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 8a18f9b669..6b988e1478 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1033,7 +1033,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, r = -1; goto fail; } - QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); r = hdev->vhost_ops->vhost_set_owner(hdev); if (r < 0) { @@ -1103,6 +1102,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, hdev->started = false; hdev->memory_changed = false; memory_listener_register(&hdev->memory_listener, &address_space_memory); + QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); return 0; fail_busyloop: while (--i >= 0) { @@ -1126,7 +1126,11 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) for (i = 0; i < hdev->nvqs; ++i) { vhost_virtqueue_cleanup(hdev->vqs + i); } - memory_listener_unregister(&hdev->memory_listener); + if (hdev->mem) { + /* those are only safe after successful init */ + memory_listener_unregister(&hdev->memory_listener); + QLIST_REMOVE(hdev, entry); + } if (hdev->migration_blocker) { migrate_del_blocker(hdev->migration_blocker); error_free(hdev->migration_blocker); @@ -1135,7 +1139,6 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) g_free(hdev->mem_sections); hdev->vhost_ops->vhost_backend_cleanup(hdev); assert(!hdev->log); - QLIST_REMOVE(hdev, entry); } /* Stop processing guest IO notifications in qemu. From e0547b59dc0ead4c605d3f02d1c8829630a1311b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:02 +0400 Subject: [PATCH 18/41] vhost: make vhost_dev_cleanup() idempotent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is called on multiple code path, so make it safe to call several times (note: I don't remember a reproducer here, but a function called 'cleanup' should probably be idempotent in my book) Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 6b988e1478..9400b47e10 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1123,6 +1123,7 @@ fail: void vhost_dev_cleanup(struct vhost_dev *hdev) { int i; + for (i = 0; i < hdev->nvqs; ++i) { vhost_virtqueue_cleanup(hdev->vqs + i); } @@ -1137,8 +1138,12 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) } g_free(hdev->mem); g_free(hdev->mem_sections); - hdev->vhost_ops->vhost_backend_cleanup(hdev); + if (hdev->vhost_ops) { + hdev->vhost_ops->vhost_backend_cleanup(hdev); + } assert(!hdev->log); + + memset(hdev, 0, sizeof(struct vhost_dev)); } /* Stop processing guest IO notifications in qemu. From f1a0365b686c2abdfde6303947f8893873b6b00b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:03 +0400 Subject: [PATCH 19/41] vhost-net: always call vhost_dev_cleanup() on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost_dev_init(), calling vhost backend initialization, should be cleaned up after failure too. Call vhost_dev_cleanup() in all failure cases. First, it needs to zero-alloc the struct to avoid the initial garbage. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 3677a8297a..c11f69cdc2 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -140,7 +140,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) { int r; bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; - struct vhost_net *net = g_malloc(sizeof *net); + struct vhost_net *net = g_new0(struct vhost_net, 1); uint64_t features = 0; if (!options->net_backend) { @@ -185,7 +185,6 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n", (uint64_t)(~net->dev.features & net->dev.backend_features)); - vhost_dev_cleanup(&net->dev); goto fail; } } @@ -197,7 +196,6 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n", (uint64_t)(~net->dev.features & features)); - vhost_dev_cleanup(&net->dev); goto fail; } } @@ -205,7 +203,9 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) vhost_net_ack_features(net, features); return net; + fail: + vhost_dev_cleanup(&net->dev); g_free(net); return NULL; } From a06db3ec720a570089b16125d3bca470e7bb34b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:04 +0400 Subject: [PATCH 20/41] vhost: fix calling vhost_dev_cleanup() after vhost_dev_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost_net_init() calls vhost_dev_init() and in case of failure, calls vhost_dev_cleanup() directly. However, the structure is already partially cleaned on error. Calling vhost_dev_cleanup() again will call vhost_virtqueue_cleanup() on already clean queues, and causing potential double-close. Instead, adjust dev->nvqs and simplify vhost_dev_init() code to not call vhost_virtqueue_cleanup() but vhost_dev_cleanup() instead. Signed-off-by: Marc-André Lureau Signed-off-by: Ilya Maximets Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9400b47e10..e3091d17be 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1015,7 +1015,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout) { uint64_t features; - int i, r; + int i, r, n_initialized_vqs = 0; hdev->migration_blocker = NULL; @@ -1044,10 +1044,10 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, goto fail; } - for (i = 0; i < hdev->nvqs; ++i) { + for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) { r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i); if (r < 0) { - goto fail_vq; + goto fail; } } @@ -1104,19 +1104,14 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, memory_listener_register(&hdev->memory_listener, &address_space_memory); QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); return 0; + fail_busyloop: while (--i >= 0) { vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0); } - i = hdev->nvqs; -fail_vq: - while (--i >= 0) { - vhost_virtqueue_cleanup(hdev->vqs + i); - } fail: - r = -errno; - hdev->vhost_ops->vhost_backend_cleanup(hdev); - QLIST_REMOVE(hdev, entry); + hdev->nvqs = n_initialized_vqs; + vhost_dev_cleanup(hdev); return r; } From 162bba7fa84182e5dc07f94db9744fc80109592a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:05 +0400 Subject: [PATCH 21/41] vhost: do not assert() on vhost_ops failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling a vhost operation may fail, for example with disconnected vhost-user backend, but qemu shouldn't abort in this case. Log an error instead, except on error and cleanup code paths where it can be mostly ignored. Let's use a VHOST_OPS_DEBUG macro to easily disable those messages once disconnected backend stabilizes. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 49 +++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index e3091d17be..1ece1ef8ca 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,6 +27,18 @@ #include "hw/virtio/virtio-access.h" #include "migration/migration.h" +/* enabled until disconnected backend stabilizes */ +#define _VHOST_DEBUG 1 + +#ifdef _VHOST_DEBUG +#define VHOST_OPS_DEBUG(fmt, ...) \ + do { error_report(fmt ": %s (%d)", ## __VA_ARGS__, \ + strerror(errno), errno); } while (0) +#else +#define VHOST_OPS_DEBUG(fmt, ...) \ + do { } while (0) +#endif + static struct vhost_log *vhost_log; static struct vhost_log *vhost_log_shm; @@ -400,7 +412,10 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) /* inform backend of log switching, this must be done before releasing the current log, to ensure no logging is lost */ r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); - assert(r >= 0); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_log_base failed"); + } + vhost_log_put(dev, true); dev->log = log; dev->log_size = size; @@ -567,7 +582,9 @@ static void vhost_commit(MemoryListener *listener) if (!dev->log_enabled) { r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); - assert(r >= 0); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_mem_table failed"); + } dev->memory_changed = false; return; } @@ -580,7 +597,9 @@ static void vhost_commit(MemoryListener *listener) vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); } r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); - assert(r >= 0); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_mem_table failed"); + } /* To log less, can only decrease log size after table update. */ if (dev->log_size > log_size + VHOST_LOG_BUFFER) { vhost_dev_log_resize(dev, log_size); @@ -667,7 +686,7 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) { - int r, t, i, idx; + int r, i, idx; r = vhost_dev_set_features(dev, enable_log); if (r < 0) { goto err_features; @@ -684,12 +703,10 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) err_vq: for (; i >= 0; --i) { idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i); - t = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, - dev->log_enabled); - assert(t >= 0); + vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, + dev->log_enabled); } - t = vhost_dev_set_features(dev, dev->log_enabled); - assert(t >= 0); + vhost_dev_set_features(dev, dev->log_enabled); err_features: return r; } @@ -929,15 +946,11 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, * native as legacy devices expect so by default. */ if (vhost_needs_vring_endian(vdev)) { - r = vhost_virtqueue_set_vring_endian_legacy(dev, - !virtio_is_big_endian(vdev), - vhost_vq_index); - if (r < 0) { - error_report("failed to reset vring endianness"); - } + vhost_virtqueue_set_vring_endian_legacy(dev, + !virtio_is_big_endian(vdev), + vhost_vq_index); } - assert (r >= 0); cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), 0, virtio_queue_get_ring_size(vdev, idx)); cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), @@ -1228,7 +1241,9 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); - assert(r >= 0); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_vring_call failed"); + } } uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, From c640969216d6212ddcb8613330832b044d768996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:06 +0400 Subject: [PATCH 22/41] vhost: add missing VHOST_OPS_DEBUG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing VHOST_OPS_DEBUG() logs, for completeness. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 1ece1ef8ca..5a29eb38b9 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -668,6 +668,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, }; int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_vring_addr failed"); return -errno; } return 0; @@ -681,6 +682,9 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) features |= 0x1ULL << VHOST_F_LOG_ALL; } r = dev->vhost_ops->vhost_set_features(dev, features); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_features failed"); + } return r < 0 ? -errno : 0; } @@ -804,6 +808,7 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, return 0; } + VHOST_OPS_DEBUG("vhost_set_vring_endian failed"); if (errno == ENOTTY) { error_report("vhost does not support cross-endian"); return -ENOSYS; @@ -832,12 +837,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vq->num = state.num = virtio_queue_get_num(vdev, idx); r = dev->vhost_ops->vhost_set_vring_num(dev, &state); if (r) { + VHOST_OPS_DEBUG("vhost_set_vring_num failed"); return -errno; } state.num = virtio_queue_get_last_avail_idx(vdev, idx); r = dev->vhost_ops->vhost_set_vring_base(dev, &state); if (r) { + VHOST_OPS_DEBUG("vhost_set_vring_base failed"); return -errno; } @@ -889,6 +896,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); r = dev->vhost_ops->vhost_set_vring_kick(dev, &file); if (r) { + VHOST_OPS_DEBUG("vhost_set_vring_kick failed"); r = -errno; goto fail_kick; } @@ -936,8 +944,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, r = dev->vhost_ops->vhost_get_vring_base(dev, &state); if (r < 0) { - fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); - fflush(stderr); + VHOST_OPS_DEBUG("vhost VQ %d ring restore failed: %d", idx, r); } virtio_queue_set_last_avail_idx(vdev, idx, state.num); virtio_queue_invalidate_signalled_used(vdev, idx); @@ -989,6 +996,7 @@ static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state); if (r) { + VHOST_OPS_DEBUG("vhost_set_vring_busyloop_timeout failed"); return r; } @@ -1010,6 +1018,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, file.fd = event_notifier_get_fd(&vq->masked_notifier); r = dev->vhost_ops->vhost_set_vring_call(dev, &file); if (r) { + VHOST_OPS_DEBUG("vhost_set_vring_call failed"); r = -errno; goto fail_call; } @@ -1049,11 +1058,13 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, r = hdev->vhost_ops->vhost_set_owner(hdev); if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_owner failed"); goto fail; } r = hdev->vhost_ops->vhost_get_features(hdev, &features); if (r < 0) { + VHOST_OPS_DEBUG("vhost_get_features failed"); goto fail; } @@ -1286,6 +1297,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) } r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_mem_table failed"); r = -errno; goto fail_mem; } @@ -1310,6 +1322,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->log_size ? log_base : 0, hdev->log); if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_log_base failed"); r = -errno; goto fail_log; } From 4afba631206d302303499edc22a8f3fc7a1816ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:07 +0400 Subject: [PATCH 23/41] vhost: use error_report() instead of fprintf(stderr,...) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use qemu proper error reporting API, this ensures the error is reported at the right place (stderr or monitor), with a conventional format. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 5a29eb38b9..bb886f3e7e 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -439,11 +439,11 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, l = vq->ring_size; p = cpu_physical_memory_map(vq->ring_phys, &l, 1); if (!p || l != vq->ring_size) { - fprintf(stderr, "Unable to map ring buffer for ring %d\n", i); + error_report("Unable to map ring buffer for ring %d", i); r = -ENOMEM; } if (p != vq->ring) { - fprintf(stderr, "Ring buffer relocated for ring %d\n", i); + error_report("Ring buffer relocated for ring %d", i); r = -EBUSY; } cpu_physical_memory_unmap(p, l, 0, 0); @@ -1050,8 +1050,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { - fprintf(stderr, "vhost backend memory slots limit is less" - " than current number of present memory slots\n"); + error_report("vhost backend memory slots limit is less" + " than current number of present memory slots"); r = -1; goto fail; } @@ -1174,8 +1174,9 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) VirtioBusState *vbus = VIRTIO_BUS(qbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); int i, r, e; + if (!k->ioeventfd_started) { - fprintf(stderr, "binding does not support host notifiers\n"); + error_report("binding does not support host notifiers"); r = -ENOSYS; goto fail; } @@ -1184,7 +1185,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, true); if (r < 0) { - fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r); + error_report("vhost VQ %d notifier binding failed: %d", i, -r); goto fail_vq; } } @@ -1195,8 +1196,7 @@ fail_vq: e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); if (e < 0) { - fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r); - fflush(stderr); + error_report("vhost VQ %d notifier cleanup error: %d", i, -r); } assert (e >= 0); } @@ -1218,8 +1218,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); if (r < 0) { - fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r); - fflush(stderr); + error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); } assert (r >= 0); } From 5c7eaabf65ba936f718ef4dfcfc551ffc9d4f35c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:08 +0400 Subject: [PATCH 24/41] qemu-char: fix qemu_chr_fe_set_msgfds() crash when disconnected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling qemu_chr_fe_set_msgfds() on unconnected socket leads to crash since s->ioc is NULL in this case. Return an error earlier instead. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qemu-char.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index e4b8448422..1274f50e00 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2760,14 +2760,16 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) { TCPCharDriver *s = chr->opaque; - if (!qio_channel_has_feature(s->ioc, - QIO_CHANNEL_FEATURE_FD_PASS)) { - return -1; - } /* clear old pending fd array */ g_free(s->write_msgfds); s->write_msgfds = NULL; + if (!s->connected || + !qio_channel_has_feature(s->ioc, + QIO_CHANNEL_FEATURE_FD_PASS)) { + return -1; + } + if (num) { s->write_msgfds = g_new(int, num); memcpy(s->write_msgfds, fds, num * sizeof(int)); From df3485a14821edf75b1d3ee6ca38cd80fa989dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:09 +0400 Subject: [PATCH 25/41] vhost-user: call set_msgfds unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is fine to call set_msgfds() with 0 fd, and ensures any previous fd array is cleared. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 495e09fd4e..f01b92f512 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -187,9 +187,7 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, return 0; } - if (fd_num) { - qemu_chr_fe_set_msgfds(chr, fds, fd_num); - } + qemu_chr_fe_set_msgfds(chr, fds, fd_num); return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 0 : -1; From 6fab2f3f6064ad0a712c6d9213b9151bcc87c3d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:10 +0400 Subject: [PATCH 26/41] vhost-user: check qemu_chr_fe_set_msgfds() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check qemu_chr_fe_set_msgfds() for errors, to make sure the message to be sent is correct. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index f01b92f512..5dae49615c 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -187,7 +187,9 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, return 0; } - qemu_chr_fe_set_msgfds(chr, fds, fd_num); + if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { + return -1; + } return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 0 : -1; From c4843a45e3d4f3698b214275ab5e78cdb6a3d212 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:11 +0400 Subject: [PATCH 27/41] vhost-user: check vhost_user_{read,write}() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vhost-user code is quite inconsistent with error handling. Instead of ignoring some return values of read/write and silently going on with invalid state (invalid read for example), break the code flow when the error happened. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 50 ++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 5dae49615c..819481dee2 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -214,12 +214,14 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, fds[fd_num++] = log->fd; } - vhost_user_write(dev, &msg, fds, fd_num); + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } if (shmfd) { msg.size = 0; if (vhost_user_read(dev, &msg) < 0) { - return 0; + return -1; } if (msg.request != VHOST_USER_SET_LOG_BASE) { @@ -275,7 +277,9 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, msg.size += sizeof(msg.payload.memory.padding); msg.size += fd_num * sizeof(VhostUserMemoryRegion); - vhost_user_write(dev, &msg, fds, fd_num); + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } return 0; } @@ -290,7 +294,9 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, .size = sizeof(msg.payload.addr), }; - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } return 0; } @@ -313,7 +319,9 @@ static int vhost_set_vring(struct vhost_dev *dev, .size = sizeof(msg.payload.state), }; - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } return 0; } @@ -360,10 +368,12 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, .size = sizeof(msg.payload.state), }; - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } if (vhost_user_read(dev, &msg) < 0) { - return 0; + return -1; } if (msg.request != VHOST_USER_GET_VRING_BASE) { @@ -401,7 +411,9 @@ static int vhost_set_vring_file(struct vhost_dev *dev, msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; } - vhost_user_write(dev, &msg, fds, fd_num); + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } return 0; } @@ -427,7 +439,9 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) .size = sizeof(msg.payload.u64), }; - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } return 0; } @@ -455,10 +469,12 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) return 0; } - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } if (vhost_user_read(dev, &msg) < 0) { - return 0; + return -1; } if (msg.request != request) { @@ -489,7 +505,9 @@ static int vhost_user_set_owner(struct vhost_dev *dev) .flags = VHOST_USER_VERSION, }; - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } return 0; } @@ -501,7 +519,9 @@ static int vhost_user_reset_device(struct vhost_dev *dev) .flags = VHOST_USER_VERSION, }; - vhost_user_write(dev, &msg, NULL, 0); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } return 0; } @@ -588,7 +608,6 @@ static bool vhost_user_requires_shm_log(struct vhost_dev *dev) static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) { VhostUserMsg msg = { 0 }; - int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); @@ -605,8 +624,7 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) memcpy((char *)&msg.payload.u64, mac_addr, 6); msg.size = sizeof(msg.payload.u64); - err = vhost_user_write(dev, &msg, NULL, 0); - return err; + return vhost_user_write(dev, &msg, NULL, 0); } return -1; } From e6bcb1b61782cefacecc8b9ee7a5f782373dab2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:12 +0400 Subject: [PATCH 28/41] vhost-user: keep vhost_net after a disconnection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many code paths assume get_vhost_net() returns non-null. Keep VhostUserState.vhost_net after a successful vhost_net_init(), instead of freeing it in vhost_net_cleanup(). VhostUserState.vhost_net is thus freed before after being recreated or on final vhost_user_cleanup() and there is no need to save the acked features. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 1 - net/tap.c | 1 + net/vhost-user.c | 36 +++++++++++++++++++----------------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index c11f69cdc2..7b76591409 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -378,7 +378,6 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, void vhost_net_cleanup(struct vhost_net *net) { vhost_dev_cleanup(&net->dev); - g_free(net); } int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) diff --git a/net/tap.c b/net/tap.c index 40a8c741fc..6abb962efd 100644 --- a/net/tap.c +++ b/net/tap.c @@ -312,6 +312,7 @@ static void tap_cleanup(NetClientState *nc) if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); s->vhost_net = NULL; } diff --git a/net/vhost-user.c b/net/vhost-user.c index 2cac32e3b7..d2a984d767 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -45,11 +45,6 @@ uint64_t vhost_user_get_acked_features(NetClientState *nc) return s->acked_features; } -static int vhost_user_running(VhostUserState *s) -{ - return (s->vhost_net) ? 1 : 0; -} - static void vhost_user_stop(int queues, NetClientState *ncs[]) { VhostUserState *s; @@ -59,15 +54,14 @@ static void vhost_user_stop(int queues, NetClientState *ncs[]) assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER); s = DO_UPCAST(VhostUserState, nc, ncs[i]); - if (!vhost_user_running(s)) { - continue; - } if (s->vhost_net) { /* save acked features */ - s->acked_features = vhost_net_get_acked_features(s->vhost_net); + uint64_t features = vhost_net_get_acked_features(s->vhost_net); + if (features) { + s->acked_features = features; + } vhost_net_cleanup(s->vhost_net); - s->vhost_net = NULL; } } } @@ -75,6 +69,7 @@ static void vhost_user_stop(int queues, NetClientState *ncs[]) static int vhost_user_start(int queues, NetClientState *ncs[]) { VhostNetOptions options; + struct vhost_net *net = NULL; VhostUserState *s; int max_queues; int i; @@ -85,33 +80,39 @@ static int vhost_user_start(int queues, NetClientState *ncs[]) assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER); s = DO_UPCAST(VhostUserState, nc, ncs[i]); - if (vhost_user_running(s)) { - continue; - } options.net_backend = ncs[i]; options.opaque = s->chr; options.busyloop_timeout = 0; - s->vhost_net = vhost_net_init(&options); - if (!s->vhost_net) { + net = vhost_net_init(&options); + if (!net) { error_report("failed to init vhost_net for queue %d", i); goto err; } if (i == 0) { - max_queues = vhost_net_get_max_queues(s->vhost_net); + max_queues = vhost_net_get_max_queues(net); if (queues > max_queues) { error_report("you are asking more queues than supported: %d", max_queues); goto err; } } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); + } + s->vhost_net = net; } return 0; err: - vhost_user_stop(i + 1, ncs); + if (net) { + vhost_net_cleanup(net); + } + vhost_user_stop(i, ncs); return -1; } @@ -150,6 +151,7 @@ static void vhost_user_cleanup(NetClientState *nc) if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); s->vhost_net = NULL; } if (s->chr) { From 1a5b68cee8a2b165ffd61b2e0641a4da3990f242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:13 +0400 Subject: [PATCH 29/41] vhost-user: add get_vhost_net() assertions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a few assertions to be more explicit about the runtime behaviour after the previous patch: get_vhost_net() is non-null after net_vhost_user_init(). Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 1 + net/vhost-user.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 7b76591409..4e6495e069 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -417,6 +417,7 @@ VHostNetState *get_vhost_net(NetClientState *nc) break; case NET_CLIENT_DRIVER_VHOST_USER: vhost_net = vhost_user_get_vhost_net(nc); + assert(vhost_net); break; default: break; diff --git a/net/vhost-user.c b/net/vhost-user.c index d2a984d767..39987a3c8a 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -259,6 +259,8 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, nc[0].name); + assert(s->vhost_net); + return 0; } From bb12e761e8e7b5c3c3d77bd08de9f007727a941e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:14 +0400 Subject: [PATCH 30/41] Revert "vhost-net: do not crash if backend is not present" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that get_vhost_net() returns non-null after a successful vhost_net_init(), we no longer need to check this case. This reverts commit ecd34898596c60f79886061618dd7e01001113ad. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 4e6495e069..54cf015dee 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -429,15 +429,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) int vhost_set_vring_enable(NetClientState *nc, int enable) { VHostNetState *net = get_vhost_net(nc); - const VhostOps *vhost_ops; + const VhostOps *vhost_ops = net->dev.vhost_ops; nc->vring_enable = enable; - if (!net) { - return 0; - } - - vhost_ops = net->dev.vhost_ops; if (vhost_ops->vhost_set_vring_enable) { return vhost_ops->vhost_set_vring_enable(&net->dev, enable); } From 51f7aca973be7550227102055218dffb98fb6990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:15 +0400 Subject: [PATCH 31/41] vhost-net: vhost_migration_done is vhost-user specific MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Either the callback is mandatory to implement, in which case an assert() is more appropriate, or it's not and we can't tell much whether the function should fail or not (given it's name, I guess it should silently success by default). Instead, make the implementation mandatory and vhost-user specific to be more clear about its usage. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 54cf015dee..dd41a8eda8 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -383,13 +383,11 @@ void vhost_net_cleanup(struct vhost_net *net) int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) { const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = -1; - if (vhost_ops->vhost_migration_done) { - r = vhost_ops->vhost_migration_done(&net->dev, mac_addr); - } + assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + assert(vhost_ops->vhost_migration_done); - return r; + return vhost_ops->vhost_migration_done(&net->dev, mac_addr); } bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) From 8695de0fcf739b2071018d933b407ed8c16f40e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:16 +0400 Subject: [PATCH 32/41] vhost: add assert() to check runtime behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All these functions must be called only after the backend is connected. They are called from virtio-net.c, after either virtio or link status change. The check for nc->peer->link_down should ensure vhost_net_{start,stop}() are always called between vhost_user_{start,stop}(). Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index bb886f3e7e..2d0d1d1ba9 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1242,6 +1242,9 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, int r, index = n - hdev->vq_index; struct vhost_vring_file file; + /* should only be called after backend is connected */ + assert(hdev->vhost_ops); + if (mask) { assert(vdev->use_guest_notifier_mask); file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier); @@ -1288,6 +1291,9 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { int i, r; + /* should only be called after backend is connected */ + assert(hdev->vhost_ops); + hdev->started = true; r = vhost_dev_set_features(hdev, hdev->log_enabled); @@ -1350,6 +1356,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) { int i; + /* should only be called after backend is connected */ + assert(hdev->vhost_ops); + for (i = 0; i < hdev->nvqs; ++i) { vhost_virtqueue_stop(hdev, vdev, From 6b6723c3b508403309aa21c4fcbe515bd1edfd74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:17 +0400 Subject: [PATCH 33/41] char: add chr_wait_connected callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function to wait on the backend to be connected, to be used in the following patches. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/sysemu/char.h | 8 ++++++++ qemu-char.c | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/include/sysemu/char.h b/include/sysemu/char.h index 0ea9eacc40..ee7e55468f 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -70,6 +70,7 @@ struct CharDriverState { int (*get_msgfds)(struct CharDriverState *s, int* fds, int num); int (*set_msgfds)(struct CharDriverState *s, int *fds, int num); int (*chr_add_client)(struct CharDriverState *chr, int fd); + int (*chr_wait_connected)(struct CharDriverState *chr, Error **errp); IOEventHandler *chr_event; IOCanReadHandler *chr_can_read; IOReadHandler *chr_read; @@ -158,6 +159,13 @@ void qemu_chr_disconnect(CharDriverState *chr); */ void qemu_chr_cleanup(void); +/** + * @qemu_chr_wait_connected: + * + * Wait for characted backend to be connected. + */ +int qemu_chr_wait_connected(CharDriverState *chr, Error **errp); + /** * @qemu_chr_new_noreplay: * diff --git a/qemu-char.c b/qemu-char.c index 1274f50e00..6eba615818 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3139,6 +3139,15 @@ static gboolean tcp_chr_accept(QIOChannel *channel, return TRUE; } +int qemu_chr_wait_connected(CharDriverState *chr, Error **errp) +{ + if (chr->chr_wait_connected) { + return chr->chr_wait_connected(chr, errp); + } + + return 0; +} + static void tcp_chr_close(CharDriverState *chr) { TCPCharDriver *s = chr->opaque; From d7a04fd7d500870ad70f15089c6590e4bea73e70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:18 +0400 Subject: [PATCH 34/41] char: add and use tcp_chr_wait_connected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a chr_wait_connected for the tcp backend, and use it in the open_socket() function. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qemu-char.c | 63 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index 6eba615818..a50b8fb3a3 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3139,6 +3139,32 @@ static gboolean tcp_chr_accept(QIOChannel *channel, return TRUE; } +static int tcp_chr_wait_connected(CharDriverState *chr, Error **errp) +{ + TCPCharDriver *s = chr->opaque; + QIOChannelSocket *sioc; + + while (!s->connected) { + if (s->is_listen) { + fprintf(stderr, "QEMU waiting for connection on: %s\n", + chr->filename); + qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), true, NULL); + tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); + } else { + sioc = qio_channel_socket_new(); + if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { + object_unref(OBJECT(sioc)); + return -1; + } + tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); + } + } + + return 0; +} + int qemu_chr_wait_connected(CharDriverState *chr, Error **errp) { if (chr->chr_wait_connected) { @@ -4402,6 +4428,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, s->addr = QAPI_CLONE(SocketAddress, sock->addr); chr->opaque = s; + chr->chr_wait_connected = tcp_chr_wait_connected; chr->chr_write = tcp_chr_write; chr->chr_sync_read = tcp_chr_sync_read; chr->chr_close = tcp_chr_close; @@ -4425,32 +4452,30 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, s->reconnect_time = reconnect; } - sioc = qio_channel_socket_new(); if (s->reconnect_time) { + sioc = qio_channel_socket_new(); qio_channel_socket_connect_async(sioc, s->addr, qemu_chr_socket_connected, chr, NULL); - } else if (s->is_listen) { - if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { - goto error; - } - s->listen_ioc = sioc; - if (is_waitconnect) { - fprintf(stderr, "QEMU waiting for connection on: %s\n", - chr->filename); - tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); - } - qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); - if (!s->ioc) { - s->listen_tag = qio_channel_add_watch( - QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); - } } else { - if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { + if (s->is_listen) { + sioc = qio_channel_socket_new(); + if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { + goto error; + } + s->listen_ioc = sioc; + if (is_waitconnect && + qemu_chr_wait_connected(chr, errp) < 0) { + goto error; + } + if (!s->ioc) { + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, + tcp_chr_accept, chr, NULL); + } + } else if (qemu_chr_wait_connected(chr, errp) < 0) { goto error; } - tcp_chr_new_client(chr, sioc); - object_unref(OBJECT(sioc)); } return chr; From c89804d674e4e3804bd3ac1fe79650896044b4e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:19 +0400 Subject: [PATCH 35/41] vhost-user: wait until backend init is completed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chardev waits for an initial connection before starting qemu, and vhost-user should wait for the backend negotiation to be completed before starting qemu too. vhost-user is started in the net_vhost_user_event callback, which is synchronously called after the socket is connected. Use a VhostUserState.started flag to indicate vhost-user init completed successfully and qemu can be started. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-user.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/vhost-user.c b/net/vhost-user.c index 39987a3c8a..b0595f8781 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -24,6 +24,7 @@ typedef struct VhostUserState { VHostNetState *vhost_net; guint watch; uint64_t acked_features; + bool started; } VhostUserState; typedef struct VhostUserChardevProps { @@ -220,6 +221,7 @@ static void net_vhost_user_event(void *opaque, int event) return; } qmp_set_link(name, true, &err); + s->started = true; break; case CHR_EVENT_CLOSED: qmp_set_link(name, false, &err); @@ -238,7 +240,7 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, const char *name, CharDriverState *chr, int queues) { - NetClientState *nc; + NetClientState *nc, *nc0 = NULL; VhostUserState *s; int i; @@ -247,6 +249,9 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, for (i = 0; i < queues; i++) { nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + if (!nc0) { + nc0 = nc; + } snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", i, chr->label); @@ -257,7 +262,16 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, s->chr = chr; } - qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, nc[0].name); + s = DO_UPCAST(VhostUserState, nc, nc0); + do { + Error *err = NULL; + if (qemu_chr_wait_connected(chr, &err) < 0) { + error_report_err(err); + return -1; + } + qemu_chr_add_handlers(chr, NULL, NULL, + net_vhost_user_event, nc0->name); + } while (!s->started); assert(s->vhost_net); From 1ec3b71cde6997db866a76ff3311cb97c0b6741a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:20 +0400 Subject: [PATCH 36/41] tests: plug some leaks in virtio-net-test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found thanks to valgrind. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/virtio-net-test.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/virtio-net-test.c b/tests/virtio-net-test.c index a34a9392e3..361506faf5 100644 --- a/tests/virtio-net-test.c +++ b/tests/virtio-net-test.c @@ -149,6 +149,7 @@ static void rx_stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev, char test[] = "TEST"; char buffer[64]; int len = htonl(sizeof(test)); + QDict *rsp; struct iovec iov[] = { { .iov_base = &len, @@ -165,7 +166,8 @@ static void rx_stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev, free_head = qvirtqueue_add(vq, req_addr, 64, true, false); qvirtqueue_kick(bus, dev, vq, free_head); - qmp("{ 'execute' : 'stop'}"); + rsp = qmp("{ 'execute' : 'stop'}"); + QDECREF(rsp); ret = iov_send(socket, iov, 2, 0, sizeof(len) + sizeof(test)); g_assert_cmpint(ret, ==, sizeof(test) + sizeof(len)); @@ -173,8 +175,10 @@ static void rx_stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev, /* We could check the status, but this command is more importantly to * ensure the packet data gets queued in QEMU, before we do 'cont'. */ - qmp("{ 'execute' : 'query-status'}"); - qmp("{ 'execute' : 'cont'}"); + rsp = qmp("{ 'execute' : 'query-status'}"); + QDECREF(rsp); + rsp = qmp("{ 'execute' : 'cont'}"); + QDECREF(rsp); qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_NET_TIMEOUT_US); memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test)); @@ -230,8 +234,10 @@ static void pci_basic(gconstpointer data) /* End test */ close(sv[0]); qvirtqueue_cleanup(&qvirtio_pci, &tx->vq, alloc); + qvirtqueue_cleanup(&qvirtio_pci, &rx->vq, alloc); pc_alloc_uninit(alloc); qvirtio_pci_device_disable(dev); + g_free(dev->pdev); g_free(dev); qpci_free_pc(bus); test_end(); From 69179fe2fc0b91f68699012ba72d329e74ff629e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:21 +0400 Subject: [PATCH 37/41] tests: fix vhost-user-test leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spotted by valgrind. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/vhost-user-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 46d05881b6..27b10c19b8 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -683,6 +683,7 @@ static void test_reconnect(void) qtest_get_arch()); g_test_trap_subprocess(path, 0, 0); g_test_trap_assert_passed(); + g_free(path); } #endif From f6b8571041eea057b9e55fad51357e8d8ac9b1c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:24 +0400 Subject: [PATCH 38/41] vhost-user: add error report in vhost_user_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to vhost_user_read() error report, it is useful to have early error report. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 819481dee2..1995fd20bd 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -176,7 +176,7 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int *fds, int fd_num) { CharDriverState *chr = dev->opaque; - int size = VHOST_USER_HDR_SIZE + msg->size; + int ret, size = VHOST_USER_HDR_SIZE + msg->size; /* * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, @@ -188,11 +188,18 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, } if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { + error_report("Failed to set msg fds."); return -1; } - return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? - 0 : -1; + ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); + if (ret != size) { + error_report("Failed to write msg." + " Wrote %d instead of %d.", ret, size); + return -1; + } + + return 0; } static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, From 950d94ba0671e7f154a9e87a277f8efbddcee28f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:25 +0400 Subject: [PATCH 39/41] vhost: add vhost_net_set_backend() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all vhost-user backends support ops->vhost_net_set_backend(). It is a nicer to provide an assert/error than to crash trying to call. Furthermore, it improves a bit the code by hiding vhost_ops details. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 9 +++------ hw/virtio/vhost.c | 10 ++++++++++ include/hw/virtio/vhost.h | 4 ++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index dd41a8eda8..dc61dc1c5f 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -242,8 +242,7 @@ static int vhost_net_start_one(struct vhost_net *net, qemu_set_fd_handler(net->backend, NULL, NULL, NULL); file.fd = net->backend; for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - const VhostOps *vhost_ops = net->dev.vhost_ops; - r = vhost_ops->vhost_net_set_backend(&net->dev, &file); + r = vhost_net_set_backend(&net->dev, &file); if (r < 0) { r = -errno; goto fail; @@ -255,8 +254,7 @@ fail: file.fd = -1; if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { while (file.index-- > 0) { - const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); + int r = vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } } @@ -277,8 +275,7 @@ static void vhost_net_stop_one(struct vhost_net *net, if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); + int r = vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 2d0d1d1ba9..b0e8ecce60 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1369,3 +1369,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) vhost_log_put(hdev, true); hdev->started = false; } + +int vhost_net_set_backend(struct vhost_dev *hdev, + struct vhost_vring_file *file) +{ + if (hdev->vhost_ops->vhost_net_set_backend) { + return hdev->vhost_ops->vhost_net_set_backend(hdev, file); + } + + return -1; +} diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 2106ed8c0d..e433089ea9 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -86,4 +86,8 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, uint64_t features); bool vhost_has_free_slot(void); + +int vhost_net_set_backend(struct vhost_dev *hdev, + struct vhost_vring_file *file); + #endif From 499c557975275dba23fbf8f23880815d019ce663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 27 Jul 2016 01:15:27 +0400 Subject: [PATCH 40/41] vhost: do not update last avail idx on get_vring_base() failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The state.num value will probably be 0 in this case, but that doesn't make sense to update. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index b0e8ecce60..3d0c807d0e 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -945,8 +945,9 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, r = dev->vhost_ops->vhost_get_vring_base(dev, &state); if (r < 0) { VHOST_OPS_DEBUG("vhost VQ %d ring restore failed: %d", idx, r); + } else { + virtio_queue_set_last_avail_idx(vdev, idx, state.num); } - virtio_queue_set_last_avail_idx(vdev, idx, state.num); virtio_queue_invalidate_signalled_used(vdev, idx); /* In the cross-endian case, we need to reset the vring endianness to From f077f889121c89612eaf0f21e94b97aa110a6910 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Tue, 26 Jul 2016 17:03:23 +0800 Subject: [PATCH 41/41] mptsas: Fix a migration compatible issue My previous commit 2e2aa316 removed internal flag msi_in_use, which exists in vmstate, use VMSTATE_UNUSED for migration compatibility. Reported-by: Amit Shah Suggested-by: Amit Shah Cc: Markus Armbruster Cc: Marcel Apfelbaum Cc: Paolo Bonzini Cc: Michael S. Tsirkin Cc: Amit Shah Signed-off-by: Cao jin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Amit Shah --- hw/scsi/mptsas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index 1ae32fb7d3..bebe5130fe 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -1370,7 +1370,7 @@ static const VMStateDescription vmstate_mptsas = { .post_load = mptsas_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, MPTSASState), - + VMSTATE_UNUSED(sizeof(bool)), /* Was msi_in_use */ VMSTATE_UINT32(state, MPTSASState), VMSTATE_UINT8(who_init, MPTSASState), VMSTATE_UINT8(doorbell_state, MPTSASState),