diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 9aecaa82bc..8db182fa3d 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -135,12 +135,18 @@ enum { struct VFIOGroup; +typedef struct VFIOType1 { + MemoryListener listener; + int error; + bool initialized; +} VFIOType1; + typedef struct VFIOContainer { int fd; /* /dev/vfio/vfio, empowered by the attached groups */ struct { /* enable abstraction to support various iommu backends */ union { - MemoryListener listener; /* Used by type1 iommu */ + VFIOType1 type1; }; void (*release)(struct VFIOContainer *); } iommu_data; @@ -191,6 +197,7 @@ typedef struct VFIODevice { bool has_flr; bool has_pm_reset; bool needs_reset; + bool rom_read_failed; } VFIODevice; typedef struct VFIOGroup { @@ -592,7 +599,7 @@ static void vfio_msi_interrupt(void *opaque) return; } -#ifdef VFIO_DEBUG +#ifdef DEBUG_VFIO MSIMessage msg; if (vdev->interrupt == VFIO_INT_MSIX) { @@ -1125,6 +1132,14 @@ static void vfio_pci_load_rom(VFIODevice *vdev) vdev->rom_offset = reg_info.offset; if (!vdev->rom_size) { + vdev->rom_read_failed = true; + error_report("vfio-pci: Cannot read device rom at " + "%04x:%02x:%02x.%x\n", + vdev->host.domain, vdev->host.bus, vdev->host.slot, + vdev->host.function); + error_printf("Device option ROM contents are probably invalid " + "(check dmesg).\nSkip option ROM probe with rombar=0, " + "or load from file with romfile=\n"); return; } @@ -1156,6 +1171,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) /* Load the ROM lazily when the guest tries to read it */ if (unlikely(!vdev->rom)) { vfio_pci_load_rom(vdev); + if (unlikely(!vdev->rom && !vdev->rom_read_failed)) { + vfio_pci_load_rom(vdev); + } } memcpy(&val, vdev->rom + addr, @@ -1223,6 +1241,7 @@ static void vfio_pci_size_rom(VFIODevice *vdev) PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); vdev->pdev.has_rom = true; + vdev->rom_read_failed = false; } static void vfio_vga_write(void *opaque, hwaddr addr, @@ -1968,6 +1987,7 @@ static void vfio_vga_quirk_teardown(VFIODevice *vdev) while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); + memory_region_destroy(&quirk->mem); QLIST_REMOVE(quirk, next); g_free(quirk); } @@ -1990,6 +2010,7 @@ static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr) while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); memory_region_del_subregion(&bar->mem, &quirk->mem); + memory_region_destroy(&quirk->mem); QLIST_REMOVE(quirk, next); g_free(quirk); } @@ -2141,14 +2162,21 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, static bool vfio_listener_skipped_section(MemoryRegionSection *section) { - return !memory_region_is_ram(section->mr); + return !memory_region_is_ram(section->mr) || + /* + * Sizing an enabled 64-bit BAR can cause spurious mappings to + * addresses in the upper part of the 64-bit address space. These + * are never accessed by the CPU and beyond the address width of + * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. + */ + section->offset_within_address_space & (1ULL << 63); } static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.listener); + iommu_data.type1.listener); hwaddr iova, end; void *vaddr; int ret; @@ -2190,6 +2218,19 @@ static void vfio_listener_region_add(MemoryListener *listener, error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%m)", container, iova, end - iova, vaddr, ret); + + /* + * On the initfn path, store the first error in the container so we + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ + if (!container->iommu_data.type1.initialized) { + if (!container->iommu_data.type1.error) { + container->iommu_data.type1.error = ret; + } + } else { + hw_error("vfio: DMA mapping failed, unable to continue\n"); + } } } @@ -2197,7 +2238,7 @@ static void vfio_listener_region_del(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.listener); + iommu_data.type1.listener); hwaddr iova, end; int ret; @@ -2242,7 +2283,7 @@ static MemoryListener vfio_memory_listener = { static void vfio_listener_release(VFIOContainer *container) { - memory_listener_unregister(&container->iommu_data.listener); + memory_listener_unregister(&container->iommu_data.type1.listener); } /* @@ -2412,10 +2453,12 @@ static void vfio_unmap_bar(VFIODevice *vdev, int nr) memory_region_del_subregion(&bar->mem, &bar->mmap_mem); munmap(bar->mmap, memory_region_size(&bar->mmap_mem)); + memory_region_destroy(&bar->mmap_mem); if (vdev->msix && vdev->msix->table_bar == nr) { memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); + memory_region_destroy(&vdev->msix->mmap_mem); } memory_region_destroy(&bar->mem); @@ -2501,7 +2544,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) * potentially insert a direct-mapped subregion before and after it. */ if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & TARGET_PAGE_MASK; + size = vdev->msix->table_offset & qemu_host_page_mask; } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); @@ -2513,8 +2556,8 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) if (vdev->msix && vdev->msix->table_bar == nr) { unsigned start; - start = TARGET_PAGE_ALIGN(vdev->msix->table_offset + - (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + start = HOST_PAGE_ALIGN(vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); size = start < bar->size ? bar->size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); @@ -3212,10 +3255,23 @@ static int vfio_connect_container(VFIOGroup *group) return -errno; } - container->iommu_data.listener = vfio_memory_listener; + container->iommu_data.type1.listener = vfio_memory_listener; container->iommu_data.release = vfio_listener_release; - memory_listener_register(&container->iommu_data.listener, &address_space_memory); + memory_listener_register(&container->iommu_data.type1.listener, + &address_space_memory); + + if (container->iommu_data.type1.error) { + ret = container->iommu_data.type1.error; + vfio_listener_release(container); + g_free(container); + close(fd); + error_report("vfio: memory listener initialization failed for container\n"); + return ret; + } + + container->iommu_data.type1.initialized = true; + } else { error_report("vfio: No available IOMMU models"); g_free(container); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index ea90b649d4..3b03cbfcf8 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -81,6 +81,7 @@ void cpu_gen_init(void); int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb, int *gen_code_size_ptr); bool cpu_restore_state(CPUArchState *env, uintptr_t searched_pc); +void page_size_init(void); void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc); void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr); diff --git a/kvm-all.c b/kvm-all.c index a3fb8de268..9588feab99 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1360,6 +1360,7 @@ int kvm_init(void) * page size for the system though. */ assert(TARGET_PAGE_SIZE <= getpagesize()); + page_size_init(); #ifdef KVM_CAP_SET_GUEST_DEBUG QTAILQ_INIT(&s->kvm_sw_breakpoints); diff --git a/translate-all.c b/translate-all.c index 105c25aff3..543e1ffe77 100644 --- a/translate-all.c +++ b/translate-all.c @@ -289,17 +289,15 @@ static inline void map_exec(void *addr, long size) } #endif -static void page_init(void) +void page_size_init(void) { /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ #ifdef _WIN32 - { - SYSTEM_INFO system_info; + SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - qemu_real_host_page_size = system_info.dwPageSize; - } + GetSystemInfo(&system_info); + qemu_real_host_page_size = system_info.dwPageSize; #else qemu_real_host_page_size = getpagesize(); #endif @@ -310,7 +308,11 @@ static void page_init(void) qemu_host_page_size = TARGET_PAGE_SIZE; } qemu_host_page_mask = ~(qemu_host_page_size - 1); +} +static void page_init(void) +{ + page_size_init(); #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) { #ifdef HAVE_KINFO_GETVMMAP