vfio: vfio-pci device assignment driver

This adds the core of the QEMU VFIO-based PCI device assignment driver.
To make use of this driver, enable CONFIG_VFIO, CONFIG_VFIO_IOMMU_TYPE1,
and CONFIG_VFIO_PCI in your host Linux kernel config.  Load the vfio-pci
module.  To assign device 0000:05:00.0 to a guest, do the following:

for dev in $(ls /sys/bus/pci/devices/0000:05:00.0/iommu_group/devices); do
    vendor=$(cat /sys/bus/pci/devices/$dev/vendor)
    device=$(cat /sys/bus/pci/devices/$dev/device)
    if [ -e /sys/bus/pci/devices/$dev/driver ]; then
        echo $dev > /sys/bus/pci/devices/$dev/driver/unbind
    fi
    echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id
done

See Documentation/vfio.txt in the Linux kernel tree for further
description of IOMMU groups and VFIO.

Then launch qemu including the option:

-device vfio-pci,host=0000:05:00.0

Legacy PCI interrupts (INTx) currently makes use of a kludge where we
trap BAR accesses and assume the access is in response to an interrupt,
therefore de-asserting and unmasking the interrupt.  It's not quite as
targetted as using the EOI for this, but it's self contained and seems
to work across all architectures.  The side-effect is a significant
performance slow-down for device in INTx mode.  Some devices, like
graphics cards, don't really use their interrupt, so this can be turned
off with the x-intx=off option, which disables INTx alltogether.  This
should be considered an experimental option until we refine this code.
Both MSI and MSI-X are supported and avoid these issues.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
master
Alex Williamson 2012-09-26 11:19:32 -06:00 committed by Anthony Liguori
parent 883f0b85f0
commit 65501a745d
2 changed files with 1978 additions and 0 deletions

1864
hw/vfio_pci.c Normal file

File diff suppressed because it is too large Load Diff

114
hw/vfio_pci_int.h Normal file
View File

@ -0,0 +1,114 @@
/*
* vfio based device assignment support
*
* Copyright Red Hat, Inc. 2012
*
* Authors:
* Alex Williamson <alex.williamson@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#ifndef HW_VFIO_PCI_INT_H
#define HW_VFIO_PCI_INT_H
#include "qemu-common.h"
#include "qemu-queue.h"
#include "pci.h"
#include "event_notifier.h"
typedef struct VFIOBAR {
off_t fd_offset; /* offset of BAR within device fd */
int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
MemoryRegion mem; /* slow, read/write access */
MemoryRegion mmap_mem; /* direct mapped access */
void *mmap;
size_t size;
uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
uint8_t nr; /* cache the BAR number for debug */
} VFIOBAR;
typedef struct VFIOINTx {
bool pending; /* interrupt pending */
bool kvm_accel; /* set when QEMU bypass through KVM enabled */
uint8_t pin; /* which pin to pull for qemu_set_irq */
EventNotifier interrupt; /* eventfd triggered on interrupt */
EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
PCIINTxRoute route; /* routing info for QEMU bypass */
bool disabled;
char *intx;
} VFIOINTx;
struct VFIODevice;
typedef struct VFIOMSIVector {
EventNotifier interrupt; /* eventfd triggered on interrupt */
struct VFIODevice *vdev; /* back pointer to device */
int virq; /* KVM irqchip route for QEMU bypass */
bool use;
} VFIOMSIVector;
enum {
VFIO_INT_NONE = 0,
VFIO_INT_INTx = 1,
VFIO_INT_MSI = 2,
VFIO_INT_MSIX = 3,
};
struct VFIOGroup;
typedef struct VFIOContainer {
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
struct {
/* enable abstraction to support various iommu backends */
union {
MemoryListener listener; /* Used by type1 iommu */
};
void (*release)(struct VFIOContainer *);
} iommu_data;
QLIST_HEAD(, VFIOGroup) group_list;
QLIST_ENTRY(VFIOContainer) next;
} VFIOContainer;
/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
typedef struct VFIOMSIXInfo {
uint8_t table_bar;
uint8_t pba_bar;
uint16_t entries;
uint32_t table_offset;
uint32_t pba_offset;
MemoryRegion mmap_mem;
void *mmap;
} VFIOMSIXInfo;
typedef struct VFIODevice {
PCIDevice pdev;
int fd;
VFIOINTx intx;
unsigned int config_size;
off_t config_offset; /* Offset of config space region within device fd */
unsigned int rom_size;
off_t rom_offset; /* Offset of ROM region within device fd */
int msi_cap_size;
VFIOMSIVector *msi_vectors;
VFIOMSIXInfo *msix;
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
PCIHostDeviceAddress host;
QLIST_ENTRY(VFIODevice) next;
struct VFIOGroup *group;
bool reset_works;
} VFIODevice;
typedef struct VFIOGroup {
int fd;
int groupid;
VFIOContainer *container;
QLIST_HEAD(, VFIODevice) device_list;
QLIST_ENTRY(VFIOGroup) next;
QLIST_ENTRY(VFIOGroup) container_next;
} VFIOGroup;
#endif /* HW_VFIO_PCI_INT_H */