diff --git a/configure b/configure index d854936682..5c06663f3c 100755 --- a/configure +++ b/configure @@ -1881,6 +1881,34 @@ EOF #if !defined(HVM_MAX_VCPUS) # error HVM_MAX_VCPUS not defined #endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL); + xc_reserved_device_memory_map(xc, 0, 0, 0, 0, NULL, 0); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=460 + xen=yes + + # Xen 4.5 + elif + cat > $TMPC < +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif int main(void) { xc_interface *xc; xs_daemon_open(); diff --git a/hw/core/machine.c b/hw/core/machine.c index ac4654e9dd..51ed6b2e05 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -226,6 +226,20 @@ static void machine_set_usb(Object *obj, bool value, Error **errp) ms->usb_disabled = !value; } +static bool machine_get_igd_gfx_passthru(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->igd_gfx_passthru; +} + +static void machine_set_igd_gfx_passthru(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->igd_gfx_passthru = value; +} + static char *machine_get_firmware(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -388,6 +402,12 @@ static void machine_initfn(Object *obj) object_property_set_description(obj, "usb", "Set on/off to enable/disable usb", NULL); + object_property_add_bool(obj, "igd-passthru", + machine_get_igd_gfx_passthru, + machine_set_igd_gfx_passthru, NULL); + object_property_set_description(obj, "igd-passthru", + "Set on/off to enable/disable igd passthrou", + NULL); object_property_add_str(obj, "firmware", machine_get_firmware, machine_set_firmware, NULL); diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index ebd1015a08..c250deb848 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -7,6 +7,7 @@ obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o obj-y += acpi-build.o +obj-y += pci-assign-load-rom.o gen-hex-y += hw/i386/acpi-dsdt.hex gen-hex-y += hw/i386/q35-acpi-dsdt.hex diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index 74d22f4fd2..b1beaa66b2 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -37,6 +37,7 @@ #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "kvm_i386.h" +#include "hw/pci/pci-assign.h" #define MSIX_PAGE_SIZE 0x1000 @@ -48,17 +49,6 @@ #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ #define IORESOURCE_MEM_64 0x00100000 -//#define DEVICE_ASSIGNMENT_DEBUG - -#ifdef DEVICE_ASSIGNMENT_DEBUG -#define DEBUG(fmt, ...) \ - do { \ - fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ - } while (0) -#else -#define DEBUG(fmt, ...) -#endif - typedef struct PCIRegion { int type; /* Memory or port I/O */ int valid; @@ -1896,73 +1886,15 @@ static void assign_register_types(void) type_init(assign_register_types) -/* - * Scan the assigned devices for the devices that have an option ROM, and then - * load the corresponding ROM data to RAM. If an error occurs while loading an - * option ROM, we just ignore that option ROM and continue with the next one. - */ static void assigned_dev_load_option_rom(AssignedDevice *dev) { - char name[32], rom_file[64]; - FILE *fp; - uint8_t val; - struct stat st; - void *ptr; + int size = 0; - /* If loading ROM from file, pci handles it */ - if (dev->dev.romfile || !dev->dev.rom_bar) { - return; + pci_assign_dev_load_option_rom(&dev->dev, OBJECT(dev), &size, + dev->host.domain, dev->host.bus, + dev->host.slot, dev->host.function); + + if (!size) { + error_report("pci-assign: Invalid ROM."); } - - snprintf(rom_file, sizeof(rom_file), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", - dev->host.domain, dev->host.bus, dev->host.slot, - dev->host.function); - - if (stat(rom_file, &st)) { - return; - } - - if (access(rom_file, F_OK)) { - error_report("pci-assign: Insufficient privileges for %s", rom_file); - return; - } - - /* Write "1" to the ROM file to enable it */ - fp = fopen(rom_file, "r+"); - if (fp == NULL) { - return; - } - val = 1; - if (fwrite(&val, 1, 1, fp) != 1) { - goto close_rom; - } - fseek(fp, 0, SEEK_SET); - - snprintf(name, sizeof(name), "%s.rom", - object_get_typename(OBJECT(dev))); - memory_region_init_ram(&dev->dev.rom, OBJECT(dev), name, st.st_size, - &error_abort); - vmstate_register_ram(&dev->dev.rom, &dev->dev.qdev); - ptr = memory_region_get_ram_ptr(&dev->dev.rom); - memset(ptr, 0xff, st.st_size); - - if (!fread(ptr, 1, st.st_size, fp)) { - error_report("pci-assign: Cannot read from host %s", rom_file); - error_printf("Device option ROM contents are probably invalid " - "(check dmesg).\nSkip option ROM probe with rombar=0, " - "or load from file with romfile=\n"); - goto close_rom; - } - - pci_register_bar(&dev->dev, PCI_ROM_SLOT, 0, &dev->dev.rom); - dev->dev.has_rom = true; -close_rom: - /* Write "0" to disable ROM */ - fseek(fp, 0, SEEK_SET); - val = 0; - if (!fwrite(&val, 1, 1, fp)) { - DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); - } - fclose(fp); } diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index eab39fc098..3f925b26db 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,7 +50,8 @@ #include "cpu.h" #include "qemu/error-report.h" #ifdef CONFIG_XEN -# include +#include +#include "hw/xen/xen_pt.h" #endif #include "migration/migration.h" @@ -76,7 +77,8 @@ static bool has_reserved_memory = true; static bool kvmclock_enabled = true; /* PC hardware initialisation */ -static void pc_init1(MachineState *machine) +static void pc_init1(MachineState *machine, + const char *host_type, const char *pci_type) { PCMachineState *pcms = PC_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); @@ -194,7 +196,9 @@ static void pc_init1(MachineState *machine) } if (pci_enabled) { - pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi, + pci_bus = i440fx_init(host_type, + pci_type, + &i440fx_state, &piix3_devfn, &isa_bus, gsi, system_memory, system_io, machine->ram_size, pcms->below_4g_mem_size, pcms->above_4g_mem_size, @@ -412,15 +416,25 @@ static void pc_init_isa(MachineState *machine) } x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI); enable_compat_apic_id_mode(); - pc_init1(machine); + pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); } #ifdef CONFIG_XEN +static void pc_xen_hvm_init_pci(MachineState *machine) +{ + const char *pci_type = has_igd_gfx_passthru ? + TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE : TYPE_I440FX_PCI_DEVICE; + + pc_init1(machine, + TYPE_I440FX_PCI_HOST_BRIDGE, + pci_type); +} + static void pc_xen_hvm_init(MachineState *machine) { PCIBus *bus; - pc_init1(machine); + pc_xen_hvm_init_pci(machine); bus = pci_find_primary_bus(); if (bus != NULL) { @@ -436,7 +450,8 @@ static void pc_xen_hvm_init(MachineState *machine) if (compat) { \ compat(machine); \ } \ - pc_init1(machine); \ + pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ + TYPE_I440FX_PCI_DEVICE); \ } \ DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -878,6 +893,118 @@ static void pc_i440fx_0_10_machine_options(MachineClass *m) DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13, pc_i440fx_0_10_machine_options); +typedef struct { + uint16_t gpu_device_id; + uint16_t pch_device_id; + uint8_t pch_revision_id; +} IGDDeviceIDInfo; + +/* In real world different GPU should have different PCH. But actually + * the different PCH DIDs likely map to different PCH SKUs. We do the + * same thing for the GPU. For PCH, the different SKUs are going to be + * all the same silicon design and implementation, just different + * features turn on and off with fuses. The SW interfaces should be + * consistent across all SKUs in a given family (eg LPT). But just same + * features may not be supported. + * + * Most of these different PCH features probably don't matter to the + * Gfx driver, but obviously any difference in display port connections + * will so it should be fine with any PCH in case of passthrough. + * + * So currently use one PCH version, 0x8c4e, to cover all HSW(Haswell) + * scenarios, 0x9cc3 for BDW(Broadwell). + */ +static const IGDDeviceIDInfo igd_combo_id_infos[] = { + /* HSW Classic */ + {0x0402, 0x8c4e, 0x04}, /* HSWGT1D, HSWD_w7 */ + {0x0406, 0x8c4e, 0x04}, /* HSWGT1M, HSWM_w7 */ + {0x0412, 0x8c4e, 0x04}, /* HSWGT2D, HSWD_w7 */ + {0x0416, 0x8c4e, 0x04}, /* HSWGT2M, HSWM_w7 */ + {0x041E, 0x8c4e, 0x04}, /* HSWGT15D, HSWD_w7 */ + /* HSW ULT */ + {0x0A06, 0x8c4e, 0x04}, /* HSWGT1UT, HSWM_w7 */ + {0x0A16, 0x8c4e, 0x04}, /* HSWGT2UT, HSWM_w7 */ + {0x0A26, 0x8c4e, 0x06}, /* HSWGT3UT, HSWM_w7 */ + {0x0A2E, 0x8c4e, 0x04}, /* HSWGT3UT28W, HSWM_w7 */ + {0x0A1E, 0x8c4e, 0x04}, /* HSWGT2UX, HSWM_w7 */ + {0x0A0E, 0x8c4e, 0x04}, /* HSWGT1ULX, HSWM_w7 */ + /* HSW CRW */ + {0x0D26, 0x8c4e, 0x04}, /* HSWGT3CW, HSWM_w7 */ + {0x0D22, 0x8c4e, 0x04}, /* HSWGT3CWDT, HSWD_w7 */ + /* HSW Server */ + {0x041A, 0x8c4e, 0x04}, /* HSWSVGT2, HSWD_w7 */ + /* HSW SRVR */ + {0x040A, 0x8c4e, 0x04}, /* HSWSVGT1, HSWD_w7 */ + /* BSW */ + {0x1606, 0x9cc3, 0x03}, /* BDWULTGT1, BDWM_w7 */ + {0x1616, 0x9cc3, 0x03}, /* BDWULTGT2, BDWM_w7 */ + {0x1626, 0x9cc3, 0x03}, /* BDWULTGT3, BDWM_w7 */ + {0x160E, 0x9cc3, 0x03}, /* BDWULXGT1, BDWM_w7 */ + {0x161E, 0x9cc3, 0x03}, /* BDWULXGT2, BDWM_w7 */ + {0x1602, 0x9cc3, 0x03}, /* BDWHALOGT1, BDWM_w7 */ + {0x1612, 0x9cc3, 0x03}, /* BDWHALOGT2, BDWM_w7 */ + {0x1622, 0x9cc3, 0x03}, /* BDWHALOGT3, BDWM_w7 */ + {0x162B, 0x9cc3, 0x03}, /* BDWHALO28W, BDWM_w7 */ + {0x162A, 0x9cc3, 0x03}, /* BDWGT3WRKS, BDWM_w7 */ + {0x162D, 0x9cc3, 0x03}, /* BDWGT3SRVR, BDWM_w7 */ +}; + +static void isa_bridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + dc->desc = "ISA bridge faked to support IGD PT"; + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->class_id = PCI_CLASS_BRIDGE_ISA; +}; + +static TypeInfo isa_bridge_info = { + .name = "igd-passthrough-isa-bridge", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIDevice), + .class_init = isa_bridge_class_init, +}; + +static void pt_graphics_register_types(void) +{ + type_register_static(&isa_bridge_info); +} +type_init(pt_graphics_register_types) + +void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id) +{ + struct PCIDevice *bridge_dev; + int i, num; + uint16_t pch_dev_id = 0xffff; + uint8_t pch_rev_id; + + num = ARRAY_SIZE(igd_combo_id_infos); + for (i = 0; i < num; i++) { + if (gpu_dev_id == igd_combo_id_infos[i].gpu_device_id) { + pch_dev_id = igd_combo_id_infos[i].pch_device_id; + pch_rev_id = igd_combo_id_infos[i].pch_revision_id; + } + } + + if (pch_dev_id == 0xffff) { + return; + } + + /* Currently IGD drivers always need to access PCH by 1f.0. */ + bridge_dev = pci_create_simple(bus, PCI_DEVFN(0x1f, 0), + "igd-passthrough-isa-bridge"); + + /* + * Note that vendor id is always PCI_VENDOR_ID_INTEL. + */ + if (!bridge_dev) { + fprintf(stderr, "set igd-passthrough-isa-bridge failed!\n"); + return; + } + pci_config_set_device_id(bridge_dev->config, pch_dev_id); + pci_config_set_revision(bridge_dev->config, pch_rev_id); +} static void isapc_machine_options(MachineClass *m) { diff --git a/hw/i386/pci-assign-load-rom.c b/hw/i386/pci-assign-load-rom.c new file mode 100644 index 0000000000..34a3a7ed7f --- /dev/null +++ b/hw/i386/pci-assign-load-rom.c @@ -0,0 +1,91 @@ +/* + * This is splited from hw/i386/kvm/pci-assign.c + */ +#include +#include +#include +#include +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "qemu/error-report.h" +#include "ui/console.h" +#include "hw/loader.h" +#include "monitor/monitor.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci-assign.h" + +/* + * Scan the assigned devices for the devices that have an option ROM, and then + * load the corresponding ROM data to RAM. If an error occurs while loading an + * option ROM, we just ignore that option ROM and continue with the next one. + */ +void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, + int *size, unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function) +{ + char name[32], rom_file[64]; + FILE *fp; + uint8_t val; + struct stat st; + void *ptr = NULL; + + /* If loading ROM from file, pci handles it */ + if (dev->romfile || !dev->rom_bar) { + return NULL; + } + + snprintf(rom_file, sizeof(rom_file), + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + domain, bus, slot, function); + + if (stat(rom_file, &st)) { + return NULL; + } + + if (access(rom_file, F_OK)) { + error_report("pci-assign: Insufficient privileges for %s", rom_file); + return NULL; + } + + /* Write "1" to the ROM file to enable it */ + fp = fopen(rom_file, "r+"); + if (fp == NULL) { + return NULL; + } + val = 1; + if (fwrite(&val, 1, 1, fp) != 1) { + goto close_rom; + } + fseek(fp, 0, SEEK_SET); + + snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); + memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort); + vmstate_register_ram(&dev->rom, &dev->qdev); + ptr = memory_region_get_ram_ptr(&dev->rom); + memset(ptr, 0xff, st.st_size); + + if (!fread(ptr, 1, st.st_size, fp)) { + error_report("pci-assign: Cannot read from host %s", rom_file); + error_printf("Device option ROM contents are probably invalid " + "(check dmesg).\nSkip option ROM probe with rombar=0, " + "or load from file with romfile=\n"); + goto close_rom; + } + + pci_register_bar(dev, PCI_ROM_SLOT, 0, &dev->rom); + dev->has_rom = true; + *size = st.st_size; +close_rom: + /* Write "0" to disable ROM */ + fseek(fp, 0, SEEK_SET); + val = 0; + if (!fwrite(&val, 1, 1, fp)) { + DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); + } + fclose(fp); + + return ptr; +} diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 1cb25f3fa6..1fb71c8081 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -40,7 +40,6 @@ * http://download.intel.com/design/chipsets/datashts/29054901.pdf */ -#define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" #define I440FX_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(I440FXState, (obj), TYPE_I440FX_PCI_HOST_BRIDGE) @@ -95,7 +94,6 @@ typedef struct PIIX3State { #define PIIX3_PCI_DEVICE(obj) \ OBJECT_CHECK(PIIX3State, (obj), TYPE_PIIX3_PCI_DEVICE) -#define TYPE_I440FX_PCI_DEVICE "i440FX" #define I440FX_PCI_DEVICE(obj) \ OBJECT_CHECK(PCII440FXState, (obj), TYPE_I440FX_PCI_DEVICE) @@ -305,7 +303,8 @@ static void i440fx_realize(PCIDevice *dev, Error **errp) dev->config[I440FX_SMRAM] = 0x02; } -PCIBus *i440fx_init(PCII440FXState **pi440fx_state, +PCIBus *i440fx_init(const char *host_type, const char *pci_type, + PCII440FXState **pi440fx_state, int *piix3_devfn, ISABus **isa_bus, qemu_irq *pic, MemoryRegion *address_space_mem, @@ -325,7 +324,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, unsigned i; I440FXState *i440fx; - dev = qdev_create(NULL, TYPE_I440FX_PCI_HOST_BRIDGE); + dev = qdev_create(NULL, host_type); s = PCI_HOST_BRIDGE(dev); b = pci_bus_new(dev, NULL, pci_address_space, address_space_io, 0, TYPE_PCI_BUS); @@ -333,7 +332,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev), NULL); qdev_init_nofail(dev); - d = pci_create_simple(b, 0, TYPE_I440FX_PCI_DEVICE); + d = pci_create_simple(b, 0, pci_type); *pi440fx_state = I440FX_PCI_DEVICE(d); f = *pi440fx_state; f->system_memory = address_space_mem; @@ -740,6 +739,90 @@ static const TypeInfo i440fx_info = { .class_init = i440fx_class_init, }; +/* IGD Passthrough Host Bridge. */ +typedef struct { + uint8_t offset; + uint8_t len; +} IGDHostInfo; + +/* Here we just expose minimal host bridge offset subset. */ +static const IGDHostInfo igd_host_bridge_infos[] = { + {0x08, 2}, /* revision id */ + {0x2c, 2}, /* sybsystem vendor id */ + {0x2e, 2}, /* sybsystem id */ + {0x50, 2}, /* SNB: processor graphics control register */ + {0x52, 2}, /* processor graphics control register */ + {0xa4, 4}, /* SNB: graphics base of stolen memory */ + {0xa8, 4}, /* SNB: base of GTT stolen memory */ +}; + +static int host_pci_config_read(int pos, int len, uint32_t val) +{ + char path[PATH_MAX]; + int config_fd; + ssize_t size = sizeof(path); + /* Access real host bridge. */ + int rc = snprintf(path, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s", + 0, 0, 0, 0, "config"); + + if (rc >= size || rc < 0) { + return -ENODEV; + } + + config_fd = open(path, O_RDWR); + if (config_fd < 0) { + return -ENODEV; + } + + if (lseek(config_fd, pos, SEEK_SET) != pos) { + return -errno; + } + do { + rc = read(config_fd, (uint8_t *)&val, len); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + if (rc != len) { + return -errno; + } + + return 0; +} + +static int igd_pt_i440fx_initfn(struct PCIDevice *pci_dev) +{ + uint32_t val = 0; + int rc, i, num; + int pos, len; + + num = ARRAY_SIZE(igd_host_bridge_infos); + for (i = 0; i < num; i++) { + pos = igd_host_bridge_infos[i].offset; + len = igd_host_bridge_infos[i].len; + rc = host_pci_config_read(pos, len, val); + if (rc) { + return -ENODEV; + } + pci_default_write_config(pci_dev, pos, val, len); + } + + return 0; +} + +static void igd_passthrough_i440fx_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = igd_pt_i440fx_initfn; + dc->desc = "IGD Passthrough Host bridge"; +} + +static const TypeInfo igd_passthrough_i440fx_info = { + .name = TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE, + .parent = TYPE_I440FX_PCI_DEVICE, + .instance_size = sizeof(PCII440FXState), + .class_init = igd_passthrough_i440fx_class_init, +}; + static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { @@ -781,6 +864,7 @@ static const TypeInfo i440fx_pcihost_info = { static void i440fx_register_types(void) { type_register_static(&i440fx_info); + type_register_static(&igd_passthrough_i440fx_info); type_register_static(&piix3_pci_type_info); type_register_static(&piix3_info); type_register_static(&piix3_xen_info); diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs index a0ca0aa3df..a9ad7e70f7 100644 --- a/hw/xen/Makefile.objs +++ b/hw/xen/Makefile.objs @@ -3,3 +3,4 @@ common-obj-$(CONFIG_XEN_BACKEND) += xen_backend.o xen_devconfig.o obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_msi.o +obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_msi.o xen_pt_graphics.o diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c index 743b37b991..be28ca2ce7 100644 --- a/hw/xen/xen-host-pci-device.c +++ b/hw/xen/xen-host-pci-device.c @@ -376,6 +376,11 @@ int xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, goto error; } d->irq = v; + rc = xen_host_pci_get_hex_value(d, "class", &v); + if (rc) { + goto error; + } + d->class_code = v; d->is_virtfn = xen_host_pci_dev_is_virtfn(d); return 0; @@ -387,6 +392,11 @@ error: return rc; } +bool xen_host_pci_device_closed(XenHostPCIDevice *d) +{ + return d->config_fd == -1; +} + void xen_host_pci_device_put(XenHostPCIDevice *d) { if (d->config_fd >= 0) { diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h index c2486f0c19..3d44e044ff 100644 --- a/hw/xen/xen-host-pci-device.h +++ b/hw/xen/xen-host-pci-device.h @@ -25,6 +25,7 @@ typedef struct XenHostPCIDevice { uint16_t vendor_id; uint16_t device_id; + uint32_t class_code; int irq; XenHostPCIIORegion io_regions[PCI_NUM_REGIONS - 1]; @@ -38,6 +39,7 @@ typedef struct XenHostPCIDevice { int xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, uint8_t bus, uint8_t dev, uint8_t func); void xen_host_pci_device_put(XenHostPCIDevice *pci_dev); +bool xen_host_pci_device_closed(XenHostPCIDevice *d); int xen_host_pci_get_byte(XenHostPCIDevice *d, int pos, uint8_t *p); int xen_host_pci_get_word(XenHostPCIDevice *d, int pos, uint16_t *p); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index ed5fcaec0d..2b54f52707 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -56,6 +56,7 @@ #include "hw/pci/pci.h" #include "hw/xen/xen.h" +#include "hw/i386/pc.h" #include "hw/xen/xen_backend.h" #include "xen_pt.h" #include "qemu/range.h" @@ -378,7 +379,7 @@ static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr, } } - /* need to shift back before passing them to xen_host_pci_device */ + /* need to shift back before passing them to xen_host_pci_set_block. */ val >>= (addr & 3) << 3; memory_region_transaction_commit(); @@ -406,7 +407,7 @@ out: (uint8_t *)&val + index, len); if (rc < 0) { - XEN_PT_ERR(d, "pci_write_block failed. return value: %d.\n", rc); + XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc); } } } @@ -502,6 +503,7 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd) d->rom.size, d->rom.base_addr); } + xen_pt_register_vga_regions(d); return 0; } @@ -683,13 +685,86 @@ static const MemoryListener xen_pt_io_listener = { .priority = 10, }; +static void +xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, + XenHostPCIDevice *dev) +{ + uint16_t gpu_dev_id; + PCIDevice *d = &s->dev; + + gpu_dev_id = dev->device_id; + igd_passthrough_isa_bridge_create(d->bus, gpu_dev_id); +} + +/* destroy. */ +static void xen_pt_destroy(PCIDevice *d) { + + XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + XenHostPCIDevice *host_dev = &s->real_device; + uint8_t machine_irq = s->machine_irq; + uint8_t intx; + int rc; + + if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { + intx = xen_pt_pci_intx(s); + rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, + PT_IRQ_TYPE_PCI, + pci_bus_num(d->bus), + PCI_SLOT(s->dev.devfn), + intx, + 0 /* isa_irq */); + if (rc < 0) { + XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." + " (machine irq: %i, err: %d)" + " But bravely continuing on..\n", + 'a' + intx, machine_irq, errno); + } + } + + /* N.B. xen_pt_config_delete takes care of freeing them. */ + if (s->msi) { + xen_pt_msi_disable(s); + } + if (s->msix) { + xen_pt_msix_disable(s); + } + + if (machine_irq) { + xen_pt_mapped_machine_irq[machine_irq]--; + + if (xen_pt_mapped_machine_irq[machine_irq] == 0) { + rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); + + if (rc < 0) { + XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" + " But bravely continuing on..\n", + machine_irq, errno); + } + } + s->machine_irq = 0; + } + + /* delete all emulated config registers */ + xen_pt_config_delete(s); + + xen_pt_unregister_vga_regions(host_dev); + + if (s->listener_set) { + memory_listener_unregister(&s->memory_listener); + memory_listener_unregister(&s->io_listener); + s->listener_set = false; + } + if (!xen_host_pci_device_closed(&s->real_device)) { + xen_host_pci_device_put(&s->real_device); + } +} /* init */ static int xen_pt_initfn(PCIDevice *d) { XenPCIPassthroughState *s = XEN_PT_DEVICE(d); int rc = 0; - uint8_t machine_irq = 0; + uint8_t machine_irq = 0, scratch; uint16_t cmd = 0; int pirq = XEN_PT_UNASSIGNED_PIRQ; @@ -715,27 +790,48 @@ static int xen_pt_initfn(PCIDevice *d) } /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ - if (xen_host_pci_get_block(&s->real_device, 0, d->config, - PCI_CONFIG_SPACE_SIZE) == -1) { - xen_host_pci_device_put(&s->real_device); - return -1; - } + memset(d->config, 0, PCI_CONFIG_SPACE_SIZE); s->memory_listener = xen_pt_memory_listener; s->io_listener = xen_pt_io_listener; + /* Setup VGA bios for passthrough GFX */ + if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && + (s->real_device.dev == 2) && (s->real_device.func == 0)) { + if (!is_igd_vga_passthrough(&s->real_device)) { + XEN_PT_ERR(d, "Need to enable igd-passthru if you're trying" + " to passthrough IGD GFX.\n"); + xen_host_pci_device_put(&s->real_device); + return -1; + } + + if (xen_pt_setup_vga(s, &s->real_device) < 0) { + XEN_PT_ERR(d, "Setup VGA BIOS of passthrough GFX failed!\n"); + xen_host_pci_device_put(&s->real_device); + return -1; + } + + /* Register ISA bridge for passthrough GFX. */ + xen_igd_passthrough_isa_bridge_create(s, &s->real_device); + } + /* Handle real device's MMIO/PIO BARs */ xen_pt_register_regions(s, &cmd); /* reinitialize each config register to be emulated */ - if (xen_pt_config_init(s)) { + rc = xen_pt_config_init(s); + if (rc) { XEN_PT_ERR(d, "PCI Config space initialisation failed.\n"); - xen_host_pci_device_put(&s->real_device); - return -1; + goto err_out; } /* Bind interrupt */ - if (!s->dev.config[PCI_INTERRUPT_PIN]) { + rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch); + if (rc) { + XEN_PT_ERR(d, "Failed to read PCI_INTERRUPT_PIN! (rc:%d)\n", rc); + goto err_out; + } + if (!scratch) { XEN_PT_LOG(d, "no pin interrupt\n"); goto out; } @@ -785,69 +881,41 @@ static int xen_pt_initfn(PCIDevice *d) out: if (cmd) { - xen_host_pci_set_word(&s->real_device, PCI_COMMAND, - pci_get_word(d->config + PCI_COMMAND) | cmd); + uint16_t val; + + rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val); + if (rc) { + XEN_PT_ERR(d, "Failed to read PCI_COMMAND! (rc: %d)\n", rc); + goto err_out; + } else { + val |= cmd; + rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val); + if (rc) { + XEN_PT_ERR(d, "Failed to write PCI_COMMAND val=0x%x!(rc: %d)\n", + val, rc); + goto err_out; + } + } } memory_listener_register(&s->memory_listener, &s->dev.bus_master_as); memory_listener_register(&s->io_listener, &address_space_io); + s->listener_set = true; XEN_PT_LOG(d, "Real physical device %02x:%02x.%d registered successfully!\n", s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function); return 0; + +err_out: + xen_pt_destroy(d); + assert(rc); + return rc; } static void xen_pt_unregister_device(PCIDevice *d) { - XenPCIPassthroughState *s = XEN_PT_DEVICE(d); - uint8_t machine_irq = s->machine_irq; - uint8_t intx = xen_pt_pci_intx(s); - int rc; - - if (machine_irq) { - rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, - PT_IRQ_TYPE_PCI, - pci_bus_num(d->bus), - PCI_SLOT(s->dev.devfn), - intx, - 0 /* isa_irq */); - if (rc < 0) { - XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." - " (machine irq: %i, err: %d)" - " But bravely continuing on..\n", - 'a' + intx, machine_irq, errno); - } - } - - if (s->msi) { - xen_pt_msi_disable(s); - } - if (s->msix) { - xen_pt_msix_disable(s); - } - - if (machine_irq) { - xen_pt_mapped_machine_irq[machine_irq]--; - - if (xen_pt_mapped_machine_irq[machine_irq] == 0) { - rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); - - if (rc < 0) { - XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" - " But bravely continuing on..\n", - machine_irq, errno); - } - } - } - - /* delete all emulated config registers */ - xen_pt_config_delete(s); - - memory_listener_unregister(&s->memory_listener); - memory_listener_unregister(&s->io_listener); - - xen_host_pci_device_put(&s->real_device); + xen_pt_destroy(d); } static Property xen_pci_passthrough_properties[] = { diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 393f36ccbf..3bc22eb1d1 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -40,6 +40,9 @@ typedef struct XenPCIPassthroughState XenPCIPassthroughState; #define XEN_PT_DEVICE(obj) \ OBJECT_CHECK(XenPCIPassthroughState, (obj), TYPE_XEN_PT_DEVICE) +uint32_t igd_read_opregion(XenPCIPassthroughState *s); +void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val); + /* function type for config reg */ typedef int (*xen_pt_conf_reg_init) (XenPCIPassthroughState *, XenPTRegInfo *, uint32_t real_offset, @@ -66,8 +69,9 @@ typedef int (*xen_pt_conf_byte_read) #define XEN_PT_BAR_ALLF 0xFFFFFFFF #define XEN_PT_BAR_UNMAPPED (-1) -#define PCI_CAP_MAX 48 +#define XEN_PCI_CAP_MAX 48 +#define XEN_PCI_INTEL_OPREGION 0xfc typedef enum { XEN_PT_GRP_TYPE_HARDWIRED = 0, /* 0 Hardwired reg group */ @@ -134,7 +138,11 @@ struct XenPTRegInfo { struct XenPTReg { QLIST_ENTRY(XenPTReg) entries; XenPTRegInfo *reg; - uint32_t data; /* emulated value */ + union { + uint8_t *byte; + uint16_t *half_word; + uint32_t *word; + } ptr; /* pointer to dev.config. */ }; typedef const struct XenPTRegGroupInfo XenPTRegGroupInfo; @@ -217,6 +225,7 @@ struct XenPCIPassthroughState { MemoryListener memory_listener; MemoryListener io_listener; + bool listener_set; }; int xen_pt_config_init(XenPCIPassthroughState *s); @@ -282,6 +291,7 @@ static inline uint8_t xen_pt_pci_intx(XenPCIPassthroughState *s) " value=%i, acceptable range is 1 - 4\n", r_val); r_val = 0; } else { + /* Note that if s.real_device.config_fd is closed we make 0xff. */ r_val -= 1; } @@ -289,7 +299,6 @@ static inline uint8_t xen_pt_pci_intx(XenPCIPassthroughState *s) } /* MSI/MSI-X */ -int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool en); int xen_pt_msi_setup(XenPCIPassthroughState *s); int xen_pt_msi_update(XenPCIPassthroughState *d); void xen_pt_msi_disable(XenPCIPassthroughState *s); @@ -305,5 +314,18 @@ static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar) return s->msix && s->msix->bar_index == bar; } - +extern void *pci_assign_dev_load_option_rom(PCIDevice *dev, + struct Object *owner, int *size, + unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function); +extern bool has_igd_gfx_passthru; +static inline bool is_igd_vga_passthrough(XenHostPCIDevice *dev) +{ + return (has_igd_gfx_passthru + && ((dev->class_code >> 0x8) == PCI_CLASS_DISPLAY_VGA)); +} +int xen_pt_register_vga_regions(XenHostPCIDevice *dev); +int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev); +int xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev); #endif /* !XEN_PT_H */ diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index dd37be38a4..aca3790681 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -128,10 +128,11 @@ static int xen_pt_byte_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, { XenPTRegInfo *reg = cfg_entry->reg; uint8_t valid_emu_mask = 0; + uint8_t *data = cfg_entry->ptr.byte; /* emulate byte register */ valid_emu_mask = reg->emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); return 0; } @@ -140,10 +141,11 @@ static int xen_pt_word_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, { XenPTRegInfo *reg = cfg_entry->reg; uint16_t valid_emu_mask = 0; + uint16_t *data = cfg_entry->ptr.half_word; /* emulate word register */ valid_emu_mask = reg->emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); return 0; } @@ -152,10 +154,11 @@ static int xen_pt_long_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, { XenPTRegInfo *reg = cfg_entry->reg; uint32_t valid_emu_mask = 0; + uint32_t *data = cfg_entry->ptr.word; /* emulate long register */ valid_emu_mask = reg->emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); return 0; } @@ -169,10 +172,11 @@ static int xen_pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint8_t writable_mask = 0; uint8_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint8_t *data = cfg_entry->ptr.byte; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -186,10 +190,11 @@ static int xen_pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -203,10 +208,11 @@ static int xen_pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint32_t writable_mask = 0; uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint32_t *data = cfg_entry->ptr.word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -255,7 +261,7 @@ static int xen_pt_status_reg_init(XenPCIPassthroughState *s, reg_entry = xen_pt_find_reg(reg_grp_entry, PCI_CAPABILITY_LIST); if (reg_entry) { /* check Capabilities Pointer register */ - if (reg_entry->data) { + if (*reg_entry->ptr.half_word) { reg_field |= PCI_STATUS_CAP_LIST; } else { reg_field &= ~PCI_STATUS_CAP_LIST; @@ -301,10 +307,11 @@ static int xen_pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, XenPTRegInfo *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ if (*val & PCI_COMMAND_INTX_DISABLE) { @@ -447,7 +454,7 @@ static int xen_pt_bar_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, /* emulate BAR */ valid_emu_mask = bar_emu_mask & valid_mask; - *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + *value = XEN_PT_MERGE_VALUE(*value, *cfg_entry->ptr.word, ~valid_emu_mask); return 0; } @@ -464,6 +471,7 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, uint32_t bar_ro_mask = 0; uint32_t r_size = 0; int index = 0; + uint32_t *data = cfg_entry->ptr.word; index = xen_pt_bar_offset_to_index(reg->offset); if (index < 0 || index >= PCI_NUM_REGIONS) { @@ -500,7 +508,7 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, /* modify emulate register */ writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* check whether we need to update the virtual region address or not */ switch (s->bases[index].bar_flag) { @@ -533,6 +541,7 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask); pcibus_t r_size = 0; uint32_t bar_ro_mask = 0; + uint32_t *data = cfg_entry->ptr.word; r_size = d->io_regions[PCI_ROM_SLOT].size; base = &s->bases[PCI_ROM_SLOT]; @@ -544,7 +553,7 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = ~bar_ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -552,6 +561,22 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, return 0; } +static int xen_pt_intel_opregion_read(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + *value = igd_read_opregion(s); + return 0; +} + +static int xen_pt_intel_opregion_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + igd_write_opregion(s, *value); + return 0; +} + /* Header Type0 reg static information table */ static XenPTRegInfo xen_pt_emu_reg_header0[] = { /* Vendor ID reg */ @@ -800,15 +825,21 @@ static XenPTRegInfo xen_pt_emu_reg_vendor[] = { static inline uint8_t get_capability_version(XenPCIPassthroughState *s, uint32_t offset) { - uint8_t flags = pci_get_byte(s->dev.config + offset + PCI_EXP_FLAGS); - return flags & PCI_EXP_FLAGS_VERS; + uint8_t flag; + if (xen_host_pci_get_byte(&s->real_device, offset + PCI_EXP_FLAGS, &flag)) { + return 0; + } + return flag & PCI_EXP_FLAGS_VERS; } static inline uint8_t get_device_type(XenPCIPassthroughState *s, uint32_t offset) { - uint8_t flags = pci_get_byte(s->dev.config + offset + PCI_EXP_FLAGS); - return (flags & PCI_EXP_FLAGS_TYPE) >> 4; + uint8_t flag; + if (xen_host_pci_get_byte(&s->real_device, offset + PCI_EXP_FLAGS, &flag)) { + return 0; + } + return (flag & PCI_EXP_FLAGS_TYPE) >> 4; } /* initialize Link Control register */ @@ -857,8 +888,14 @@ static int xen_pt_linkctrl2_reg_init(XenPCIPassthroughState *s, reg_field = XEN_PT_INVALID_REG; } else { /* set Supported Link Speed */ - uint8_t lnkcap = pci_get_byte(s->dev.config + real_offset - reg->offset - + PCI_EXP_LNKCAP); + uint8_t lnkcap; + int rc; + rc = xen_host_pci_get_byte(&s->real_device, + real_offset - reg->offset + PCI_EXP_LNKCAP, + &lnkcap); + if (rc) { + return rc; + } reg_field |= PCI_EXP_LNKCAP_SLS & lnkcap; } @@ -971,10 +1008,11 @@ static int xen_pt_pmcsr_reg_write(XenPCIPassthroughState *s, XenPTRegInfo *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~PCI_PM_CTRL_PME_STATUS, @@ -1039,13 +1077,15 @@ static int xen_pt_msgctrl_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, uint32_t real_offset, uint32_t *data) { - PCIDevice *d = &s->dev; XenPTMSI *msi = s->msi; - uint16_t reg_field = 0; + uint16_t reg_field; + int rc; /* use I/O device register's value as initial value */ - reg_field = pci_get_word(d->config + real_offset); - + rc = xen_host_pci_get_word(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } if (reg_field & PCI_MSI_FLAGS_ENABLE) { XEN_PT_LOG(&s->dev, "MSI already enabled, disabling it first\n"); xen_host_pci_set_word(&s->real_device, real_offset, @@ -1067,6 +1107,7 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, XenPTMSI *msi = s->msi; uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; /* Currently no support for multi-vector */ if (*val & PCI_MSI_FLAGS_QSIZE) { @@ -1075,8 +1116,8 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); - msi->flags |= cfg_entry->data & ~PCI_MSI_FLAGS_ENABLE; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + msi->flags |= *data & ~PCI_MSI_FLAGS_ENABLE; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -1086,7 +1127,7 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, /* setup MSI pirq for the first time */ if (!msi->initialized) { /* Init physical one */ - XEN_PT_LOG(&s->dev, "setup MSI\n"); + XEN_PT_LOG(&s->dev, "setup MSI (register: %x).\n", *val); if (xen_pt_msi_setup(s)) { /* We do not broadcast the error to the framework code, so * that MSI errors are contained in MSI emulation code and @@ -1094,12 +1135,12 @@ static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, * Guest MSI would be actually not working. */ *val &= ~PCI_MSI_FLAGS_ENABLE; - XEN_PT_WARN(&s->dev, "Can not map MSI.\n"); + XEN_PT_WARN(&s->dev, "Can not map MSI (register: %x)!\n", *val); return 0; } if (xen_pt_msi_update(s)) { *val &= ~PCI_MSI_FLAGS_ENABLE; - XEN_PT_WARN(&s->dev, "Can not bind MSI\n"); + XEN_PT_WARN(&s->dev, "Can not bind MSI (register: %x)!\n", *val); return 0; } msi->initialized = true; @@ -1190,18 +1231,19 @@ static int xen_pt_msgaddr32_reg_write(XenPCIPassthroughState *s, { XenPTRegInfo *reg = cfg_entry->reg; uint32_t writable_mask = 0; - uint32_t old_addr = cfg_entry->data; + uint32_t old_addr = *cfg_entry->ptr.word; + uint32_t *data = cfg_entry->ptr.word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); - s->msi->addr_lo = cfg_entry->data; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + s->msi->addr_lo = *data; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); /* update MSI */ - if (cfg_entry->data != old_addr) { + if (*data != old_addr) { if (s->msi->mapped) { xen_pt_msi_update(s); } @@ -1216,7 +1258,8 @@ static int xen_pt_msgaddr64_reg_write(XenPCIPassthroughState *s, { XenPTRegInfo *reg = cfg_entry->reg; uint32_t writable_mask = 0; - uint32_t old_addr = cfg_entry->data; + uint32_t old_addr = *cfg_entry->ptr.word; + uint32_t *data = cfg_entry->ptr.word; /* check whether the type is 64 bit or not */ if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) { @@ -1227,15 +1270,15 @@ static int xen_pt_msgaddr64_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* update the msi_info too */ - s->msi->addr_hi = cfg_entry->data; + s->msi->addr_hi = *data; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); /* update MSI */ - if (cfg_entry->data != old_addr) { + if (*data != old_addr) { if (s->msi->mapped) { xen_pt_msi_update(s); } @@ -1254,8 +1297,9 @@ static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s, XenPTRegInfo *reg = cfg_entry->reg; XenPTMSI *msi = s->msi; uint16_t writable_mask = 0; - uint16_t old_data = cfg_entry->data; + uint16_t old_data = *cfg_entry->ptr.half_word; uint32_t offset = reg->offset; + uint16_t *data = cfg_entry->ptr.half_word; /* check the offset whether matches the type or not */ if (!xen_pt_msi_check_type(offset, msi->flags, DATA)) { @@ -1266,15 +1310,15 @@ static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s, /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* update the msi_info too */ - msi->data = cfg_entry->data; + msi->data = *data; /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); /* update MSI */ - if (cfg_entry->data != old_data) { + if (*data != old_data) { if (msi->mapped) { xen_pt_msi_update(s); } @@ -1411,14 +1455,16 @@ static int xen_pt_msixctrl_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, uint32_t real_offset, uint32_t *data) { - PCIDevice *d = &s->dev; - uint16_t reg_field = 0; + uint16_t reg_field; + int rc; /* use I/O device register's value as initial value */ - reg_field = pci_get_word(d->config + real_offset); - + rc = xen_host_pci_get_word(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } if (reg_field & PCI_MSIX_FLAGS_ENABLE) { - XEN_PT_LOG(d, "MSIX already enabled, disabling it first\n"); + XEN_PT_LOG(&s->dev, "MSIX already enabled, disabling it first\n"); xen_host_pci_set_word(&s->real_device, real_offset, reg_field & ~PCI_MSIX_FLAGS_ENABLE); } @@ -1436,10 +1482,11 @@ static int xen_pt_msixctrl_reg_write(XenPCIPassthroughState *s, uint16_t writable_mask = 0; uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); int debug_msix_enabled_old; + uint16_t *data = cfg_entry->ptr.half_word; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); @@ -1492,6 +1539,19 @@ static XenPTRegInfo xen_pt_emu_reg_msix[] = { }, }; +static XenPTRegInfo xen_pt_emu_reg_igd_opregion[] = { + /* Intel IGFX OpRegion reg */ + { + .offset = 0x0, + .size = 4, + .init_val = 0, + .u.dw.read = xen_pt_intel_opregion_read, + .u.dw.write = xen_pt_intel_opregion_write, + }, + { + .size = 0, + }, +}; /**************************** * Capabilities @@ -1511,8 +1571,7 @@ static int xen_pt_vendor_size_init(XenPCIPassthroughState *s, const XenPTRegGroupInfo *grp_reg, uint32_t base_offset, uint8_t *size) { - *size = pci_get_byte(s->dev.config + base_offset + 0x02); - return 0; + return xen_host_pci_get_byte(&s->real_device, base_offset + 0x02, size); } /* get PCI Express Capability Structure register group size */ static int xen_pt_pcie_size_init(XenPCIPassthroughState *s, @@ -1591,12 +1650,15 @@ static int xen_pt_msi_size_init(XenPCIPassthroughState *s, const XenPTRegGroupInfo *grp_reg, uint32_t base_offset, uint8_t *size) { - PCIDevice *d = &s->dev; uint16_t msg_ctrl = 0; uint8_t msi_size = 0xa; + int rc; - msg_ctrl = pci_get_word(d->config + (base_offset + PCI_MSI_FLAGS)); - + rc = xen_host_pci_get_word(&s->real_device, base_offset + PCI_MSI_FLAGS, + &msg_ctrl); + if (rc) { + return rc; + } /* check if 64-bit address is capable of per-vector masking */ if (msg_ctrl & PCI_MSI_FLAGS_64BIT) { msi_size += 4; @@ -1729,6 +1791,14 @@ static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = { .size_init = xen_pt_msix_size_init, .emu_regs = xen_pt_emu_reg_msix, }, + /* Intel IGD Opregion group */ + { + .grp_id = XEN_PCI_INTEL_OPREGION, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0x4, + .size_init = xen_pt_reg_grp_size_init, + .emu_regs = xen_pt_emu_reg_igd_opregion, + }, { .grp_size = 0, }, @@ -1739,11 +1809,14 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, uint32_t real_offset, uint32_t *data) { - int i; - uint8_t *config = s->dev.config; - uint32_t reg_field = pci_get_byte(config + real_offset); + int i, rc; + uint8_t reg_field; uint8_t cap_id = 0; + rc = xen_host_pci_get_byte(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } /* find capability offset */ while (reg_field) { for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) { @@ -1752,7 +1825,13 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, continue; } - cap_id = pci_get_byte(config + reg_field + PCI_CAP_LIST_ID); + rc = xen_host_pci_get_byte(&s->real_device, + reg_field + PCI_CAP_LIST_ID, &cap_id); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to read capability @0x%x (rc:%d)\n", + reg_field + PCI_CAP_LIST_ID, rc); + return rc; + } if (xen_pt_emu_reg_grps[i].grp_id == cap_id) { if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU) { goto out; @@ -1763,7 +1842,11 @@ static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, } /* next capability */ - reg_field = pci_get_byte(config + reg_field + PCI_CAP_LIST_NEXT); + rc = xen_host_pci_get_byte(&s->real_device, + reg_field + PCI_CAP_LIST_NEXT, ®_field); + if (rc) { + return rc; + } } out: @@ -1779,7 +1862,7 @@ out: static uint8_t find_cap_offset(XenPCIPassthroughState *s, uint8_t cap) { uint8_t id; - unsigned max_cap = PCI_CAP_MAX; + unsigned max_cap = XEN_PCI_CAP_MAX; uint8_t pos = PCI_CAPABILITY_LIST; uint8_t status = 0; @@ -1827,6 +1910,10 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, reg_entry->reg = reg; if (reg->init) { + uint32_t host_mask, size_mask; + unsigned int offset; + uint32_t val; + /* initialize emulate register */ rc = reg->init(s, reg_entry->reg, reg_grp->base_offset + reg->offset, &data); @@ -1839,8 +1926,67 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, g_free(reg_entry); return 0; } - /* set register value */ - reg_entry->data = data; + /* Sync up the data to dev.config */ + offset = reg_grp->base_offset + reg->offset; + size_mask = 0xFFFFFFFF >> ((4 - reg->size) << 3); + + switch (reg->size) { + case 1: rc = xen_host_pci_get_byte(&s->real_device, offset, (uint8_t *)&val); + break; + case 2: rc = xen_host_pci_get_word(&s->real_device, offset, (uint16_t *)&val); + break; + case 4: rc = xen_host_pci_get_long(&s->real_device, offset, &val); + break; + default: assert(1); + } + if (rc) { + /* Serious issues when we cannot read the host values! */ + g_free(reg_entry); + return rc; + } + /* Set bits in emu_mask are the ones we emulate. The dev.config shall + * contain the emulated view of the guest - therefore we flip the mask + * to mask out the host values (which dev.config initially has) . */ + host_mask = size_mask & ~reg->emu_mask; + + if ((data & host_mask) != (val & host_mask)) { + uint32_t new_val; + + /* Mask out host (including past size). */ + new_val = val & host_mask; + /* Merge emulated ones (excluding the non-emulated ones). */ + new_val |= data & host_mask; + /* Leave intact host and emulated values past the size - even though + * we do not care as we write per reg->size granularity, but for the + * logging below lets have the proper value. */ + new_val |= ((val | data)) & ~size_mask; + XEN_PT_LOG(&s->dev,"Offset 0x%04x mismatch! Emulated=0x%04x, host=0x%04x, syncing to 0x%04x.\n", + offset, data, val, new_val); + val = new_val; + } else + val = data; + + if (val & ~size_mask) { + XEN_PT_ERR(&s->dev,"Offset 0x%04x:0x%04x expands past register size(%d)!\n", + offset, val, reg->size); + g_free(reg_entry); + return -ENXIO; + } + /* This could be just pci_set_long as we don't modify the bits + * past reg->size, but in case this routine is run in parallel or the + * init value is larger, we do not want to over-write registers. */ + switch (reg->size) { + case 1: pci_set_byte(s->dev.config + offset, (uint8_t)val); + break; + case 2: pci_set_word(s->dev.config + offset, (uint16_t)val); + break; + case 4: pci_set_long(s->dev.config + offset, val); + break; + default: assert(1); + } + /* set register value pointer to the data. */ + reg_entry->ptr.byte = s->dev.config + offset; + } /* list add register entry */ QLIST_INSERT_HEAD(®_grp->reg_tbl_list, reg_entry, entries); @@ -1858,7 +2004,8 @@ int xen_pt_config_init(XenPCIPassthroughState *s) uint32_t reg_grp_offset = 0; XenPTRegGroup *reg_grp_entry = NULL; - if (xen_pt_emu_reg_grps[i].grp_id != 0xFF) { + if (xen_pt_emu_reg_grps[i].grp_id != 0xFF + && xen_pt_emu_reg_grps[i].grp_id != XEN_PCI_INTEL_OPREGION) { if (xen_pt_hide_dev_cap(&s->real_device, xen_pt_emu_reg_grps[i].grp_id)) { continue; @@ -1871,6 +2018,15 @@ int xen_pt_config_init(XenPCIPassthroughState *s) } } + /* + * By default we will trap up to 0x40 in the cfg space. + * If an intel device is pass through we need to trap 0xfc, + * therefore the size should be 0xff. + */ + if (xen_pt_emu_reg_grps[i].grp_id == XEN_PCI_INTEL_OPREGION) { + reg_grp_offset = XEN_PCI_INTEL_OPREGION; + } + reg_grp_entry = g_new0(XenPTRegGroup, 1); QLIST_INIT(®_grp_entry->reg_tbl_list); QLIST_INSERT_HEAD(&s->reg_grps, reg_grp_entry, entries); @@ -1883,6 +2039,9 @@ int xen_pt_config_init(XenPCIPassthroughState *s) reg_grp_offset, ®_grp_entry->size); if (rc < 0) { + XEN_PT_LOG(&s->dev, "Failed to initialize %d/%ld, type=0x%x, rc:%d\n", + i, ARRAY_SIZE(xen_pt_emu_reg_grps), + xen_pt_emu_reg_grps[i].grp_type, rc); xen_pt_config_delete(s); return rc; } @@ -1897,6 +2056,10 @@ int xen_pt_config_init(XenPCIPassthroughState *s) /* initialize capability register */ rc = xen_pt_config_reg_init(s, reg_grp_entry, regs); if (rc < 0) { + XEN_PT_LOG(&s->dev, "Failed to initialize %d/%ld reg 0x%x in grp_type=0x%x (%d/%ld), rc=%d\n", + j, ARRAY_SIZE(xen_pt_emu_reg_grps[i].emu_regs), + regs->offset, xen_pt_emu_reg_grps[i].grp_type, + i, ARRAY_SIZE(xen_pt_emu_reg_grps), rc); xen_pt_config_delete(s); return rc; } diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c new file mode 100644 index 0000000000..df6069bf63 --- /dev/null +++ b/hw/xen/xen_pt_graphics.c @@ -0,0 +1,272 @@ +/* + * graphics passthrough + */ +#include "xen_pt.h" +#include "xen-host-pci-device.h" +#include "hw/xen/xen_backend.h" + +static unsigned long igd_guest_opregion; +static unsigned long igd_host_opregion; + +#define XEN_PCI_INTEL_OPREGION_MASK 0xfff + +typedef struct VGARegion { + int type; /* Memory or port I/O */ + uint64_t guest_base_addr; + uint64_t machine_base_addr; + uint64_t size; /* size of the region */ + int rc; +} VGARegion; + +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + +static struct VGARegion vga_args[] = { + { + .type = IORESOURCE_IO, + .guest_base_addr = 0x3B0, + .machine_base_addr = 0x3B0, + .size = 0xC, + .rc = -1, + }, + { + .type = IORESOURCE_IO, + .guest_base_addr = 0x3C0, + .machine_base_addr = 0x3C0, + .size = 0x20, + .rc = -1, + }, + { + .type = IORESOURCE_MEM, + .guest_base_addr = 0xa0000 >> XC_PAGE_SHIFT, + .machine_base_addr = 0xa0000 >> XC_PAGE_SHIFT, + .size = 0x20, + .rc = -1, + }, +}; + +/* + * register VGA resources for the domain with assigned gfx + */ +int xen_pt_register_vga_regions(XenHostPCIDevice *dev) +{ + int i = 0; + + if (!is_igd_vga_passthrough(dev)) { + return 0; + } + + for (i = 0 ; i < ARRAY_SIZE(vga_args); i++) { + if (vga_args[i].type == IORESOURCE_IO) { + vga_args[i].rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_ADD_MAPPING); + } else { + vga_args[i].rc = xc_domain_memory_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_ADD_MAPPING); + } + + if (vga_args[i].rc) { + XEN_PT_ERR(NULL, "VGA %s mapping failed! (rc: %i)\n", + vga_args[i].type == IORESOURCE_IO ? "ioport" : "memory", + vga_args[i].rc); + return vga_args[i].rc; + } + } + + return 0; +} + +/* + * unregister VGA resources for the domain with assigned gfx + */ +int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) +{ + int i = 0; + int ret = 0; + + if (!is_igd_vga_passthrough(dev)) { + return 0; + } + + for (i = 0 ; i < ARRAY_SIZE(vga_args); i++) { + if (vga_args[i].type == IORESOURCE_IO) { + vga_args[i].rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_REMOVE_MAPPING); + } else { + vga_args[i].rc = xc_domain_memory_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_REMOVE_MAPPING); + } + + if (vga_args[i].rc) { + XEN_PT_ERR(NULL, "VGA %s unmapping failed! (rc: %i)\n", + vga_args[i].type == IORESOURCE_IO ? "ioport" : "memory", + vga_args[i].rc); + return vga_args[i].rc; + } + } + + if (igd_guest_opregion) { + ret = xc_domain_memory_mapping(xen_xc, xen_domid, + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + 3, + DPCI_REMOVE_MAPPING); + if (ret) { + return ret; + } + } + + return 0; +} + +static void *get_vgabios(XenPCIPassthroughState *s, int *size, + XenHostPCIDevice *dev) +{ + return pci_assign_dev_load_option_rom(&s->dev, OBJECT(&s->dev), size, + dev->domain, dev->bus, + dev->dev, dev->func); +} + +/* Refer to Seabios. */ +struct rom_header { + uint16_t signature; + uint8_t size; + uint8_t initVector[4]; + uint8_t reserved[17]; + uint16_t pcioffset; + uint16_t pnpoffset; +} __attribute__((packed)); + +struct pci_data { + uint32_t signature; + uint16_t vendor; + uint16_t device; + uint16_t vitaldata; + uint16_t dlen; + uint8_t drevision; + uint8_t class_lo; + uint16_t class_hi; + uint16_t ilen; + uint16_t irevision; + uint8_t type; + uint8_t indicator; + uint16_t reserved; +} __attribute__((packed)); + +int xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev) +{ + unsigned char *bios = NULL; + struct rom_header *rom; + int bios_size; + char *c = NULL; + char checksum = 0; + uint32_t len = 0; + struct pci_data *pd = NULL; + + if (!is_igd_vga_passthrough(dev)) { + return -1; + } + + bios = get_vgabios(s, &bios_size, dev); + if (!bios) { + XEN_PT_ERR(&s->dev, "VGA: Can't getting VBIOS!\n"); + return -1; + } + + /* Currently we fixed this address as a primary. */ + rom = (struct rom_header *)bios; + pd = (void *)(bios + (unsigned char)rom->pcioffset); + + /* We may need to fixup Device Identification. */ + if (pd->device != s->real_device.device_id) { + pd->device = s->real_device.device_id; + + len = rom->size * 512; + /* Then adjust the bios checksum */ + for (c = (char *)bios; c < ((char *)bios + len); c++) { + checksum += *c; + } + if (checksum) { + bios[len - 1] -= checksum; + XEN_PT_LOG(&s->dev, "vga bios checksum is adjusted %x!\n", + checksum); + } + } + + /* Currently we fixed this address as a primary for legacy BIOS. */ + cpu_physical_memory_rw(0xc0000, bios, bios_size, 1); + return 0; +} + +uint32_t igd_read_opregion(XenPCIPassthroughState *s) +{ + uint32_t val = 0; + + if (!igd_guest_opregion) { + return val; + } + + val = igd_guest_opregion; + + XEN_PT_LOG(&s->dev, "Read opregion val=%x\n", val); + return val; +} + +#define XEN_PCI_INTEL_OPREGION_PAGES 0x3 +#define XEN_PCI_INTEL_OPREGION_ENABLE_ACCESSED 0x1 +void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val) +{ + int ret; + + if (igd_guest_opregion) { + XEN_PT_LOG(&s->dev, "opregion register already been set, ignoring %x\n", + val); + return; + } + + /* We just work with LE. */ + xen_host_pci_get_block(&s->real_device, XEN_PCI_INTEL_OPREGION, + (uint8_t *)&igd_host_opregion, 4); + igd_guest_opregion = (unsigned long)(val & ~XEN_PCI_INTEL_OPREGION_MASK) + | (igd_host_opregion & XEN_PCI_INTEL_OPREGION_MASK); + + ret = xc_domain_iomem_permission(xen_xc, xen_domid, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + XEN_PCI_INTEL_OPREGION_PAGES, + XEN_PCI_INTEL_OPREGION_ENABLE_ACCESSED); + + if (ret) { + XEN_PT_ERR(&s->dev, "[%d]:Can't enable to access IGD host opregion:" + " 0x%lx.\n", ret, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT)), + igd_guest_opregion = 0; + return; + } + + ret = xc_domain_memory_mapping(xen_xc, xen_domid, + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + XEN_PCI_INTEL_OPREGION_PAGES, + DPCI_ADD_MAPPING); + + if (ret) { + XEN_PT_ERR(&s->dev, "[%d]:Can't map IGD host opregion:0x%lx to" + " guest opregion:0x%lx.\n", ret, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); + igd_guest_opregion = 0; + return; + } + + XEN_PT_LOG(&s->dev, "Map OpRegion: 0x%lx -> 0x%lx\n", + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); +} diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index 263e0514a2..e3d71945cd 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -75,19 +75,29 @@ static int msi_msix_enable(XenPCIPassthroughState *s, bool enable) { uint16_t val = 0; + int rc; if (!address) { return -1; } - xen_host_pci_get_word(&s->real_device, address, &val); + rc = xen_host_pci_get_word(&s->real_device, address, &val); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to read MSI/MSI-X register (0x%x), rc:%d\n", + address, rc); + return rc; + } if (enable) { val |= flag; } else { val &= ~flag; } - xen_host_pci_set_word(&s->real_device, address, val); - return 0; + rc = xen_host_pci_set_word(&s->real_device, address, val); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to write MSI/MSI-X register (0x%x), rc:%d\n", + address, rc); + } + return rc; } static int msi_msix_setup(XenPCIPassthroughState *s, @@ -220,7 +230,7 @@ static int msi_msix_disable(XenPCIPassthroughState *s, * MSI virtualization functions */ -int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool enable) +static int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool enable) { XEN_PT_LOG(&s->dev, "%s MSI.\n", enable ? "enabling" : "disabling"); @@ -276,7 +286,7 @@ void xen_pt_msi_disable(XenPCIPassthroughState *s) return; } - xen_pt_msi_set_enable(s, false); + (void)xen_pt_msi_set_enable(s, false); msi_msix_disable(s, msi_addr64(msi), msi->data, msi->pirq, false, msi->initialized); diff --git a/include/hw/boards.h b/include/hw/boards.h index 3f84afdf1c..566a5cad13 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -137,6 +137,7 @@ struct MachineState { bool mem_merge; bool usb; bool usb_disabled; + bool igd_gfx_passthru; char *firmware; bool iommu; bool suppress_vmdesc; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ff0b48bf58..3e002c9da6 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -220,7 +220,13 @@ extern int no_hpet; struct PCII440FXState; typedef struct PCII440FXState PCII440FXState; -PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, +#define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" +#define TYPE_I440FX_PCI_DEVICE "i440FX" + +#define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX" + +PCIBus *i440fx_init(const char *host_type, const char *pci_type, + PCII440FXState **pi440fx_state, int *piix_devfn, ISABus **isa_bus, qemu_irq *pic, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, @@ -721,4 +727,5 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); (m)->compat_props = props; \ } while (0) +extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); #endif diff --git a/include/hw/pci/pci-assign.h b/include/hw/pci/pci-assign.h new file mode 100644 index 0000000000..55f42c56fa --- /dev/null +++ b/include/hw/pci/pci-assign.h @@ -0,0 +1,27 @@ +/* + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Just split from hw/i386/kvm/pci-assign.c. + */ +#ifndef PCI_ASSIGN_H +#define PCI_ASSIGN_H + +#include "hw/pci/pci.h" + +//#define DEVICE_ASSIGNMENT_DEBUG + +#ifdef DEVICE_ASSIGNMENT_DEBUG +#define DEBUG(fmt, ...) \ + do { \ + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ + } while (0) +#else +#define DEBUG(fmt, ...) +#endif + +void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, + int *size, unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function); +#endif /* PCI_ASSIGN_H */ diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index ed5fd3e1a2..d7fa6a4d01 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -186,6 +186,15 @@ static inline int xen_get_vmport_regs_pfn(XenXC xc, domid_t dom, } #endif +/* Xen before 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 460 + +#ifndef HVM_IOREQSRV_BUFIOREQ_ATOMIC +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 +#endif + +#endif + /* Xen before 4.5 */ #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 450 @@ -370,7 +379,8 @@ static inline void xen_unmap_pcidev(XenXC xc, domid_t dom, static inline int xen_create_ioreq_server(XenXC xc, domid_t dom, ioservid_t *ioservid) { - int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid); + int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC, + ioservid); if (rc == 0) { trace_xen_ioreq_server_create(*ioservid); @@ -407,4 +417,26 @@ static inline int xen_set_ioreq_server_state(XenXC xc, domid_t dom, #endif +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 460 +static inline int xen_xc_domain_add_to_physmap(XenXC xch, uint32_t domid, + unsigned int space, + unsigned long idx, + xen_pfn_t gpfn) +{ + return xc_domain_add_to_physmap(xch, domid, space, idx, gpfn); +} +#else +static inline int xen_xc_domain_add_to_physmap(XenXC xch, uint32_t domid, + unsigned int space, + unsigned long idx, + xen_pfn_t gpfn) +{ + /* In Xen 4.6 rc is -1 and errno contains the error value. */ + int rc = xc_domain_add_to_physmap(xch, domid, space, idx, gpfn); + if (rc == -1) + return errno; + return rc; +} +#endif + #endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/qemu-options.hx b/qemu-options.hx index efce775f66..b2f9dce0d5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " dump-guest-core=on|off include guest memory in a core dump (default=on)\n" " mem-merge=on|off controls memory merge support (default: on)\n" " iommu=on|off controls emulated Intel IOMMU (VT-d) support (default=off)\n" + " igd-passthru=on|off controls IGD GFX passthrough support (default=off)\n" " aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n" " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" " suppress-vmdesc=on|off disables self-describing migration (default=off)\n", @@ -55,6 +56,8 @@ than one accelerator specified, the next one is used if the previous one fails to initialize. @item kernel_irqchip=on|off Enables in-kernel irqchip support for the chosen accelerator when available. +@item gfx_passthru=on|off +Enables IGD GFX passthrough support for the chosen machine when available. @item vmport=on|off|auto Enables emulation of VMWare IO port, for vmmouse etc. auto says to select the value based on accel. For accel=xen the default is off otherwise the default diff --git a/trace-events b/trace-events index 1684a9530f..1927c764a9 100644 --- a/trace-events +++ b/trace-events @@ -936,6 +936,13 @@ xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: % xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" +handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" +cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=%#"PRIx64" port=%#"PRIx64" size=%d" +cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=%#"PRIx64" port=%#"PRIx64" size=%d" +cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d" # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want %#"PRIx64 diff --git a/vl.c b/vl.c index 584ca88dda..76e0bb2a89 100644 --- a/vl.c +++ b/vl.c @@ -1338,6 +1338,13 @@ static inline void semihosting_arg_fallback(const char *file, const char *cmd) } } +/* Now we still need this for compatibility with XEN. */ +bool has_igd_gfx_passthru; +static void igd_gfx_passthru(void) +{ + has_igd_gfx_passthru = current_machine->igd_gfx_passthru; +} + /***********************************************************/ /* USB devices */ @@ -4528,6 +4535,9 @@ int main(int argc, char **argv, char **envp) exit(1); } + /* Check if IGD GFX passthrough. */ + igd_gfx_passthru(); + /* init generic devices */ if (qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, NULL)) { diff --git a/xen-hvm.c b/xen-hvm.c index 55bce3ae12..5ea621e7b7 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -344,10 +344,10 @@ go_physmap: unsigned long idx = pfn + i; xen_pfn_t gpfn = start_gpfn + i; - rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); + rc = xen_xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); if (rc) { DPRINTF("add_to_physmap MFN %"PRI_xen_pfn" to PFN %" - PRI_xen_pfn" failed: %d\n", idx, gpfn, rc); + PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno); return -rc; } } @@ -421,10 +421,10 @@ static int xen_remove_from_physmap(XenIOState *state, xen_pfn_t idx = start_addr + i; xen_pfn_t gpfn = phys_offset + i; - rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); + rc = xen_xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); if (rc) { fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %" - PRI_xen_pfn" failed: %d\n", idx, gpfn, rc); + PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno); return -rc; } } @@ -813,9 +813,14 @@ static void cpu_ioreq_pio(ioreq_t *req) { uint32_t i; + trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + if (req->dir == IOREQ_READ) { if (!req->data_is_ptr) { req->data = do_inp(req->addr, req->size); + trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr, + req->size); } else { uint32_t tmp; @@ -826,6 +831,8 @@ static void cpu_ioreq_pio(ioreq_t *req) } } else if (req->dir == IOREQ_WRITE) { if (!req->data_is_ptr) { + trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr, + req->size); do_outp(req->addr, req->size, req->data); } else { for (i = 0; i < req->count; i++) { @@ -842,6 +849,9 @@ static void cpu_ioreq_move(ioreq_t *req) { uint32_t i; + trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + if (!req->data_is_ptr) { if (req->dir == IOREQ_READ) { for (i = 0; i < req->count; i++) { @@ -914,11 +924,18 @@ static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req) static void handle_ioreq(XenIOState *state, ioreq_t *req) { + trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && (req->size < sizeof (target_ulong))) { req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; } + if (req->dir == IOREQ_WRITE) + trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + switch (req->type) { case IOREQ_TYPE_PIO: cpu_ioreq_pio(req); @@ -958,23 +975,38 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) default: hw_error("Invalid ioreq type 0x%x\n", req->type); } + if (req->dir == IOREQ_READ) { + trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + } } static int handle_buffered_iopage(XenIOState *state) { + buffered_iopage_t *buf_page = state->buffered_io_page; buf_ioreq_t *buf_req = NULL; ioreq_t req; int qw; - if (!state->buffered_io_page) { + if (!buf_page) { return 0; } memset(&req, 0x00, sizeof(req)); - while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) { - buf_req = &state->buffered_io_page->buf_ioreq[ - state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM]; + for (;;) { + uint32_t rdptr = buf_page->read_pointer, wrptr; + + xen_rmb(); + wrptr = buf_page->write_pointer; + xen_rmb(); + if (rdptr != buf_page->read_pointer) { + continue; + } + if (rdptr == wrptr) { + break; + } + buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM]; req.size = 1UL << buf_req->size; req.count = 1; req.addr = buf_req->addr; @@ -986,15 +1018,14 @@ static int handle_buffered_iopage(XenIOState *state) req.data_is_ptr = 0; qw = (req.size == 8); if (qw) { - buf_req = &state->buffered_io_page->buf_ioreq[ - (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM]; + buf_req = &buf_page->buf_ioreq[(rdptr + 1) % + IOREQ_BUFFER_SLOT_NUM]; req.data |= ((uint64_t)buf_req->data) << 32; } handle_ioreq(state, &req); - xen_mb(); - state->buffered_io_page->read_pointer += qw ? 2 : 1; + atomic_add(&buf_page->read_pointer, qw + 1); } return req.count;