s390x update:

- update Linux headers to 5.8-rc1 (for vfio-ccw path handling)
 - vfio-ccw: add support for path handling
 - documentation fix
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEEw9DWbcNiT/aowBjO3s9rk8bwL68FAl7rieQSHGNvaHVja0By
 ZWRoYXQuY29tAAoJEN7Pa5PG8C+vOn0P/iOCCMhOTPNEfDF7JandmBV2sp+ZKbVq
 zrMJzybWSwu1YD8tILUiB/G8K9iVK/dpnccbiH/OfHHnW6x88Q8Ggrs4yxBDggOg
 4v/CfoaPR9/0CxhE68OOVz+Wl+6nly1tJu7l8f/8zTkKZhb0WLrn2NypvTIH3n0Q
 cBMmNCoas15YYkKMCWb68McXWThB3BNAeo0gUZsNH+DayQbHna34zI274xQIXhhM
 pZynKyxOjYm1BTYqyIEGwXP+IGdJwC1SgknExE93NF/2QW/ZPkrruZuh7BKJQBm1
 v2Zix0uR7tuXzuf1DNNLIPm+/sXcVUOq+h/GOtT+HpdrpNixW8qDOuOl9UAAhTSU
 Gb0EOHbh2X9ypopYswi4nVSuMVQwqXXyWTn/i2XfCQhoIQL/BQ750uacQPaO2W7u
 zaqEqUdezG6AyYACW2juhqs2jGGOL4/4Vlu7drQFNTm5lAOzfqtE5B7AJ6t71P8k
 xcKcgEzWL5qTB4kFyFDahKCH2BLluSOa+mshHaZmYZUvSnpFBKWsdEkuPTwXhnl6
 FtHjFAfv2a6EsAKsa3rZBR43Kv3pHsSqhdyJczA7AlfL5abUxvU0H86JWVXQEl90
 zVbSOqwd3uu2zGUqfVdvCT5+FT3SujpmKujZHXkJuZRcm5AKOXz97aihbJzjoIDf
 xa3T2/8xWLvJ
 =qr0y
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200618' into staging

s390x update:
- update Linux headers to 5.8-rc1 (for vfio-ccw path handling)
- vfio-ccw: add support for path handling
- documentation fix

# gpg: Signature made Thu 18 Jun 2020 16:36:04 BST
# gpg:                using RSA key C3D0D66DC3624FF6A8C018CEDECF6B93C6F02FAF
# gpg:                issuer "cohuck@redhat.com"
# gpg: Good signature from "Cornelia Huck <conny@cornelia-huck.de>" [marginal]
# gpg:                 aka "Cornelia Huck <huckc@linux.vnet.ibm.com>" [full]
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>" [full]
# gpg:                 aka "Cornelia Huck <cohuck@kernel.org>" [marginal]
# gpg:                 aka "Cornelia Huck <cohuck@redhat.com>" [marginal]
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20200618:
  docs/s390x: fix vfio-ap device_del description
  vfio-ccw: Add support for the CRW region and IRQ
  s390x/css: Refactor the css_queue_crw() routine
  vfio-ccw: Refactor ccw irq handler
  vfio-ccw: Add support for the schib region
  vfio-ccw: Refactor cleanup of regions
  Linux headers: update

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2020-06-19 11:44:03 +01:00
commit 4d285821c5
32 changed files with 1075 additions and 78 deletions

View File

@ -606,10 +606,11 @@ action.
To hot plug a vfio-ap device, use the QEMU ``device_add`` command:: To hot plug a vfio-ap device, use the QEMU ``device_add`` command::
(qemu) device_add vfio-ap,sysfsdev="$path-to-mdev" (qemu) device_add vfio-ap,sysfsdev="$path-to-mdev",id="$id"
Where the ``$path-to-mdev`` value specifies the absolute path to a mediated Where the ``$path-to-mdev`` value specifies the absolute path to a mediated
device to which AP resources to be used by the guest have been assigned. device to which AP resources to be used by the guest have been assigned.
``$id`` is the name value for the optional id parameter.
Note that on Linux guests, the AP devices will be created in the Note that on Linux guests, the AP devices will be created in the
``/sys/bus/ap/devices`` directory when the AP bus subsequently performs its periodic ``/sys/bus/ap/devices`` directory when the AP bus subsequently performs its periodic
@ -632,10 +633,9 @@ or a prior hot plug action.
To hot unplug a vfio-ap device, use the QEMU ``device_del`` command:: To hot unplug a vfio-ap device, use the QEMU ``device_del`` command::
(qemu) device_del vfio-ap,sysfsdev="$path-to-mdev" (qemu) device_del "$id"
Where ``$path-to-mdev`` is the same as the path specified when the vfio-ap Where ``$id`` is the same id that was specified at device creation.
device was attached to the virtual machine's ap-bus.
On a Linux guest, the AP devices will be removed from the ``/sys/bus/ap/devices`` On a Linux guest, the AP devices will be removed from the ``/sys/bus/ap/devices``
directory on the guest when the AP bus subsequently performs its periodic scan, directory on the guest when the AP bus subsequently performs its periodic scan,

View File

@ -1335,11 +1335,20 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
} }
} }
int css_do_stsch(SubchDev *sch, SCHIB *schib) IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib)
{ {
int ret;
/*
* For some subchannels, we may want to update parts of
* the schib (e.g., update path masks from the host device
* for passthrough subchannels).
*/
ret = s390_ccw_store(sch);
/* Use current status. */ /* Use current status. */
copy_schib_to_guest(schib, &sch->curr_status); copy_schib_to_guest(schib, &sch->curr_status);
return 0; return ret;
} }
static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src) static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src)
@ -2161,30 +2170,23 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
} }
} }
void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, void css_crw_add_to_queue(CRW crw)
int chain, uint16_t rsid)
{ {
CrwContainer *crw_cont; CrwContainer *crw_cont;
trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : ""); trace_css_crw((crw.flags & CRW_FLAGS_MASK_RSC) >> 8,
crw.flags & CRW_FLAGS_MASK_ERC,
crw.rsid,
(crw.flags & CRW_FLAGS_MASK_C) ? "(chained)" : "");
/* TODO: Maybe use a static crw pool? */ /* TODO: Maybe use a static crw pool? */
crw_cont = g_try_new0(CrwContainer, 1); crw_cont = g_try_new0(CrwContainer, 1);
if (!crw_cont) { if (!crw_cont) {
channel_subsys.crws_lost = true; channel_subsys.crws_lost = true;
return; return;
} }
crw_cont->crw.flags = (rsc << 8) | erc;
if (solicited) { crw_cont->crw = crw;
crw_cont->crw.flags |= CRW_FLAGS_MASK_S;
}
if (chain) {
crw_cont->crw.flags |= CRW_FLAGS_MASK_C;
}
crw_cont->crw.rsid = rsid;
if (channel_subsys.crws_lost) {
crw_cont->crw.flags |= CRW_FLAGS_MASK_R;
channel_subsys.crws_lost = false;
}
QTAILQ_INSERT_TAIL(&channel_subsys.pending_crws, crw_cont, sibling); QTAILQ_INSERT_TAIL(&channel_subsys.pending_crws, crw_cont, sibling);
@ -2195,6 +2197,27 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
} }
} }
void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
int chain, uint16_t rsid)
{
CRW crw;
crw.flags = (rsc << 8) | erc;
if (solicited) {
crw.flags |= CRW_FLAGS_MASK_S;
}
if (chain) {
crw.flags |= CRW_FLAGS_MASK_C;
}
crw.rsid = rsid;
if (channel_subsys.crws_lost) {
crw.flags |= CRW_FLAGS_MASK_R;
channel_subsys.crws_lost = false;
}
css_crw_add_to_queue(crw);
}
void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
int hotplugged, int add) int hotplugged, int add)
{ {

View File

@ -51,6 +51,27 @@ int s390_ccw_clear(SubchDev *sch)
return cdc->handle_clear(sch); return cdc->handle_clear(sch);
} }
IOInstEnding s390_ccw_store(SubchDev *sch)
{
S390CCWDeviceClass *cdc = NULL;
int ret = IOINST_CC_EXPECTED;
/*
* This code is called for both virtual and passthrough devices,
* but only applies to to the latter. This ugly check makes that
* distinction for us.
*/
if (object_dynamic_cast(OBJECT(sch->driver_data), TYPE_S390_CCW)) {
cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data);
}
if (cdc && cdc->handle_store) {
ret = cdc->handle_store(sch);
}
return ret;
}
static void s390_ccw_get_dev_info(S390CCWDevice *cdev, static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
char *sysfsdev, char *sysfsdev,
Error **errp) Error **errp)

View File

@ -41,7 +41,14 @@ struct VFIOCCWDevice {
uint64_t async_cmd_region_size; uint64_t async_cmd_region_size;
uint64_t async_cmd_region_offset; uint64_t async_cmd_region_offset;
struct ccw_cmd_region *async_cmd_region; struct ccw_cmd_region *async_cmd_region;
uint64_t schib_region_size;
uint64_t schib_region_offset;
struct ccw_schib_region *schib_region;
uint64_t crw_region_size;
uint64_t crw_region_offset;
struct ccw_crw_region *crw_region;
EventNotifier io_notifier; EventNotifier io_notifier;
EventNotifier crw_notifier;
bool force_orb_pfch; bool force_orb_pfch;
bool warned_orb_pfch; bool warned_orb_pfch;
}; };
@ -116,6 +123,51 @@ again:
} }
} }
static IOInstEnding vfio_ccw_handle_store(SubchDev *sch)
{
S390CCWDevice *cdev = sch->driver_data;
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
SCHIB *schib = &sch->curr_status;
struct ccw_schib_region *region = vcdev->schib_region;
SCHIB *s;
int ret;
/* schib region not available so nothing else to do */
if (!region) {
return IOINST_CC_EXPECTED;
}
memset(region, 0, sizeof(*region));
ret = pread(vcdev->vdev.fd, region, vcdev->schib_region_size,
vcdev->schib_region_offset);
if (ret == -1) {
/*
* Device is probably damaged, but store subchannel does not
* have a nonzero cc defined for this scenario. Log an error,
* and presume things are otherwise fine.
*/
error_report("vfio-ccw: store region read failed with errno=%d", errno);
return IOINST_CC_EXPECTED;
}
/*
* Selectively copy path-related bits of the SCHIB,
* rather than copying the entire struct.
*/
s = (SCHIB *)region->schib_area;
schib->pmcw.pnom = s->pmcw.pnom;
schib->pmcw.lpum = s->pmcw.lpum;
schib->pmcw.pam = s->pmcw.pam;
schib->pmcw.pom = s->pmcw.pom;
if (s->scsw.flags & SCSW_FLAGS_MASK_PNO) {
schib->scsw.flags |= SCSW_FLAGS_MASK_PNO;
}
return IOINST_CC_EXPECTED;
}
static int vfio_ccw_handle_clear(SubchDev *sch) static int vfio_ccw_handle_clear(SubchDev *sch)
{ {
S390CCWDevice *cdev = sch->driver_data; S390CCWDevice *cdev = sch->driver_data;
@ -206,6 +258,44 @@ static void vfio_ccw_reset(DeviceState *dev)
ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
} }
static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev)
{
struct ccw_crw_region *region = vcdev->crw_region;
CRW crw;
int size;
/* Keep reading CRWs as long as data is returned */
do {
memset(region, 0, sizeof(*region));
size = pread(vcdev->vdev.fd, region, vcdev->crw_region_size,
vcdev->crw_region_offset);
if (size == -1) {
error_report("vfio-ccw: Read crw region failed with errno=%d",
errno);
break;
}
if (region->crw == 0) {
/* No more CRWs to queue */
break;
}
memcpy(&crw, &region->crw, sizeof(CRW));
css_crw_add_to_queue(crw);
} while (1);
}
static void vfio_ccw_crw_notifier_handler(void *opaque)
{
VFIOCCWDevice *vcdev = opaque;
while (event_notifier_test_and_clear(&vcdev->crw_notifier)) {
vfio_ccw_crw_read(vcdev);
}
}
static void vfio_ccw_io_notifier_handler(void *opaque) static void vfio_ccw_io_notifier_handler(void *opaque)
{ {
VFIOCCWDevice *vcdev = opaque; VFIOCCWDevice *vcdev = opaque;
@ -276,22 +366,40 @@ read_err:
css_inject_io_interrupt(sch); css_inject_io_interrupt(sch);
} }
static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
unsigned int irq,
Error **errp)
{ {
VFIODevice *vdev = &vcdev->vdev; VFIODevice *vdev = &vcdev->vdev;
struct vfio_irq_info *irq_info; struct vfio_irq_info *irq_info;
size_t argsz; size_t argsz;
int fd; int fd;
EventNotifier *notifier;
IOHandler *fd_read;
if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { switch (irq) {
error_setg(errp, "vfio: unexpected number of io irqs %u", case VFIO_CCW_IO_IRQ_INDEX:
notifier = &vcdev->io_notifier;
fd_read = vfio_ccw_io_notifier_handler;
break;
case VFIO_CCW_CRW_IRQ_INDEX:
notifier = &vcdev->crw_notifier;
fd_read = vfio_ccw_crw_notifier_handler;
break;
default:
error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
return;
}
if (vdev->num_irqs < irq + 1) {
error_setg(errp, "vfio: unexpected number of irqs %u",
vdev->num_irqs); vdev->num_irqs);
return; return;
} }
argsz = sizeof(*irq_info); argsz = sizeof(*irq_info);
irq_info = g_malloc0(argsz); irq_info = g_malloc0(argsz);
irq_info->index = VFIO_CCW_IO_IRQ_INDEX; irq_info->index = irq;
irq_info->argsz = argsz; irq_info->argsz = argsz;
if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
irq_info) < 0 || irq_info->count < 1) { irq_info) < 0 || irq_info->count < 1) {
@ -299,37 +407,52 @@ static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
goto out_free_info; goto out_free_info;
} }
if (event_notifier_init(&vcdev->io_notifier, 0)) { if (event_notifier_init(notifier, 0)) {
error_setg_errno(errp, errno, error_setg_errno(errp, errno,
"vfio: Unable to init event notifier for IO"); "vfio: Unable to init event notifier for irq (%d)",
irq);
goto out_free_info; goto out_free_info;
} }
fd = event_notifier_get_fd(&vcdev->io_notifier); fd = event_notifier_get_fd(notifier);
qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); qemu_set_fd_handler(fd, fd_read, NULL, vcdev);
if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, if (vfio_set_irq_signaling(vdev, irq, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vcdev); qemu_set_fd_handler(fd, NULL, NULL, vcdev);
event_notifier_cleanup(&vcdev->io_notifier); event_notifier_cleanup(notifier);
} }
out_free_info: out_free_info:
g_free(irq_info); g_free(irq_info);
} }
static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
unsigned int irq)
{ {
Error *err = NULL; Error *err = NULL;
EventNotifier *notifier;
if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, switch (irq) {
case VFIO_CCW_IO_IRQ_INDEX:
notifier = &vcdev->io_notifier;
break;
case VFIO_CCW_CRW_IRQ_INDEX:
notifier = &vcdev->crw_notifier;
break;
default:
error_report("vfio: Unsupported device irq(%d)", irq);
return;
}
if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
} }
qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), qemu_set_fd_handler(event_notifier_get_fd(notifier),
NULL, NULL, vcdev); NULL, NULL, vcdev);
event_notifier_cleanup(&vcdev->io_notifier); event_notifier_cleanup(notifier);
} }
static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
@ -363,8 +486,7 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
vcdev->io_region_size = info->size; vcdev->io_region_size = info->size;
if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
error_setg(errp, "vfio: Unexpected size of the I/O region"); error_setg(errp, "vfio: Unexpected size of the I/O region");
g_free(info); goto out_err;
return;
} }
vcdev->io_region_offset = info->offset; vcdev->io_region_offset = info->offset;
@ -377,19 +499,53 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
vcdev->async_cmd_region_size = info->size; vcdev->async_cmd_region_size = info->size;
if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) {
error_setg(errp, "vfio: Unexpected size of the async cmd region"); error_setg(errp, "vfio: Unexpected size of the async cmd region");
g_free(vcdev->io_region); goto out_err;
g_free(info);
return;
} }
vcdev->async_cmd_region_offset = info->offset; vcdev->async_cmd_region_offset = info->offset;
vcdev->async_cmd_region = g_malloc0(info->size); vcdev->async_cmd_region = g_malloc0(info->size);
} }
ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
VFIO_REGION_SUBTYPE_CCW_SCHIB, &info);
if (!ret) {
vcdev->schib_region_size = info->size;
if (sizeof(*vcdev->schib_region) != vcdev->schib_region_size) {
error_setg(errp, "vfio: Unexpected size of the schib region");
goto out_err;
}
vcdev->schib_region_offset = info->offset;
vcdev->schib_region = g_malloc(info->size);
}
ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
VFIO_REGION_SUBTYPE_CCW_CRW, &info);
if (!ret) {
vcdev->crw_region_size = info->size;
if (sizeof(*vcdev->crw_region) != vcdev->crw_region_size) {
error_setg(errp, "vfio: Unexpected size of the CRW region");
goto out_err;
}
vcdev->crw_region_offset = info->offset;
vcdev->crw_region = g_malloc(info->size);
}
g_free(info); g_free(info);
return;
out_err:
g_free(vcdev->crw_region);
g_free(vcdev->schib_region);
g_free(vcdev->async_cmd_region);
g_free(vcdev->io_region);
g_free(info);
return;
} }
static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
{ {
g_free(vcdev->crw_region);
g_free(vcdev->schib_region);
g_free(vcdev->async_cmd_region); g_free(vcdev->async_cmd_region);
g_free(vcdev->io_region); g_free(vcdev->io_region);
} }
@ -499,11 +655,19 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
goto out_region_err; goto out_region_err;
} }
vfio_ccw_register_io_notifier(vcdev, &err); vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err);
if (err) { if (err) {
goto out_notifier_err; goto out_notifier_err;
} }
if (vcdev->crw_region) {
vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err);
if (err) {
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
goto out_notifier_err;
}
}
return; return;
out_notifier_err: out_notifier_err:
@ -528,7 +692,8 @@ static void vfio_ccw_unrealize(DeviceState *dev)
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
VFIOGroup *group = vcdev->vdev.group; VFIOGroup *group = vcdev->vdev.group;
vfio_ccw_unregister_io_notifier(vcdev); vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX);
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
vfio_ccw_put_region(vcdev); vfio_ccw_put_region(vcdev);
vfio_ccw_put_device(vcdev); vfio_ccw_put_device(vcdev);
vfio_put_group(group); vfio_put_group(group);
@ -565,6 +730,7 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data)
cdc->handle_request = vfio_ccw_handle_request; cdc->handle_request = vfio_ccw_handle_request;
cdc->handle_halt = vfio_ccw_handle_halt; cdc->handle_halt = vfio_ccw_handle_halt;
cdc->handle_clear = vfio_ccw_handle_clear; cdc->handle_clear = vfio_ccw_handle_clear;
cdc->handle_store = vfio_ccw_handle_store;
} }
static const TypeInfo vfio_ccw_info = { static const TypeInfo vfio_ccw_info = {

View File

@ -205,6 +205,7 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src);
void css_inject_io_interrupt(SubchDev *sch); void css_inject_io_interrupt(SubchDev *sch);
void css_reset(void); void css_reset(void);
void css_reset_sch(SubchDev *sch); void css_reset_sch(SubchDev *sch);
void css_crw_add_to_queue(CRW crw);
void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
int chain, uint16_t rsid); int chain, uint16_t rsid);
void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
@ -218,6 +219,7 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sub);
int s390_ccw_halt(SubchDev *sch); int s390_ccw_halt(SubchDev *sch);
int s390_ccw_clear(SubchDev *sch); int s390_ccw_clear(SubchDev *sch);
IOInstEnding s390_ccw_store(SubchDev *sch);
typedef enum { typedef enum {
CSS_IO_ADAPTER_VIRTIO = 0, CSS_IO_ADAPTER_VIRTIO = 0,
@ -242,7 +244,7 @@ SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
uint16_t schid); uint16_t schid);
bool css_subch_visible(SubchDev *sch); bool css_subch_visible(SubchDev *sch);
void css_conditional_io_interrupt(SubchDev *sch); void css_conditional_io_interrupt(SubchDev *sch);
int css_do_stsch(SubchDev *sch, SCHIB *schib); IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib);
bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid); bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid);
IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib); IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib);
IOInstEnding css_do_xsch(SubchDev *sch); IOInstEnding css_do_xsch(SubchDev *sch);

View File

@ -37,6 +37,7 @@ typedef struct S390CCWDeviceClass {
IOInstEnding (*handle_request) (SubchDev *sch); IOInstEnding (*handle_request) (SubchDev *sch);
int (*handle_halt) (SubchDev *sch); int (*handle_halt) (SubchDev *sch);
int (*handle_clear) (SubchDev *sch); int (*handle_clear) (SubchDev *sch);
IOInstEnding (*handle_store) (SubchDev *sch);
} S390CCWDeviceClass; } S390CCWDeviceClass;
#endif #endif

View File

@ -31,6 +31,7 @@
#define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_PV_SEND_IPI 11
#define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_POLL_CONTROL 12
#define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_FEATURE_ASYNC_PF_INT 14
#define KVM_HINTS_REALTIME 0 #define KVM_HINTS_REALTIME 0
@ -50,6 +51,8 @@
#define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04 #define MSR_KVM_PV_EOI_EN 0x4b564d04
#define MSR_KVM_POLL_CONTROL 0x4b564d05 #define MSR_KVM_POLL_CONTROL 0x4b564d05
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
struct kvm_steal_time { struct kvm_steal_time {
uint64_t steal; uint64_t steal;
@ -81,6 +84,11 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
/* MSR_KVM_ASYNC_PF_INT */
#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
/* Operations for KVM_HC_MMU_OP */ /* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1 #define KVM_MMU_OP_WRITE_PTE 1
@ -112,8 +120,13 @@ struct kvm_mmu_op_release_pt {
#define KVM_PV_REASON_PAGE_READY 2 #define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data { struct kvm_vcpu_pv_apf_data {
uint32_t reason; /* Used for 'page not present' events delivered via #PF */
uint8_t pad[60]; uint32_t flags;
/* Used for 'page ready' events delivered via interrupt notification */
uint32_t token;
uint8_t pad[56];
uint32_t enabled; uint32_t enabled;
}; };

View File

@ -353,9 +353,12 @@ extern "C" {
* a platform-dependent stride. On top of that the memory can apply * a platform-dependent stride. On top of that the memory can apply
* platform-depending swizzling of some higher address bits into bit6. * platform-depending swizzling of some higher address bits into bit6.
* *
* This format is highly platforms specific and not useful for cross-driver * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
* sharing. It exists since on a given platform it does uniquely identify the * On earlier platforms the is highly platforms specific and not useful for
* layout in a simple way for i915-specific userspace. * cross-driver sharing. It exists since on a given platform it does uniquely
* identify the layout in a simple way for i915-specific userspace, which
* facilitated conversion of userspace to modifiers. Additionally the exact
* format on some really old platforms is not known.
*/ */
#define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) #define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1)
@ -368,9 +371,12 @@ extern "C" {
* memory can apply platform-depending swizzling of some higher address bits * memory can apply platform-depending swizzling of some higher address bits
* into bit6. * into bit6.
* *
* This format is highly platforms specific and not useful for cross-driver * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
* sharing. It exists since on a given platform it does uniquely identify the * On earlier platforms the is highly platforms specific and not useful for
* layout in a simple way for i915-specific userspace. * cross-driver sharing. It exists since on a given platform it does uniquely
* identify the layout in a simple way for i915-specific userspace, which
* facilitated conversion of userspace to modifiers. Additionally the exact
* format on some really old platforms is not known.
*/ */
#define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) #define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2)
@ -520,7 +526,113 @@ extern "C" {
#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) #define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1)
/* /*
* 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80,
* and Tegra GPUs starting with Tegra K1.
*
* Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies
* based on the architecture generation. GOBs themselves are then arranged in
* 3D blocks, with the block dimensions (in terms of GOBs) always being a power
* of two, and hence expressible as their log2 equivalent (E.g., "2" represents
* a block depth or height of "4").
*
* Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
* in full detail.
*
* Macro
* Bits Param Description
* ---- ----- -----------------------------------------------------------------
*
* 3:0 h log2(height) of each block, in GOBs. Placed here for
* compatibility with the existing
* DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
*
* 4:4 - Must be 1, to indicate block-linear layout. Necessary for
* compatibility with the existing
* DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
*
* 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block
* size). Must be zero.
*
* Note there is no log2(width) parameter. Some portions of the
* hardware support a block width of two gobs, but it is impractical
* to use due to lack of support elsewhere, and has no known
* benefits.
*
* 11:9 - Reserved (To support 2D-array textures with variable array stride
* in blocks, specified via log2(tile width in blocks)). Must be
* zero.
*
* 19:12 k Page Kind. This value directly maps to a field in the page
* tables of all GPUs >= NV50. It affects the exact layout of bits
* in memory and can be derived from the tuple
*
* (format, GPU model, compression type, samples per pixel)
*
* Where compression type is defined below. If GPU model were
* implied by the format modifier, format, or memory buffer, page
* kind would not need to be included in the modifier itself, but
* since the modifier should define the layout of the associated
* memory buffer independent from any device or other context, it
* must be included here.
*
* 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed
* starting with Fermi GPUs. Additionally, the mapping between page
* kind and bit layout has changed at various points.
*
* 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping
* 1 = Gob Height 4, G80 - GT2XX Page Kind mapping
* 2 = Gob Height 8, Turing+ Page Kind mapping
* 3 = Reserved for future use.
*
* 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further
* bit remapping step that occurs at an even lower level than the
* page kind and block linear swizzles. This causes the layout of
* surfaces mapped in those SOC's GPUs to be incompatible with the
* equivalent mapping on other GPUs in the same system.
*
* 0 = Tegra K1 - Tegra Parker/TX2 Layout.
* 1 = Desktop GPU and Tegra Xavier+ Layout
*
* 25:23 c Lossless Framebuffer Compression type.
*
* 0 = none
* 1 = ROP/3D, layout 1, exact compression format implied by Page
* Kind field
* 2 = ROP/3D, layout 2, exact compression format implied by Page
* Kind field
* 3 = CDE horizontal
* 4 = CDE vertical
* 5 = Reserved for future use
* 6 = Reserved for future use
* 7 = Reserved for future use
*
* 55:25 - Reserved for future use. Must be zero.
*/
#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
fourcc_mod_code(NVIDIA, (0x10 | \
((h) & 0xf) | \
(((k) & 0xff) << 12) | \
(((g) & 0x3) << 20) | \
(((s) & 0x1) << 22) | \
(((c) & 0x7) << 23)))
/* To grandfather in prior block linear format modifiers to the above layout,
* the page kind "0", which corresponds to "pitch/linear" and hence is unusable
* with block-linear layouts, is remapped within drivers to the value 0xfe,
* which corresponds to the "generic" kind used for simple single-sample
* uncompressed color formats on Fermi - Volta GPUs.
*/
static inline uint64_t
drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
{
if (!(modifier & 0x10) || (modifier & (0xff << 12)))
return modifier;
else
return modifier | (0xfe << 12);
}
/*
* 16Bx2 Block Linear layout, used by Tegra K1 and later
* *
* Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked
* vertically by a power of 2 (1 to 32 GOBs) to form a block. * vertically by a power of 2 (1 to 32 GOBs) to form a block.
@ -541,20 +653,20 @@ extern "C" {
* in full detail. * in full detail.
*/ */
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \
fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf)) DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v))
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \
fourcc_mod_code(NVIDIA, 0x10) DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0)
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \
fourcc_mod_code(NVIDIA, 0x11) DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1)
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \
fourcc_mod_code(NVIDIA, 0x12) DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2)
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \
fourcc_mod_code(NVIDIA, 0x13) DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3)
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \
fourcc_mod_code(NVIDIA, 0x14) DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4)
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
fourcc_mod_code(NVIDIA, 0x15) DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5)
/* /*
* Some Broadcom modifiers take parameters, for example the number of * Some Broadcom modifiers take parameters, for example the number of

View File

@ -1666,6 +1666,18 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
return 0; return 0;
} }
#define MASTER_SLAVE_CFG_UNSUPPORTED 0
#define MASTER_SLAVE_CFG_UNKNOWN 1
#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2
#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3
#define MASTER_SLAVE_CFG_MASTER_FORCE 4
#define MASTER_SLAVE_CFG_SLAVE_FORCE 5
#define MASTER_SLAVE_STATE_UNSUPPORTED 0
#define MASTER_SLAVE_STATE_UNKNOWN 1
#define MASTER_SLAVE_STATE_MASTER 2
#define MASTER_SLAVE_STATE_SLAVE 3
#define MASTER_SLAVE_STATE_ERR 4
/* Which connector port. */ /* Which connector port. */
#define PORT_TP 0x00 #define PORT_TP 0x00
#define PORT_AUI 0x01 #define PORT_AUI 0x01
@ -1904,7 +1916,9 @@ struct ethtool_link_settings {
uint8_t eth_tp_mdix_ctrl; uint8_t eth_tp_mdix_ctrl;
int8_t link_mode_masks_nwords; int8_t link_mode_masks_nwords;
uint8_t transceiver; uint8_t transceiver;
uint8_t reserved1[3]; uint8_t master_slave_cfg;
uint8_t master_slave_state;
uint8_t reserved1[1];
uint32_t reserved[7]; uint32_t reserved[7];
uint32_t link_mode_masks[0]; uint32_t link_mode_masks[0];
/* layout of link_mode_masks fields: /* layout of link_mode_masks fields:

View File

@ -44,6 +44,7 @@
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#define VIRTIO_ID_MEM 24 /* virtio mem */
#define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */

View File

@ -0,0 +1,211 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Virtio Mem Device
*
* Copyright Red Hat, Inc. 2020
*
* Authors:
* David Hildenbrand <david@redhat.com>
*
* This header is BSD licensed so anyone can use the definitions
* to implement compatible drivers/servers:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _LINUX_VIRTIO_MEM_H
#define _LINUX_VIRTIO_MEM_H
#include "standard-headers/linux/types.h"
#include "standard-headers/linux/virtio_types.h"
#include "standard-headers/linux/virtio_ids.h"
#include "standard-headers/linux/virtio_config.h"
/*
* Each virtio-mem device manages a dedicated region in physical address
* space. Each device can belong to a single NUMA node, multiple devices
* for a single NUMA node are possible. A virtio-mem device is like a
* "resizable DIMM" consisting of small memory blocks that can be plugged
* or unplugged. The device driver is responsible for (un)plugging memory
* blocks on demand.
*
* Virtio-mem devices can only operate on their assigned memory region in
* order to (un)plug memory. A device cannot (un)plug memory belonging to
* other devices.
*
* The "region_size" corresponds to the maximum amount of memory that can
* be provided by a device. The "size" corresponds to the amount of memory
* that is currently plugged. "requested_size" corresponds to a request
* from the device to the device driver to (un)plug blocks. The
* device driver should try to (un)plug blocks in order to reach the
* "requested_size". It is impossible to plug more memory than requested.
*
* The "usable_region_size" represents the memory region that can actually
* be used to (un)plug memory. It is always at least as big as the
* "requested_size" and will grow dynamically. It will only shrink when
* explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
*
* There are no guarantees what will happen if unplugged memory is
* read/written. Such memory should, in general, not be touched. E.g.,
* even writing might succeed, but the values will simply be discarded at
* random points in time.
*
* It can happen that the device cannot process a request, because it is
* busy. The device driver has to retry later.
*
* Usually, during system resets all memory will get unplugged, so the
* device driver can start with a clean state. However, in specific
* scenarios (if the device is busy) it can happen that the device still
* has memory plugged. The device driver can request to unplug all memory
* (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the
* device is busy.
*/
/* --- virtio-mem: feature bits --- */
/* node_id is an ACPI PXM and is valid */
#define VIRTIO_MEM_F_ACPI_PXM 0
/* --- virtio-mem: guest -> host requests --- */
/* request to plug memory blocks */
#define VIRTIO_MEM_REQ_PLUG 0
/* request to unplug memory blocks */
#define VIRTIO_MEM_REQ_UNPLUG 1
/* request to unplug all blocks and shrink the usable size */
#define VIRTIO_MEM_REQ_UNPLUG_ALL 2
/* request information about the plugged state of memory blocks */
#define VIRTIO_MEM_REQ_STATE 3
struct virtio_mem_req_plug {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req_unplug {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req_state {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req {
__virtio16 type;
__virtio16 padding[3];
union {
struct virtio_mem_req_plug plug;
struct virtio_mem_req_unplug unplug;
struct virtio_mem_req_state state;
} u;
};
/* --- virtio-mem: host -> guest response --- */
/*
* Request processed successfully, applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_UNPLUG_ALL
* - VIRTIO_MEM_REQ_STATE
*/
#define VIRTIO_MEM_RESP_ACK 0
/*
* Request denied - e.g. trying to plug more than requested, applicable for
* - VIRTIO_MEM_REQ_PLUG
*/
#define VIRTIO_MEM_RESP_NACK 1
/*
* Request cannot be processed right now, try again later, applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_UNPLUG_ALL
*/
#define VIRTIO_MEM_RESP_BUSY 2
/*
* Error in request (e.g. addresses/alignment), applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_STATE
*/
#define VIRTIO_MEM_RESP_ERROR 3
/* State of memory blocks is "plugged" */
#define VIRTIO_MEM_STATE_PLUGGED 0
/* State of memory blocks is "unplugged" */
#define VIRTIO_MEM_STATE_UNPLUGGED 1
/* State of memory blocks is "mixed" */
#define VIRTIO_MEM_STATE_MIXED 2
struct virtio_mem_resp_state {
__virtio16 state;
};
struct virtio_mem_resp {
__virtio16 type;
__virtio16 padding[3];
union {
struct virtio_mem_resp_state state;
} u;
};
/* --- virtio-mem: configuration --- */
struct virtio_mem_config {
/* Block size and alignment. Cannot change. */
uint64_t block_size;
/* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
uint16_t node_id;
uint8_t padding[6];
/* Start address of the memory region. Cannot change. */
uint64_t addr;
/* Region size (maximum). Cannot change. */
uint64_t region_size;
/*
* Currently usable region size. Can grow up to region_size. Can
* shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
* update will be sent).
*/
uint64_t usable_region_size;
/*
* Currently used size. Changes due to plug/unplug requests, but no
* config updates will be sent.
*/
uint64_t plugged_size;
/* Requested size. New plug requests cannot exceed it. Can change. */
uint64_t requested_size;
};
#endif /* _LINUX_VIRTIO_MEM_H */

View File

@ -84,6 +84,13 @@
* at the end of the used ring. Guest should ignore the used->flags field. */ * at the end of the used ring. Guest should ignore the used->flags field. */
#define VIRTIO_RING_F_EVENT_IDX 29 #define VIRTIO_RING_F_EVENT_IDX 29
/* Alignment requirements for vring elements.
* When using pre-virtio 1.0 layout, these fall out naturally.
*/
#define VRING_AVAIL_ALIGN_SIZE 2
#define VRING_USED_ALIGN_SIZE 4
#define VRING_DESC_ALIGN_SIZE 16
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc { struct vring_desc {
/* Address (guest-physical). */ /* Address (guest-physical). */
@ -110,28 +117,47 @@ struct vring_used_elem {
__virtio32 len; __virtio32 len;
}; };
typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
vring_used_elem_t;
struct vring_used { struct vring_used {
__virtio16 flags; __virtio16 flags;
__virtio16 idx; __virtio16 idx;
struct vring_used_elem ring[]; vring_used_elem_t ring[];
}; };
/*
* The ring element addresses are passed between components with different
* alignments assumptions. Thus, we might need to decrease the compiler-selected
* alignment, and so must use a typedef to make sure the aligned attribute
* actually takes hold:
*
* https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes
*
* When used on a struct, or struct member, the aligned attribute can only
* increase the alignment; in order to decrease it, the packed attribute must
* be specified as well. When used as part of a typedef, the aligned attribute
* can both increase and decrease alignment, and specifying the packed
* attribute generates a warning.
*/
typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE)))
vring_desc_t;
typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE)))
vring_avail_t;
typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
vring_used_t;
struct vring { struct vring {
unsigned int num; unsigned int num;
struct vring_desc *desc; vring_desc_t *desc;
struct vring_avail *avail; vring_avail_t *avail;
struct vring_used *used; vring_used_t *used;
}; };
/* Alignment requirements for vring elements. #ifndef VIRTIO_RING_NO_LEGACY
* When using pre-virtio 1.0 layout, these fall out naturally.
*/
#define VRING_AVAIL_ALIGN_SIZE 2
#define VRING_USED_ALIGN_SIZE 4
#define VRING_DESC_ALIGN_SIZE 16
/* The standard layout for the ring is a continuous chunk of memory which looks /* The standard layout for the ring is a continuous chunk of memory which looks
* like this. We assume num is a power of 2. * like this. We assume num is a power of 2.
@ -179,6 +205,8 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
} }
#endif /* VIRTIO_RING_NO_LEGACY */
/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
/* Assuming a given event_idx value from the other side, if /* Assuming a given event_idx value from the other side, if
* we have just incremented index from old to new_idx, * we have just incremented index from old to new_idx,

View File

@ -1 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __ASM_MMAN_H
#define __ASM_MMAN_H
#include <asm-generic/mman.h> #include <asm-generic/mman.h>
#define PROT_BTI 0x10 /* BTI guarded page */
#endif /* ! _UAPI__ASM_MMAN_H */

View File

@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3)
__SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
#undef __NR_syscalls #undef __NR_syscalls
#define __NR_syscalls 439 #define __NR_syscalls 440
/* /*
* 32 bit systems traditionally used different * 32 bit systems traditionally used different

View File

@ -367,6 +367,7 @@
#define __NR_clone3 (__NR_Linux + 435) #define __NR_clone3 (__NR_Linux + 435)
#define __NR_openat2 (__NR_Linux + 437) #define __NR_openat2 (__NR_Linux + 437)
#define __NR_pidfd_getfd (__NR_Linux + 438) #define __NR_pidfd_getfd (__NR_Linux + 438)
#define __NR_faccessat2 (__NR_Linux + 439)
#endif /* _ASM_MIPS_UNISTD_N32_H */ #endif /* _ASM_MIPS_UNISTD_N32_H */

View File

@ -343,6 +343,7 @@
#define __NR_clone3 (__NR_Linux + 435) #define __NR_clone3 (__NR_Linux + 435)
#define __NR_openat2 (__NR_Linux + 437) #define __NR_openat2 (__NR_Linux + 437)
#define __NR_pidfd_getfd (__NR_Linux + 438) #define __NR_pidfd_getfd (__NR_Linux + 438)
#define __NR_faccessat2 (__NR_Linux + 439)
#endif /* _ASM_MIPS_UNISTD_N64_H */ #endif /* _ASM_MIPS_UNISTD_N64_H */

View File

@ -413,6 +413,7 @@
#define __NR_clone3 (__NR_Linux + 435) #define __NR_clone3 (__NR_Linux + 435)
#define __NR_openat2 (__NR_Linux + 437) #define __NR_openat2 (__NR_Linux + 437)
#define __NR_pidfd_getfd (__NR_Linux + 438) #define __NR_pidfd_getfd (__NR_Linux + 438)
#define __NR_faccessat2 (__NR_Linux + 439)
#endif /* _ASM_MIPS_UNISTD_O32_H */ #endif /* _ASM_MIPS_UNISTD_O32_H */

View File

@ -420,6 +420,7 @@
#define __NR_clone3 435 #define __NR_clone3 435
#define __NR_openat2 437 #define __NR_openat2 437
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
#define __NR_faccessat2 439
#endif /* _ASM_POWERPC_UNISTD_32_H */ #endif /* _ASM_POWERPC_UNISTD_32_H */

View File

@ -392,6 +392,7 @@
#define __NR_clone3 435 #define __NR_clone3 435
#define __NR_openat2 437 #define __NR_openat2 437
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
#define __NR_faccessat2 439
#endif /* _ASM_POWERPC_UNISTD_64_H */ #endif /* _ASM_POWERPC_UNISTD_64_H */

View File

@ -410,5 +410,6 @@
#define __NR_clone3 435 #define __NR_clone3 435
#define __NR_openat2 437 #define __NR_openat2 437
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
#define __NR_faccessat2 439
#endif /* _ASM_S390_UNISTD_32_H */ #endif /* _ASM_S390_UNISTD_32_H */

View File

@ -358,5 +358,6 @@
#define __NR_clone3 435 #define __NR_clone3 435
#define __NR_openat2 437 #define __NR_openat2 437
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
#define __NR_faccessat2 439
#endif /* _ASM_S390_UNISTD_64_H */ #endif /* _ASM_S390_UNISTD_64_H */

View File

@ -385,32 +385,48 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ #define KVM_STATE_NESTED_FORMAT_SVM 1
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_MTF_PENDING 0x00000008 #define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_GIF_SET 0x00000100
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002
#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
struct kvm_vmx_nested_state_data { struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
}; };
struct kvm_vmx_nested_state_hdr { struct kvm_vmx_nested_state_hdr {
__u32 flags;
__u64 vmxon_pa; __u64 vmxon_pa;
__u64 vmcs12_pa; __u64 vmcs12_pa;
__u64 preemption_timer_deadline;
struct { struct {
__u16 flags; __u16 flags;
} smm; } smm;
}; };
struct kvm_svm_nested_state_data {
/* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */
__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
};
struct kvm_svm_nested_state_hdr {
__u64 vmcb_pa;
};
/* for KVM_CAP_NESTED_STATE */ /* for KVM_CAP_NESTED_STATE */
struct kvm_nested_state { struct kvm_nested_state {
__u16 flags; __u16 flags;
@ -419,6 +435,7 @@ struct kvm_nested_state {
union { union {
struct kvm_vmx_nested_state_hdr vmx; struct kvm_vmx_nested_state_hdr vmx;
struct kvm_svm_nested_state_hdr svm;
/* Pad the header to 128 bytes. */ /* Pad the header to 128 bytes. */
__u8 pad[120]; __u8 pad[120];
@ -431,6 +448,7 @@ struct kvm_nested_state {
*/ */
union { union {
struct kvm_vmx_nested_state_data vmx[0]; struct kvm_vmx_nested_state_data vmx[0];
struct kvm_svm_nested_state_data svm[0];
} data; } data;
}; };

View File

@ -2,8 +2,15 @@
#ifndef _ASM_X86_UNISTD_H #ifndef _ASM_X86_UNISTD_H
#define _ASM_X86_UNISTD_H #define _ASM_X86_UNISTD_H
/* x32 syscall flag bit */ /*
#define __X32_SYSCALL_BIT 0x40000000UL * x32 syscall flag bit. Some user programs expect syscall NR macros
* and __X32_SYSCALL_BIT to have type int, even though syscall numbers
* are, for practical purposes, unsigned long.
*
* Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right
* thing regardless.
*/
#define __X32_SYSCALL_BIT 0x40000000
# ifdef __i386__ # ifdef __i386__
# include <asm/unistd_32.h> # include <asm/unistd_32.h>

View File

@ -428,6 +428,7 @@
#define __NR_clone3 435 #define __NR_clone3 435
#define __NR_openat2 437 #define __NR_openat2 437
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
#define __NR_faccessat2 439
#endif /* _ASM_X86_UNISTD_32_H */ #endif /* _ASM_X86_UNISTD_32_H */

View File

@ -350,6 +350,7 @@
#define __NR_clone3 435 #define __NR_clone3 435
#define __NR_openat2 437 #define __NR_openat2 437
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
#define __NR_faccessat2 439
#endif /* _ASM_X86_UNISTD_64_H */ #endif /* _ASM_X86_UNISTD_64_H */

View File

@ -303,6 +303,7 @@
#define __NR_clone3 (__X32_SYSCALL_BIT + 435) #define __NR_clone3 (__X32_SYSCALL_BIT + 435)
#define __NR_openat2 (__X32_SYSCALL_BIT + 437) #define __NR_openat2 (__X32_SYSCALL_BIT + 437)
#define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438) #define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438)
#define __NR_faccessat2 (__X32_SYSCALL_BIT + 439)
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
#define __NR_ioctl (__X32_SYSCALL_BIT + 514) #define __NR_ioctl (__X32_SYSCALL_BIT + 514)

View File

@ -116,7 +116,7 @@ struct kvm_irq_level {
* ACPI gsi notion of irq. * ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
* For ARM: See Documentation/virt/kvm/api.txt * For ARM: See Documentation/virt/kvm/api.rst
*/ */
union { union {
__u32 irq; __u32 irq;
@ -188,10 +188,13 @@ struct kvm_s390_cmma_log {
struct kvm_hyperv_exit { struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2 #define KVM_EXIT_HYPERV_HCALL 2
#define KVM_EXIT_HYPERV_SYNDBG 3
__u32 type; __u32 type;
__u32 pad1;
union { union {
struct { struct {
__u32 msr; __u32 msr;
__u32 pad2;
__u64 control; __u64 control;
__u64 evt_page; __u64 evt_page;
__u64 msg_page; __u64 msg_page;
@ -201,6 +204,15 @@ struct kvm_hyperv_exit {
__u64 result; __u64 result;
__u64 params[2]; __u64 params[2];
} hcall; } hcall;
struct {
__u32 msr;
__u32 pad2;
__u64 control;
__u64 status;
__u64 send_page;
__u64 recv_page;
__u64 pending_page;
} syndbg;
} u; } u;
}; };
@ -1017,6 +1029,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_VCPU_RESETS 179 #define KVM_CAP_S390_VCPU_RESETS 179
#define KVM_CAP_S390_PROTECTED 180 #define KVM_CAP_S390_PROTECTED 180
#define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_PPC_SECURE_GUEST 181
#define KVM_CAP_HALT_POLL 182
#define KVM_CAP_ASYNC_PF_INT 183
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
@ -1107,7 +1121,7 @@ struct kvm_xen_hvm_config {
* *
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
* the irqfd to operate in resampling mode for level triggered interrupt * the irqfd to operate in resampling mode for level triggered interrupt
* emulation. See Documentation/virt/kvm/api.txt. * emulation. See Documentation/virt/kvm/api.rst.
*/ */
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)

View File

@ -83,6 +83,8 @@ struct sev_user_data_status {
__u32 guest_count; /* Out */ __u32 guest_count; /* Out */
} __attribute__((packed)); } __attribute__((packed));
#define SEV_STATUS_FLAGS_CONFIG_ES 0x0100
/** /**
* struct sev_user_data_pek_csr - PEK_CSR command parameters * struct sev_user_data_pek_csr - PEK_CSR command parameters
* *

View File

@ -305,6 +305,7 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
#define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_GFX (1)
#define VFIO_REGION_TYPE_CCW (2) #define VFIO_REGION_TYPE_CCW (2)
#define VFIO_REGION_TYPE_MIGRATION (3)
/* sub-types for VFIO_REGION_TYPE_PCI_* */ /* sub-types for VFIO_REGION_TYPE_PCI_* */
@ -378,6 +379,235 @@ struct vfio_region_gfx_edid {
/* sub-types for VFIO_REGION_TYPE_CCW */ /* sub-types for VFIO_REGION_TYPE_CCW */
#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1)
#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2)
#define VFIO_REGION_SUBTYPE_CCW_CRW (3)
/* sub-types for VFIO_REGION_TYPE_MIGRATION */
#define VFIO_REGION_SUBTYPE_MIGRATION (1)
/*
* The structure vfio_device_migration_info is placed at the 0th offset of
* the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
* migration information. Field accesses from this structure are only supported
* at their native width and alignment. Otherwise, the result is undefined and
* vendor drivers should return an error.
*
* device_state: (read/write)
* - The user application writes to this field to inform the vendor driver
* about the device state to be transitioned to.
* - The vendor driver should take the necessary actions to change the
* device state. After successful transition to a given state, the
* vendor driver should return success on write(device_state, state)
* system call. If the device state transition fails, the vendor driver
* should return an appropriate -errno for the fault condition.
* - On the user application side, if the device state transition fails,
* that is, if write(device_state, state) returns an error, read
* device_state again to determine the current state of the device from
* the vendor driver.
* - The vendor driver should return previous state of the device unless
* the vendor driver has encountered an internal error, in which case
* the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR.
* - The user application must use the device reset ioctl to recover the
* device from VFIO_DEVICE_STATE_ERROR state. If the device is
* indicated to be in a valid device state by reading device_state, the
* user application may attempt to transition the device to any valid
* state reachable from the current state or terminate itself.
*
* device_state consists of 3 bits:
* - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
* it indicates the _STOP state. When the device state is changed to
* _STOP, driver should stop the device before write() returns.
* - If bit 1 is set, it indicates the _SAVING state, which means that the
* driver should start gathering device state information that will be
* provided to the VFIO user application to save the device's state.
* - If bit 2 is set, it indicates the _RESUMING state, which means that
* the driver should prepare to resume the device. Data provided through
* the migration region should be used to resume the device.
* Bits 3 - 31 are reserved for future use. To preserve them, the user
* application should perform a read-modify-write operation on this
* field when modifying the specified bits.
*
* +------- _RESUMING
* |+------ _SAVING
* ||+----- _RUNNING
* |||
* 000b => Device Stopped, not saving or resuming
* 001b => Device running, which is the default state
* 010b => Stop the device & save the device state, stop-and-copy state
* 011b => Device running and save the device state, pre-copy state
* 100b => Device stopped and the device state is resuming
* 101b => Invalid state
* 110b => Error state
* 111b => Invalid state
*
* State transitions:
*
* _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP
* (100b) (001b) (011b) (010b) (000b)
* 0. Running or default state
* |
*
* 1. Normal Shutdown (optional)
* |------------------------------------->|
*
* 2. Save the state or suspend
* |------------------------->|---------->|
*
* 3. Save the state during live migration
* |----------->|------------>|---------->|
*
* 4. Resuming
* |<---------|
*
* 5. Resumed
* |--------->|
*
* 0. Default state of VFIO device is _RUNNNG when the user application starts.
* 1. During normal shutdown of the user application, the user application may
* optionally change the VFIO device state from _RUNNING to _STOP. This
* transition is optional. The vendor driver must support this transition but
* must not require it.
* 2. When the user application saves state or suspends the application, the
* device state transitions from _RUNNING to stop-and-copy and then to _STOP.
* On state transition from _RUNNING to stop-and-copy, driver must stop the
* device, save the device state and send it to the application through the
* migration region. The sequence to be followed for such transition is given
* below.
* 3. In live migration of user application, the state transitions from _RUNNING
* to pre-copy, to stop-and-copy, and to _STOP.
* On state transition from _RUNNING to pre-copy, the driver should start
* gathering the device state while the application is still running and send
* the device state data to application through the migration region.
* On state transition from pre-copy to stop-and-copy, the driver must stop
* the device, save the device state and send it to the user application
* through the migration region.
* Vendor drivers must support the pre-copy state even for implementations
* where no data is provided to the user before the stop-and-copy state. The
* user must not be required to consume all migration data before the device
* transitions to a new state, including the stop-and-copy state.
* The sequence to be followed for above two transitions is given below.
* 4. To start the resuming phase, the device state should be transitioned from
* the _RUNNING to the _RESUMING state.
* In the _RESUMING state, the driver should use the device state data
* received through the migration region to resume the device.
* 5. After providing saved device data to the driver, the application should
* change the state from _RESUMING to _RUNNING.
*
* reserved:
* Reads on this field return zero and writes are ignored.
*
* pending_bytes: (read only)
* The number of pending bytes still to be migrated from the vendor driver.
*
* data_offset: (read only)
* The user application should read data_offset field from the migration
* region. The user application should read the device data from this
* offset within the migration region during the _SAVING state or write
* the device data during the _RESUMING state. See below for details of
* sequence to be followed.
*
* data_size: (read/write)
* The user application should read data_size to get the size in bytes of
* the data copied in the migration region during the _SAVING state and
* write the size in bytes of the data copied in the migration region
* during the _RESUMING state.
*
* The format of the migration region is as follows:
* ------------------------------------------------------------------
* |vfio_device_migration_info| data section |
* | | /////////////////////////////// |
* ------------------------------------------------------------------
* ^ ^
* offset 0-trapped part data_offset
*
* The structure vfio_device_migration_info is always followed by the data
* section in the region, so data_offset will always be nonzero. The offset
* from where the data is copied is decided by the kernel driver. The data
* section can be trapped, mmapped, or partitioned, depending on how the kernel
* driver defines the data section. The data section partition can be defined
* as mapped by the sparse mmap capability. If mmapped, data_offset must be
* page aligned, whereas initial section which contains the
* vfio_device_migration_info structure, might not end at the offset, which is
* page aligned. The user is not required to access through mmap regardless
* of the capabilities of the region mmap.
* The vendor driver should determine whether and how to partition the data
* section. The vendor driver should return data_offset accordingly.
*
* The sequence to be followed while in pre-copy state and stop-and-copy state
* is as follows:
* a. Read pending_bytes, indicating the start of a new iteration to get device
* data. Repeated read on pending_bytes at this stage should have no side
* effects.
* If pending_bytes == 0, the user application should not iterate to get data
* for that device.
* If pending_bytes > 0, perform the following steps.
* b. Read data_offset, indicating that the vendor driver should make data
* available through the data section. The vendor driver should return this
* read operation only after data is available from (region + data_offset)
* to (region + data_offset + data_size).
* c. Read data_size, which is the amount of data in bytes available through
* the migration region.
* Read on data_offset and data_size should return the offset and size of
* the current buffer if the user application reads data_offset and
* data_size more than once here.
* d. Read data_size bytes of data from (region + data_offset) from the
* migration region.
* e. Process the data.
* f. Read pending_bytes, which indicates that the data from the previous
* iteration has been read. If pending_bytes > 0, go to step b.
*
* The user application can transition from the _SAVING|_RUNNING
* (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the
* number of pending bytes. The user application should iterate in _SAVING
* (stop-and-copy) until pending_bytes is 0.
*
* The sequence to be followed while _RESUMING device state is as follows:
* While data for this device is available, repeat the following steps:
* a. Read data_offset from where the user application should write data.
* b. Write migration data starting at the migration region + data_offset for
* the length determined by data_size from the migration source.
* c. Write data_size, which indicates to the vendor driver that data is
* written in the migration region. Vendor driver must return this write
* operations on consuming data. Vendor driver should apply the
* user-provided migration region data to the device resume state.
*
* If an error occurs during the above sequences, the vendor driver can return
* an error code for next read() or write() operation, which will terminate the
* loop. The user application should then take the next necessary action, for
* example, failing migration or terminating the user application.
*
* For the user application, data is opaque. The user application should write
* data in the same order as the data is received and the data should be of
* same transaction size at the source.
*/
struct vfio_device_migration_info {
__u32 device_state; /* VFIO device state */
#define VFIO_DEVICE_STATE_STOP (0)
#define VFIO_DEVICE_STATE_RUNNING (1 << 0)
#define VFIO_DEVICE_STATE_SAVING (1 << 1)
#define VFIO_DEVICE_STATE_RESUMING (1 << 2)
#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
VFIO_DEVICE_STATE_SAVING | \
VFIO_DEVICE_STATE_RESUMING)
#define VFIO_DEVICE_STATE_VALID(state) \
(state & VFIO_DEVICE_STATE_RESUMING ? \
(state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1)
#define VFIO_DEVICE_STATE_IS_ERROR(state) \
((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \
VFIO_DEVICE_STATE_RESUMING))
#define VFIO_DEVICE_STATE_SET_ERROR(state) \
((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \
VFIO_DEVICE_STATE_RESUMING)
__u32 reserved;
__u64 pending_bytes;
__u64 data_offset;
__u64 data_size;
};
/* /*
* The MSIX mappable capability informs that MSIX data of a BAR can be mmapped * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
@ -577,6 +807,7 @@ enum {
enum { enum {
VFIO_CCW_IO_IRQ_INDEX, VFIO_CCW_IO_IRQ_INDEX,
VFIO_CCW_CRW_IRQ_INDEX,
VFIO_CCW_NUM_IRQS VFIO_CCW_NUM_IRQS
}; };
@ -785,6 +1016,29 @@ struct vfio_iommu_type1_info_cap_iova_range {
struct vfio_iova_range iova_ranges[]; struct vfio_iova_range iova_ranges[];
}; };
/*
* The migration capability allows to report supported features for migration.
*
* The structures below define version 1 of this capability.
*
* The existence of this capability indicates that IOMMU kernel driver supports
* dirty page logging.
*
* pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty
* page logging.
* max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap
* size in bytes that can be used by user applications when getting the dirty
* bitmap.
*/
#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1
struct vfio_iommu_type1_info_cap_migration {
struct vfio_info_cap_header header;
__u32 flags;
__u64 pgsize_bitmap;
__u64 max_dirty_bitmap_size; /* in bytes */
};
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
/** /**
@ -805,6 +1059,12 @@ struct vfio_iommu_type1_dma_map {
#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
struct vfio_bitmap {
__u64 pgsize; /* page size for bitmap in bytes */
__u64 size; /* in bytes */
__u64 *data; /* one bit per page */
};
/** /**
* VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
* struct vfio_dma_unmap) * struct vfio_dma_unmap)
@ -814,12 +1074,23 @@ struct vfio_iommu_type1_dma_map {
* field. No guarantee is made to the user that arbitrary unmaps of iova * field. No guarantee is made to the user that arbitrary unmaps of iova
* or size different from those used in the original mapping call will * or size different from those used in the original mapping call will
* succeed. * succeed.
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
* before unmapping IO virtual addresses. When this flag is set, the user must
* provide a struct vfio_bitmap in data[]. User must provide zero-allocated
* memory via vfio_bitmap.data and its size in the vfio_bitmap.size field.
* A bit in the bitmap represents one page, of user provided page size in
* vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set
* indicates that the page at that offset from iova is dirty. A Bitmap of the
* pages in the range of unmapped size is returned in the user-provided
* vfio_bitmap.data.
*/ */
struct vfio_iommu_type1_dma_unmap { struct vfio_iommu_type1_dma_unmap {
__u32 argsz; __u32 argsz;
__u32 flags; __u32 flags;
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
__u64 iova; /* IO virtual address */ __u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */ __u64 size; /* Size of mapping (bytes) */
__u8 data[];
}; };
#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
@ -831,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap {
#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16)
/**
* VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
* struct vfio_iommu_type1_dirty_bitmap)
* IOCTL is used for dirty pages logging.
* Caller should set flag depending on which operation to perform, details as
* below:
*
* Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs
* the IOMMU driver to log pages that are dirtied or potentially dirtied by
* the device; designed to be used when a migration is in progress. Dirty pages
* are logged until logging is disabled by user application by calling the IOCTL
* with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
*
* Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs
* the IOMMU driver to stop logging dirtied pages.
*
* Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set
* returns the dirty pages bitmap for IOMMU container for a given IOVA range.
* The user must specify the IOVA range and the pgsize through the structure
* vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
* supports getting a bitmap of the smallest supported pgsize only and can be
* modified in future to get a bitmap of any specified supported pgsize. The
* user must provide a zeroed memory area for the bitmap memory and specify its
* size in bitmap.size. One bit is used to represent one page consecutively
* starting from iova offset. The user should provide page size in bitmap.pgsize
* field. A bit set in the bitmap indicates that the page at that offset from
* iova is dirty. The caller must set argsz to a value including the size of
* structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the
* actual bitmap. If dirty pages logging is not enabled, an error will be
* returned.
*
* Only one of the flags _START, _STOP and _GET may be specified at a time.
*
*/
struct vfio_iommu_type1_dirty_bitmap {
__u32 argsz;
__u32 flags;
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0)
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1)
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2)
__u8 data[];
};
struct vfio_iommu_type1_dirty_bitmap_get {
__u64 iova; /* IO virtual address */
__u64 size; /* Size of iova range */
struct vfio_bitmap bitmap;
};
#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
/* /*

View File

@ -34,4 +34,23 @@ struct ccw_cmd_region {
__u32 ret_code; __u32 ret_code;
} __attribute__((packed)); } __attribute__((packed));
/*
* Used for processing commands that read the subchannel-information block
* Reading this region triggers a stsch() to hardware
* Note: this is controlled by a capability
*/
struct ccw_schib_region {
#define SCHIB_AREA_SIZE 52
__u8 schib_area[SCHIB_AREA_SIZE];
} __attribute__((packed));
/*
* Used for returning a Channel Report Word to userspace.
* Note: this is controlled by a capability
*/
struct ccw_crw_region {
__u32 crw;
__u32 pad;
} __attribute__((packed));
#endif #endif

View File

@ -15,6 +15,8 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/ioctl.h> #include <linux/ioctl.h>
#define VHOST_FILE_UNBIND -1
/* ioctls */ /* ioctls */
#define VHOST_VIRTIO 0xAF #define VHOST_VIRTIO 0xAF
@ -140,4 +142,6 @@
/* Get the max ring size. */ /* Get the max ring size. */
#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
/* Set event fd for config interrupt*/
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
#endif #endif

View File

@ -292,8 +292,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
sch = css_find_subch(m, cssid, ssid, schid); sch = css_find_subch(m, cssid, ssid, schid);
if (sch) { if (sch) {
if (css_subch_visible(sch)) { if (css_subch_visible(sch)) {
css_do_stsch(sch, &schib); cc = css_do_stsch(sch, &schib);
cc = 0;
} else { } else {
/* Indicate no more subchannels in this css/ss */ /* Indicate no more subchannels in this css/ss */
cc = 3; cc = 3;