virtio/vhost: fixes

some bugfixes for virtio/vhost
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJXq0VGAAoJECgfDbjSjVRpC1sIAIJcgNiC4dj742doGGNKlIqF
 q81YTQSfdEgXIiONXAtVXaJmymsLTZqwOxb16Yxx/GwnH3butPZE85NGdAUR6WlU
 YJ5+PnTfI9Podxtulx/cy8etWoWgiD6e3AdZ7aNzB6PuTlWvECwQNZOzfwsz7fiP
 CZrpA1zGFL/CSLH64r4Uyjm+3LjP4LsQerd/oFOmr7ZlxrGIEE676D0ihyu0xcKO
 Feb8chXjA0Js/czyCXnW18pQG6E5nbJLuNTyCdc0HU22ybniI3B44jTovwqv2uGT
 tXZBIblcOf9DDNNToXGpMF1XKwJwUuIA3je1SgEm8Fnn0Wt02ZY9CULD1ZaQ/7w=
 =Qo6W
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio/vhost: fixes

some bugfixes for virtio/vhost

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 10 Aug 2016 16:16:22 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  vhost-user: Attempt to fix a race with set_mem_table.
  vhost-user: Introduce a new protocol feature REPLY_ACK.
  vhost: check for vhost_ops before using.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2016-08-10 17:14:35 +01:00
commit d08306dc42
3 changed files with 115 additions and 50 deletions

View File

@ -37,6 +37,8 @@ consists of 3 header fields and a payload:
* Flags: 32-bit bit field:
- Lower 2 bits are the version (currently 0x01)
- Bit 2 is the reply flag - needs to be sent on each reply from the slave
- Bit 3 is the need_reply flag - see VHOST_USER_PROTOCOL_F_REPLY_ACK for
details.
* Size - 32-bit size of the payload
@ -126,6 +128,8 @@ the ones that do:
* VHOST_GET_VRING_BASE
* VHOST_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
[ Also see the section on REPLY_ACK protocol extension. ]
There are several messages that the master sends with file descriptors passed
in the ancillary data:
@ -254,6 +258,7 @@ Protocol features
#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
Message types
-------------
@ -464,3 +469,24 @@ Message types
is present in VHOST_USER_GET_PROTOCOL_FEATURES.
The first 6 bytes of the payload contain the mac address of the guest to
allow the vhost user backend to construct and broadcast the fake RARP.
VHOST_USER_PROTOCOL_F_REPLY_ACK:
-------------------------------
The original vhost-user specification only demands replies for certain
commands. This differs from the vhost protocol implementation where commands
are sent over an ioctl() call and block until the client has completed.
With this protocol extension negotiated, the sender (QEMU) can set the
"need_reply" [Bit 3] flag to any command. This indicates that
the client MUST respond with a Payload VhostUserMsg indicating success or
failure. The payload should be set to zero on success or non-zero on failure,
unless the message already has an explicit reply body.
The response payload gives QEMU a deterministic indication of the result
of the command. Today, QEMU is expected to terminate the main vhost-user
loop upon receiving such errors. In future, qemu could be taught to be more
resilient for selective requests.
For the message types that already solicit a reply from the client, the
presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being set brings
no behavioural change. (See the 'Communication' section for details.)

View File

@ -428,7 +428,7 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
nc->vring_enable = enable;
if (vhost_ops->vhost_set_vring_enable) {
if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
}

View File

@ -31,6 +31,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
VHOST_USER_PROTOCOL_F_RARP = 2,
VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
VHOST_USER_PROTOCOL_F_MAX
};
@ -84,6 +85,7 @@ typedef struct VhostUserMsg {
#define VHOST_USER_VERSION_MASK (0x3)
#define VHOST_USER_REPLY_MASK (0x1<<2)
#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
uint32_t flags;
uint32_t size; /* the following payload size */
union {
@ -158,6 +160,25 @@ fail:
return -1;
}
static int process_message_reply(struct vhost_dev *dev,
VhostUserRequest request)
{
VhostUserMsg msg;
if (vhost_user_read(dev, &msg) < 0) {
return -1;
}
if (msg.request != request) {
error_report("Received unexpected msg type."
"Expected %d received %d",
request, msg.request);
return -1;
}
return msg.payload.u64 ? -1 : 0;
}
static bool vhost_user_one_time_request(VhostUserRequest request)
{
switch (request) {
@ -242,55 +263,6 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
return 0;
}
static int vhost_user_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
VhostUserMsg msg = {
.request = VHOST_USER_SET_MEM_TABLE,
.flags = VHOST_USER_VERSION,
};
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
ram_addr_t offset;
MemoryRegion *mr;
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
}
msg.payload.memory.nregions = fd_num;
if (!fd_num) {
error_report("Failed initializing vhost-user memory map, "
"consider using -object memory-backend-file share=on");
return -1;
}
msg.size = sizeof(msg.payload.memory.nregions);
msg.size += sizeof(msg.payload.memory.padding);
msg.size += fd_num * sizeof(VhostUserMemoryRegion);
if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
return -1;
}
return 0;
}
static int vhost_user_set_vring_addr(struct vhost_dev *dev,
struct vhost_vring_addr *addr)
{
@ -505,6 +477,73 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
}
static int vhost_user_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
uint64_t features;
bool reply_supported = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_REPLY_ACK);
VhostUserMsg msg = {
.request = VHOST_USER_SET_MEM_TABLE,
.flags = VHOST_USER_VERSION,
};
if (reply_supported) {
msg.flags |= VHOST_USER_NEED_REPLY_MASK;
}
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
ram_addr_t offset;
MemoryRegion *mr;
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
msg.payload.memory.regions[fd_num].userspace_addr
= reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr
= reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
}
msg.payload.memory.nregions = fd_num;
if (!fd_num) {
error_report("Failed initializing vhost-user memory map, "
"consider using -object memory-backend-file share=on");
return -1;
}
msg.size = sizeof(msg.payload.memory.nregions);
msg.size += sizeof(msg.payload.memory.padding);
msg.size += fd_num * sizeof(VhostUserMemoryRegion);
vhost_user_write(dev, &msg, fds, fd_num);
if (reply_supported) {
return process_message_reply(dev, msg.request);
} else {
/* Note: It is (yet) unknown when the client application has finished
* remapping the GPA.
* Attempt to prevent a race by sending a command that requires a reply.
*/
vhost_user_get_features(dev, &features);
}
return 0;
}
static int vhost_user_set_owner(struct vhost_dev *dev)
{
VhostUserMsg msg = {