Merge remote-tracking branch 'stefanha/block' into staging

# By MORITA Kazutaka (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
  block: for HMP commit() operations on 'all', skip non-COW drives
  sheepdog: add support for connecting to unix domain socket
  sheepdog: use inet_connect to simplify connect code
  sheepdog: accept URIs
  move socket_set_nodelay to osdep.c
  slirp/tcp_subr.c: fix coding style in tcp_connect
  dataplane: remove EventPoll in favor of AioContext
  virtio-blk: fix unplug + virsh reboot
  ide/macio: Fix macio DMA initialisation.
master
Anthony Liguori 2013-03-04 08:22:48 -06:00
commit 71df81afc6
15 changed files with 330 additions and 390 deletions

View File

@ -1640,9 +1640,11 @@ int bdrv_commit_all(void)
BlockDriverState *bs; BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) { QTAILQ_FOREACH(bs, &bdrv_states, list) {
int ret = bdrv_commit(bs); if (bs->drv && bs->backing_hd) {
if (ret < 0) { int ret = bdrv_commit(bs);
return ret; if (ret < 0) {
return ret;
}
} }
} }
return 0; return 0;

View File

@ -13,6 +13,7 @@
*/ */
#include "qemu-common.h" #include "qemu-common.h"
#include "qemu/uri.h"
#include "qemu/error-report.h" #include "qemu/error-report.h"
#include "qemu/sockets.h" #include "qemu/sockets.h"
#include "block/block_int.h" #include "block/block_int.h"
@ -21,7 +22,7 @@
#define SD_PROTO_VER 0x01 #define SD_PROTO_VER 0x01
#define SD_DEFAULT_ADDR "localhost" #define SD_DEFAULT_ADDR "localhost"
#define SD_DEFAULT_PORT "7000" #define SD_DEFAULT_PORT 7000
#define SD_OP_CREATE_AND_WRITE_OBJ 0x01 #define SD_OP_CREATE_AND_WRITE_OBJ 0x01
#define SD_OP_READ_OBJ 0x02 #define SD_OP_READ_OBJ 0x02
@ -297,8 +298,8 @@ typedef struct BDRVSheepdogState {
bool is_snapshot; bool is_snapshot;
uint32_t cache_flags; uint32_t cache_flags;
char *addr; char *host_spec;
char *port; bool is_unix;
int fd; int fd;
CoMutex lock; CoMutex lock;
@ -446,56 +447,29 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
return acb; return acb;
} }
static int connect_to_sdog(const char *addr, const char *port) static int connect_to_sdog(BDRVSheepdogState *s)
{ {
char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; int fd;
int fd, ret; Error *err = NULL;
struct addrinfo hints, *res, *res0;
if (!addr) { if (s->is_unix) {
addr = SD_DEFAULT_ADDR; fd = unix_connect(s->host_spec, &err);
port = SD_DEFAULT_PORT; } else {
} fd = inet_connect(s->host_spec, &err);
memset(&hints, 0, sizeof(hints)); if (err == NULL) {
hints.ai_socktype = SOCK_STREAM; int ret = socket_set_nodelay(fd);
if (ret < 0) {
ret = getaddrinfo(addr, port, &hints, &res0); error_report("%s", strerror(errno));
if (ret) {
error_report("unable to get address info %s, %s",
addr, strerror(errno));
return -errno;
}
for (res = res0; res; res = res->ai_next) {
ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
if (ret) {
continue;
}
fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (fd < 0) {
continue;
}
reconnect:
ret = connect(fd, res->ai_addr, res->ai_addrlen);
if (ret < 0) {
if (errno == EINTR) {
goto reconnect;
} }
close(fd);
break;
} }
dprintf("connected to %s:%s\n", addr, port);
goto success;
} }
fd = -errno;
error_report("failed connect to %s:%s", addr, port); if (err != NULL) {
success: qerror_report_err(err);
freeaddrinfo(res0); error_free(err);
}
return fd; return fd;
} }
@ -787,15 +761,6 @@ static int aio_flush_request(void *opaque)
!QLIST_EMPTY(&s->pending_aio_head); !QLIST_EMPTY(&s->pending_aio_head);
} }
static int set_nodelay(int fd)
{
int ret, opt;
opt = 1;
ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
return ret;
}
/* /*
* Return a socket discriptor to read/write objects. * Return a socket discriptor to read/write objects.
* *
@ -804,29 +769,88 @@ static int set_nodelay(int fd)
*/ */
static int get_sheep_fd(BDRVSheepdogState *s) static int get_sheep_fd(BDRVSheepdogState *s)
{ {
int ret, fd; int fd;
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
error_report("%s", strerror(errno));
return fd; return fd;
} }
socket_set_nonblock(fd); socket_set_nonblock(fd);
ret = set_nodelay(fd);
if (ret) {
error_report("%s", strerror(errno));
closesocket(fd);
return -errno;
}
qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s); qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
return fd; return fd;
} }
static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag)
{
URI *uri;
QueryParams *qp = NULL;
int ret = 0;
uri = uri_parse(filename);
if (!uri) {
return -EINVAL;
}
/* transport */
if (!strcmp(uri->scheme, "sheepdog")) {
s->is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
s->is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+unix")) {
s->is_unix = true;
} else {
ret = -EINVAL;
goto out;
}
if (uri->path == NULL || !strcmp(uri->path, "/")) {
ret = -EINVAL;
goto out;
}
pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
qp = query_params_parse(uri->query);
if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
ret = -EINVAL;
goto out;
}
if (s->is_unix) {
/* sheepdog+unix:///vdiname?socket=path */
if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
ret = -EINVAL;
goto out;
}
s->host_spec = g_strdup(qp->p[0].value);
} else {
/* sheepdog[+tcp]://[host:port]/vdiname */
s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
uri->port ?: SD_DEFAULT_PORT);
}
/* snapshot tag */
if (uri->fragment) {
*snapid = strtoul(uri->fragment, NULL, 10);
if (*snapid == 0) {
pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
}
} else {
*snapid = CURRENT_VDI_ID; /* search current vdi */
}
out:
if (qp) {
query_params_free(qp);
}
uri_free(uri);
return ret;
}
/* /*
* Parse a filename * Parse a filename (old syntax)
* *
* filename must be one of the following formats: * filename must be one of the following formats:
* 1. [vdiname] * 1. [vdiname]
@ -845,9 +869,11 @@ static int get_sheep_fd(BDRVSheepdogState *s)
static int parse_vdiname(BDRVSheepdogState *s, const char *filename, static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag) char *vdi, uint32_t *snapid, char *tag)
{ {
char *p, *q; char *p, *q, *uri;
int nr_sep; const char *host_spec, *vdi_spec;
int nr_sep, ret;
strstart(filename, "sheepdog:", (const char **)&filename);
p = q = g_strdup(filename); p = q = g_strdup(filename);
/* count the number of separators */ /* count the number of separators */
@ -860,38 +886,32 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
} }
p = q; p = q;
/* use the first two tokens as hostname and port number. */ /* use the first two tokens as host_spec. */
if (nr_sep >= 2) { if (nr_sep >= 2) {
s->addr = p; host_spec = p;
p = strchr(p, ':'); p = strchr(p, ':');
*p++ = '\0'; p++;
s->port = p;
p = strchr(p, ':'); p = strchr(p, ':');
*p++ = '\0'; *p++ = '\0';
} else { } else {
s->addr = NULL; host_spec = "";
s->port = 0;
} }
pstrcpy(vdi, SD_MAX_VDI_LEN, p); vdi_spec = p;
p = strchr(vdi, ':'); p = strchr(vdi_spec, ':');
if (p) { if (p) {
*p++ = '\0'; *p++ = '#';
*snapid = strtoul(p, NULL, 10);
if (*snapid == 0) {
pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
}
} else {
*snapid = CURRENT_VDI_ID; /* search current vdi */
} }
if (s->addr == NULL) { uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
g_free(q);
}
return 0; ret = sd_parse_uri(s, uri, vdi, snapid, tag);
g_free(q);
g_free(uri);
return ret;
} }
static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
@ -903,7 +923,7 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
unsigned int wlen, rlen = 0; unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
return fd; return fd;
} }
@ -1106,16 +1126,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
uint32_t snapid; uint32_t snapid;
char *buf = NULL; char *buf = NULL;
strstart(filename, "sheepdog:", (const char **)&filename);
QLIST_INIT(&s->inflight_aio_head); QLIST_INIT(&s->inflight_aio_head);
QLIST_INIT(&s->pending_aio_head); QLIST_INIT(&s->pending_aio_head);
s->fd = -1; s->fd = -1;
memset(vdi, 0, sizeof(vdi)); memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag)); memset(tag, 0, sizeof(tag));
if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
ret = -EINVAL; if (strstr(filename, "://")) {
ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
} else {
ret = parse_vdiname(s, filename, vdi, &snapid, tag);
}
if (ret < 0) {
goto out; goto out;
} }
s->fd = get_sheep_fd(s); s->fd = get_sheep_fd(s);
@ -1143,9 +1166,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
s->is_snapshot = true; s->is_snapshot = true;
} }
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
error_report("failed to connect");
ret = fd; ret = fd;
goto out; goto out;
} }
@ -1178,9 +1200,8 @@ out:
return ret; return ret;
} }
static int do_sd_create(char *filename, int64_t vdi_size, static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
uint32_t base_vid, uint32_t *vdi_id, int snapshot, uint32_t base_vid, uint32_t *vdi_id, int snapshot)
const char *addr, const char *port)
{ {
SheepdogVdiReq hdr; SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@ -1188,7 +1209,7 @@ static int do_sd_create(char *filename, int64_t vdi_size,
unsigned int wlen, rlen = 0; unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN]; char buf[SD_MAX_VDI_LEN];
fd = connect_to_sdog(addr, port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
return fd; return fd;
} }
@ -1284,17 +1305,17 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid; uint32_t snapid;
bool prealloc = false; bool prealloc = false;
const char *vdiname;
s = g_malloc0(sizeof(BDRVSheepdogState)); s = g_malloc0(sizeof(BDRVSheepdogState));
strstart(filename, "sheepdog:", &vdiname);
memset(vdi, 0, sizeof(vdi)); memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag)); memset(tag, 0, sizeof(tag));
if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) { if (strstr(filename, "://")) {
error_report("invalid filename"); ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
ret = -EINVAL; } else {
ret = parse_vdiname(s, filename, vdi, &snapid, tag);
}
if (ret < 0) {
goto out; goto out;
} }
@ -1355,7 +1376,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
bdrv_delete(bs); bdrv_delete(bs);
} }
ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port); ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
if (!prealloc || ret) { if (!prealloc || ret) {
goto out; goto out;
} }
@ -1376,7 +1397,7 @@ static void sd_close(BlockDriverState *bs)
dprintf("%s\n", s->name); dprintf("%s\n", s->name);
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
return; return;
} }
@ -1400,7 +1421,7 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd); closesocket(s->fd);
g_free(s->addr); g_free(s->host_spec);
} }
static int64_t sd_getlength(BlockDriverState *bs) static int64_t sd_getlength(BlockDriverState *bs)
@ -1424,7 +1445,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL; return -EINVAL;
} }
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
return fd; return fd;
} }
@ -1500,17 +1521,15 @@ static int sd_create_branch(BDRVSheepdogState *s)
buf = g_malloc(SD_INODE_SIZE); buf = g_malloc(SD_INODE_SIZE);
ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1, ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1);
s->addr, s->port);
if (ret) { if (ret) {
goto out; goto out;
} }
dprintf("%" PRIx32 " is created.\n", vid); dprintf("%" PRIx32 " is created.\n", vid);
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
error_report("failed to connect");
ret = fd; ret = fd;
goto out; goto out;
} }
@ -1769,7 +1788,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
/* refresh inode. */ /* refresh inode. */
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
ret = fd; ret = fd;
goto cleanup; goto cleanup;
@ -1782,8 +1801,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto cleanup; goto cleanup;
} }
ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1, ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
s->addr, s->port); 1);
if (ret < 0) { if (ret < 0) {
error_report("failed to create inode for snapshot. %s", error_report("failed to create inode for snapshot. %s",
strerror(errno)); strerror(errno));
@ -1838,9 +1857,8 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto out; goto out;
} }
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
error_report("failed to connect");
ret = fd; ret = fd;
goto out; goto out;
} }
@ -1902,7 +1920,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
vdi_inuse = g_malloc(max); vdi_inuse = g_malloc(max);
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
ret = fd; ret = fd;
goto out; goto out;
@ -1929,9 +1947,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
start_nr = hval & (SD_NR_VDIS - 1); start_nr = hval & (SD_NR_VDIS - 1);
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
error_report("failed to connect");
ret = fd; ret = fd;
goto out; goto out;
} }
@ -1988,7 +2005,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
uint32_t vdi_index; uint32_t vdi_index;
uint64_t offset; uint64_t offset;
fd = connect_to_sdog(s->addr, s->port); fd = connect_to_sdog(s);
if (fd < 0) { if (fd < 0) {
return fd; return fd;
} }
@ -2063,7 +2080,7 @@ static QEMUOptionParameter sd_create_options[] = {
{ NULL } { NULL }
}; };
BlockDriver bdrv_sheepdog = { static BlockDriver bdrv_sheepdog = {
.format_name = "sheepdog", .format_name = "sheepdog",
.protocol_name = "sheepdog", .protocol_name = "sheepdog",
.instance_size = sizeof(BDRVSheepdogState), .instance_size = sizeof(BDRVSheepdogState),
@ -2088,8 +2105,60 @@ BlockDriver bdrv_sheepdog = {
.create_options = sd_create_options, .create_options = sd_create_options,
}; };
static BlockDriver bdrv_sheepdog_tcp = {
.format_name = "sheepdog",
.protocol_name = "sheepdog+tcp",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
.bdrv_snapshot_delete = sd_snapshot_delete,
.bdrv_snapshot_list = sd_snapshot_list,
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
.create_options = sd_create_options,
};
static BlockDriver bdrv_sheepdog_unix = {
.format_name = "sheepdog",
.protocol_name = "sheepdog+unix",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
.bdrv_snapshot_delete = sd_snapshot_delete,
.bdrv_snapshot_list = sd_snapshot_list,
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
.create_options = sd_create_options,
};
static void bdrv_sheepdog_init(void) static void bdrv_sheepdog_init(void)
{ {
bdrv_register(&bdrv_sheepdog); bdrv_register(&bdrv_sheepdog);
bdrv_register(&bdrv_sheepdog_tcp);
bdrv_register(&bdrv_sheepdog_unix);
} }
block_init(bdrv_sheepdog_init); block_init(bdrv_sheepdog_init);

View File

@ -2841,7 +2841,7 @@ static void gdb_accept(void)
GDBState *s; GDBState *s;
struct sockaddr_in sockaddr; struct sockaddr_in sockaddr;
socklen_t len; socklen_t len;
int val, fd; int fd;
for(;;) { for(;;) {
len = sizeof(sockaddr); len = sizeof(sockaddr);
@ -2858,8 +2858,7 @@ static void gdb_accept(void)
} }
/* set short latency */ /* set short latency */
val = 1; socket_set_nodelay(fd);
setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
s = g_malloc0(sizeof(GDBState)); s = g_malloc0(sizeof(GDBState));
s->c_cpu = first_cpu; s->c_cpu = first_cpu;

View File

@ -1 +1 @@
obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o

View File

@ -1,100 +0,0 @@
/*
* Event loop with file descriptor polling
*
* Copyright 2012 IBM, Corp.
* Copyright 2012 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include <sys/epoll.h>
#include "hw/dataplane/event-poll.h"
/* Add an event notifier and its callback for polling */
void event_poll_add(EventPoll *poll, EventHandler *handler,
EventNotifier *notifier, EventCallback *callback)
{
struct epoll_event event = {
.events = EPOLLIN,
.data.ptr = handler,
};
handler->notifier = notifier;
handler->callback = callback;
if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD,
event_notifier_get_fd(notifier), &event) != 0) {
fprintf(stderr, "failed to add event handler to epoll: %m\n");
exit(1);
}
}
/* Event callback for stopping event_poll() */
static void handle_stop(EventHandler *handler)
{
/* Do nothing */
}
void event_poll_init(EventPoll *poll)
{
/* Create epoll file descriptor */
poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
if (poll->epoll_fd < 0) {
fprintf(stderr, "epoll_create1 failed: %m\n");
exit(1);
}
/* Set up stop notifier */
if (event_notifier_init(&poll->stop_notifier, 0) < 0) {
fprintf(stderr, "failed to init stop notifier\n");
exit(1);
}
event_poll_add(poll, &poll->stop_handler,
&poll->stop_notifier, handle_stop);
}
void event_poll_cleanup(EventPoll *poll)
{
event_notifier_cleanup(&poll->stop_notifier);
close(poll->epoll_fd);
poll->epoll_fd = -1;
}
/* Block until the next event and invoke its callback */
void event_poll(EventPoll *poll)
{
EventHandler *handler;
struct epoll_event event;
int nevents;
/* Wait for the next event. Only do one event per call to keep the
* function simple, this could be changed later. */
do {
nevents = epoll_wait(poll->epoll_fd, &event, 1, -1);
} while (nevents < 0 && errno == EINTR);
if (unlikely(nevents != 1)) {
fprintf(stderr, "epoll_wait failed: %m\n");
exit(1); /* should never happen */
}
/* Find out which event handler has become active */
handler = event.data.ptr;
/* Clear the eventfd */
event_notifier_test_and_clear(handler->notifier);
/* Handle the event */
handler->callback(handler);
}
/* Stop event_poll()
*
* This function can be used from another thread.
*/
void event_poll_notify(EventPoll *poll)
{
event_notifier_set(&poll->stop_notifier);
}

View File

@ -1,40 +0,0 @@
/*
* Event loop with file descriptor polling
*
* Copyright 2012 IBM, Corp.
* Copyright 2012 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#ifndef EVENT_POLL_H
#define EVENT_POLL_H
#include "qemu/event_notifier.h"
typedef struct EventHandler EventHandler;
typedef void EventCallback(EventHandler *handler);
struct EventHandler {
EventNotifier *notifier; /* eventfd */
EventCallback *callback; /* callback function */
};
typedef struct {
int epoll_fd; /* epoll(2) file descriptor */
EventNotifier stop_notifier; /* stop poll notifier */
EventHandler stop_handler; /* stop poll handler */
} EventPoll;
void event_poll_add(EventPoll *poll, EventHandler *handler,
EventNotifier *notifier, EventCallback *callback);
void event_poll_init(EventPoll *poll);
void event_poll_cleanup(EventPoll *poll);
void event_poll(EventPoll *poll);
void event_poll_notify(EventPoll *poll);
#endif /* EVENT_POLL_H */

View File

@ -14,13 +14,13 @@
#include "trace.h" #include "trace.h"
#include "qemu/iov.h" #include "qemu/iov.h"
#include "event-poll.h"
#include "qemu/thread.h" #include "qemu/thread.h"
#include "vring.h" #include "vring.h"
#include "ioq.h" #include "ioq.h"
#include "migration/migration.h" #include "migration/migration.h"
#include "hw/virtio-blk.h" #include "hw/virtio-blk.h"
#include "hw/dataplane/virtio-blk.h" #include "hw/dataplane/virtio-blk.h"
#include "block/aio.h"
enum { enum {
SEG_MAX = 126, /* maximum number of I/O segments */ SEG_MAX = 126, /* maximum number of I/O segments */
@ -51,9 +51,14 @@ struct VirtIOBlockDataPlane {
Vring vring; /* virtqueue vring */ Vring vring; /* virtqueue vring */
EventNotifier *guest_notifier; /* irq */ EventNotifier *guest_notifier; /* irq */
EventPoll event_poll; /* event poller */ /* Note that these EventNotifiers are assigned by value. This is
EventHandler io_handler; /* Linux AIO completion handler */ * fine as long as you do not call event_notifier_cleanup on them
EventHandler notify_handler; /* virtqueue notify handler */ * (because you don't own the file descriptor or handle; you just
* use it).
*/
AioContext *ctx;
EventNotifier io_notifier; /* Linux AIO completion */
EventNotifier host_notifier; /* doorbell */
IOQueue ioqueue; /* Linux AIO queue (should really be per IOQueue ioqueue; /* Linux AIO queue (should really be per
dataplane thread) */ dataplane thread) */
@ -256,10 +261,10 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
} }
} }
static void handle_notify(EventHandler *handler) static void handle_notify(EventNotifier *e)
{ {
VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane, VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
notify_handler); host_notifier);
/* There is one array of iovecs into which all new requests are extracted /* There is one array of iovecs into which all new requests are extracted
* from the vring. Requests are read from the vring and the translated * from the vring. Requests are read from the vring and the translated
@ -286,6 +291,7 @@ static void handle_notify(EventHandler *handler)
unsigned int out_num = 0, in_num = 0; unsigned int out_num = 0, in_num = 0;
unsigned int num_queued; unsigned int num_queued;
event_notifier_test_and_clear(&s->host_notifier);
for (;;) { for (;;) {
/* Disable guest->host notifies to avoid unnecessary vmexits */ /* Disable guest->host notifies to avoid unnecessary vmexits */
vring_disable_notification(s->vdev, &s->vring); vring_disable_notification(s->vdev, &s->vring);
@ -334,11 +340,12 @@ static void handle_notify(EventHandler *handler)
} }
} }
static void handle_io(EventHandler *handler) static void handle_io(EventNotifier *e)
{ {
VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane, VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
io_handler); io_notifier);
event_notifier_test_and_clear(&s->io_notifier);
if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) { if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
notify_guest(s); notify_guest(s);
} }
@ -348,7 +355,7 @@ static void handle_io(EventHandler *handler)
* requests. * requests.
*/ */
if (unlikely(vring_more_avail(&s->vring))) { if (unlikely(vring_more_avail(&s->vring))) {
handle_notify(&s->notify_handler); handle_notify(&s->host_notifier);
} }
} }
@ -357,7 +364,7 @@ static void *data_plane_thread(void *opaque)
VirtIOBlockDataPlane *s = opaque; VirtIOBlockDataPlane *s = opaque;
do { do {
event_poll(&s->event_poll); aio_poll(s->ctx, true);
} while (!s->stopping || s->num_reqs > 0); } while (!s->stopping || s->num_reqs > 0);
return NULL; return NULL;
} }
@ -445,7 +452,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
return; return;
} }
event_poll_init(&s->event_poll); s->ctx = aio_context_new();
/* Set up guest notifier (irq) */ /* Set up guest notifier (irq) */
if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1,
@ -462,17 +469,16 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
fprintf(stderr, "virtio-blk failed to set host notifier\n"); fprintf(stderr, "virtio-blk failed to set host notifier\n");
exit(1); exit(1);
} }
event_poll_add(&s->event_poll, &s->notify_handler, s->host_notifier = *virtio_queue_get_host_notifier(vq);
virtio_queue_get_host_notifier(vq), aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, NULL);
handle_notify);
/* Set up ioqueue */ /* Set up ioqueue */
ioq_init(&s->ioqueue, s->fd, REQ_MAX); ioq_init(&s->ioqueue, s->fd, REQ_MAX);
for (i = 0; i < ARRAY_SIZE(s->requests); i++) { for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb); ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
} }
event_poll_add(&s->event_poll, &s->io_handler, s->io_notifier = *ioq_get_notifier(&s->ioqueue);
ioq_get_notifier(&s->ioqueue), handle_io); aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, NULL);
s->started = true; s->started = true;
trace_virtio_blk_data_plane_start(s); trace_virtio_blk_data_plane_start(s);
@ -498,15 +504,17 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
qemu_bh_delete(s->start_bh); qemu_bh_delete(s->start_bh);
s->start_bh = NULL; s->start_bh = NULL;
} else { } else {
event_poll_notify(&s->event_poll); aio_notify(s->ctx);
qemu_thread_join(&s->thread); qemu_thread_join(&s->thread);
} }
aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL);
ioq_cleanup(&s->ioqueue); ioq_cleanup(&s->ioqueue);
aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL);
s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false); s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false);
event_poll_cleanup(&s->event_poll); aio_context_unref(s->ctx);
/* Clean up guest notifier (irq) */ /* Clean up guest notifier (irq) */
s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false); s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false);

View File

@ -188,7 +188,7 @@ static int macio_newworld_initfn(PCIDevice *d)
sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]); sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]);
sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]); sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]);
sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]); sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]);
macio_ide_register_dma(&ns->ide[0], s->dbdma, 0x1a); macio_ide_register_dma(&ns->ide[1], s->dbdma, 0x1a);
ret = qdev_init(DEVICE(&ns->ide[1])); ret = qdev_init(DEVICE(&ns->ide[1]));
if (ret < 0) { if (ret < 0) {
return ret; return ret;

View File

@ -36,6 +36,7 @@ typedef struct VirtIOBlock
VirtIOBlkConf *blk; VirtIOBlkConf *blk;
unsigned short sector_mask; unsigned short sector_mask;
DeviceState *qdev; DeviceState *qdev;
VMChangeStateEntry *change;
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
VirtIOBlockDataPlane *dataplane; VirtIOBlockDataPlane *dataplane;
#endif #endif
@ -681,7 +682,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
} }
#endif #endif
qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
s->qdev = dev; s->qdev = dev;
register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
virtio_blk_save, virtio_blk_load, s); virtio_blk_save, virtio_blk_load, s);
@ -702,6 +703,7 @@ void virtio_blk_exit(VirtIODevice *vdev)
virtio_blk_data_plane_destroy(s->dataplane); virtio_blk_data_plane_destroy(s->dataplane);
s->dataplane = NULL; s->dataplane = NULL;
#endif #endif
qemu_del_vm_change_state_handler(s->change);
unregister_savevm(s->qdev, "virtio-blk", s); unregister_savevm(s->qdev, "virtio-blk", s);
blockdev_mark_auto_del(s->bs); blockdev_mark_auto_del(s->bs);
virtio_cleanup(vdev); virtio_cleanup(vdev);

View File

@ -34,6 +34,7 @@ int inet_aton(const char *cp, struct in_addr *ia);
int qemu_socket(int domain, int type, int protocol); int qemu_socket(int domain, int type, int protocol);
int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
int socket_set_cork(int fd, int v); int socket_set_cork(int fd, int v);
int socket_set_nodelay(int fd);
void socket_set_block(int fd); void socket_set_block(int fd);
void socket_set_nonblock(int fd); void socket_set_nonblock(int fd);
int send_all(int fd, const void *buf, int len1); int send_all(int fd, const void *buf, int len1);

View File

@ -2365,12 +2365,6 @@ static void tcp_chr_telnet_init(int fd)
send(fd, (char *)buf, 3, 0); send(fd, (char *)buf, 3, 0);
} }
static void socket_set_nodelay(int fd)
{
int val = 1;
setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
}
static int tcp_chr_add_client(CharDriverState *chr, int fd) static int tcp_chr_add_client(CharDriverState *chr, int fd)
{ {
TCPCharDriver *s = chr->opaque; TCPCharDriver *s = chr->opaque;

View File

@ -830,7 +830,7 @@ QEMU-based virtual machines.
You can create a Sheepdog disk image with the command: You can create a Sheepdog disk image with the command:
@example @example
qemu-img create sheepdog:@var{image} @var{size} qemu-img create sheepdog:///@var{image} @var{size}
@end example @end example
where @var{image} is the Sheepdog image name and @var{size} is its where @var{image} is the Sheepdog image name and @var{size} is its
size. size.
@ -838,38 +838,44 @@ size.
To import the existing @var{filename} to Sheepdog, you can use a To import the existing @var{filename} to Sheepdog, you can use a
convert command. convert command.
@example @example
qemu-img convert @var{filename} sheepdog:@var{image} qemu-img convert @var{filename} sheepdog:///@var{image}
@end example @end example
You can boot from the Sheepdog disk image with the command: You can boot from the Sheepdog disk image with the command:
@example @example
qemu-system-i386 sheepdog:@var{image} qemu-system-i386 sheepdog:///@var{image}
@end example @end example
You can also create a snapshot of the Sheepdog image like qcow2. You can also create a snapshot of the Sheepdog image like qcow2.
@example @example
qemu-img snapshot -c @var{tag} sheepdog:@var{image} qemu-img snapshot -c @var{tag} sheepdog:///@var{image}
@end example @end example
where @var{tag} is a tag name of the newly created snapshot. where @var{tag} is a tag name of the newly created snapshot.
To boot from the Sheepdog snapshot, specify the tag name of the To boot from the Sheepdog snapshot, specify the tag name of the
snapshot. snapshot.
@example @example
qemu-system-i386 sheepdog:@var{image}:@var{tag} qemu-system-i386 sheepdog:///@var{image}#@var{tag}
@end example @end example
You can create a cloned image from the existing snapshot. You can create a cloned image from the existing snapshot.
@example @example
qemu-img create -b sheepdog:@var{base}:@var{tag} sheepdog:@var{image} qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image}
@end example @end example
where @var{base} is a image name of the source snapshot and @var{tag} where @var{base} is a image name of the source snapshot and @var{tag}
is its tag name. is its tag name.
You can use an unix socket instead of an inet socket:
@example
qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path}
@end example
If the Sheepdog daemon doesn't run on the local host, you need to If the Sheepdog daemon doesn't run on the local host, you need to
specify one of the Sheepdog servers to connect to. specify one of the Sheepdog servers to connect to.
@example @example
qemu-img create sheepdog:@var{hostname}:@var{port}:@var{image} @var{size} qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size}
qemu-system-i386 sheepdog:@var{hostname}:@var{port}:@var{image} qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image}
@end example @end example
@node disk_images_iscsi @node disk_images_iscsi

View File

@ -2108,23 +2108,13 @@ QEMU supports using either local sheepdog devices or remote networked
devices. devices.
Syntax for specifying a sheepdog device Syntax for specifying a sheepdog device
@table @list @example
``sheepdog:<vdiname>'' sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag]
@end example
``sheepdog:<vdiname>:<snapid>''
``sheepdog:<vdiname>:<tag>''
``sheepdog:<host>:<port>:<vdiname>''
``sheepdog:<host>:<port>:<vdiname>:<snapid>''
``sheepdog:<host>:<port>:<vdiname>:<tag>''
@end table
Example Example
@example @example
qemu-system-i386 --drive file=sheepdog:192.0.2.1:30000:MyVirtualMachine qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine
@end example @end example
See also @url{http://http://www.osrg.net/sheepdog/}. See also @url{http://http://www.osrg.net/sheepdog/}.

View File

@ -384,83 +384,86 @@ int tcp_fconnect(struct socket *so)
* the time it gets to accept(), so... We simply accept * the time it gets to accept(), so... We simply accept
* here and SYN the local-host. * here and SYN the local-host.
*/ */
void void tcp_connect(struct socket *inso)
tcp_connect(struct socket *inso)
{ {
Slirp *slirp = inso->slirp; Slirp *slirp = inso->slirp;
struct socket *so; struct socket *so;
struct sockaddr_in addr; struct sockaddr_in addr;
socklen_t addrlen = sizeof(struct sockaddr_in); socklen_t addrlen = sizeof(struct sockaddr_in);
struct tcpcb *tp; struct tcpcb *tp;
int s, opt; int s, opt;
DEBUG_CALL("tcp_connect"); DEBUG_CALL("tcp_connect");
DEBUG_ARG("inso = %lx", (long)inso); DEBUG_ARG("inso = %lx", (long)inso);
/* /*
* If it's an SS_ACCEPTONCE socket, no need to socreate() * If it's an SS_ACCEPTONCE socket, no need to socreate()
* another socket, just use the accept() socket. * another socket, just use the accept() socket.
*/ */
if (inso->so_state & SS_FACCEPTONCE) { if (inso->so_state & SS_FACCEPTONCE) {
/* FACCEPTONCE already have a tcpcb */ /* FACCEPTONCE already have a tcpcb */
so = inso; so = inso;
} else { } else {
if ((so = socreate(slirp)) == NULL) { so = socreate(slirp);
/* If it failed, get rid of the pending connection */ if (so == NULL) {
closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen)); /* If it failed, get rid of the pending connection */
return; closesocket(accept(inso->s, (struct sockaddr *)&addr, &addrlen));
} return;
if (tcp_attach(so) < 0) {
free(so); /* NOT sofree */
return;
}
so->so_laddr = inso->so_laddr;
so->so_lport = inso->so_lport;
}
(void) tcp_mss(sototcpcb(so), 0);
if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) {
tcp_close(sototcpcb(so)); /* This will sofree() as well */
return;
}
socket_set_nonblock(s);
opt = 1;
setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int));
opt = 1;
setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
opt = 1;
setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&opt,sizeof(int));
so->so_fport = addr.sin_port;
so->so_faddr = addr.sin_addr;
/* Translate connections from localhost to the real hostname */
if (so->so_faddr.s_addr == 0 ||
(so->so_faddr.s_addr & loopback_mask) ==
(loopback_addr.s_addr & loopback_mask)) {
so->so_faddr = slirp->vhost_addr;
} }
if (tcp_attach(so) < 0) {
free(so); /* NOT sofree */
return;
}
so->so_laddr = inso->so_laddr;
so->so_lport = inso->so_lport;
}
/* Close the accept() socket, set right state */ tcp_mss(sototcpcb(so), 0);
if (inso->so_state & SS_FACCEPTONCE) {
closesocket(so->s); /* If we only accept once, close the accept() socket */
so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */
/* if it's not FACCEPTONCE, it's already NOFDREF */
}
so->s = s;
so->so_state |= SS_INCOMING;
so->so_iptos = tcp_tos(so); s = accept(inso->s, (struct sockaddr *)&addr, &addrlen);
tp = sototcpcb(so); if (s < 0) {
tcp_close(sototcpcb(so)); /* This will sofree() as well */
return;
}
socket_set_nonblock(s);
opt = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(int));
opt = 1;
setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(int));
socket_set_nodelay(s);
tcp_template(tp); so->so_fport = addr.sin_port;
so->so_faddr = addr.sin_addr;
/* Translate connections from localhost to the real hostname */
if (so->so_faddr.s_addr == 0 ||
(so->so_faddr.s_addr & loopback_mask) ==
(loopback_addr.s_addr & loopback_mask)) {
so->so_faddr = slirp->vhost_addr;
}
tp->t_state = TCPS_SYN_SENT; /* Close the accept() socket, set right state */
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; if (inso->so_state & SS_FACCEPTONCE) {
tp->iss = slirp->tcp_iss; /* If we only accept once, close the accept() socket */
slirp->tcp_iss += TCP_ISSINCR/2; closesocket(so->s);
tcp_sendseqinit(tp);
tcp_output(tp); /* Don't select it yet, even though we have an FD */
/* if it's not FACCEPTONCE, it's already NOFDREF */
so->so_state = SS_NOFDREF;
}
so->s = s;
so->so_state |= SS_INCOMING;
so->so_iptos = tcp_tos(so);
tp = sototcpcb(so);
tcp_template(tp);
tp->t_state = TCPS_SYN_SENT;
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
tp->iss = slirp->tcp_iss;
slirp->tcp_iss += TCP_ISSINCR/2;
tcp_sendseqinit(tp);
tcp_output(tp);
} }
/* /*

View File

@ -63,6 +63,12 @@ int socket_set_cork(int fd, int v)
#endif #endif
} }
int socket_set_nodelay(int fd)
{
int v = 1;
return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
}
int qemu_madvise(void *addr, size_t len, int advice) int qemu_madvise(void *addr, size_t len, int advice)
{ {
if (advice == QEMU_MADV_INVALID) { if (advice == QEMU_MADV_INVALID) {