raw-posix: Switch to bdrv_co_* interfaces

In order to use the modern byte-based .bdrv_co_preadv/pwritev()
interface, this patch switches raw-posix to coroutine-based interfaces
as a first step. In terms of semantics and performance, it doesn't make
a difference with the existing code whether we go from a coroutine to a
callback-based interface already in block/io.c or only in linux-aio.c

As there have been concerns in the past that this change may be a step
in the wrong direction with respect to a possible AIO fast path, the
old callback-based interface for linux-aio is left around and can be
reactivated when a fast path (e.g. directly from virtio-blk dataplane,
bypassing the whole block layer) is implemented.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
master
Kevin Wolf 2014-08-06 17:18:07 +02:00
parent 9896c8765f
commit 2174f12bde
3 changed files with 96 additions and 54 deletions

View File

@ -11,8 +11,10 @@
#include "qemu-common.h" #include "qemu-common.h"
#include "block/aio.h" #include "block/aio.h"
#include "qemu/queue.h" #include "qemu/queue.h"
#include "block/block.h"
#include "block/raw-aio.h" #include "block/raw-aio.h"
#include "qemu/event_notifier.h" #include "qemu/event_notifier.h"
#include "qemu/coroutine.h"
#include <libaio.h> #include <libaio.h>
@ -30,6 +32,7 @@
struct qemu_laiocb { struct qemu_laiocb {
BlockAIOCB common; BlockAIOCB common;
Coroutine *co;
LinuxAioState *ctx; LinuxAioState *ctx;
struct iocb iocb; struct iocb iocb;
ssize_t ret; ssize_t ret;
@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
} }
} }
} }
laiocb->common.cb(laiocb->common.opaque, ret);
qemu_aio_unref(laiocb); laiocb->ret = ret;
if (laiocb->co) {
qemu_coroutine_enter(laiocb->co, NULL);
} else {
laiocb->common.cb(laiocb->common.opaque, ret);
qemu_aio_unref(laiocb);
}
} }
/* The completion BH fetches completed I/O requests and invokes their /* The completion BH fetches completed I/O requests and invokes their
@ -230,22 +238,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
} }
} }
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
BlockCompletionFunc *cb, void *opaque, int type)
{ {
struct qemu_laiocb *laiocb; LinuxAioState *s = laiocb->ctx;
struct iocb *iocbs; struct iocb *iocbs = &laiocb->iocb;
off_t offset = sector_num * 512; QEMUIOVector *qiov = laiocb->qiov;
laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
laiocb->nbytes = nb_sectors * 512;
laiocb->ctx = s;
laiocb->ret = -EINPROGRESS;
laiocb->is_read = (type == QEMU_AIO_READ);
laiocb->qiov = qiov;
iocbs = &laiocb->iocb;
switch (type) { switch (type) {
case QEMU_AIO_WRITE: case QEMU_AIO_WRITE:
@ -258,7 +256,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
default: default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type); __func__, type);
goto out_free_aiocb; return -EIO;
} }
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@ -268,11 +266,56 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) { (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
ioq_submit(s); ioq_submit(s);
} }
return &laiocb->common;
out_free_aiocb: return 0;
qemu_aio_unref(laiocb); }
return NULL;
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors, int type)
{
off_t offset = sector_num * BDRV_SECTOR_SIZE;
int ret;
struct qemu_laiocb laiocb = {
.co = qemu_coroutine_self(),
.nbytes = nb_sectors * BDRV_SECTOR_SIZE,
.ctx = s,
.is_read = (type == QEMU_AIO_READ),
.qiov = qiov,
};
ret = laio_do_submit(fd, &laiocb, offset, type);
if (ret < 0) {
return ret;
}
qemu_coroutine_yield();
return laiocb.ret;
}
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type)
{
struct qemu_laiocb *laiocb;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
int ret;
laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
laiocb->ctx = s;
laiocb->ret = -EINPROGRESS;
laiocb->is_read = (type == QEMU_AIO_READ);
laiocb->qiov = qiov;
ret = laio_do_submit(fd, laiocb, offset, type);
if (ret < 0) {
qemu_aio_unref(laiocb);
return NULL;
}
return &laiocb->common;
} }
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)

View File

@ -15,6 +15,7 @@
#ifndef QEMU_RAW_AIO_H #ifndef QEMU_RAW_AIO_H
#define QEMU_RAW_AIO_H #define QEMU_RAW_AIO_H
#include "qemu/coroutine.h"
#include "qemu/iov.h" #include "qemu/iov.h"
/* AIO request types */ /* AIO request types */
@ -38,6 +39,9 @@
typedef struct LinuxAioState LinuxAioState; typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void); LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s); void laio_cleanup(LinuxAioState *s);
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type); BlockCompletionFunc *cb, void *opaque, int type);

View File

@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
} }
static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int nb_sectors, QEMUIOVector *qiov, int type)
BlockCompletionFunc *cb, void *opaque, int type)
{ {
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0) if (fd_open(bs) < 0)
return NULL; return -EIO;
/* /*
* Check if the underlying device requires requests to be aligned, * Check if the underlying device requires requests to be aligned,
@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
type |= QEMU_AIO_MISALIGNED; type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO #ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) { } else if (s->use_aio) {
return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, return laio_co_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
nb_sectors, cb, opaque, type); nb_sectors, type);
#endif #endif
} }
} }
return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors, return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
cb, opaque, type); nb_sectors * BDRV_SECTOR_SIZE, type);
}
static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
}
static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
} }
static void raw_aio_plug(BlockDriverState *bs) static void raw_aio_plug(BlockDriverState *bs)
@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
#endif #endif
} }
static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
cb, opaque, QEMU_AIO_READ);
}
static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
cb, opaque, QEMU_AIO_WRITE);
}
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque) BlockCompletionFunc *cb, void *opaque)
{ {
@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status, .bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
.bdrv_aio_readv = raw_aio_readv, .bdrv_co_readv = raw_co_readv,
.bdrv_aio_writev = raw_aio_writev, .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush, .bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard, .bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits, .bdrv_refresh_limits = raw_refresh_limits,
@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts, .create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
.bdrv_aio_readv = raw_aio_readv, .bdrv_co_readv = raw_co_readv,
.bdrv_aio_writev = raw_aio_writev, .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush, .bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard, .bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits, .bdrv_refresh_limits = raw_refresh_limits,
@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create, .bdrv_create = hdev_create,
.create_opts = &raw_create_opts, .create_opts = &raw_create_opts,
.bdrv_aio_readv = raw_aio_readv, .bdrv_co_readv = raw_co_readv,
.bdrv_aio_writev = raw_aio_writev, .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush, .bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits, .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug, .bdrv_io_plug = raw_aio_plug,
@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create, .bdrv_create = hdev_create,
.create_opts = &raw_create_opts, .create_opts = &raw_create_opts,
.bdrv_aio_readv = raw_aio_readv, .bdrv_co_readv = raw_co_readv,
.bdrv_aio_writev = raw_aio_writev, .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush, .bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits, .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug, .bdrv_io_plug = raw_aio_plug,