diff --git a/src/qemu_driver.c b/src/qemu_driver.c index f73ab38f..5b844b93 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -58,6 +58,8 @@ typedef struct VitastorFdData VitastorFdData; typedef struct VitastorClient { void *proxy; + int uring_eventfd; + void *watch; char *config_path; char *etcd_host; @@ -223,6 +225,40 @@ out: return; } +static void vitastor_unlocked_uring_handler(VitastorClient *client) +{ + do + { + vitastor_c_uring_handle_events(client->proxy); + } while (vitastor_c_uring_has_work(client->proxy)); +} + +static void vitastor_uring_handler(void *opaque) +{ + VitastorClient *client = (VitastorClient*)opaque; + qemu_mutex_lock(&client->mutex); + do + { + vitastor_c_uring_handle_events(client->proxy); + } while (vitastor_c_uring_has_work(client->proxy)); + qemu_mutex_unlock(&client->mutex); +} + +static void vitastor_schedule_uring_handler(VitastorClient *client) +{ + void *opaque = client; +#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 + replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque); +#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8 + aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque); +#elif QEMU_VERSION_MAJOR >= 2 +// task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); +// qemu_bh_schedule(task->bh); +#else +// vitastor_co_generic_bh_cb(opaque); +#endif +} + static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task) { BlockDriverState *bs = task->bs; @@ -231,6 +267,7 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task) qemu_mutex_lock(&client->mutex); vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task); + vitastor_schedule_uring_handler(client); qemu_mutex_unlock(&client->mutex); while (!task->complete) @@ -255,6 +292,26 @@ static void vitastor_aio_fd_write(void *fddv) qemu_mutex_unlock(&fdd->cli->mutex); } +static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque) +{ + aio_set_fd_handler(ctx, fd, +#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 + 0 /*is_external*/, +#endif + fd_read, + fd_write, +#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1 + NULL /*io_flush*/, +#endif +#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3 + NULL /*io_poll*/, +#endif +#if QEMU_VERSION_MAJOR >= 7 + NULL /*io_poll_ready*/, +#endif + opaque); +} + static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque) { VitastorClient *client = (VitastorClient*)vcli; @@ -297,22 +354,9 @@ static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandl } client->fds[client->fd_count++] = fdd; } - aio_set_fd_handler(client->ctx, fd, -#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 - 0 /*is_external*/, -#endif - fd_read ? vitastor_aio_fd_read : NULL, - fd_write ? vitastor_aio_fd_write : NULL, -#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1 - NULL /*io_flush*/, -#endif -#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3 - NULL /*io_poll*/, -#endif -#if QEMU_VERSION_MAJOR >= 7 - NULL /*io_poll_ready*/, -#endif - fdd); + universal_aio_set_fd_handler( + client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd + ); } static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) @@ -333,10 +377,19 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0); client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0); client->ctx = bdrv_get_aio_context(bs); - client->proxy = vitastor_c_create_qemu( - vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix, + client->proxy = vitastor_c_create_uring( + client->config_path, client->etcd_host, client->etcd_prefix, client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 ); + client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy); + if (client->uring_eventfd < 0) + { + fprintf(stderr, "failed to create eventfd: %s\n", strerror(errno)); + error_setg(errp, "failed to create eventfd"); + vitastor_close(bs); + return -1; + } + universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client); image = client->image = g_strdup(qdict_get_try_str(options, "image")); client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0; // Get image metadata (size and readonly flag) or just wait until the client is ready @@ -593,6 +646,7 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; qemu_mutex_lock(&client->mutex); vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task); + vitastor_schedule_uring_handler(client); qemu_mutex_unlock(&client->mutex); while (!task.complete) @@ -626,6 +680,7 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs, uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; qemu_mutex_lock(&client->mutex); vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task); + vitastor_schedule_uring_handler(client); qemu_mutex_unlock(&client->mutex); while (!task.complete) @@ -704,6 +759,7 @@ static int coroutine_fn vitastor_co_block_status( task.bitmap = client->last_bitmap = NULL; qemu_mutex_lock(&client->mutex); vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task); + vitastor_schedule_uring_handler(client); qemu_mutex_unlock(&client->mutex); while (!task.complete) { @@ -790,6 +846,7 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs) qemu_mutex_lock(&client->mutex); vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task); + vitastor_schedule_uring_handler(client); qemu_mutex_unlock(&client->mutex); while (!task.complete) diff --git a/src/ringloop.cpp b/src/ringloop.cpp index 28da77c2..92cfd901 100644 --- a/src/ringloop.cpp +++ b/src/ringloop.cpp @@ -5,6 +5,8 @@ #include +#include + #include "ringloop.h" ring_loop_t::ring_loop_t(int qd) @@ -32,6 +34,10 @@ ring_loop_t::~ring_loop_t() free(free_ring_data); free(ring_datas); io_uring_queue_exit(&ring); + if (ring_eventfd) + { + close(ring_eventfd); + } } void ring_loop_t::register_consumer(ring_consumer_t *consumer) @@ -127,3 +133,24 @@ int ring_loop_t::sqes_left() } return left; } + +int ring_loop_t::register_eventfd() +{ + if (ring_eventfd >= 0) + { + return ring_eventfd; + } + ring_eventfd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK); + if (ring_eventfd < 0) + { + return -errno; + } + int r = io_uring_register_eventfd(&ring, ring_eventfd); + if (r < 0) + { + close(ring_eventfd); + ring_eventfd = -1; + return r; + } + return ring_eventfd; +} diff --git a/src/ringloop.h b/src/ringloop.h index 8d9c15c8..bf9d8372 100644 --- a/src/ringloop.h +++ b/src/ringloop.h @@ -126,11 +126,13 @@ class ring_loop_t unsigned free_ring_data_ptr; bool loop_again; struct io_uring ring; + int ring_eventfd = -1; public: ring_loop_t(int qd); ~ring_loop_t(); void register_consumer(ring_consumer_t *consumer); void unregister_consumer(ring_consumer_t *consumer); + int register_eventfd(); inline struct io_uring_sqe* get_sqe() { diff --git a/src/vitastor_c.cpp b/src/vitastor_c.cpp index abc19b78..b1648a12 100644 --- a/src/vitastor_c.cpp +++ b/src/vitastor_c.cpp @@ -5,6 +5,7 @@ // Also acts as a C-C++ proxy for the QEMU driver (QEMU headers don't compile with g++) #include +#include #include "ringloop.h" #include "epoll_manager.h" @@ -25,6 +26,7 @@ struct vitastor_c epoll_manager_t *epmgr = NULL; timerfd_manager_t *tfd = NULL; cluster_client_t *cli = NULL; + int uring_eventfd = -1; QEMUSetFDHandler *aio_set_fd_handler = NULL; void *aio_ctx = NULL; @@ -113,6 +115,15 @@ vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_ho return self; } +int vitastor_c_uring_register_eventfd(vitastor_c *client) +{ + if (!client->ringloop) + { + return -EINVAL; + } + return client->ringloop->register_eventfd(); +} + vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len) { json11::Json::object cfg; @@ -166,6 +177,11 @@ void vitastor_c_uring_wait_events(vitastor_c *client) client->ringloop->wait(); } +bool vitastor_c_uring_has_work(vitastor_c *client) +{ + return client->ringloop->has_work(); +} + void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque) { diff --git a/src/vitastor_c.h b/src/vitastor_c.h index f8cd5be0..57f7f28d 100644 --- a/src/vitastor_c.h +++ b/src/vitastor_c.h @@ -39,9 +39,11 @@ vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_ho vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len); void vitastor_c_destroy(vitastor_c *client); int vitastor_c_is_ready(vitastor_c *client); +int vitastor_c_uring_register_eventfd(vitastor_c *client); void vitastor_c_uring_wait_ready(vitastor_c *client); void vitastor_c_uring_handle_events(vitastor_c *client); void vitastor_c_uring_wait_events(vitastor_c *client); +bool vitastor_c_uring_has_work(vitastor_c *client); void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque); void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,