Improve performance by adding io_uring support, fix qemu-img deadlocks
parent
40f9a6f1c2
commit
a417b83c07
|
@ -1,3 +1,10 @@
|
|||
pve-qemu-kvm (8.0.2-3+vitastor2) bookworm; urgency=medium
|
||||
|
||||
* Improve performance by adding io_uring support
|
||||
* Fix qemu-img deadlocks after iothread fixes
|
||||
|
||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Tue, 19 Jul 2023 02:07:02 +0300
|
||||
|
||||
pve-qemu-kvm (8.0.2-3+vitastor1) bookworm; urgency=medium
|
||||
|
||||
* Add Vitastor support
|
||||
|
|
|
@ -58,6 +58,7 @@ Depends: ceph-common (>= 0.48),
|
|||
libspice-server1 (>= 0.14.0~),
|
||||
libusb-1.0-0 (>= 1.0.17-1),
|
||||
libusbredirparser1 (>= 0.6-2),
|
||||
vitastor-client (>= 0.9.4),
|
||||
libuuid1,
|
||||
${misc:Depends},
|
||||
${shlibs:Depends},
|
||||
|
|
|
@ -192,7 +192,7 @@ Index: a/block/vitastor.c
|
|||
===================================================================
|
||||
--- /dev/null
|
||||
+++ a/block/vitastor.c
|
||||
@@ -0,0 +1,914 @@
|
||||
@@ -0,0 +1,1017 @@
|
||||
+// Copyright (c) Vitaliy Filippov, 2019+
|
||||
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||
+
|
||||
|
@ -253,6 +253,8 @@ Index: a/block/vitastor.c
|
|||
+typedef struct VitastorClient
|
||||
+{
|
||||
+ void *proxy;
|
||||
+ int uring_eventfd;
|
||||
+
|
||||
+ void *watch;
|
||||
+ char *config_path;
|
||||
+ char *etcd_host;
|
||||
|
@ -272,6 +274,7 @@ Index: a/block/vitastor.c
|
|||
+ AioContext *ctx;
|
||||
+ VitastorFdData **fds;
|
||||
+ int fd_count, fd_alloc;
|
||||
+ int bh_uring_scheduled;
|
||||
+
|
||||
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
||||
+ uint32_t last_bitmap_granularity;
|
||||
|
@ -301,6 +304,14 @@ Index: a/block/vitastor.c
|
|||
+#endif
|
||||
+} VitastorRPC;
|
||||
+
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
+typedef struct VitastorBH
|
||||
+{
|
||||
+ VitastorClient *cli;
|
||||
+ QEMUBH *bh;
|
||||
+} VitastorBH;
|
||||
+#endif
|
||||
+
|
||||
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
||||
+static void vitastor_co_generic_cb(void *opaque, long retval);
|
||||
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
|
||||
|
@ -418,6 +429,57 @@ Index: a/block/vitastor.c
|
|||
+ return;
|
||||
+}
|
||||
+
|
||||
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
+static void vitastor_uring_handler(void *opaque)
|
||||
+{
|
||||
+ VitastorClient *client = (VitastorClient*)opaque;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ client->bh_uring_scheduled = 0;
|
||||
+ do
|
||||
+ {
|
||||
+ vitastor_c_uring_handle_events(client->proxy);
|
||||
+ } while (vitastor_c_uring_has_work(client->proxy));
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+}
|
||||
+
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
+static void vitastor_bh_uring_handler(void *opaque)
|
||||
+{
|
||||
+ VitastorBH *vbh = opaque;
|
||||
+ vitastor_bh_handler(vbh->cli);
|
||||
+ qemu_bh_delete(vbh->bh);
|
||||
+ free(vbh);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void vitastor_schedule_uring_handler(VitastorClient *client)
|
||||
+{
|
||||
+ void *opaque = client;
|
||||
+ if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
|
||||
+ {
|
||||
+ client->bh_uring_scheduled = 1;
|
||||
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
+ replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
+ aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
|
||||
+#else
|
||||
+ VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
|
||||
+ vbh->cli = client;
|
||||
+#if QEMU_VERSION_MAJOR >= 2
|
||||
+ vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
|
||||
+#else
|
||||
+ vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
|
||||
+#endif
|
||||
+ qemu_bh_schedule(vbh->bh);
|
||||
+#endif
|
||||
+ }
|
||||
+}
|
||||
+#else
|
||||
+static void vitastor_schedule_uring_handler(VitastorClient *client)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
+{
|
||||
+ BlockDriverState *bs = task->bs;
|
||||
|
@ -426,6 +488,7 @@ Index: a/block/vitastor.c
|
|||
+
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task->complete)
|
||||
|
@ -439,6 +502,7 @@ Index: a/block/vitastor.c
|
|||
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
|
||||
+ qemu_mutex_lock(&fdd->cli->mutex);
|
||||
+ fdd->fd_read(fdd->opaque);
|
||||
+ vitastor_schedule_uring_handler(fdd->cli);
|
||||
+ qemu_mutex_unlock(&fdd->cli->mutex);
|
||||
+}
|
||||
+
|
||||
|
@ -447,9 +511,30 @@ Index: a/block/vitastor.c
|
|||
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
|
||||
+ qemu_mutex_lock(&fdd->cli->mutex);
|
||||
+ fdd->fd_write(fdd->opaque);
|
||||
+ vitastor_schedule_uring_handler(fdd->cli);
|
||||
+ qemu_mutex_unlock(&fdd->cli->mutex);
|
||||
+}
|
||||
+
|
||||
+static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
||||
+{
|
||||
+ aio_set_fd_handler(ctx, fd,
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
||||
+ 0 /*is_external*/,
|
||||
+#endif
|
||||
+ fd_read,
|
||||
+ fd_write,
|
||||
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
||||
+ NULL /*io_flush*/,
|
||||
+#endif
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
|
||||
+ NULL /*io_poll*/,
|
||||
+#endif
|
||||
+#if QEMU_VERSION_MAJOR >= 7
|
||||
+ NULL /*io_poll_ready*/,
|
||||
+#endif
|
||||
+ opaque);
|
||||
+}
|
||||
+
|
||||
+static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||
+{
|
||||
+ VitastorClient *client = (VitastorClient*)vcli;
|
||||
|
@ -492,22 +577,9 @@ Index: a/block/vitastor.c
|
|||
+ }
|
||||
+ client->fds[client->fd_count++] = fdd;
|
||||
+ }
|
||||
+ aio_set_fd_handler(client->ctx, fd,
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
||||
+ 0 /*is_external*/,
|
||||
+#endif
|
||||
+ fd_read ? vitastor_aio_fd_read : NULL,
|
||||
+ fd_write ? vitastor_aio_fd_write : NULL,
|
||||
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
||||
+ NULL /*io_flush*/,
|
||||
+#endif
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
|
||||
+ NULL /*io_poll*/,
|
||||
+#endif
|
||||
+#if QEMU_VERSION_MAJOR >= 7
|
||||
+ NULL /*io_poll_ready*/,
|
||||
+#endif
|
||||
+ fdd);
|
||||
+ universal_aio_set_fd_handler(
|
||||
+ client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
|
||||
+ );
|
||||
+}
|
||||
+
|
||||
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||
|
@ -528,10 +600,35 @@ Index: a/block/vitastor.c
|
|||
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
||||
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
||||
+ client->ctx = bdrv_get_aio_context(bs);
|
||||
+ client->proxy = vitastor_c_create_qemu(
|
||||
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
+ client->proxy = vitastor_c_create_qemu_uring(
|
||||
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
+ );
|
||||
+ if (!client->proxy)
|
||||
+ {
|
||||
+ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower\n", strerror(errno));
|
||||
+ client->uring_eventfd = -1;
|
||||
+#endif
|
||||
+ client->proxy = vitastor_c_create_qemu(
|
||||
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
+ );
|
||||
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
|
||||
+ if (client->uring_eventfd < 0)
|
||||
+ {
|
||||
+ fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
|
||||
+ error_setg(errp, "failed to create io_uring eventfd");
|
||||
+ vitastor_close(bs);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
|
||||
+ }
|
||||
+#endif
|
||||
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
||||
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
|
||||
|
@ -763,7 +860,8 @@ Index: a/block/vitastor.c
|
|||
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#else
|
||||
+ vitastor_co_generic_bh_cb(opaque);
|
||||
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
|
@ -788,6 +886,7 @@ Index: a/block/vitastor.c
|
|||
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task.complete)
|
||||
|
@ -821,6 +920,7 @@ Index: a/block/vitastor.c
|
|||
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task.complete)
|
||||
|
@ -838,7 +938,6 @@ Index: a/block/vitastor.c
|
|||
+ VitastorRPC *task = opaque;
|
||||
+ VitastorClient *client = task->bs->opaque;
|
||||
+ task->ret = retval;
|
||||
+ task->complete = 1;
|
||||
+ if (retval >= 0)
|
||||
+ {
|
||||
+ task->bitmap = bitmap;
|
||||
|
@ -850,15 +949,17 @@ Index: a/block/vitastor.c
|
|||
+ client->last_bitmap = bitmap;
|
||||
+ }
|
||||
+ }
|
||||
+ if (qemu_coroutine_self() != task->co)
|
||||
+ {
|
||||
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||
+ aio_co_wake(task->co);
|
||||
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 2
|
||||
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#else
|
||||
+ qemu_coroutine_enter(task->co, NULL);
|
||||
+ qemu_aio_release(task);
|
||||
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#endif
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int coroutine_fn vitastor_co_block_status(
|
||||
|
@ -899,6 +1000,7 @@ Index: a/block/vitastor.c
|
|||
+ task.bitmap = client->last_bitmap = NULL;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+ while (!task.complete)
|
||||
+ {
|
||||
|
@ -985,6 +1087,7 @@ Index: a/block/vitastor.c
|
|||
+
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task.complete)
|
||||
|
|
Loading…
Reference in New Issue