Improve performance by adding io_uring support, fix iothread compat
parent
4657dc72ae
commit
016b2de920
|
@ -1,3 +1,10 @@
|
|||
pve-qemu-kvm (7.2.0-8+vitastor2) bookworm; urgency=medium
|
||||
|
||||
* Improve performance by adding io_uring support
|
||||
* Fix compatibility with iothread
|
||||
|
||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Tue, 18 Jul 2023 02:17:06 +0300
|
||||
|
||||
pve-qemu-kvm (7.2.0-8+vitastor1) bullseye; urgency=medium
|
||||
|
||||
* Add Vitastor support
|
||||
|
|
|
@ -61,6 +61,7 @@ Depends: ceph-common (>= 0.48),
|
|||
libspice-server1 (>= 0.14.0~),
|
||||
libusb-1.0-0 (>= 1.0.17-1),
|
||||
libusbredirparser1 (>= 0.6-2),
|
||||
vitastor-client (>= 0.9.4),
|
||||
libuuid1,
|
||||
${misc:Depends},
|
||||
${shlibs:Depends},
|
||||
|
|
|
@ -171,7 +171,7 @@ Index: a/block/vitastor.c
|
|||
===================================================================
|
||||
--- /dev/null
|
||||
+++ a/block/vitastor.c
|
||||
@@ -0,0 +1,811 @@
|
||||
@@ -0,0 +1,1017 @@
|
||||
+// Copyright (c) Vitaliy Filippov, 2019+
|
||||
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||
+
|
||||
|
@ -209,6 +209,11 @@ Index: a/block/vitastor.c
|
|||
+#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
|
||||
+#define qobject_unref QDECREF
|
||||
+#endif
|
||||
+#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
|
||||
+#include "sysemu/replay.h"
|
||||
+#else
|
||||
+#include "sysemu/sysemu.h"
|
||||
+#endif
|
||||
+
|
||||
+#include "vitastor_c.h"
|
||||
+
|
||||
|
@ -222,9 +227,13 @@ Index: a/block/vitastor.c
|
|||
+}
|
||||
+#endif
|
||||
+
|
||||
+typedef struct VitastorFdData VitastorFdData;
|
||||
+
|
||||
+typedef struct VitastorClient
|
||||
+{
|
||||
+ void *proxy;
|
||||
+ int uring_eventfd;
|
||||
+
|
||||
+ void *watch;
|
||||
+ char *config_path;
|
||||
+ char *etcd_host;
|
||||
|
@ -241,12 +250,24 @@ Index: a/block/vitastor.c
|
|||
+ int rdma_gid_index;
|
||||
+ int rdma_mtu;
|
||||
+ QemuMutex mutex;
|
||||
+ AioContext *ctx;
|
||||
+ VitastorFdData **fds;
|
||||
+ int fd_count, fd_alloc;
|
||||
+ int bh_uring_scheduled;
|
||||
+
|
||||
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
||||
+ uint32_t last_bitmap_granularity;
|
||||
+ uint8_t *last_bitmap;
|
||||
+} VitastorClient;
|
||||
+
|
||||
+typedef struct VitastorFdData
|
||||
+{
|
||||
+ VitastorClient *cli;
|
||||
+ int fd;
|
||||
+ IOHandler *fd_read, *fd_write;
|
||||
+ void *opaque;
|
||||
+} VitastorFdData;
|
||||
+
|
||||
+typedef struct VitastorRPC
|
||||
+{
|
||||
+ BlockDriverState *bs;
|
||||
|
@ -257,10 +278,21 @@ Index: a/block/vitastor.c
|
|||
+ uint64_t inode, offset, len;
|
||||
+ uint32_t bitmap_granularity;
|
||||
+ uint8_t *bitmap;
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
+ QEMUBH *bh;
|
||||
+#endif
|
||||
+} VitastorRPC;
|
||||
+
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
+typedef struct VitastorBH
|
||||
+{
|
||||
+ VitastorClient *cli;
|
||||
+ QEMUBH *bh;
|
||||
+} VitastorBH;
|
||||
+#endif
|
||||
+
|
||||
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
||||
+static void vitastor_co_generic_bh_cb(void *opaque, long retval);
|
||||
+static void vitastor_co_generic_cb(void *opaque, long retval);
|
||||
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
|
||||
+static void vitastor_close(BlockDriverState *bs);
|
||||
+
|
||||
|
@ -376,6 +408,57 @@ Index: a/block/vitastor.c
|
|||
+ return;
|
||||
+}
|
||||
+
|
||||
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
+static void vitastor_uring_handler(void *opaque)
|
||||
+{
|
||||
+ VitastorClient *client = (VitastorClient*)opaque;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ client->bh_uring_scheduled = 0;
|
||||
+ do
|
||||
+ {
|
||||
+ vitastor_c_uring_handle_events(client->proxy);
|
||||
+ } while (vitastor_c_uring_has_work(client->proxy));
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+}
|
||||
+
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
+static void vitastor_bh_uring_handler(void *opaque)
|
||||
+{
|
||||
+ VitastorBH *vbh = opaque;
|
||||
+ vitastor_bh_handler(vbh->cli);
|
||||
+ qemu_bh_delete(vbh->bh);
|
||||
+ free(vbh);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void vitastor_schedule_uring_handler(VitastorClient *client)
|
||||
+{
|
||||
+ void *opaque = client;
|
||||
+ if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
|
||||
+ {
|
||||
+ client->bh_uring_scheduled = 1;
|
||||
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
+ replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
+ aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
|
||||
+#else
|
||||
+ VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
|
||||
+ vbh->cli = client;
|
||||
+#if QEMU_VERSION_MAJOR >= 2
|
||||
+ vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
|
||||
+#else
|
||||
+ vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
|
||||
+#endif
|
||||
+ qemu_bh_schedule(vbh->bh);
|
||||
+#endif
|
||||
+ }
|
||||
+}
|
||||
+#else
|
||||
+static void vitastor_schedule_uring_handler(VitastorClient *client)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
+{
|
||||
+ BlockDriverState *bs = task->bs;
|
||||
|
@ -383,7 +466,8 @@ Index: a/block/vitastor.c
|
|||
+ task->co = qemu_coroutine_self();
|
||||
+
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
|
||||
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task->complete)
|
||||
|
@ -392,13 +476,32 @@ Index: a/block/vitastor.c
|
|||
+ }
|
||||
+}
|
||||
+
|
||||
+static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||
+static void vitastor_aio_fd_read(void *fddv)
|
||||
+{
|
||||
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
|
||||
+ qemu_mutex_lock(&fdd->cli->mutex);
|
||||
+ fdd->fd_read(fdd->opaque);
|
||||
+ vitastor_schedule_uring_handler(fdd->cli);
|
||||
+ qemu_mutex_unlock(&fdd->cli->mutex);
|
||||
+}
|
||||
+
|
||||
+static void vitastor_aio_fd_write(void *fddv)
|
||||
+{
|
||||
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
|
||||
+ qemu_mutex_lock(&fdd->cli->mutex);
|
||||
+ fdd->fd_write(fdd->opaque);
|
||||
+ vitastor_schedule_uring_handler(fdd->cli);
|
||||
+ qemu_mutex_unlock(&fdd->cli->mutex);
|
||||
+}
|
||||
+
|
||||
+static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
||||
+{
|
||||
+ aio_set_fd_handler(ctx, fd,
|
||||
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
||||
+ 0 /*is_external*/,
|
||||
+#endif
|
||||
+ fd_read, fd_write,
|
||||
+ fd_read,
|
||||
+ fd_write,
|
||||
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
||||
+ NULL /*io_flush*/,
|
||||
+#endif
|
||||
|
@ -411,6 +514,53 @@ Index: a/block/vitastor.c
|
|||
+ opaque);
|
||||
+}
|
||||
+
|
||||
+static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||
+{
|
||||
+ VitastorClient *client = (VitastorClient*)vcli;
|
||||
+ VitastorFdData *fdd = NULL;
|
||||
+ int i;
|
||||
+ for (i = 0; i < client->fd_count; i++)
|
||||
+ {
|
||||
+ if (client->fds[i]->fd == fd)
|
||||
+ {
|
||||
+ if (fd_read || fd_write)
|
||||
+ {
|
||||
+ fdd = client->fds[i];
|
||||
+ fdd->opaque = opaque;
|
||||
+ fdd->fd_read = fd_read;
|
||||
+ fdd->fd_write = fd_write;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ for (int j = i+1; j < client->fd_count; j++)
|
||||
+ client->fds[j-1] = client->fds[j];
|
||||
+ client->fd_count--;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if ((fd_read || fd_write) && !fdd)
|
||||
+ {
|
||||
+ fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
|
||||
+ fdd->cli = client;
|
||||
+ fdd->fd = fd;
|
||||
+ fdd->fd_read = fd_read;
|
||||
+ fdd->fd_write = fd_write;
|
||||
+ fdd->opaque = opaque;
|
||||
+ if (client->fd_count >= client->fd_alloc)
|
||||
+ {
|
||||
+ client->fd_alloc = client->fd_alloc*2;
|
||||
+ if (client->fd_alloc < 16)
|
||||
+ client->fd_alloc = 16;
|
||||
+ client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
|
||||
+ }
|
||||
+ client->fds[client->fd_count++] = fdd;
|
||||
+ }
|
||||
+ universal_aio_set_fd_handler(
|
||||
+ client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
|
||||
+ );
|
||||
+}
|
||||
+
|
||||
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||
+{
|
||||
+ VitastorRPC task;
|
||||
|
@ -428,10 +578,36 @@ Index: a/block/vitastor.c
|
|||
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
||||
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
||||
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
||||
+ client->proxy = vitastor_c_create_qemu(
|
||||
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
+ client->ctx = bdrv_get_aio_context(bs);
|
||||
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
+ client->proxy = vitastor_c_create_qemu_uring(
|
||||
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
+ );
|
||||
+ if (!client->proxy)
|
||||
+ {
|
||||
+ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower\n", strerror(errno));
|
||||
+ client->uring_eventfd = -1;
|
||||
+#endif
|
||||
+ client->proxy = vitastor_c_create_qemu(
|
||||
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
+ );
|
||||
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
|
||||
+ if (client->uring_eventfd < 0)
|
||||
+ {
|
||||
+ fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
|
||||
+ error_setg(errp, "failed to create io_uring eventfd");
|
||||
+ vitastor_close(bs);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
|
||||
+ }
|
||||
+#endif
|
||||
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
||||
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
|
||||
|
@ -511,6 +687,12 @@ Index: a/block/vitastor.c
|
|||
+{
|
||||
+ VitastorClient *client = bs->opaque;
|
||||
+ vitastor_c_destroy(client->proxy);
|
||||
+ if (client->fds)
|
||||
+ {
|
||||
+ free(client->fds);
|
||||
+ client->fds = NULL;
|
||||
+ client->fd_alloc = client->fd_count = 0;
|
||||
+ }
|
||||
+ qemu_mutex_destroy(&client->mutex);
|
||||
+ if (client->config_path)
|
||||
+ g_free(client->config_path);
|
||||
|
@ -627,25 +809,44 @@ Index: a/block/vitastor.c
|
|||
+ };
|
||||
+}
|
||||
+
|
||||
+static void vitastor_co_generic_bh_cb(void *opaque, long retval)
|
||||
+static void vitastor_co_generic_bh_cb(void *opaque)
|
||||
+{
|
||||
+ VitastorRPC *task = opaque;
|
||||
+ task->ret = retval;
|
||||
+ task->complete = 1;
|
||||
+ if (qemu_coroutine_self() != task->co)
|
||||
+ {
|
||||
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||
+ aio_co_wake(task->co);
|
||||
+#else
|
||||
+#if QEMU_VERSION_MAJOR == 2
|
||||
+ qemu_bh_delete(task->bh);
|
||||
+#endif
|
||||
+ qemu_coroutine_enter(task->co, NULL);
|
||||
+ qemu_aio_release(task);
|
||||
+#endif
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void vitastor_co_generic_cb(void *opaque, long retval)
|
||||
+{
|
||||
+ VitastorRPC *task = opaque;
|
||||
+ task->ret = retval;
|
||||
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 2
|
||||
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#else
|
||||
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
|
||||
+{
|
||||
+ vitastor_co_generic_bh_cb(opaque, retval);
|
||||
+ vitastor_co_generic_cb(opaque, retval);
|
||||
+}
|
||||
+
|
||||
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
||||
|
@ -664,6 +865,7 @@ Index: a/block/vitastor.c
|
|||
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task.complete)
|
||||
|
@ -696,7 +898,8 @@ Index: a/block/vitastor.c
|
|||
+
|
||||
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
||||
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task.complete)
|
||||
|
@ -714,7 +917,6 @@ Index: a/block/vitastor.c
|
|||
+ VitastorRPC *task = opaque;
|
||||
+ VitastorClient *client = task->bs->opaque;
|
||||
+ task->ret = retval;
|
||||
+ task->complete = 1;
|
||||
+ if (retval >= 0)
|
||||
+ {
|
||||
+ task->bitmap = bitmap;
|
||||
|
@ -726,15 +928,17 @@ Index: a/block/vitastor.c
|
|||
+ client->last_bitmap = bitmap;
|
||||
+ }
|
||||
+ }
|
||||
+ if (qemu_coroutine_self() != task->co)
|
||||
+ {
|
||||
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||
+ aio_co_wake(task->co);
|
||||
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+#elif QEMU_VERSION_MAJOR >= 2
|
||||
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#else
|
||||
+ qemu_coroutine_enter(task->co, NULL);
|
||||
+ qemu_aio_release(task);
|
||||
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
||||
+ qemu_bh_schedule(task->bh);
|
||||
+#endif
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int coroutine_fn vitastor_co_block_status(
|
||||
|
@ -775,6 +979,7 @@ Index: a/block/vitastor.c
|
|||
+ task.bitmap = client->last_bitmap = NULL;
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+ while (!task.complete)
|
||||
+ {
|
||||
|
@ -860,7 +1065,8 @@ Index: a/block/vitastor.c
|
|||
+ vitastor_co_init_task(bs, &task);
|
||||
+
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
|
||||
+ vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
|
||||
+ vitastor_schedule_uring_handler(client);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
+
|
||||
+ while (!task.complete)
|
||||
|
|
Loading…
Reference in New Issue