From bf8b97b3d8c1101f7d60509397a4f56bfb2a04ae Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Fri, 27 Oct 2023 21:04:53 +0300 Subject: [PATCH] Fix truncation, add write-back cache support --- debian/changelog | 7 ++ debian/patches/pve-qemu-7.1-vitastor.patch | 104 ++++++++++++++++----- 2 files changed, 88 insertions(+), 23 deletions(-) diff --git a/debian/changelog b/debian/changelog index e38a77b..d75054c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +pve-qemu-kvm (7.1.0-4+vitastor5) bullseye; urgency=medium + + * Fix truncation + * Add write-back cache support + + -- Vitaliy Filippov Fri, 27 Oct 2023 21:04:05 +0300 + pve-qemu-kvm (7.1.0-4+vitastor4) bullseye; urgency=medium * Improve performance by adding io_uring support diff --git a/debian/patches/pve-qemu-7.1-vitastor.patch b/debian/patches/pve-qemu-7.1-vitastor.patch index 21eaa92..e72fcec 100644 --- a/debian/patches/pve-qemu-7.1-vitastor.patch +++ b/debian/patches/pve-qemu-7.1-vitastor.patch @@ -171,7 +171,7 @@ Index: a/block/vitastor.c =================================================================== --- /dev/null +++ a/block/vitastor.c -@@ -0,0 +1,1017 @@ +@@ -0,0 +1,1075 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details) + @@ -371,7 +371,11 @@ Index: a/block/vitastor.c + !strcmp(name, "rdma-mtu")) + { + unsigned long long num_val; ++#if QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1 + if (parse_uint_full(value, &num_val, 0)) ++#else ++ if (parse_uint_full(value, 0, &num_val)) ++#endif + { + error_setg(errp, "Illegal %s: %s", name, value); + goto out; @@ -414,10 +418,7 @@ Index: a/block/vitastor.c + VitastorClient *client = (VitastorClient*)opaque; + qemu_mutex_lock(&client->mutex); + client->bh_uring_scheduled = 0; -+ do -+ { -+ vitastor_c_uring_handle_events(client->proxy); -+ } while (vitastor_c_uring_has_work(client->proxy)); ++ vitastor_c_uring_handle_events(client->proxy); + qemu_mutex_unlock(&client->mutex); +} + @@ -497,7 +498,7 @@ Index: a/block/vitastor.c +static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque) +{ + aio_set_fd_handler(ctx, fd, -+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 ++#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 && (QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1) + 0 /*is_external*/, +#endif + fd_read, @@ -561,6 +562,45 @@ Index: a/block/vitastor.c + ); +} + ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 ++typedef struct str_array ++{ ++ const char **items; ++ int len, alloc; ++} str_array; ++ ++static void strarray_push(str_array *a, const char *str) ++{ ++ if (a->len >= a->alloc) ++ { ++ a->alloc = !a->alloc ? 4 : 2*a->alloc; ++ a->items = (const char**)realloc(a->items, a->alloc*sizeof(char*)); ++ if (!a->items) ++ { ++ fprintf(stderr, "bad alloc\n"); ++ abort(); ++ } ++ } ++ a->items[a->len++] = str; ++} ++ ++static void strarray_push_kv(str_array *a, const char *key, const char *value) ++{ ++ if (key && value) ++ { ++ strarray_push(a, key); ++ strarray_push(a, value); ++ } ++} ++ ++static void strarray_free(str_array *a) ++{ ++ free(a->items); ++ a->items = NULL; ++ a->len = a->alloc = 0; ++} ++#endif ++ +static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) +{ + VitastorRPC task; @@ -580,22 +620,19 @@ Index: a/block/vitastor.c + client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0); + client->ctx = bdrv_get_aio_context(bs); +#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 -+ client->proxy = vitastor_c_create_qemu_uring( -+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix, -+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 -+ ); -+ if (!client->proxy) -+ { -+ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower\n", strerror(errno)); -+ client->uring_eventfd = -1; -+#endif -+ client->proxy = vitastor_c_create_qemu( -+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix, -+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 -+ ); -+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 -+ } -+ else ++ str_array opt = {}; ++ strarray_push_kv(&opt, "config_path", qdict_get_try_str(options, "config-path")); ++ strarray_push_kv(&opt, "etcd_address", qdict_get_try_str(options, "etcd-host")); ++ strarray_push_kv(&opt, "etcd_prefix", qdict_get_try_str(options, "etcd-prefix")); ++ strarray_push_kv(&opt, "use_rdma", qdict_get_try_str(options, "use-rdma")); ++ strarray_push_kv(&opt, "rdma_device", qdict_get_try_str(options, "rdma-device")); ++ strarray_push_kv(&opt, "rdma_port_num", qdict_get_try_str(options, "rdma-port-num")); ++ strarray_push_kv(&opt, "rdma_gid_index", qdict_get_try_str(options, "rdma-gid-index")); ++ strarray_push_kv(&opt, "rdma_mtu", qdict_get_try_str(options, "rdma-mtu")); ++ strarray_push_kv(&opt, "client_writeback_allowed", (flags & BDRV_O_NOCACHE) ? "0" : "1"); ++ client->proxy = vitastor_c_create_uring_json(opt.items, opt.len); ++ strarray_free(&opt); ++ if (client->proxy) + { + client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy); + if (client->uring_eventfd < 0) @@ -607,6 +644,19 @@ Index: a/block/vitastor.c + } + universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client); + } ++ else ++ { ++ // Writeback cache is unusable without io_uring because the client can't correctly flush on exit ++ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower%s\n", ++ strerror(errno), (flags & BDRV_O_NOCACHE ? "" : " and writeback cache will be disabled")); ++#endif ++ client->uring_eventfd = -1; ++ client->proxy = vitastor_c_create_qemu( ++ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix, ++ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 ++ ); ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 ++ } +#endif + image = client->image = g_strdup(qdict_get_try_str(options, "image")); + client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0; @@ -666,6 +716,10 @@ Index: a/block/vitastor.c + return -1; + } + bs->total_sectors = client->size / BDRV_SECTOR_SIZE; ++#if QEMU_VERSION_MAJOR > 5 || QEMU_VERSION_MAJOR == 5 && QEMU_VERSION_MINOR >= 1 ++ /* When extending regular files, we get zeros from the OS */ ++ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; ++#endif + //client->aio_context = bdrv_get_aio_context(bs); + qdict_del(options, "use-rdma"); + qdict_del(options, "rdma-mtu"); @@ -762,7 +816,11 @@ Index: a/block/vitastor.c + } + + // TODO: Resize inode to bytes -+ client->size = offset / BDRV_SECTOR_SIZE; ++#if QEMU_VERSION_MAJOR >= 4 ++ client->size = exact || client->size < offset ? offset : client->size; ++#else ++ client->size = offset; ++#endif + + return 0; +}