From a6885c7d11a362a21080a776e8319024a5d4d83e Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Fri, 27 Oct 2023 20:54:25 +0300 Subject: [PATCH] Add Vitastor support --- Makefile | 2 +- debian/changelog | 6 + debian/control | 1 + debian/patches/pve-qemu-8.1-vitastor.patch | 1271 ++++++++++++++++++++ debian/patches/series | 1 + 5 files changed, 1280 insertions(+), 1 deletion(-) create mode 100644 debian/patches/pve-qemu-8.1-vitastor.patch diff --git a/Makefile b/Makefile index cad130e..3052349 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ $(BUILDDIR): submodule deb kvm: $(DEBS) $(DEB_DBG): $(DEB) $(DEB): $(BUILDDIR) - cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j + cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j32 lintian $(DEBS) sbuild: $(DSC) diff --git a/debian/changelog b/debian/changelog index 970fbac..9aa2046 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +pve-qemu-kvm (8.1.5-2+vitastor1) bookworm; urgency=medium + + * Add Vitastor support + + -- Vitaliy Filippov Wed, 07 Feb 2024 01:11:00 +0300 + pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium * work around for a situation where guest IO might get stuck, if the VM is diff --git a/debian/control b/debian/control index b228c00..e6fc777 100644 --- a/debian/control +++ b/debian/control @@ -59,6 +59,7 @@ Depends: ceph-common (>= 0.48), libspice-server1 (>= 0.14.0~), libusb-1.0-0 (>= 1.0.17-1), libusbredirparser1 (>= 0.6-2), + vitastor-client (>= 0.9.4), libuuid1, ${misc:Depends}, ${shlibs:Depends}, diff --git a/debian/patches/pve-qemu-8.1-vitastor.patch b/debian/patches/pve-qemu-8.1-vitastor.patch new file mode 100644 index 0000000..eeee730 --- /dev/null +++ b/debian/patches/pve-qemu-8.1-vitastor.patch @@ -0,0 +1,1271 @@ +Index: pve-qemu-kvm-8.1.2/block/meson.build +=================================================================== +--- pve-qemu-kvm-8.1.2.orig/block/meson.build ++++ pve-qemu-kvm-8.1.2/block/meson.build +@@ -123,6 +123,7 @@ foreach m : [ + [libnfs, 'nfs', files('nfs.c')], + [libssh, 'ssh', files('ssh.c')], + [rbd, 'rbd', files('rbd.c')], ++ [vitastor, 'vitastor', files('vitastor.c')], + ] + if m[0].found() + module_ss = ss.source_set() +Index: pve-qemu-kvm-8.1.2/meson.build +=================================================================== +--- pve-qemu-kvm-8.1.2.orig/meson.build ++++ pve-qemu-kvm-8.1.2/meson.build +@@ -1303,6 +1303,26 @@ if not get_option('rbd').auto() or have_ + endif + endif + ++vitastor = not_found ++if not get_option('vitastor').auto() or have_block ++ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'], ++ required: get_option('vitastor')) ++ if libvitastor_client.found() ++ if cc.links(''' ++ #include ++ int main(void) { ++ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++ return 0; ++ }''', dependencies: libvitastor_client) ++ vitastor = declare_dependency(dependencies: libvitastor_client) ++ elif get_option('vitastor').enabled() ++ error('could not link libvitastor_client') ++ else ++ warning('could not link libvitastor_client, disabling') ++ endif ++ endif ++endif ++ + glusterfs = not_found + glusterfs_ftruncate_has_stat = false + glusterfs_iocb_has_stat = false +@@ -2123,6 +2143,7 @@ if numa.found() + endif + config_host_data.set('CONFIG_OPENGL', opengl.found()) + config_host_data.set('CONFIG_RBD', rbd.found()) ++config_host_data.set('CONFIG_VITASTOR', vitastor.found()) + config_host_data.set('CONFIG_RDMA', rdma.found()) + config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack')) + config_host_data.set('CONFIG_SDL', sdl.found()) +@@ -4298,6 +4319,7 @@ summary_info += {'fdt support': fd + summary_info += {'libcap-ng support': libcap_ng} + summary_info += {'bpf support': libbpf} + summary_info += {'rbd support': rbd} ++summary_info += {'vitastor support': vitastor} + summary_info += {'smartcard support': cacard} + summary_info += {'U2F support': u2f} + summary_info += {'libusb': libusb} +Index: pve-qemu-kvm-8.1.2/meson_options.txt +=================================================================== +--- pve-qemu-kvm-8.1.2.orig/meson_options.txt ++++ pve-qemu-kvm-8.1.2/meson_options.txt +@@ -186,6 +186,8 @@ option('lzo', type : 'feature', value : + description: 'lzo compression support') + option('rbd', type : 'feature', value : 'auto', + description: 'Ceph block device driver') ++option('vitastor', type : 'feature', value : 'auto', ++ description: 'Vitastor block device driver') + option('opengl', type : 'feature', value : 'auto', + description: 'OpenGL support') + option('rdma', type : 'feature', value : 'auto', +Index: pve-qemu-kvm-8.1.2/qapi/block-core.json +=================================================================== +--- pve-qemu-kvm-8.1.2.orig/qapi/block-core.json ++++ pve-qemu-kvm-8.1.2/qapi/block-core.json +@@ -3403,7 +3403,7 @@ + 'raw', 'rbd', + { 'name': 'replication', 'if': 'CONFIG_REPLICATION' }, + 'pbs', +- 'ssh', 'throttle', 'vdi', 'vhdx', ++ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', + { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' }, + { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' }, + { 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' }, +@@ -4465,6 +4465,28 @@ + '*server': ['InetSocketAddressBase'] } } + + ## ++# @BlockdevOptionsVitastor: ++# ++# Driver specific block device options for vitastor ++# ++# @image: Image name ++# @inode: Inode number ++# @pool: Pool ID ++# @size: Desired image size in bytes ++# @config-path: Path to Vitastor configuration ++# @etcd-host: etcd connection address(es) ++# @etcd-prefix: etcd key/value prefix ++## ++{ 'struct': 'BlockdevOptionsVitastor', ++ 'data': { '*inode': 'uint64', ++ '*pool': 'uint64', ++ '*size': 'uint64', ++ '*image': 'str', ++ '*config-path': 'str', ++ '*etcd-host': 'str', ++ '*etcd-prefix': 'str' } } ++ ++## + # @ReplicationMode: + # + # An enumeration of replication modes. +@@ -4923,6 +4945,7 @@ + 'throttle': 'BlockdevOptionsThrottle', + 'vdi': 'BlockdevOptionsGenericFormat', + 'vhdx': 'BlockdevOptionsGenericFormat', ++ 'vitastor': 'BlockdevOptionsVitastor', + 'virtio-blk-vfio-pci': + { 'type': 'BlockdevOptionsVirtioBlkVfioPci', + 'if': 'CONFIG_BLKIO' }, +@@ -5360,6 +5383,17 @@ + '*encrypt' : 'RbdEncryptionCreateOptions' } } + + ## ++# @BlockdevCreateOptionsVitastor: ++# ++# Driver specific image creation options for Vitastor. ++# ++# @size: Size of the virtual disk in bytes ++## ++{ 'struct': 'BlockdevCreateOptionsVitastor', ++ 'data': { 'location': 'BlockdevOptionsVitastor', ++ 'size': 'size' } } ++ ++## + # @BlockdevVmdkSubformat: + # + # Subformat options for VMDK images +@@ -5581,6 +5615,7 @@ + 'ssh': 'BlockdevCreateOptionsSsh', + 'vdi': 'BlockdevCreateOptionsVdi', + 'vhdx': 'BlockdevCreateOptionsVhdx', ++ 'vitastor': 'BlockdevCreateOptionsVitastor', + 'vmdk': 'BlockdevCreateOptionsVmdk', + 'vpc': 'BlockdevCreateOptionsVpc' + } } +Index: pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure +=================================================================== +--- pve-qemu-kvm-8.1.2.orig/scripts/ci/org.centos/stream/8/x86_64/configure ++++ pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure +@@ -30,7 +30,7 @@ + --with-suffix="qemu-kvm" \ + --firmwarepath=/usr/share/qemu-firmware \ + --target-list="x86_64-softmmu" \ +---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \ ++--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \ + --audio-drv-list="" \ + --block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \ + --with-coroutine=ucontext \ +@@ -176,6 +176,7 @@ + --enable-opengl \ + --enable-pie \ + --enable-rbd \ ++--enable-vitastor \ + --enable-rdma \ + --enable-seccomp \ + --enable-snappy \ +Index: pve-qemu-kvm-8.1.2/scripts/meson-buildoptions.sh +=================================================================== +--- pve-qemu-kvm-8.1.2.orig/scripts/meson-buildoptions.sh ++++ pve-qemu-kvm-8.1.2/scripts/meson-buildoptions.sh +@@ -153,6 +153,7 @@ meson_options_help() { + printf "%s\n" ' qed qed image format support' + printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)' + printf "%s\n" ' rbd Ceph block device driver' ++ printf "%s\n" ' vitastor Vitastor block device driver' + printf "%s\n" ' rdma Enable RDMA-based migration' + printf "%s\n" ' replication replication support' + printf "%s\n" ' sdl SDL user interface' +@@ -416,6 +417,8 @@ _meson_option_parse() { + --disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;; + --enable-rbd) printf "%s" -Drbd=enabled ;; + --disable-rbd) printf "%s" -Drbd=disabled ;; ++ --enable-vitastor) printf "%s" -Dvitastor=enabled ;; ++ --disable-vitastor) printf "%s" -Dvitastor=disabled ;; + --enable-rdma) printf "%s" -Drdma=enabled ;; + --disable-rdma) printf "%s" -Drdma=disabled ;; + --enable-replication) printf "%s" -Dreplication=enabled ;; +Index: a/block/vitastor.c +=================================================================== +--- /dev/null ++++ a/block/vitastor.c +@@ -0,0 +1,1076 @@ ++// Copyright (c) Vitaliy Filippov, 2019+ ++// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details) ++ ++// QEMU block driver ++ ++#ifdef VITASTOR_SOURCE_TREE ++#define BUILD_DSO ++#define _GNU_SOURCE ++#endif ++#include "qemu/osdep.h" ++#include "qemu/main-loop.h" ++#if QEMU_VERSION_MAJOR >= 8 ++#include "block/block-io.h" ++#endif ++#include "block/block_int.h" ++#include "qapi/error.h" ++#include "qapi/qmp/qdict.h" ++#include "qapi/qmp/qerror.h" ++#include "qemu/uri.h" ++#include "qemu/error-report.h" ++#include "qemu/module.h" ++#include "qemu/option.h" ++ ++#if QEMU_VERSION_MAJOR >= 3 ++#include "qemu/units.h" ++#include "block/qdict.h" ++#include "qemu/cutils.h" ++#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10 ++#include "qemu/cutils.h" ++#include "qapi/qmp/qstring.h" ++#include "qapi/qmp/qjson.h" ++#else ++#include "qapi/qmp/qint.h" ++#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val))) ++#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value))) ++#define qobject_unref QDECREF ++#endif ++#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4 ++#include "sysemu/replay.h" ++#else ++#include "sysemu/sysemu.h" ++#endif ++ ++#include "vitastor_c.h" ++ ++#ifdef VITASTOR_SOURCE_TREE ++void qemu_module_dummy(void) ++{ ++} ++ ++void DSO_STAMP_FUN(void) ++{ ++} ++#endif ++ ++typedef struct VitastorFdData VitastorFdData; ++ ++typedef struct VitastorClient ++{ ++ void *proxy; ++ int uring_eventfd; ++ ++ void *watch; ++ char *config_path; ++ char *etcd_host; ++ char *etcd_prefix; ++ char *image; ++ int skip_parents; ++ uint64_t inode; ++ uint64_t pool; ++ uint64_t size; ++ long readonly; ++ int use_rdma; ++ char *rdma_device; ++ int rdma_port_num; ++ int rdma_gid_index; ++ int rdma_mtu; ++ QemuMutex mutex; ++ AioContext *ctx; ++ VitastorFdData **fds; ++ int fd_count, fd_alloc; ++ int bh_uring_scheduled; ++ ++ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len; ++ uint32_t last_bitmap_granularity; ++ uint8_t *last_bitmap; ++} VitastorClient; ++ ++typedef struct VitastorFdData ++{ ++ VitastorClient *cli; ++ int fd; ++ IOHandler *fd_read, *fd_write; ++ void *opaque; ++} VitastorFdData; ++ ++typedef struct VitastorRPC ++{ ++ BlockDriverState *bs; ++ Coroutine *co; ++ QEMUIOVector *iov; ++ long ret; ++ int complete; ++ uint64_t inode, offset, len; ++ uint32_t bitmap_granularity; ++ uint8_t *bitmap; ++#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8 ++ QEMUBH *bh; ++#endif ++} VitastorRPC; ++ ++#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8 ++typedef struct VitastorBH ++{ ++ VitastorClient *cli; ++ QEMUBH *bh; ++} VitastorBH; ++#endif ++ ++static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task); ++static void vitastor_co_generic_cb(void *opaque, long retval); ++static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version); ++static void vitastor_close(BlockDriverState *bs); ++ ++static char *qemu_vitastor_next_tok(char *src, char delim, char **p) ++{ ++ char *end; ++ *p = NULL; ++ for (end = src; *end; ++end) ++ { ++ if (*end == delim) ++ break; ++ if (*end == '\\' && end[1] != '\0') ++ end++; ++ } ++ if (*end == delim) ++ { ++ *p = end + 1; ++ *end = '\0'; ++ } ++ return src; ++} ++ ++static void qemu_vitastor_unescape(char *src) ++{ ++ char *p; ++ for (p = src; *src; ++src, ++p) ++ { ++ if (*src == '\\' && src[1] != '\0') ++ src++; ++ *p = *src; ++ } ++ *p = '\0'; ++} ++ ++// vitastor[:key=value]* ++// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3] ++// vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg ++static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp) ++{ ++ const char *start; ++ char *p, *buf; ++ ++ if (!strstart(filename, "vitastor:", &start)) ++ { ++ error_setg(errp, "File name must start with 'vitastor:'"); ++ return; ++ } ++ ++ buf = g_strdup(start); ++ p = buf; ++ ++ // The following are all key/value pairs ++ while (p) ++ { ++ int i; ++ char *name, *value; ++ name = qemu_vitastor_next_tok(p, '=', &p); ++ if (!p) ++ { ++ error_setg(errp, "conf option %s has no value", name); ++ break; ++ } ++ for (i = 0; i < strlen(name); i++) ++ if (name[i] == '_') ++ name[i] = '-'; ++ qemu_vitastor_unescape(name); ++ value = qemu_vitastor_next_tok(p, ':', &p); ++ qemu_vitastor_unescape(value); ++ if (!strcmp(name, "inode") || ++ !strcmp(name, "pool") || ++ !strcmp(name, "size") || ++ !strcmp(name, "skip-parents") || ++ !strcmp(name, "use-rdma") || ++ !strcmp(name, "rdma-port_num") || ++ !strcmp(name, "rdma-gid-index") || ++ !strcmp(name, "rdma-mtu")) ++ { ++#if QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1 ++ unsigned long long num_val; ++ if (parse_uint_full(value, &num_val, 0)) ++#else ++ uint64_t num_val; ++ if (parse_uint_full(value, 0, &num_val)) ++#endif ++ { ++ error_setg(errp, "Illegal %s: %s", name, value); ++ goto out; ++ } ++ qdict_put_int(options, name, num_val); ++ } ++ else ++ { ++ qdict_put_str(options, name, value); ++ } ++ } ++ if (!qdict_get_try_str(options, "image")) ++ { ++ if (!qdict_get_try_int(options, "inode", 0)) ++ { ++ error_setg(errp, "one of image (name) and inode (number) must be specified"); ++ goto out; ++ } ++ if (!(qdict_get_try_int(options, "inode", 0) >> (64-POOL_ID_BITS)) && ++ !qdict_get_try_int(options, "pool", 0)) ++ { ++ error_setg(errp, "pool number must be specified or included in the inode number"); ++ goto out; ++ } ++ if (!qdict_get_try_int(options, "size", 0)) ++ { ++ error_setg(errp, "size must be specified when inode number is used instead of image name"); ++ goto out; ++ } ++ } ++ ++out: ++ g_free(buf); ++ return; ++} ++ ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 ++static void vitastor_uring_handler(void *opaque) ++{ ++ VitastorClient *client = (VitastorClient*)opaque; ++ qemu_mutex_lock(&client->mutex); ++ client->bh_uring_scheduled = 0; ++ vitastor_c_uring_handle_events(client->proxy); ++ qemu_mutex_unlock(&client->mutex); ++} ++ ++#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8 ++static void vitastor_bh_uring_handler(void *opaque) ++{ ++ VitastorBH *vbh = opaque; ++ vitastor_bh_handler(vbh->cli); ++ qemu_bh_delete(vbh->bh); ++ free(vbh); ++} ++#endif ++ ++static void vitastor_schedule_uring_handler(VitastorClient *client) ++{ ++ void *opaque = client; ++ if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled) ++ { ++ client->bh_uring_scheduled = 1; ++#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 ++ replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque); ++#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8 ++ aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque); ++#else ++ VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH)); ++ vbh->cli = client; ++#if QEMU_VERSION_MAJOR >= 2 ++ vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh); ++#else ++ vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh); ++#endif ++ qemu_bh_schedule(vbh->bh); ++#endif ++ } ++} ++#else ++static void vitastor_schedule_uring_handler(VitastorClient *client) ++{ ++} ++#endif ++ ++static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task) ++{ ++ BlockDriverState *bs = task->bs; ++ VitastorClient *client = bs->opaque; ++ task->co = qemu_coroutine_self(); ++ ++ qemu_mutex_lock(&client->mutex); ++ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task); ++ vitastor_schedule_uring_handler(client); ++ qemu_mutex_unlock(&client->mutex); ++ ++ while (!task->complete) ++ { ++ qemu_coroutine_yield(); ++ } ++} ++ ++static void vitastor_aio_fd_read(void *fddv) ++{ ++ VitastorFdData *fdd = (VitastorFdData*)fddv; ++ qemu_mutex_lock(&fdd->cli->mutex); ++ fdd->fd_read(fdd->opaque); ++ vitastor_schedule_uring_handler(fdd->cli); ++ qemu_mutex_unlock(&fdd->cli->mutex); ++} ++ ++static void vitastor_aio_fd_write(void *fddv) ++{ ++ VitastorFdData *fdd = (VitastorFdData*)fddv; ++ qemu_mutex_lock(&fdd->cli->mutex); ++ fdd->fd_write(fdd->opaque); ++ vitastor_schedule_uring_handler(fdd->cli); ++ qemu_mutex_unlock(&fdd->cli->mutex); ++} ++ ++static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque) ++{ ++ aio_set_fd_handler(ctx, fd, ++#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 && (QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1) ++ 0 /*is_external*/, ++#endif ++ fd_read, ++ fd_write, ++#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1 ++ NULL /*io_flush*/, ++#endif ++#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3 ++ NULL /*io_poll*/, ++#endif ++#if QEMU_VERSION_MAJOR >= 7 ++ NULL /*io_poll_ready*/, ++#endif ++ opaque); ++} ++ ++static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque) ++{ ++ VitastorClient *client = (VitastorClient*)vcli; ++ VitastorFdData *fdd = NULL; ++ int i; ++ for (i = 0; i < client->fd_count; i++) ++ { ++ if (client->fds[i]->fd == fd) ++ { ++ if (fd_read || fd_write) ++ { ++ fdd = client->fds[i]; ++ fdd->opaque = opaque; ++ fdd->fd_read = fd_read; ++ fdd->fd_write = fd_write; ++ } ++ else ++ { ++ for (int j = i+1; j < client->fd_count; j++) ++ client->fds[j-1] = client->fds[j]; ++ client->fd_count--; ++ } ++ break; ++ } ++ } ++ if ((fd_read || fd_write) && !fdd) ++ { ++ fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData)); ++ fdd->cli = client; ++ fdd->fd = fd; ++ fdd->fd_read = fd_read; ++ fdd->fd_write = fd_write; ++ fdd->opaque = opaque; ++ if (client->fd_count >= client->fd_alloc) ++ { ++ client->fd_alloc = client->fd_alloc*2; ++ if (client->fd_alloc < 16) ++ client->fd_alloc = 16; ++ client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc); ++ } ++ client->fds[client->fd_count++] = fdd; ++ } ++ universal_aio_set_fd_handler( ++ client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd ++ ); ++} ++ ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 ++typedef struct str_array ++{ ++ const char **items; ++ int len, alloc; ++} str_array; ++ ++static void strarray_push(str_array *a, const char *str) ++{ ++ if (a->len >= a->alloc) ++ { ++ a->alloc = !a->alloc ? 4 : 2*a->alloc; ++ a->items = (const char**)realloc(a->items, a->alloc*sizeof(char*)); ++ if (!a->items) ++ { ++ fprintf(stderr, "bad alloc\n"); ++ abort(); ++ } ++ } ++ a->items[a->len++] = str; ++} ++ ++static void strarray_push_kv(str_array *a, const char *key, const char *value) ++{ ++ if (key && value) ++ { ++ strarray_push(a, key); ++ strarray_push(a, value); ++ } ++} ++ ++static void strarray_free(str_array *a) ++{ ++ free(a->items); ++ a->items = NULL; ++ a->len = a->alloc = 0; ++} ++#endif ++ ++static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) ++{ ++ VitastorRPC task; ++ VitastorClient *client = bs->opaque; ++ void *image = NULL; ++ int64_t ret = 0; ++ qemu_mutex_init(&client->mutex); ++ client->config_path = g_strdup(qdict_get_try_str(options, "config-path")); ++ // FIXME: Rename to etcd_address ++ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host")); ++ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix")); ++ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0); ++ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1); ++ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device")); ++ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0); ++ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0); ++ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0); ++ client->ctx = bdrv_get_aio_context(bs); ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 ++ str_array opt = {}; ++ strarray_push_kv(&opt, "config_path", qdict_get_try_str(options, "config-path")); ++ strarray_push_kv(&opt, "etcd_address", qdict_get_try_str(options, "etcd-host")); ++ strarray_push_kv(&opt, "etcd_prefix", qdict_get_try_str(options, "etcd-prefix")); ++ strarray_push_kv(&opt, "use_rdma", qdict_get_try_str(options, "use-rdma")); ++ strarray_push_kv(&opt, "rdma_device", qdict_get_try_str(options, "rdma-device")); ++ strarray_push_kv(&opt, "rdma_port_num", qdict_get_try_str(options, "rdma-port-num")); ++ strarray_push_kv(&opt, "rdma_gid_index", qdict_get_try_str(options, "rdma-gid-index")); ++ strarray_push_kv(&opt, "rdma_mtu", qdict_get_try_str(options, "rdma-mtu")); ++ strarray_push_kv(&opt, "client_writeback_allowed", (flags & BDRV_O_NOCACHE) ? "0" : "1"); ++ client->proxy = vitastor_c_create_uring_json(opt.items, opt.len); ++ strarray_free(&opt); ++ if (client->proxy) ++ { ++ client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy); ++ if (client->uring_eventfd < 0) ++ { ++ fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno)); ++ error_setg(errp, "failed to create io_uring eventfd"); ++ vitastor_close(bs); ++ return -1; ++ } ++ universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client); ++ } ++ else ++ { ++ // Writeback cache is unusable without io_uring because the client can't correctly flush on exit ++ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower%s\n", ++ strerror(errno), (flags & BDRV_O_NOCACHE ? "" : " and writeback cache will be disabled")); ++#endif ++ client->uring_eventfd = -1; ++ client->proxy = vitastor_c_create_qemu( ++ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix, ++ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 ++ ); ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 ++ } ++#endif ++ image = client->image = g_strdup(qdict_get_try_str(options, "image")); ++ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0; ++ // Get image metadata (size and readonly flag) or just wait until the client is ready ++ if (!image) ++ client->image = (char*)"x"; ++ task.complete = 0; ++ task.bs = bs; ++ if (qemu_in_coroutine()) ++ { ++ vitastor_co_get_metadata(&task); ++ } ++ else ++ { ++#if QEMU_VERSION_MAJOR >= 8 ++ aio_co_enter(bdrv_get_aio_context(bs), qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task)); ++#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3 ++ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task)); ++#else ++ qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task)); ++#endif ++ BDRV_POLL_WHILE(bs, !task.complete); ++ } ++ client->image = image; ++ if (client->image) ++ { ++ client->watch = (void*)task.ret; ++ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch); ++ client->size = vitastor_c_inode_get_size(client->watch); ++ if (!vitastor_c_inode_get_num(client->watch)) ++ { ++ error_setg(errp, "image does not exist"); ++ vitastor_close(bs); ++ return -1; ++ } ++ if (!client->size) ++ { ++ client->size = qdict_get_try_int(options, "size", 0); ++ } ++ } ++ else ++ { ++ client->watch = NULL; ++ client->inode = qdict_get_try_int(options, "inode", 0); ++ client->pool = qdict_get_try_int(options, "pool", 0); ++ if (client->pool) ++ { ++ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS)); ++ } ++ client->size = qdict_get_try_int(options, "size", 0); ++ vitastor_c_close_watch(client->proxy, (void*)task.ret); ++ } ++ if (!client->size) ++ { ++ error_setg(errp, "image size not specified"); ++ vitastor_close(bs); ++ return -1; ++ } ++ bs->total_sectors = client->size / BDRV_SECTOR_SIZE; ++#if QEMU_VERSION_MAJOR > 5 || QEMU_VERSION_MAJOR == 5 && QEMU_VERSION_MINOR >= 1 ++ /* When extending regular files, we get zeros from the OS */ ++ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; ++#endif ++ //client->aio_context = bdrv_get_aio_context(bs); ++ qdict_del(options, "use-rdma"); ++ qdict_del(options, "rdma-mtu"); ++ qdict_del(options, "rdma-gid-index"); ++ qdict_del(options, "rdma-port-num"); ++ qdict_del(options, "rdma-device"); ++ qdict_del(options, "config-path"); ++ qdict_del(options, "etcd-host"); ++ qdict_del(options, "etcd-prefix"); ++ qdict_del(options, "image"); ++ qdict_del(options, "inode"); ++ qdict_del(options, "pool"); ++ qdict_del(options, "size"); ++ qdict_del(options, "skip-parents"); ++ return ret; ++} ++ ++static void vitastor_close(BlockDriverState *bs) ++{ ++ VitastorClient *client = bs->opaque; ++ vitastor_c_destroy(client->proxy); ++ if (client->fds) ++ { ++ free(client->fds); ++ client->fds = NULL; ++ client->fd_alloc = client->fd_count = 0; ++ } ++ qemu_mutex_destroy(&client->mutex); ++ if (client->config_path) ++ g_free(client->config_path); ++ if (client->etcd_host) ++ g_free(client->etcd_host); ++ if (client->etcd_prefix) ++ g_free(client->etcd_prefix); ++ if (client->image) ++ g_free(client->image); ++ free(client->last_bitmap); ++ client->last_bitmap = NULL; ++} ++ ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2 ++static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) ++{ ++ bsz->phys = 4096; ++ bsz->log = 512; ++ return 0; ++} ++#endif ++ ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12 ++static int coroutine_fn vitastor_co_create_opts( ++#if QEMU_VERSION_MAJOR >= 4 ++ BlockDriver *drv, ++#endif ++ const char *url, QemuOpts *opts, Error **errp) ++{ ++ QDict *options; ++ int ret; ++ ++ options = qdict_new(); ++ vitastor_parse_filename(url, options, errp); ++ if (*errp) ++ { ++ ret = -1; ++ goto out; ++ } ++ ++ // inodes don't require creation in Vitastor. FIXME: They will when there will be some metadata ++ ++ ret = 0; ++out: ++ qobject_unref(options); ++ return ret; ++} ++#endif ++ ++#if QEMU_VERSION_MAJOR >= 3 ++static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offset, ++#if QEMU_VERSION_MAJOR >= 4 ++ bool exact, ++#endif ++ PreallocMode prealloc, ++#if QEMU_VERSION_MAJOR >= 5 && QEMU_VERSION_MINOR >= 1 || QEMU_VERSION_MAJOR > 5 || defined RHEL_BDRV_CO_TRUNCATE_FLAGS ++ BdrvRequestFlags flags, ++#endif ++ Error **errp) ++{ ++ VitastorClient *client = bs->opaque; ++ ++ if (prealloc != PREALLOC_MODE_OFF) ++ { ++ error_setg(errp, "Unsupported preallocation mode '%s'", PreallocMode_str(prealloc)); ++ return -ENOTSUP; ++ } ++ ++ // TODO: Resize inode to bytes ++#if QEMU_VERSION_MAJOR >= 4 ++ client->size = exact || client->size < offset ? offset : client->size; ++#else ++ client->size = offset; ++#endif ++ ++ return 0; ++} ++#endif ++ ++static int vitastor_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) ++{ ++ bdi->cluster_size = 4096; ++ return 0; ++} ++ ++static int64_t vitastor_getlength(BlockDriverState *bs) ++{ ++ VitastorClient *client = bs->opaque; ++ return client->size; ++} ++ ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0 ++static void vitastor_refresh_limits(BlockDriverState *bs, Error **errp) ++#else ++static int vitastor_refresh_limits(BlockDriverState *bs) ++#endif ++{ ++ bs->bl.request_alignment = 4096; ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 3 ++ bs->bl.min_mem_alignment = 4096; ++#endif ++ bs->bl.opt_mem_alignment = 4096; ++#if QEMU_VERSION_MAJOR < 2 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR == 0 ++ return 0; ++#endif ++} ++ ++//static int64_t vitastor_get_allocated_file_size(BlockDriverState *bs) ++//{ ++// return 0; ++//} ++ ++static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task) ++{ ++ *task = (VitastorRPC) { ++ .co = qemu_coroutine_self(), ++ .bs = bs, ++ }; ++} ++ ++static void vitastor_co_generic_bh_cb(void *opaque) ++{ ++ VitastorRPC *task = opaque; ++ task->complete = 1; ++ if (qemu_coroutine_self() != task->co) ++ { ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8 ++ aio_co_wake(task->co); ++#else ++#if QEMU_VERSION_MAJOR == 2 ++ qemu_bh_delete(task->bh); ++#endif ++ qemu_coroutine_enter(task->co, NULL); ++ qemu_aio_release(task); ++#endif ++ } ++} ++ ++static void vitastor_co_generic_cb(void *opaque, long retval) ++{ ++ VitastorRPC *task = opaque; ++ task->ret = retval; ++#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 ++ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); ++#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8 ++ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); ++#elif QEMU_VERSION_MAJOR >= 2 ++ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); ++ qemu_bh_schedule(task->bh); ++#else ++ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque); ++ qemu_bh_schedule(task->bh); ++#endif ++} ++ ++static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version) ++{ ++ vitastor_co_generic_cb(opaque, retval); ++} ++ ++static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, ++#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2 ++ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags ++#else ++ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags ++#endif ++) ++{ ++ VitastorClient *client = bs->opaque; ++ VitastorRPC task; ++ vitastor_co_init_task(bs, &task); ++ task.iov = iov; ++ ++ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; ++ qemu_mutex_lock(&client->mutex); ++ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task); ++ vitastor_schedule_uring_handler(client); ++ qemu_mutex_unlock(&client->mutex); ++ ++ while (!task.complete) ++ { ++ qemu_coroutine_yield(); ++ } ++ ++ return task.ret; ++} ++ ++static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs, ++#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2 ++ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags ++#else ++ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags ++#endif ++) ++{ ++ VitastorClient *client = bs->opaque; ++ VitastorRPC task; ++ vitastor_co_init_task(bs, &task); ++ task.iov = iov; ++ ++ if (client->last_bitmap) ++ { ++ // Invalidate last bitmap on write ++ free(client->last_bitmap); ++ client->last_bitmap = NULL; ++ } ++ ++ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; ++ qemu_mutex_lock(&client->mutex); ++ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task); ++ vitastor_schedule_uring_handler(client); ++ qemu_mutex_unlock(&client->mutex); ++ ++ while (!task.complete) ++ { ++ qemu_coroutine_yield(); ++ } ++ ++ return task.ret; ++} ++ ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1 ++#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 ++static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap) ++{ ++ VitastorRPC *task = opaque; ++ VitastorClient *client = task->bs->opaque; ++ task->ret = retval; ++ if (retval >= 0) ++ { ++ task->bitmap = bitmap; ++ if (client->last_bitmap_inode == task->inode && ++ client->last_bitmap_offset == task->offset && ++ client->last_bitmap_len == task->len) ++ { ++ free(client->last_bitmap); ++ client->last_bitmap = bitmap; ++ } ++ } ++#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 ++ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); ++#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8 ++ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); ++#elif QEMU_VERSION_MAJOR >= 2 ++ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); ++ qemu_bh_schedule(task->bh); ++#else ++ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque); ++ qemu_bh_schedule(task->bh); ++#endif ++} ++ ++static int coroutine_fn vitastor_co_block_status( ++ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, ++ int64_t *pnum, int64_t *map, BlockDriverState **file) ++{ ++ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID ++ // Not allocated => return 0 ++ // Error => return -errno ++ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs` ++ VitastorRPC task; ++ VitastorClient *client = bs->opaque; ++ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; ++ uint8_t bit = 0; ++ if (client->last_bitmap && client->last_bitmap_inode == inode && ++ client->last_bitmap_offset <= offset && ++ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes)) ++ { ++ // Use the previously read bitmap ++ task.bitmap_granularity = client->last_bitmap_granularity; ++ task.offset = client->last_bitmap_offset; ++ task.len = client->last_bitmap_len; ++ task.bitmap = client->last_bitmap; ++ } ++ else ++ { ++ // Read bitmap from this position, rounding to full inode PG blocks ++ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode); ++ if (!block_size) ++ return -EAGAIN; ++ // Init coroutine ++ vitastor_co_init_task(bs, &task); ++ free(client->last_bitmap); ++ task.inode = client->last_bitmap_inode = inode; ++ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode); ++ task.offset = client->last_bitmap_offset = offset / block_size * block_size; ++ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset; ++ task.bitmap = client->last_bitmap = NULL; ++ qemu_mutex_lock(&client->mutex); ++ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task); ++ vitastor_schedule_uring_handler(client); ++ qemu_mutex_unlock(&client->mutex); ++ while (!task.complete) ++ { ++ qemu_coroutine_yield(); ++ } ++ if (task.ret < 0) ++ { ++ // Error ++ return task.ret; ++ } ++ } ++ if (want_zero) ++ { ++ // Get precise mapping with all holes ++ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity; ++ uint64_t bmp_len = task.len / task.bitmap_granularity; ++ uint64_t bmp_end = bmp_pos+1; ++ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1; ++ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit) ++ { ++ bmp_end++; ++ } ++ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity; ++ } ++ else ++ { ++ // Get larger allocated extents, possibly with false positives ++ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity; ++ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos; ++ while (bmp_pos < bmp_end) ++ { ++ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8) ++ { ++ bit = bit || task.bitmap[bmp_pos >> 3]; ++ bmp_pos += 8; ++ } ++ else ++ { ++ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1); ++ bmp_pos++; ++ } ++ } ++ *pnum = bytes; ++ } ++ if (bit) ++ { ++ *map = offset; ++ *file = bs; ++ } ++ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0); ++} ++#endif ++#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12 ++// QEMU 1.7-2.11 ++static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs, ++ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) ++{ ++ int64_t map = 0; ++ int64_t pnumbytes = 0; ++ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file); ++ *pnum = pnumbytes/BDRV_SECTOR_SIZE; ++ return r; ++} ++#endif ++#endif ++ ++#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 ) ++static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov) ++{ ++ return vitastor_co_preadv(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0); ++} ++ ++static int coroutine_fn vitastor_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov) ++{ ++ return vitastor_co_pwritev(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0); ++} ++#endif ++ ++static int coroutine_fn vitastor_co_flush(BlockDriverState *bs) ++{ ++ VitastorClient *client = bs->opaque; ++ VitastorRPC task; ++ vitastor_co_init_task(bs, &task); ++ ++ qemu_mutex_lock(&client->mutex); ++ vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task); ++ vitastor_schedule_uring_handler(client); ++ qemu_mutex_unlock(&client->mutex); ++ ++ while (!task.complete) ++ { ++ qemu_coroutine_yield(); ++ } ++ ++ return task.ret; ++} ++ ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0 ++static QemuOptsList vitastor_create_opts = { ++ .name = "vitastor-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(vitastor_create_opts.head), ++ .desc = { ++ { ++ .name = BLOCK_OPT_SIZE, ++ .type = QEMU_OPT_SIZE, ++ .help = "Virtual disk size" ++ }, ++ { /* end of list */ } ++ } ++}; ++#else ++static QEMUOptionParameter vitastor_create_opts[] = { ++ { ++ .name = BLOCK_OPT_SIZE, ++ .type = OPT_SIZE, ++ .help = "Virtual disk size" ++ }, ++ { NULL } ++}; ++#endif ++ ++#if QEMU_VERSION_MAJOR >= 4 ++static const char *vitastor_strong_runtime_opts[] = { ++ "inode", ++ "pool", ++ "config-path", ++ "etcd-host", ++ "etcd-prefix", ++ ++ NULL ++}; ++#endif ++ ++static BlockDriver bdrv_vitastor = { ++ .format_name = "vitastor", ++ .protocol_name = "vitastor", ++ ++ .instance_size = sizeof(VitastorClient), ++ .bdrv_parse_filename = vitastor_parse_filename, ++ ++ .bdrv_has_zero_init = bdrv_has_zero_init_1, ++#if QEMU_VERSION_MAJOR >= 8 ++ .bdrv_co_get_info = vitastor_get_info, ++ .bdrv_co_getlength = vitastor_getlength, ++#else ++ .bdrv_get_info = vitastor_get_info, ++ .bdrv_getlength = vitastor_getlength, ++#endif ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2 ++ .bdrv_probe_blocksizes = vitastor_probe_blocksizes, ++#endif ++ .bdrv_refresh_limits = vitastor_refresh_limits, ++ ++ // FIXME: Implement it along with per-inode statistics ++ //.bdrv_get_allocated_file_size = vitastor_get_allocated_file_size, ++ ++ .bdrv_file_open = vitastor_file_open, ++ .bdrv_close = vitastor_close, ++ ++ // Option list for the create operation ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0 ++ .create_opts = &vitastor_create_opts, ++#else ++ .create_options = vitastor_create_opts, ++#endif ++ ++ // For qmp_blockdev_create(), used by the qemu monitor / QAPI ++ // Requires patching QAPI IDL, thus unimplemented ++ //.bdrv_co_create = vitastor_co_create, ++ ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12 ++ // For bdrv_create(), used by qemu-img ++ .bdrv_co_create_opts = vitastor_co_create_opts, ++#endif ++ ++#if QEMU_VERSION_MAJOR >= 3 ++ .bdrv_co_truncate = vitastor_co_truncate, ++#endif ++ ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1 ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12 ++ // For snapshot export ++ .bdrv_co_block_status = vitastor_co_block_status, ++#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12 ++ .bdrv_co_get_block_status = vitastor_co_get_block_status, ++#endif ++#endif ++ ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 ++ .bdrv_co_preadv = vitastor_co_preadv, ++ .bdrv_co_pwritev = vitastor_co_pwritev, ++#else ++ .bdrv_co_readv = vitastor_co_readv, ++ .bdrv_co_writev = vitastor_co_writev, ++#endif ++ ++ .bdrv_co_flush_to_disk = vitastor_co_flush, ++ ++#if QEMU_VERSION_MAJOR >= 4 ++ .strong_runtime_opts = vitastor_strong_runtime_opts, ++#endif ++}; ++ ++static void vitastor_block_init(void) ++{ ++ bdrv_register(&bdrv_vitastor); ++} ++ ++block_init(vitastor_block_init); diff --git a/debian/patches/series b/debian/patches/series index ee0028d..0a85ee3 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -60,3 +60,4 @@ pve/0042-Revert-block-rbd-implement-bdrv_co_block_status.patch pve/0043-alloc-track-fix-deadlock-during-drop.patch pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch pve/0045-savevm-async-don-t-hold-BQL-during-setup.patch +pve-qemu-8.1-vitastor.patch