1250 lines
41 KiB
Diff
1250 lines
41 KiB
Diff
Index: pve-qemu-kvm-7.2.0/block/meson.build
|
|
===================================================================
|
|
--- pve-qemu-kvm-7.2.0.orig/block/meson.build
|
|
+++ pve-qemu-kvm-7.2.0/block/meson.build
|
|
@@ -113,6 +113,7 @@ foreach m : [
|
|
[libnfs, 'nfs', files('nfs.c')],
|
|
[libssh, 'ssh', files('ssh.c')],
|
|
[rbd, 'rbd', files('rbd.c')],
|
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
|
]
|
|
if m[0].found()
|
|
module_ss = ss.source_set()
|
|
Index: pve-qemu-kvm-7.2.0/meson.build
|
|
===================================================================
|
|
--- pve-qemu-kvm-7.2.0.orig/meson.build
|
|
+++ pve-qemu-kvm-7.2.0/meson.build
|
|
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_
|
|
endif
|
|
endif
|
|
|
|
+vitastor = not_found
|
|
+if not get_option('vitastor').auto() or have_block
|
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
|
+ if libvitastor_client.found()
|
|
+ if cc.links('''
|
|
+ #include <vitastor_c.h>
|
|
+ int main(void) {
|
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
|
+ return 0;
|
|
+ }''', dependencies: libvitastor_client)
|
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
|
+ elif get_option('vitastor').enabled()
|
|
+ error('could not link libvitastor_client')
|
|
+ else
|
|
+ warning('could not link libvitastor_client, disabling')
|
|
+ endif
|
|
+ endif
|
|
+endif
|
|
+
|
|
glusterfs = not_found
|
|
glusterfs_ftruncate_has_stat = false
|
|
glusterfs_iocb_has_stat = false
|
|
@@ -1865,6 +1885,7 @@ config_host_data.set('CONFIG_NUMA', numa
|
|
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
|
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
|
config_host_data.set('CONFIG_RDMA', rdma.found())
|
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
|
@@ -3957,6 +3978,7 @@ if spice_protocol.found()
|
|
summary_info += {' spice server support': spice}
|
|
endif
|
|
summary_info += {'rbd support': rbd}
|
|
+summary_info += {'vitastor support': vitastor}
|
|
summary_info += {'smartcard support': cacard}
|
|
summary_info += {'U2F support': u2f}
|
|
summary_info += {'libusb': libusb}
|
|
Index: pve-qemu-kvm-7.2.0/meson_options.txt
|
|
===================================================================
|
|
--- pve-qemu-kvm-7.2.0.orig/meson_options.txt
|
|
+++ pve-qemu-kvm-7.2.0/meson_options.txt
|
|
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value :
|
|
description: 'lzo compression support')
|
|
option('rbd', type : 'feature', value : 'auto',
|
|
description: 'Ceph block device driver')
|
|
+option('vitastor', type : 'feature', value : 'auto',
|
|
+ description: 'Vitastor block device driver')
|
|
option('opengl', type : 'feature', value : 'auto',
|
|
description: 'OpenGL support')
|
|
option('rdma', type : 'feature', value : 'auto',
|
|
Index: pve-qemu-kvm-7.2.0/qapi/block-core.json
|
|
===================================================================
|
|
--- pve-qemu-kvm-7.2.0.orig/qapi/block-core.json
|
|
+++ pve-qemu-kvm-7.2.0/qapi/block-core.json
|
|
@@ -3213,7 +3213,7 @@
|
|
'raw', 'rbd',
|
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
|
'pbs',
|
|
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
|
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
|
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
|
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
|
@@ -4223,6 +4223,28 @@
|
|
'*server': ['InetSocketAddressBase'] } }
|
|
|
|
##
|
|
+# @BlockdevOptionsVitastor:
|
|
+#
|
|
+# Driver specific block device options for vitastor
|
|
+#
|
|
+# @image: Image name
|
|
+# @inode: Inode number
|
|
+# @pool: Pool ID
|
|
+# @size: Desired image size in bytes
|
|
+# @config-path: Path to Vitastor configuration
|
|
+# @etcd-host: etcd connection address(es)
|
|
+# @etcd-prefix: etcd key/value prefix
|
|
+##
|
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
|
+ 'data': { '*inode': 'uint64',
|
|
+ '*pool': 'uint64',
|
|
+ '*size': 'uint64',
|
|
+ '*image': 'str',
|
|
+ '*config-path': 'str',
|
|
+ '*etcd-host': 'str',
|
|
+ '*etcd-prefix': 'str' } }
|
|
+
|
|
+##
|
|
# @ReplicationMode:
|
|
#
|
|
# An enumeration of replication modes.
|
|
@@ -4671,6 +4693,7 @@
|
|
'throttle': 'BlockdevOptionsThrottle',
|
|
'vdi': 'BlockdevOptionsGenericFormat',
|
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
|
'virtio-blk-vfio-pci':
|
|
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
|
'if': 'CONFIG_BLKIO' },
|
|
@@ -5072,6 +5095,17 @@
|
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
|
|
|
##
|
|
+# @BlockdevCreateOptionsVitastor:
|
|
+#
|
|
+# Driver specific image creation options for Vitastor.
|
|
+#
|
|
+# @size: Size of the virtual disk in bytes
|
|
+##
|
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
|
+ 'size': 'size' } }
|
|
+
|
|
+##
|
|
# @BlockdevVmdkSubformat:
|
|
#
|
|
# Subformat options for VMDK images
|
|
@@ -5269,6 +5303,7 @@
|
|
'ssh': 'BlockdevCreateOptionsSsh',
|
|
'vdi': 'BlockdevCreateOptionsVdi',
|
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
|
'vpc': 'BlockdevCreateOptionsVpc'
|
|
} }
|
|
Index: pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
|
|
===================================================================
|
|
--- pve-qemu-kvm-7.2.0.orig/scripts/ci/org.centos/stream/8/x86_64/configure
|
|
+++ pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
|
|
@@ -31,7 +31,7 @@
|
|
--with-git=meson \
|
|
--with-git-submodules=update \
|
|
--target-list="x86_64-softmmu" \
|
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
|
--audio-drv-list="" \
|
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
|
--with-coroutine=ucontext \
|
|
@@ -179,6 +179,7 @@
|
|
--enable-opengl \
|
|
--enable-pie \
|
|
--enable-rbd \
|
|
+--enable-vitastor \
|
|
--enable-rdma \
|
|
--enable-seccomp \
|
|
--enable-snappy \
|
|
Index: a/block/vitastor.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ a/block/vitastor.c
|
|
@@ -0,0 +1,1075 @@
|
|
+// Copyright (c) Vitaliy Filippov, 2019+
|
|
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
|
+
|
|
+// QEMU block driver
|
|
+
|
|
+#ifdef VITASTOR_SOURCE_TREE
|
|
+#define BUILD_DSO
|
|
+#define _GNU_SOURCE
|
|
+#endif
|
|
+#include "qemu/osdep.h"
|
|
+#include "qemu/main-loop.h"
|
|
+#if QEMU_VERSION_MAJOR >= 8
|
|
+#include "block/block-io.h"
|
|
+#endif
|
|
+#include "block/block_int.h"
|
|
+#include "qapi/error.h"
|
|
+#include "qapi/qmp/qdict.h"
|
|
+#include "qapi/qmp/qerror.h"
|
|
+#include "qemu/uri.h"
|
|
+#include "qemu/error-report.h"
|
|
+#include "qemu/module.h"
|
|
+#include "qemu/option.h"
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3
|
|
+#include "qemu/units.h"
|
|
+#include "block/qdict.h"
|
|
+#include "qemu/cutils.h"
|
|
+#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10
|
|
+#include "qemu/cutils.h"
|
|
+#include "qapi/qmp/qstring.h"
|
|
+#include "qapi/qmp/qjson.h"
|
|
+#else
|
|
+#include "qapi/qmp/qint.h"
|
|
+#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val)))
|
|
+#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
|
|
+#define qobject_unref QDECREF
|
|
+#endif
|
|
+#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
|
|
+#include "sysemu/replay.h"
|
|
+#else
|
|
+#include "sysemu/sysemu.h"
|
|
+#endif
|
|
+
|
|
+#include "vitastor_c.h"
|
|
+
|
|
+#ifdef VITASTOR_SOURCE_TREE
|
|
+void qemu_module_dummy(void)
|
|
+{
|
|
+}
|
|
+
|
|
+void DSO_STAMP_FUN(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+typedef struct VitastorFdData VitastorFdData;
|
|
+
|
|
+typedef struct VitastorClient
|
|
+{
|
|
+ void *proxy;
|
|
+ int uring_eventfd;
|
|
+
|
|
+ void *watch;
|
|
+ char *config_path;
|
|
+ char *etcd_host;
|
|
+ char *etcd_prefix;
|
|
+ char *image;
|
|
+ int skip_parents;
|
|
+ uint64_t inode;
|
|
+ uint64_t pool;
|
|
+ uint64_t size;
|
|
+ long readonly;
|
|
+ int use_rdma;
|
|
+ char *rdma_device;
|
|
+ int rdma_port_num;
|
|
+ int rdma_gid_index;
|
|
+ int rdma_mtu;
|
|
+ QemuMutex mutex;
|
|
+ AioContext *ctx;
|
|
+ VitastorFdData **fds;
|
|
+ int fd_count, fd_alloc;
|
|
+ int bh_uring_scheduled;
|
|
+
|
|
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
|
+ uint32_t last_bitmap_granularity;
|
|
+ uint8_t *last_bitmap;
|
|
+} VitastorClient;
|
|
+
|
|
+typedef struct VitastorFdData
|
|
+{
|
|
+ VitastorClient *cli;
|
|
+ int fd;
|
|
+ IOHandler *fd_read, *fd_write;
|
|
+ void *opaque;
|
|
+} VitastorFdData;
|
|
+
|
|
+typedef struct VitastorRPC
|
|
+{
|
|
+ BlockDriverState *bs;
|
|
+ Coroutine *co;
|
|
+ QEMUIOVector *iov;
|
|
+ long ret;
|
|
+ int complete;
|
|
+ uint64_t inode, offset, len;
|
|
+ uint32_t bitmap_granularity;
|
|
+ uint8_t *bitmap;
|
|
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
|
+ QEMUBH *bh;
|
|
+#endif
|
|
+} VitastorRPC;
|
|
+
|
|
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
|
+typedef struct VitastorBH
|
|
+{
|
|
+ VitastorClient *cli;
|
|
+ QEMUBH *bh;
|
|
+} VitastorBH;
|
|
+#endif
|
|
+
|
|
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
|
+static void vitastor_co_generic_cb(void *opaque, long retval);
|
|
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
|
|
+static void vitastor_close(BlockDriverState *bs);
|
|
+
|
|
+static char *qemu_vitastor_next_tok(char *src, char delim, char **p)
|
|
+{
|
|
+ char *end;
|
|
+ *p = NULL;
|
|
+ for (end = src; *end; ++end)
|
|
+ {
|
|
+ if (*end == delim)
|
|
+ break;
|
|
+ if (*end == '\\' && end[1] != '\0')
|
|
+ end++;
|
|
+ }
|
|
+ if (*end == delim)
|
|
+ {
|
|
+ *p = end + 1;
|
|
+ *end = '\0';
|
|
+ }
|
|
+ return src;
|
|
+}
|
|
+
|
|
+static void qemu_vitastor_unescape(char *src)
|
|
+{
|
|
+ char *p;
|
|
+ for (p = src; *src; ++src, ++p)
|
|
+ {
|
|
+ if (*src == '\\' && src[1] != '\0')
|
|
+ src++;
|
|
+ *p = *src;
|
|
+ }
|
|
+ *p = '\0';
|
|
+}
|
|
+
|
|
+// vitastor[:key=value]*
|
|
+// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
|
|
+// vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg
|
|
+static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
|
|
+{
|
|
+ const char *start;
|
|
+ char *p, *buf;
|
|
+
|
|
+ if (!strstart(filename, "vitastor:", &start))
|
|
+ {
|
|
+ error_setg(errp, "File name must start with 'vitastor:'");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ buf = g_strdup(start);
|
|
+ p = buf;
|
|
+
|
|
+ // The following are all key/value pairs
|
|
+ while (p)
|
|
+ {
|
|
+ int i;
|
|
+ char *name, *value;
|
|
+ name = qemu_vitastor_next_tok(p, '=', &p);
|
|
+ if (!p)
|
|
+ {
|
|
+ error_setg(errp, "conf option %s has no value", name);
|
|
+ break;
|
|
+ }
|
|
+ for (i = 0; i < strlen(name); i++)
|
|
+ if (name[i] == '_')
|
|
+ name[i] = '-';
|
|
+ qemu_vitastor_unescape(name);
|
|
+ value = qemu_vitastor_next_tok(p, ':', &p);
|
|
+ qemu_vitastor_unescape(value);
|
|
+ if (!strcmp(name, "inode") ||
|
|
+ !strcmp(name, "pool") ||
|
|
+ !strcmp(name, "size") ||
|
|
+ !strcmp(name, "skip-parents") ||
|
|
+ !strcmp(name, "use-rdma") ||
|
|
+ !strcmp(name, "rdma-port_num") ||
|
|
+ !strcmp(name, "rdma-gid-index") ||
|
|
+ !strcmp(name, "rdma-mtu"))
|
|
+ {
|
|
+ unsigned long long num_val;
|
|
+#if QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1
|
|
+ if (parse_uint_full(value, &num_val, 0))
|
|
+#else
|
|
+ if (parse_uint_full(value, 0, &num_val))
|
|
+#endif
|
|
+ {
|
|
+ error_setg(errp, "Illegal %s: %s", name, value);
|
|
+ goto out;
|
|
+ }
|
|
+ qdict_put_int(options, name, num_val);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ qdict_put_str(options, name, value);
|
|
+ }
|
|
+ }
|
|
+ if (!qdict_get_try_str(options, "image"))
|
|
+ {
|
|
+ if (!qdict_get_try_int(options, "inode", 0))
|
|
+ {
|
|
+ error_setg(errp, "one of image (name) and inode (number) must be specified");
|
|
+ goto out;
|
|
+ }
|
|
+ if (!(qdict_get_try_int(options, "inode", 0) >> (64-POOL_ID_BITS)) &&
|
|
+ !qdict_get_try_int(options, "pool", 0))
|
|
+ {
|
|
+ error_setg(errp, "pool number must be specified or included in the inode number");
|
|
+ goto out;
|
|
+ }
|
|
+ if (!qdict_get_try_int(options, "size", 0))
|
|
+ {
|
|
+ error_setg(errp, "size must be specified when inode number is used instead of image name");
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ g_free(buf);
|
|
+ return;
|
|
+}
|
|
+
|
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
|
+static void vitastor_uring_handler(void *opaque)
|
|
+{
|
|
+ VitastorClient *client = (VitastorClient*)opaque;
|
|
+ qemu_mutex_lock(&client->mutex);
|
|
+ client->bh_uring_scheduled = 0;
|
|
+ vitastor_c_uring_handle_events(client->proxy);
|
|
+ qemu_mutex_unlock(&client->mutex);
|
|
+}
|
|
+
|
|
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
|
+static void vitastor_bh_uring_handler(void *opaque)
|
|
+{
|
|
+ VitastorBH *vbh = opaque;
|
|
+ vitastor_bh_handler(vbh->cli);
|
|
+ qemu_bh_delete(vbh->bh);
|
|
+ free(vbh);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void vitastor_schedule_uring_handler(VitastorClient *client)
|
|
+{
|
|
+ void *opaque = client;
|
|
+ if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
|
|
+ {
|
|
+ client->bh_uring_scheduled = 1;
|
|
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
|
+ replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
|
|
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
|
+ aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
|
|
+#else
|
|
+ VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
|
|
+ vbh->cli = client;
|
|
+#if QEMU_VERSION_MAJOR >= 2
|
|
+ vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
|
|
+#else
|
|
+ vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
|
|
+#endif
|
|
+ qemu_bh_schedule(vbh->bh);
|
|
+#endif
|
|
+ }
|
|
+}
|
|
+#else
|
|
+static void vitastor_schedule_uring_handler(VitastorClient *client)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
|
+{
|
|
+ BlockDriverState *bs = task->bs;
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ task->co = qemu_coroutine_self();
|
|
+
|
|
+ qemu_mutex_lock(&client->mutex);
|
|
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
|
|
+ vitastor_schedule_uring_handler(client);
|
|
+ qemu_mutex_unlock(&client->mutex);
|
|
+
|
|
+ while (!task->complete)
|
|
+ {
|
|
+ qemu_coroutine_yield();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void vitastor_aio_fd_read(void *fddv)
|
|
+{
|
|
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
|
|
+ qemu_mutex_lock(&fdd->cli->mutex);
|
|
+ fdd->fd_read(fdd->opaque);
|
|
+ vitastor_schedule_uring_handler(fdd->cli);
|
|
+ qemu_mutex_unlock(&fdd->cli->mutex);
|
|
+}
|
|
+
|
|
+static void vitastor_aio_fd_write(void *fddv)
|
|
+{
|
|
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
|
|
+ qemu_mutex_lock(&fdd->cli->mutex);
|
|
+ fdd->fd_write(fdd->opaque);
|
|
+ vitastor_schedule_uring_handler(fdd->cli);
|
|
+ qemu_mutex_unlock(&fdd->cli->mutex);
|
|
+}
|
|
+
|
|
+static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
|
+{
|
|
+ aio_set_fd_handler(ctx, fd,
|
|
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 && (QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1)
|
|
+ 0 /*is_external*/,
|
|
+#endif
|
|
+ fd_read,
|
|
+ fd_write,
|
|
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
|
+ NULL /*io_flush*/,
|
|
+#endif
|
|
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
|
|
+ NULL /*io_poll*/,
|
|
+#endif
|
|
+#if QEMU_VERSION_MAJOR >= 7
|
|
+ NULL /*io_poll_ready*/,
|
|
+#endif
|
|
+ opaque);
|
|
+}
|
|
+
|
|
+static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
|
+{
|
|
+ VitastorClient *client = (VitastorClient*)vcli;
|
|
+ VitastorFdData *fdd = NULL;
|
|
+ int i;
|
|
+ for (i = 0; i < client->fd_count; i++)
|
|
+ {
|
|
+ if (client->fds[i]->fd == fd)
|
|
+ {
|
|
+ if (fd_read || fd_write)
|
|
+ {
|
|
+ fdd = client->fds[i];
|
|
+ fdd->opaque = opaque;
|
|
+ fdd->fd_read = fd_read;
|
|
+ fdd->fd_write = fd_write;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ for (int j = i+1; j < client->fd_count; j++)
|
|
+ client->fds[j-1] = client->fds[j];
|
|
+ client->fd_count--;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ if ((fd_read || fd_write) && !fdd)
|
|
+ {
|
|
+ fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
|
|
+ fdd->cli = client;
|
|
+ fdd->fd = fd;
|
|
+ fdd->fd_read = fd_read;
|
|
+ fdd->fd_write = fd_write;
|
|
+ fdd->opaque = opaque;
|
|
+ if (client->fd_count >= client->fd_alloc)
|
|
+ {
|
|
+ client->fd_alloc = client->fd_alloc*2;
|
|
+ if (client->fd_alloc < 16)
|
|
+ client->fd_alloc = 16;
|
|
+ client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
|
|
+ }
|
|
+ client->fds[client->fd_count++] = fdd;
|
|
+ }
|
|
+ universal_aio_set_fd_handler(
|
|
+ client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
|
|
+ );
|
|
+}
|
|
+
|
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
|
+typedef struct str_array
|
|
+{
|
|
+ const char **items;
|
|
+ int len, alloc;
|
|
+} str_array;
|
|
+
|
|
+static void strarray_push(str_array *a, const char *str)
|
|
+{
|
|
+ if (a->len >= a->alloc)
|
|
+ {
|
|
+ a->alloc = !a->alloc ? 4 : 2*a->alloc;
|
|
+ a->items = (const char**)realloc(a->items, a->alloc*sizeof(char*));
|
|
+ if (!a->items)
|
|
+ {
|
|
+ fprintf(stderr, "bad alloc\n");
|
|
+ abort();
|
|
+ }
|
|
+ }
|
|
+ a->items[a->len++] = str;
|
|
+}
|
|
+
|
|
+static void strarray_push_kv(str_array *a, const char *key, const char *value)
|
|
+{
|
|
+ if (key && value)
|
|
+ {
|
|
+ strarray_push(a, key);
|
|
+ strarray_push(a, value);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void strarray_free(str_array *a)
|
|
+{
|
|
+ free(a->items);
|
|
+ a->items = NULL;
|
|
+ a->len = a->alloc = 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
|
+{
|
|
+ VitastorRPC task;
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ void *image = NULL;
|
|
+ int64_t ret = 0;
|
|
+ qemu_mutex_init(&client->mutex);
|
|
+ client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
|
|
+ // FIXME: Rename to etcd_address
|
|
+ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
|
|
+ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
|
|
+ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
|
|
+ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
|
|
+ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
|
|
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
|
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
|
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
|
+ client->ctx = bdrv_get_aio_context(bs);
|
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
|
+ str_array opt = {};
|
|
+ strarray_push_kv(&opt, "config_path", qdict_get_try_str(options, "config-path"));
|
|
+ strarray_push_kv(&opt, "etcd_address", qdict_get_try_str(options, "etcd-host"));
|
|
+ strarray_push_kv(&opt, "etcd_prefix", qdict_get_try_str(options, "etcd-prefix"));
|
|
+ strarray_push_kv(&opt, "use_rdma", qdict_get_try_str(options, "use-rdma"));
|
|
+ strarray_push_kv(&opt, "rdma_device", qdict_get_try_str(options, "rdma-device"));
|
|
+ strarray_push_kv(&opt, "rdma_port_num", qdict_get_try_str(options, "rdma-port-num"));
|
|
+ strarray_push_kv(&opt, "rdma_gid_index", qdict_get_try_str(options, "rdma-gid-index"));
|
|
+ strarray_push_kv(&opt, "rdma_mtu", qdict_get_try_str(options, "rdma-mtu"));
|
|
+ strarray_push_kv(&opt, "client_writeback_allowed", (flags & BDRV_O_NOCACHE) ? "0" : "1");
|
|
+ client->proxy = vitastor_c_create_uring_json(opt.items, opt.len);
|
|
+ strarray_free(&opt);
|
|
+ if (client->proxy)
|
|
+ {
|
|
+ client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
|
|
+ if (client->uring_eventfd < 0)
|
|
+ {
|
|
+ fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
|
|
+ error_setg(errp, "failed to create io_uring eventfd");
|
|
+ vitastor_close(bs);
|
|
+ return -1;
|
|
+ }
|
|
+ universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ // Writeback cache is unusable without io_uring because the client can't correctly flush on exit
|
|
+ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower%s\n",
|
|
+ strerror(errno), (flags & BDRV_O_NOCACHE ? "" : " and writeback cache will be disabled"));
|
|
+#endif
|
|
+ client->uring_eventfd = -1;
|
|
+ client->proxy = vitastor_c_create_qemu(
|
|
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
|
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
|
+ );
|
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
|
+ }
|
|
+#endif
|
|
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
|
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
|
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
|
|
+ if (!image)
|
|
+ client->image = (char*)"x";
|
|
+ task.complete = 0;
|
|
+ task.bs = bs;
|
|
+ if (qemu_in_coroutine())
|
|
+ {
|
|
+ vitastor_co_get_metadata(&task);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+#if QEMU_VERSION_MAJOR >= 8
|
|
+ aio_co_enter(bdrv_get_aio_context(bs), qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
|
+#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
|
|
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
|
+#else
|
|
+ qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
|
+#endif
|
|
+ BDRV_POLL_WHILE(bs, !task.complete);
|
|
+ }
|
|
+ client->image = image;
|
|
+ if (client->image)
|
|
+ {
|
|
+ client->watch = (void*)task.ret;
|
|
+ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
|
+ client->size = vitastor_c_inode_get_size(client->watch);
|
|
+ if (!vitastor_c_inode_get_num(client->watch))
|
|
+ {
|
|
+ error_setg(errp, "image does not exist");
|
|
+ vitastor_close(bs);
|
|
+ return -1;
|
|
+ }
|
|
+ if (!client->size)
|
|
+ {
|
|
+ client->size = qdict_get_try_int(options, "size", 0);
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ client->watch = NULL;
|
|
+ client->inode = qdict_get_try_int(options, "inode", 0);
|
|
+ client->pool = qdict_get_try_int(options, "pool", 0);
|
|
+ if (client->pool)
|
|
+ {
|
|
+ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
|
|
+ }
|
|
+ client->size = qdict_get_try_int(options, "size", 0);
|
|
+ vitastor_c_close_watch(client->proxy, (void*)task.ret);
|
|
+ }
|
|
+ if (!client->size)
|
|
+ {
|
|
+ error_setg(errp, "image size not specified");
|
|
+ vitastor_close(bs);
|
|
+ return -1;
|
|
+ }
|
|
+ bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
|
|
+#if QEMU_VERSION_MAJOR > 5 || QEMU_VERSION_MAJOR == 5 && QEMU_VERSION_MINOR >= 1
|
|
+ /* When extending regular files, we get zeros from the OS */
|
|
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
|
|
+#endif
|
|
+ //client->aio_context = bdrv_get_aio_context(bs);
|
|
+ qdict_del(options, "use-rdma");
|
|
+ qdict_del(options, "rdma-mtu");
|
|
+ qdict_del(options, "rdma-gid-index");
|
|
+ qdict_del(options, "rdma-port-num");
|
|
+ qdict_del(options, "rdma-device");
|
|
+ qdict_del(options, "config-path");
|
|
+ qdict_del(options, "etcd-host");
|
|
+ qdict_del(options, "etcd-prefix");
|
|
+ qdict_del(options, "image");
|
|
+ qdict_del(options, "inode");
|
|
+ qdict_del(options, "pool");
|
|
+ qdict_del(options, "size");
|
|
+ qdict_del(options, "skip-parents");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void vitastor_close(BlockDriverState *bs)
|
|
+{
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ vitastor_c_destroy(client->proxy);
|
|
+ if (client->fds)
|
|
+ {
|
|
+ free(client->fds);
|
|
+ client->fds = NULL;
|
|
+ client->fd_alloc = client->fd_count = 0;
|
|
+ }
|
|
+ qemu_mutex_destroy(&client->mutex);
|
|
+ if (client->config_path)
|
|
+ g_free(client->config_path);
|
|
+ if (client->etcd_host)
|
|
+ g_free(client->etcd_host);
|
|
+ if (client->etcd_prefix)
|
|
+ g_free(client->etcd_prefix);
|
|
+ if (client->image)
|
|
+ g_free(client->image);
|
|
+ free(client->last_bitmap);
|
|
+ client->last_bitmap = NULL;
|
|
+}
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
|
+static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
|
|
+{
|
|
+ bsz->phys = 4096;
|
|
+ bsz->log = 512;
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
|
|
+static int coroutine_fn vitastor_co_create_opts(
|
|
+#if QEMU_VERSION_MAJOR >= 4
|
|
+ BlockDriver *drv,
|
|
+#endif
|
|
+ const char *url, QemuOpts *opts, Error **errp)
|
|
+{
|
|
+ QDict *options;
|
|
+ int ret;
|
|
+
|
|
+ options = qdict_new();
|
|
+ vitastor_parse_filename(url, options, errp);
|
|
+ if (*errp)
|
|
+ {
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ // inodes don't require creation in Vitastor. FIXME: They will when there will be some metadata
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ qobject_unref(options);
|
|
+ return ret;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3
|
|
+static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offset,
|
|
+#if QEMU_VERSION_MAJOR >= 4
|
|
+ bool exact,
|
|
+#endif
|
|
+ PreallocMode prealloc,
|
|
+#if QEMU_VERSION_MAJOR >= 5 && QEMU_VERSION_MINOR >= 1 || QEMU_VERSION_MAJOR > 5 || defined RHEL_BDRV_CO_TRUNCATE_FLAGS
|
|
+ BdrvRequestFlags flags,
|
|
+#endif
|
|
+ Error **errp)
|
|
+{
|
|
+ VitastorClient *client = bs->opaque;
|
|
+
|
|
+ if (prealloc != PREALLOC_MODE_OFF)
|
|
+ {
|
|
+ error_setg(errp, "Unsupported preallocation mode '%s'", PreallocMode_str(prealloc));
|
|
+ return -ENOTSUP;
|
|
+ }
|
|
+
|
|
+ // TODO: Resize inode to <offset> bytes
|
|
+#if QEMU_VERSION_MAJOR >= 4
|
|
+ client->size = exact || client->size < offset ? offset : client->size;
|
|
+#else
|
|
+ client->size = offset;
|
|
+#endif
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int vitastor_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
|
+{
|
|
+ bdi->cluster_size = 4096;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int64_t vitastor_getlength(BlockDriverState *bs)
|
|
+{
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ return client->size;
|
|
+}
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
|
|
+static void vitastor_refresh_limits(BlockDriverState *bs, Error **errp)
|
|
+#else
|
|
+static int vitastor_refresh_limits(BlockDriverState *bs)
|
|
+#endif
|
|
+{
|
|
+ bs->bl.request_alignment = 4096;
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 3
|
|
+ bs->bl.min_mem_alignment = 4096;
|
|
+#endif
|
|
+ bs->bl.opt_mem_alignment = 4096;
|
|
+#if QEMU_VERSION_MAJOR < 2 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR == 0
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+//static int64_t vitastor_get_allocated_file_size(BlockDriverState *bs)
|
|
+//{
|
|
+// return 0;
|
|
+//}
|
|
+
|
|
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
|
|
+{
|
|
+ *task = (VitastorRPC) {
|
|
+ .co = qemu_coroutine_self(),
|
|
+ .bs = bs,
|
|
+ };
|
|
+}
|
|
+
|
|
+static void vitastor_co_generic_bh_cb(void *opaque)
|
|
+{
|
|
+ VitastorRPC *task = opaque;
|
|
+ task->complete = 1;
|
|
+ if (qemu_coroutine_self() != task->co)
|
|
+ {
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
|
+ aio_co_wake(task->co);
|
|
+#else
|
|
+#if QEMU_VERSION_MAJOR == 2
|
|
+ qemu_bh_delete(task->bh);
|
|
+#endif
|
|
+ qemu_coroutine_enter(task->co, NULL);
|
|
+ qemu_aio_release(task);
|
|
+#endif
|
|
+ }
|
|
+}
|
|
+
|
|
+static void vitastor_co_generic_cb(void *opaque, long retval)
|
|
+{
|
|
+ VitastorRPC *task = opaque;
|
|
+ task->ret = retval;
|
|
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
|
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
|
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
+#elif QEMU_VERSION_MAJOR >= 2
|
|
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
+ qemu_bh_schedule(task->bh);
|
|
+#else
|
|
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
|
+ qemu_bh_schedule(task->bh);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
|
|
+{
|
|
+ vitastor_co_generic_cb(opaque, retval);
|
|
+}
|
|
+
|
|
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
|
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
|
|
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
|
|
+#else
|
|
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
|
|
+#endif
|
|
+)
|
|
+{
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ VitastorRPC task;
|
|
+ vitastor_co_init_task(bs, &task);
|
|
+ task.iov = iov;
|
|
+
|
|
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
|
+ qemu_mutex_lock(&client->mutex);
|
|
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
|
+ vitastor_schedule_uring_handler(client);
|
|
+ qemu_mutex_unlock(&client->mutex);
|
|
+
|
|
+ while (!task.complete)
|
|
+ {
|
|
+ qemu_coroutine_yield();
|
|
+ }
|
|
+
|
|
+ return task.ret;
|
|
+}
|
|
+
|
|
+static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
|
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
|
|
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
|
|
+#else
|
|
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
|
|
+#endif
|
|
+)
|
|
+{
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ VitastorRPC task;
|
|
+ vitastor_co_init_task(bs, &task);
|
|
+ task.iov = iov;
|
|
+
|
|
+ if (client->last_bitmap)
|
|
+ {
|
|
+ // Invalidate last bitmap on write
|
|
+ free(client->last_bitmap);
|
|
+ client->last_bitmap = NULL;
|
|
+ }
|
|
+
|
|
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
|
+ qemu_mutex_lock(&client->mutex);
|
|
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
|
|
+ vitastor_schedule_uring_handler(client);
|
|
+ qemu_mutex_unlock(&client->mutex);
|
|
+
|
|
+ while (!task.complete)
|
|
+ {
|
|
+ qemu_coroutine_yield();
|
|
+ }
|
|
+
|
|
+ return task.ret;
|
|
+}
|
|
+
|
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
|
+#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
|
|
+static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
|
|
+{
|
|
+ VitastorRPC *task = opaque;
|
|
+ VitastorClient *client = task->bs->opaque;
|
|
+ task->ret = retval;
|
|
+ if (retval >= 0)
|
|
+ {
|
|
+ task->bitmap = bitmap;
|
|
+ if (client->last_bitmap_inode == task->inode &&
|
|
+ client->last_bitmap_offset == task->offset &&
|
|
+ client->last_bitmap_len == task->len)
|
|
+ {
|
|
+ free(client->last_bitmap);
|
|
+ client->last_bitmap = bitmap;
|
|
+ }
|
|
+ }
|
|
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
|
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
|
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
+#elif QEMU_VERSION_MAJOR >= 2
|
|
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
+ qemu_bh_schedule(task->bh);
|
|
+#else
|
|
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
|
+ qemu_bh_schedule(task->bh);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int coroutine_fn vitastor_co_block_status(
|
|
+ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
|
|
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
|
|
+{
|
|
+ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
|
|
+ // Not allocated => return 0
|
|
+ // Error => return -errno
|
|
+ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
|
|
+ VitastorRPC task;
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
|
+ uint8_t bit = 0;
|
|
+ if (client->last_bitmap && client->last_bitmap_inode == inode &&
|
|
+ client->last_bitmap_offset <= offset &&
|
|
+ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
|
|
+ {
|
|
+ // Use the previously read bitmap
|
|
+ task.bitmap_granularity = client->last_bitmap_granularity;
|
|
+ task.offset = client->last_bitmap_offset;
|
|
+ task.len = client->last_bitmap_len;
|
|
+ task.bitmap = client->last_bitmap;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ // Read bitmap from this position, rounding to full inode PG blocks
|
|
+ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
|
|
+ if (!block_size)
|
|
+ return -EAGAIN;
|
|
+ // Init coroutine
|
|
+ vitastor_co_init_task(bs, &task);
|
|
+ free(client->last_bitmap);
|
|
+ task.inode = client->last_bitmap_inode = inode;
|
|
+ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
|
|
+ task.offset = client->last_bitmap_offset = offset / block_size * block_size;
|
|
+ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
|
|
+ task.bitmap = client->last_bitmap = NULL;
|
|
+ qemu_mutex_lock(&client->mutex);
|
|
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
|
+ vitastor_schedule_uring_handler(client);
|
|
+ qemu_mutex_unlock(&client->mutex);
|
|
+ while (!task.complete)
|
|
+ {
|
|
+ qemu_coroutine_yield();
|
|
+ }
|
|
+ if (task.ret < 0)
|
|
+ {
|
|
+ // Error
|
|
+ return task.ret;
|
|
+ }
|
|
+ }
|
|
+ if (want_zero)
|
|
+ {
|
|
+ // Get precise mapping with all holes
|
|
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
|
+ uint64_t bmp_len = task.len / task.bitmap_granularity;
|
|
+ uint64_t bmp_end = bmp_pos+1;
|
|
+ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
|
|
+ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
|
|
+ {
|
|
+ bmp_end++;
|
|
+ }
|
|
+ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ // Get larger allocated extents, possibly with false positives
|
|
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
|
+ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
|
|
+ while (bmp_pos < bmp_end)
|
|
+ {
|
|
+ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
|
|
+ {
|
|
+ bit = bit || task.bitmap[bmp_pos >> 3];
|
|
+ bmp_pos += 8;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
|
|
+ bmp_pos++;
|
|
+ }
|
|
+ }
|
|
+ *pnum = bytes;
|
|
+ }
|
|
+ if (bit)
|
|
+ {
|
|
+ *map = offset;
|
|
+ *file = bs;
|
|
+ }
|
|
+ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
|
|
+}
|
|
+#endif
|
|
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
|
+// QEMU 1.7-2.11
|
|
+static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
|
|
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
|
|
+{
|
|
+ int64_t map = 0;
|
|
+ int64_t pnumbytes = 0;
|
|
+ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
|
|
+ *pnum = pnumbytes/BDRV_SECTOR_SIZE;
|
|
+ return r;
|
|
+}
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
|
|
+static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
|
+{
|
|
+ return vitastor_co_preadv(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
|
|
+}
|
|
+
|
|
+static int coroutine_fn vitastor_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
|
+{
|
|
+ return vitastor_co_pwritev(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
|
|
+{
|
|
+ VitastorClient *client = bs->opaque;
|
|
+ VitastorRPC task;
|
|
+ vitastor_co_init_task(bs, &task);
|
|
+
|
|
+ qemu_mutex_lock(&client->mutex);
|
|
+ vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
|
|
+ vitastor_schedule_uring_handler(client);
|
|
+ qemu_mutex_unlock(&client->mutex);
|
|
+
|
|
+ while (!task.complete)
|
|
+ {
|
|
+ qemu_coroutine_yield();
|
|
+ }
|
|
+
|
|
+ return task.ret;
|
|
+}
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
|
|
+static QemuOptsList vitastor_create_opts = {
|
|
+ .name = "vitastor-create-opts",
|
|
+ .head = QTAILQ_HEAD_INITIALIZER(vitastor_create_opts.head),
|
|
+ .desc = {
|
|
+ {
|
|
+ .name = BLOCK_OPT_SIZE,
|
|
+ .type = QEMU_OPT_SIZE,
|
|
+ .help = "Virtual disk size"
|
|
+ },
|
|
+ { /* end of list */ }
|
|
+ }
|
|
+};
|
|
+#else
|
|
+static QEMUOptionParameter vitastor_create_opts[] = {
|
|
+ {
|
|
+ .name = BLOCK_OPT_SIZE,
|
|
+ .type = OPT_SIZE,
|
|
+ .help = "Virtual disk size"
|
|
+ },
|
|
+ { NULL }
|
|
+};
|
|
+#endif
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 4
|
|
+static const char *vitastor_strong_runtime_opts[] = {
|
|
+ "inode",
|
|
+ "pool",
|
|
+ "config-path",
|
|
+ "etcd-host",
|
|
+ "etcd-prefix",
|
|
+
|
|
+ NULL
|
|
+};
|
|
+#endif
|
|
+
|
|
+static BlockDriver bdrv_vitastor = {
|
|
+ .format_name = "vitastor",
|
|
+ .protocol_name = "vitastor",
|
|
+
|
|
+ .instance_size = sizeof(VitastorClient),
|
|
+ .bdrv_parse_filename = vitastor_parse_filename,
|
|
+
|
|
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
|
|
+#if QEMU_VERSION_MAJOR >= 8
|
|
+ .bdrv_co_get_info = vitastor_get_info,
|
|
+ .bdrv_co_getlength = vitastor_getlength,
|
|
+#else
|
|
+ .bdrv_get_info = vitastor_get_info,
|
|
+ .bdrv_getlength = vitastor_getlength,
|
|
+#endif
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
|
+ .bdrv_probe_blocksizes = vitastor_probe_blocksizes,
|
|
+#endif
|
|
+ .bdrv_refresh_limits = vitastor_refresh_limits,
|
|
+
|
|
+ // FIXME: Implement it along with per-inode statistics
|
|
+ //.bdrv_get_allocated_file_size = vitastor_get_allocated_file_size,
|
|
+
|
|
+ .bdrv_file_open = vitastor_file_open,
|
|
+ .bdrv_close = vitastor_close,
|
|
+
|
|
+ // Option list for the create operation
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
|
|
+ .create_opts = &vitastor_create_opts,
|
|
+#else
|
|
+ .create_options = vitastor_create_opts,
|
|
+#endif
|
|
+
|
|
+ // For qmp_blockdev_create(), used by the qemu monitor / QAPI
|
|
+ // Requires patching QAPI IDL, thus unimplemented
|
|
+ //.bdrv_co_create = vitastor_co_create,
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
|
|
+ // For bdrv_create(), used by qemu-img
|
|
+ .bdrv_co_create_opts = vitastor_co_create_opts,
|
|
+#endif
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3
|
|
+ .bdrv_co_truncate = vitastor_co_truncate,
|
|
+#endif
|
|
+
|
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
|
|
+ // For snapshot export
|
|
+ .bdrv_co_block_status = vitastor_co_block_status,
|
|
+#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
|
+ .bdrv_co_get_block_status = vitastor_co_get_block_status,
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
|
|
+ .bdrv_co_preadv = vitastor_co_preadv,
|
|
+ .bdrv_co_pwritev = vitastor_co_pwritev,
|
|
+#else
|
|
+ .bdrv_co_readv = vitastor_co_readv,
|
|
+ .bdrv_co_writev = vitastor_co_writev,
|
|
+#endif
|
|
+
|
|
+ .bdrv_co_flush_to_disk = vitastor_co_flush,
|
|
+
|
|
+#if QEMU_VERSION_MAJOR >= 4
|
|
+ .strong_runtime_opts = vitastor_strong_runtime_opts,
|
|
+#endif
|
|
+};
|
|
+
|
|
+static void vitastor_block_init(void)
|
|
+{
|
|
+ bdrv_register(&bdrv_vitastor);
|
|
+}
|
|
+
|
|
+block_init(vitastor_block_init);
|