Add bdrv_co_block_status
parent
78a75914b3
commit
8826fdad7b
|
@ -1,3 +1,9 @@
|
||||||
|
pve-qemu-kvm (7.1.0-4+vitastor3) bullseye; urgency=medium
|
||||||
|
|
||||||
|
* Add bdrv_co_block_status implementation for QCOW2 export support
|
||||||
|
|
||||||
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 12 Jan 2023 02:31:18 +0300
|
||||||
|
|
||||||
pve-qemu-kvm (7.1.0-4+vitastor2) bullseye; urgency=medium
|
pve-qemu-kvm (7.1.0-4+vitastor2) bullseye; urgency=medium
|
||||||
|
|
||||||
* Add Vitastor support
|
* Add Vitastor support
|
||||||
|
|
|
@ -171,7 +171,7 @@ Index: a/block/vitastor.c
|
||||||
===================================================================
|
===================================================================
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ a/block/vitastor.c
|
+++ a/block/vitastor.c
|
||||||
@@ -0,0 +1,629 @@
|
@@ -0,0 +1,797 @@
|
||||||
+// Copyright (c) Vitaliy Filippov, 2019+
|
+// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||||
+
|
+
|
||||||
|
@ -227,6 +227,7 @@ Index: a/block/vitastor.c
|
||||||
+ char *etcd_host;
|
+ char *etcd_host;
|
||||||
+ char *etcd_prefix;
|
+ char *etcd_prefix;
|
||||||
+ char *image;
|
+ char *image;
|
||||||
|
+ int skip_parents;
|
||||||
+ uint64_t inode;
|
+ uint64_t inode;
|
||||||
+ uint64_t pool;
|
+ uint64_t pool;
|
||||||
+ uint64_t size;
|
+ uint64_t size;
|
||||||
|
@ -237,6 +238,10 @@ Index: a/block/vitastor.c
|
||||||
+ int rdma_gid_index;
|
+ int rdma_gid_index;
|
||||||
+ int rdma_mtu;
|
+ int rdma_mtu;
|
||||||
+ QemuMutex mutex;
|
+ QemuMutex mutex;
|
||||||
|
+
|
||||||
|
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
||||||
|
+ uint32_t last_bitmap_granularity;
|
||||||
|
+ uint8_t *last_bitmap;
|
||||||
+} VitastorClient;
|
+} VitastorClient;
|
||||||
+
|
+
|
||||||
+typedef struct VitastorRPC
|
+typedef struct VitastorRPC
|
||||||
|
@ -246,6 +251,9 @@ Index: a/block/vitastor.c
|
||||||
+ QEMUIOVector *iov;
|
+ QEMUIOVector *iov;
|
||||||
+ long ret;
|
+ long ret;
|
||||||
+ int complete;
|
+ int complete;
|
||||||
|
+ uint64_t inode, offset, len;
|
||||||
|
+ uint32_t bitmap_granularity;
|
||||||
|
+ uint8_t *bitmap;
|
||||||
+} VitastorRPC;
|
+} VitastorRPC;
|
||||||
+
|
+
|
||||||
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
||||||
|
@ -321,6 +329,7 @@ Index: a/block/vitastor.c
|
||||||
+ if (!strcmp(name, "inode") ||
|
+ if (!strcmp(name, "inode") ||
|
||||||
+ !strcmp(name, "pool") ||
|
+ !strcmp(name, "pool") ||
|
||||||
+ !strcmp(name, "size") ||
|
+ !strcmp(name, "size") ||
|
||||||
|
+ !strcmp(name, "skip-parents") ||
|
||||||
+ !strcmp(name, "use-rdma") ||
|
+ !strcmp(name, "use-rdma") ||
|
||||||
+ !strcmp(name, "rdma-port_num") ||
|
+ !strcmp(name, "rdma-port_num") ||
|
||||||
+ !strcmp(name, "rdma-gid-index") ||
|
+ !strcmp(name, "rdma-gid-index") ||
|
||||||
|
@ -401,13 +410,16 @@ Index: a/block/vitastor.c
|
||||||
+
|
+
|
||||||
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||||
+{
|
+{
|
||||||
|
+ VitastorRPC task;
|
||||||
+ VitastorClient *client = bs->opaque;
|
+ VitastorClient *client = bs->opaque;
|
||||||
|
+ void *image = NULL;
|
||||||
+ int64_t ret = 0;
|
+ int64_t ret = 0;
|
||||||
+ qemu_mutex_init(&client->mutex);
|
+ qemu_mutex_init(&client->mutex);
|
||||||
+ client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
|
+ client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
|
||||||
+ // FIXME: Rename to etcd_address
|
+ // FIXME: Rename to etcd_address
|
||||||
+ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
|
+ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
|
||||||
+ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
|
+ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
|
||||||
|
+ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
|
||||||
+ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
|
+ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
|
||||||
+ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
|
+ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
|
||||||
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
||||||
|
@ -417,12 +429,11 @@ Index: a/block/vitastor.c
|
||||||
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||||
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||||
+ );
|
+ );
|
||||||
+ client->image = g_strdup(qdict_get_try_str(options, "image"));
|
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||||
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
||||||
+ if (client->image)
|
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
|
||||||
+ {
|
+ if (!image)
|
||||||
+ // Get image metadata (size and readonly flag)
|
+ client->image = (char*)"x";
|
||||||
+ VitastorRPC task;
|
|
||||||
+ task.complete = 0;
|
+ task.complete = 0;
|
||||||
+ task.bs = bs;
|
+ task.bs = bs;
|
||||||
+ if (qemu_in_coroutine())
|
+ if (qemu_in_coroutine())
|
||||||
|
@ -434,6 +445,9 @@ Index: a/block/vitastor.c
|
||||||
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
||||||
+ BDRV_POLL_WHILE(bs, !task.complete);
|
+ BDRV_POLL_WHILE(bs, !task.complete);
|
||||||
+ }
|
+ }
|
||||||
|
+ client->image = image;
|
||||||
|
+ if (client->image)
|
||||||
|
+ {
|
||||||
+ client->watch = (void*)task.ret;
|
+ client->watch = (void*)task.ret;
|
||||||
+ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
+ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
||||||
+ client->size = vitastor_c_inode_get_size(client->watch);
|
+ client->size = vitastor_c_inode_get_size(client->watch);
|
||||||
|
@ -458,6 +472,7 @@ Index: a/block/vitastor.c
|
||||||
+ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
|
+ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
|
||||||
+ }
|
+ }
|
||||||
+ client->size = qdict_get_try_int(options, "size", 0);
|
+ client->size = qdict_get_try_int(options, "size", 0);
|
||||||
|
+ vitastor_c_close_watch(client->proxy, (void*)task.ret);
|
||||||
+ }
|
+ }
|
||||||
+ if (!client->size)
|
+ if (!client->size)
|
||||||
+ {
|
+ {
|
||||||
|
@ -479,6 +494,7 @@ Index: a/block/vitastor.c
|
||||||
+ qdict_del(options, "inode");
|
+ qdict_del(options, "inode");
|
||||||
+ qdict_del(options, "pool");
|
+ qdict_del(options, "pool");
|
||||||
+ qdict_del(options, "size");
|
+ qdict_del(options, "size");
|
||||||
|
+ qdict_del(options, "skip-parents");
|
||||||
+ return ret;
|
+ return ret;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
|
@ -495,6 +511,8 @@ Index: a/block/vitastor.c
|
||||||
+ g_free(client->etcd_prefix);
|
+ g_free(client->etcd_prefix);
|
||||||
+ if (client->image)
|
+ if (client->image)
|
||||||
+ g_free(client->image);
|
+ g_free(client->image);
|
||||||
|
+ free(client->last_bitmap);
|
||||||
|
+ client->last_bitmap = NULL;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
||||||
|
@ -660,6 +678,13 @@ Index: a/block/vitastor.c
|
||||||
+ vitastor_co_init_task(bs, &task);
|
+ vitastor_co_init_task(bs, &task);
|
||||||
+ task.iov = iov;
|
+ task.iov = iov;
|
||||||
+
|
+
|
||||||
|
+ if (client->last_bitmap)
|
||||||
|
+ {
|
||||||
|
+ // Invalidate last bitmap on write
|
||||||
|
+ free(client->last_bitmap);
|
||||||
|
+ client->last_bitmap = NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||||
+ qemu_mutex_lock(&client->mutex);
|
+ qemu_mutex_lock(&client->mutex);
|
||||||
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
||||||
|
@ -673,6 +698,140 @@ Index: a/block/vitastor.c
|
||||||
+ return task.ret;
|
+ return task.ret;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
||||||
|
+#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
|
||||||
|
+static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
|
||||||
|
+{
|
||||||
|
+ VitastorRPC *task = opaque;
|
||||||
|
+ VitastorClient *client = task->bs->opaque;
|
||||||
|
+ task->ret = retval;
|
||||||
|
+ task->complete = 1;
|
||||||
|
+ if (retval >= 0)
|
||||||
|
+ {
|
||||||
|
+ task->bitmap = bitmap;
|
||||||
|
+ if (client->last_bitmap_inode == task->inode &&
|
||||||
|
+ client->last_bitmap_offset == task->offset &&
|
||||||
|
+ client->last_bitmap_len == task->len)
|
||||||
|
+ {
|
||||||
|
+ free(client->last_bitmap);
|
||||||
|
+ client->last_bitmap = bitmap;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (qemu_coroutine_self() != task->co)
|
||||||
|
+ {
|
||||||
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||||
|
+ aio_co_wake(task->co);
|
||||||
|
+#else
|
||||||
|
+ qemu_coroutine_enter(task->co, NULL);
|
||||||
|
+ qemu_aio_release(task);
|
||||||
|
+#endif
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int coroutine_fn vitastor_co_block_status(
|
||||||
|
+ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
|
||||||
|
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
|
||||||
|
+{
|
||||||
|
+ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
|
||||||
|
+ // Not allocated => return 0
|
||||||
|
+ // Error => return -errno
|
||||||
|
+ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
|
||||||
|
+ VitastorRPC task;
|
||||||
|
+ VitastorClient *client = bs->opaque;
|
||||||
|
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||||
|
+ uint8_t bit = 0;
|
||||||
|
+ if (client->last_bitmap && client->last_bitmap_inode == inode &&
|
||||||
|
+ client->last_bitmap_offset <= offset &&
|
||||||
|
+ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
|
||||||
|
+ {
|
||||||
|
+ // Use the previously read bitmap
|
||||||
|
+ task.bitmap_granularity = client->last_bitmap_granularity;
|
||||||
|
+ task.offset = client->last_bitmap_offset;
|
||||||
|
+ task.len = client->last_bitmap_len;
|
||||||
|
+ task.bitmap = client->last_bitmap;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ // Read bitmap from this position, rounding to full inode PG blocks
|
||||||
|
+ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
|
||||||
|
+ if (!block_size)
|
||||||
|
+ return -EAGAIN;
|
||||||
|
+ // Init coroutine
|
||||||
|
+ vitastor_co_init_task(bs, &task);
|
||||||
|
+ free(client->last_bitmap);
|
||||||
|
+ task.inode = client->last_bitmap_inode = inode;
|
||||||
|
+ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
|
||||||
|
+ task.offset = client->last_bitmap_offset = offset / block_size * block_size;
|
||||||
|
+ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
|
||||||
|
+ task.bitmap = client->last_bitmap = NULL;
|
||||||
|
+ qemu_mutex_lock(&client->mutex);
|
||||||
|
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||||
|
+ qemu_mutex_unlock(&client->mutex);
|
||||||
|
+ while (!task.complete)
|
||||||
|
+ {
|
||||||
|
+ qemu_coroutine_yield();
|
||||||
|
+ }
|
||||||
|
+ if (task.ret < 0)
|
||||||
|
+ {
|
||||||
|
+ // Error
|
||||||
|
+ return task.ret;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (want_zero)
|
||||||
|
+ {
|
||||||
|
+ // Get precise mapping with all holes
|
||||||
|
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
||||||
|
+ uint64_t bmp_len = task.len / task.bitmap_granularity;
|
||||||
|
+ uint64_t bmp_end = bmp_pos+1;
|
||||||
|
+ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
|
||||||
|
+ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
|
||||||
|
+ {
|
||||||
|
+ bmp_end++;
|
||||||
|
+ }
|
||||||
|
+ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ // Get larger allocated extents, possibly with false positives
|
||||||
|
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
||||||
|
+ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
|
||||||
|
+ while (bmp_pos < bmp_end)
|
||||||
|
+ {
|
||||||
|
+ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
|
||||||
|
+ {
|
||||||
|
+ bit = bit || task.bitmap[bmp_pos >> 3];
|
||||||
|
+ bmp_pos += 8;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
|
||||||
|
+ bmp_pos++;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ *pnum = bytes;
|
||||||
|
+ }
|
||||||
|
+ if (bit)
|
||||||
|
+ {
|
||||||
|
+ *map = offset;
|
||||||
|
+ *file = bs;
|
||||||
|
+ }
|
||||||
|
+ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
||||||
|
+// QEMU 1.7-2.11
|
||||||
|
+static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
|
||||||
|
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
|
||||||
|
+{
|
||||||
|
+ int64_t map = 0;
|
||||||
|
+ int64_t pnumbytes = 0;
|
||||||
|
+ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
|
||||||
|
+ *pnum = pnumbytes/BDRV_SECTOR_SIZE;
|
||||||
|
+ return r;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
+#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
|
+#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
|
||||||
+static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
+static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
||||||
+{
|
+{
|
||||||
|
@ -780,6 +939,15 @@ Index: a/block/vitastor.c
|
||||||
+ .bdrv_co_truncate = vitastor_co_truncate,
|
+ .bdrv_co_truncate = vitastor_co_truncate,
|
||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
|
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
||||||
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
|
||||||
|
+ // For snapshot export
|
||||||
|
+ .bdrv_co_block_status = vitastor_co_block_status,
|
||||||
|
+#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
||||||
|
+ .bdrv_co_get_block_status = vitastor_co_get_block_status,
|
||||||
|
+#endif
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
|
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
|
||||||
+ .bdrv_co_preadv = vitastor_co_preadv,
|
+ .bdrv_co_preadv = vitastor_co_preadv,
|
||||||
+ .bdrv_co_pwritev = vitastor_co_pwritev,
|
+ .bdrv_co_pwritev = vitastor_co_pwritev,
|
||||||
|
|
2
qemu
2
qemu
|
@ -1 +1 @@
|
||||||
Subproject commit 621da7789083b80d6f1ff1c0fb499334007b4f51
|
Subproject commit 54e1f5be86dd11744e45da8be6afad01d01d59e7
|
Loading…
Reference in New Issue