diff --git a/debian/changelog b/debian/changelog index 7189413..c8b8e5e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +pve-qemu-kvm (7.1.0-4+vitastor3) bullseye; urgency=medium + + * Add bdrv_co_block_status implementation for QCOW2 export support + + -- Vitaliy Filippov Thu, 12 Jan 2023 02:31:18 +0300 + pve-qemu-kvm (7.1.0-4+vitastor2) bullseye; urgency=medium * Add Vitastor support diff --git a/debian/patches/pve-qemu-7.1-vitastor.patch b/debian/patches/pve-qemu-7.1-vitastor.patch index 6de3acd..661eb5e 100644 --- a/debian/patches/pve-qemu-7.1-vitastor.patch +++ b/debian/patches/pve-qemu-7.1-vitastor.patch @@ -171,7 +171,7 @@ Index: a/block/vitastor.c =================================================================== --- /dev/null +++ a/block/vitastor.c -@@ -0,0 +1,629 @@ +@@ -0,0 +1,797 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details) + @@ -227,6 +227,7 @@ Index: a/block/vitastor.c + char *etcd_host; + char *etcd_prefix; + char *image; ++ int skip_parents; + uint64_t inode; + uint64_t pool; + uint64_t size; @@ -237,6 +238,10 @@ Index: a/block/vitastor.c + int rdma_gid_index; + int rdma_mtu; + QemuMutex mutex; ++ ++ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len; ++ uint32_t last_bitmap_granularity; ++ uint8_t *last_bitmap; +} VitastorClient; + +typedef struct VitastorRPC @@ -246,6 +251,9 @@ Index: a/block/vitastor.c + QEMUIOVector *iov; + long ret; + int complete; ++ uint64_t inode, offset, len; ++ uint32_t bitmap_granularity; ++ uint8_t *bitmap; +} VitastorRPC; + +static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task); @@ -321,6 +329,7 @@ Index: a/block/vitastor.c + if (!strcmp(name, "inode") || + !strcmp(name, "pool") || + !strcmp(name, "size") || ++ !strcmp(name, "skip-parents") || + !strcmp(name, "use-rdma") || + !strcmp(name, "rdma-port_num") || + !strcmp(name, "rdma-gid-index") || @@ -401,13 +410,16 @@ Index: a/block/vitastor.c + +static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) +{ ++ VitastorRPC task; + VitastorClient *client = bs->opaque; ++ void *image = NULL; + int64_t ret = 0; + qemu_mutex_init(&client->mutex); + client->config_path = g_strdup(qdict_get_try_str(options, "config-path")); + // FIXME: Rename to etcd_address + client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host")); + client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix")); ++ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0); + client->use_rdma = qdict_get_try_int(options, "use-rdma", -1); + client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device")); + client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0); @@ -417,23 +429,25 @@ Index: a/block/vitastor.c + vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix, + client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 + ); -+ client->image = g_strdup(qdict_get_try_str(options, "image")); ++ image = client->image = g_strdup(qdict_get_try_str(options, "image")); + client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0; ++ // Get image metadata (size and readonly flag) or just wait until the client is ready ++ if (!image) ++ client->image = (char*)"x"; ++ task.complete = 0; ++ task.bs = bs; ++ if (qemu_in_coroutine()) ++ { ++ vitastor_co_get_metadata(&task); ++ } ++ else ++ { ++ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task)); ++ BDRV_POLL_WHILE(bs, !task.complete); ++ } ++ client->image = image; + if (client->image) + { -+ // Get image metadata (size and readonly flag) -+ VitastorRPC task; -+ task.complete = 0; -+ task.bs = bs; -+ if (qemu_in_coroutine()) -+ { -+ vitastor_co_get_metadata(&task); -+ } -+ else -+ { -+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task)); -+ BDRV_POLL_WHILE(bs, !task.complete); -+ } + client->watch = (void*)task.ret; + client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch); + client->size = vitastor_c_inode_get_size(client->watch); @@ -458,6 +472,7 @@ Index: a/block/vitastor.c + client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS)); + } + client->size = qdict_get_try_int(options, "size", 0); ++ vitastor_c_close_watch(client->proxy, (void*)task.ret); + } + if (!client->size) + { @@ -479,6 +494,7 @@ Index: a/block/vitastor.c + qdict_del(options, "inode"); + qdict_del(options, "pool"); + qdict_del(options, "size"); ++ qdict_del(options, "skip-parents"); + return ret; +} + @@ -495,6 +511,8 @@ Index: a/block/vitastor.c + g_free(client->etcd_prefix); + if (client->image) + g_free(client->image); ++ free(client->last_bitmap); ++ client->last_bitmap = NULL; +} + +#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2 @@ -660,6 +678,13 @@ Index: a/block/vitastor.c + vitastor_co_init_task(bs, &task); + task.iov = iov; + ++ if (client->last_bitmap) ++ { ++ // Invalidate last bitmap on write ++ free(client->last_bitmap); ++ client->last_bitmap = NULL; ++ } ++ + uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; + qemu_mutex_lock(&client->mutex); + vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task); @@ -673,6 +698,140 @@ Index: a/block/vitastor.c + return task.ret; +} + ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1 ++#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 ++static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap) ++{ ++ VitastorRPC *task = opaque; ++ VitastorClient *client = task->bs->opaque; ++ task->ret = retval; ++ task->complete = 1; ++ if (retval >= 0) ++ { ++ task->bitmap = bitmap; ++ if (client->last_bitmap_inode == task->inode && ++ client->last_bitmap_offset == task->offset && ++ client->last_bitmap_len == task->len) ++ { ++ free(client->last_bitmap); ++ client->last_bitmap = bitmap; ++ } ++ } ++ if (qemu_coroutine_self() != task->co) ++ { ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8 ++ aio_co_wake(task->co); ++#else ++ qemu_coroutine_enter(task->co, NULL); ++ qemu_aio_release(task); ++#endif ++ } ++} ++ ++static int coroutine_fn vitastor_co_block_status( ++ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, ++ int64_t *pnum, int64_t *map, BlockDriverState **file) ++{ ++ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID ++ // Not allocated => return 0 ++ // Error => return -errno ++ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs` ++ VitastorRPC task; ++ VitastorClient *client = bs->opaque; ++ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; ++ uint8_t bit = 0; ++ if (client->last_bitmap && client->last_bitmap_inode == inode && ++ client->last_bitmap_offset <= offset && ++ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes)) ++ { ++ // Use the previously read bitmap ++ task.bitmap_granularity = client->last_bitmap_granularity; ++ task.offset = client->last_bitmap_offset; ++ task.len = client->last_bitmap_len; ++ task.bitmap = client->last_bitmap; ++ } ++ else ++ { ++ // Read bitmap from this position, rounding to full inode PG blocks ++ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode); ++ if (!block_size) ++ return -EAGAIN; ++ // Init coroutine ++ vitastor_co_init_task(bs, &task); ++ free(client->last_bitmap); ++ task.inode = client->last_bitmap_inode = inode; ++ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode); ++ task.offset = client->last_bitmap_offset = offset / block_size * block_size; ++ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset; ++ task.bitmap = client->last_bitmap = NULL; ++ qemu_mutex_lock(&client->mutex); ++ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task); ++ qemu_mutex_unlock(&client->mutex); ++ while (!task.complete) ++ { ++ qemu_coroutine_yield(); ++ } ++ if (task.ret < 0) ++ { ++ // Error ++ return task.ret; ++ } ++ } ++ if (want_zero) ++ { ++ // Get precise mapping with all holes ++ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity; ++ uint64_t bmp_len = task.len / task.bitmap_granularity; ++ uint64_t bmp_end = bmp_pos+1; ++ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1; ++ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit) ++ { ++ bmp_end++; ++ } ++ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity; ++ } ++ else ++ { ++ // Get larger allocated extents, possibly with false positives ++ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity; ++ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos; ++ while (bmp_pos < bmp_end) ++ { ++ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8) ++ { ++ bit = bit || task.bitmap[bmp_pos >> 3]; ++ bmp_pos += 8; ++ } ++ else ++ { ++ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1); ++ bmp_pos++; ++ } ++ } ++ *pnum = bytes; ++ } ++ if (bit) ++ { ++ *map = offset; ++ *file = bs; ++ } ++ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0); ++} ++#endif ++#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12 ++// QEMU 1.7-2.11 ++static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs, ++ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) ++{ ++ int64_t map = 0; ++ int64_t pnumbytes = 0; ++ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file); ++ *pnum = pnumbytes/BDRV_SECTOR_SIZE; ++ return r; ++} ++#endif ++#endif ++ +#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 ) +static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov) +{ @@ -780,6 +939,15 @@ Index: a/block/vitastor.c + .bdrv_co_truncate = vitastor_co_truncate, +#endif + ++#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1 ++#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12 ++ // For snapshot export ++ .bdrv_co_block_status = vitastor_co_block_status, ++#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12 ++ .bdrv_co_get_block_status = vitastor_co_get_block_status, ++#endif ++#endif ++ +#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 + .bdrv_co_preadv = vitastor_co_preadv, + .bdrv_co_pwritev = vitastor_co_pwritev, diff --git a/qemu b/qemu index 621da77..54e1f5b 160000 --- a/qemu +++ b/qemu @@ -1 +1 @@ -Subproject commit 621da7789083b80d6f1ff1c0fb499334007b4f51 +Subproject commit 54e1f5be86dd11744e45da8be6afad01d01d59e7