From 6545916d528de7a6b784f4d10e7b236b30bfaced Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 14 Sep 2018 19:51:16 +0300 Subject: [PATCH 1/3] nbd/server: fix bitmap export bitmap_to_extents function is broken: it switches dirty variable after every iteration, however it can process only part of dirty (or zero) area during one iteration in case when this area is too large for one extent. Fortunately, the bug doesn't produce wrong extent flags: it just inserts a zero-length extent between sequential extents representing large dirty (or zero) area. However, zero-length extents are forbidden by the NBD protocol. So, a careful client should consider such a reply as a server fault, while a less-careful will likely ignore zero-length extents. The bug can only be triggered by a client that requests block status for nearly 4G at once (a request of 4G and larger is impossible per the protocol, and requests smaller than 4G less the bitmap granularity cause the loop to quit iterating rather than revisit the tail of the large area); it also cannot trigger if the client used the NBD_CMD_FLAG_REQ_ONE flag. Since qemu 3.0 as client (using the x-dirty-bitmap extension) always passes the flag, it is immune; and we are not aware of other open-source clients that know how to request qemu:dirty-bitmap:FOO contexts. Clients that want to avoid the bug could cap block status requests to a smaller length, such as 2G or 3G. Fix this by more careful handling of dirty variable. Bug was introduced in 3d068aff16 "nbd/server: implement dirty bitmap export", with the whole function. and is present in v3.0.0 release. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180914165116.23182-1-vsementsov@virtuozzo.com> CC: qemu-stable@nongnu.org Reviewed-by: Eric Blake [eblake: improved commit message] Signed-off-by: Eric Blake --- nbd/server.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nbd/server.c b/nbd/server.c index ea5fe0eb33..12f721482d 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1951,6 +1951,8 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, assert(begin < overall_end && nb_extents); while (begin < overall_end && i < nb_extents) { + bool next_dirty = !dirty; + if (dirty) { end = bdrv_dirty_bitmap_next_zero(bitmap, begin); } else { @@ -1962,6 +1964,7 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, end = MIN(bdrv_dirty_bitmap_size(bitmap), begin + UINT32_MAX + 1 - bdrv_dirty_bitmap_granularity(bitmap)); + next_dirty = dirty; } if (dont_fragment && end > overall_end) { end = overall_end; @@ -1971,7 +1974,7 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, extents[i].flags = cpu_to_be32(dirty ? NBD_STATE_DIRTY : 0); i++; begin = end; - dirty = !dirty; + dirty = next_dirty; } bdrv_dirty_iter_free(it); From cb9f871e80fe630422e0962751a6f9fb1795fe02 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 19 Sep 2018 15:09:34 -0400 Subject: [PATCH 2/3] qapi: bitmap-merge: document name change We named these using underscores instead of the preferred dash, document this nearby so we cannot possibly forget to rectify this when we remove the 'x-' prefixes when the feature becomes stable. We do not implement the change ahead of time to avoid more work for libvirt to do in order to figure out how to use the beta version of the API needlessly. Reported-by: Eric Blake Signed-off-by: John Snow Message-Id: <20180919190934.16284-1-jsnow@redhat.com> Reviewed-by: Eric Blake [eblake: typo fix] Signed-off-by: Eric Blake --- qapi/block-core.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qapi/block-core.json b/qapi/block-core.json index ac3b48ee54..58ec9931c7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1935,6 +1935,8 @@ ## # @x-block-dirty-bitmap-merge: # +# FIXME: Rename @src_name and @dst_name to src-name and dst-name. +# # Merge @src_name dirty bitmap to @dst_name dirty bitmap. @src_name dirty # bitmap is unchanged. On error, @dst_name is unchanged. # From fb7afc797e071f2616e1ccc849b39fe43e7033bf Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 4 Jul 2018 14:23:02 +0300 Subject: [PATCH 3/3] nbd/server: send more than one extent of base:allocation context This is necessary for efficient block-status export, for clients which support it. (qemu is not yet such a client, but could become one.) Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180704112302.471456-3-vsementsov@virtuozzo.com> [eblake: grammar tweaks] Signed-off-by: Eric Blake --- nbd/server.c | 87 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 12f721482d..c3dd402b45 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1844,37 +1844,68 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, return ret; } -static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, NBDExtent *extent) +/* + * Populate @extents from block status. Update @bytes to be the actual + * length encoded (which may be smaller than the original), and update + * @nb_extents to the number of extents used. + * + * Returns zero on success and -errno on bdrv_block_status_above failure. + */ +static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset, + uint64_t *bytes, NBDExtent *extents, + unsigned int *nb_extents) { - uint64_t remaining_bytes = bytes; + uint64_t remaining_bytes = *bytes; + NBDExtent *extent = extents, *extents_end = extents + *nb_extents; + bool first_extent = true; + assert(*nb_extents); while (remaining_bytes) { uint32_t flags; int64_t num; int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes, &num, NULL, NULL); + if (ret < 0) { return ret; } flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) | (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0); - - if (remaining_bytes == bytes) { - extent->flags = flags; - } - - if (flags != extent->flags) { - break; - } - offset += num; remaining_bytes -= num; + + if (first_extent) { + extent->flags = flags; + extent->length = num; + first_extent = false; + continue; + } + + if (flags == extent->flags) { + /* extend current extent */ + extent->length += num; + } else { + if (extent + 1 == extents_end) { + break; + } + + /* start new extent */ + extent++; + extent->flags = flags; + extent->length = num; + } } - cpu_to_be32s(&extent->flags); - extent->length = cpu_to_be32(bytes - remaining_bytes); + extents_end = extent + 1; + + for (extent = extents; extent < extents_end; extent++) { + cpu_to_be32s(&extent->flags); + cpu_to_be32s(&extent->length); + } + + *bytes -= remaining_bytes; + *nb_extents = extents_end - extents; return 0; } @@ -1910,21 +1941,29 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, /* Get block status from the exported device and send it to the client */ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, BlockDriverState *bs, uint64_t offset, - uint32_t length, bool last, - uint32_t context_id, Error **errp) + uint32_t length, bool dont_fragment, + bool last, uint32_t context_id, + Error **errp) { int ret; - NBDExtent extent; + unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS; + NBDExtent *extents = g_new(NBDExtent, nb_extents); + uint64_t final_length = length; - ret = blockstatus_to_extent_be(bs, offset, length, &extent); + ret = blockstatus_to_extents(bs, offset, &final_length, extents, + &nb_extents); if (ret < 0) { + g_free(extents); return nbd_co_send_structured_error( client, handle, -ret, "can't get block status", errp); } - return nbd_co_send_extents(client, handle, &extent, 1, - be32_to_cpu(extent.length), last, - context_id, errp); + ret = nbd_co_send_extents(client, handle, extents, nb_extents, + final_length, last, context_id, errp); + + g_free(extents); + + return ret; } /* @@ -2231,10 +2270,12 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, (client->export_meta.base_allocation || client->export_meta.bitmap)) { + bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE; + if (client->export_meta.base_allocation) { ret = nbd_co_send_block_status(client, request->handle, blk_bs(exp->blk), request->from, - request->len, + request->len, dont_fragment, !client->export_meta.bitmap, NBD_META_ID_BASE_ALLOCATION, errp); @@ -2247,7 +2288,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, ret = nbd_co_send_bitmap(client, request->handle, client->exp->export_bitmap, request->from, request->len, - request->flags & NBD_CMD_FLAG_REQ_ONE, + dont_fragment, true, NBD_META_ID_DIRTY_BITMAP, errp); if (ret < 0) { return ret;