From 0c304110bd7092cbabff55b45193e13549b53e40 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Sat, 14 Mar 2015 10:23:14 +0100 Subject: [PATCH 01/17] iotests: Update 051's reference output Commit c4bacaf improved error reporting, but neglected to update 051.out. Commit 2726958 tried to redress, but didn't get it quite right (punctuation difference), and shortly after commit ae071cc..master improved error reporting some more, neglecting 051.out some more. Sorry! Signed-off-by: Markus Armbruster Signed-off-by: Kevin Wolf --- tests/qemu-iotests/051.out | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 09895e6136..2890eac084 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -128,13 +128,11 @@ QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive if=virtio QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty -QEMU_PROG: -drive if=virtio: Device initialization failed. QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized Testing: -drive if=scsi QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -drive if=scsi: Device needs media, but drive is empty -QEMU_PROG: Initialization of device lsi53c895a failed: Device initialization failed. +(qemu) QEMU_PROG: Initialization of device lsi53c895a failed: Device needs media, but drive is empty Testing: -drive if=none,id=disk -device ide-cd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information From 97a2ca7ae6bccb78b2acf10d6d3eebd1e9f3bbdc Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Sat, 14 Mar 2015 10:23:15 +0100 Subject: [PATCH 02/17] qemu-img: Fix convert, amend error messages for unknown options Message quality regressed in commit dc523cd. Signed-off-by: Markus Armbruster Signed-off-by: Kevin Wolf --- qemu-img.c | 6 ++---- tests/qemu-iotests/061.out | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 5af6f455df..82d907801e 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1554,8 +1554,7 @@ static int img_convert(int argc, char **argv) if (options) { qemu_opts_do_parse(opts, options, NULL, &local_err); if (local_err) { - error_report("Invalid options for file format '%s'", out_fmt); - error_free(local_err); + error_report_err(local_err); ret = -1; goto out; } @@ -3001,8 +3000,7 @@ static int img_amend(int argc, char **argv) if (options) { qemu_opts_do_parse(opts, options, NULL, &err); if (err) { - error_report("Invalid options for file format '%s'", fmt); - error_free(err); + error_report_err(err); ret = -1; goto out; } diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index e70f9834d8..5ec248f79b 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -288,7 +288,6 @@ qemu-img: Error while amending options: Invalid argument Unknown compatibility level 0.42. qemu-img: Error while amending options: Invalid argument qemu-img: Invalid parameter 'foo' -qemu-img: Invalid options for file format 'qcow2' Changing the cluster size is not supported. qemu-img: Error while amending options: Operation not supported Changing the encryption flag is not supported. From a1f688f4152e65260b94f37543521ceff8bfebe4 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Fri, 13 Mar 2015 21:09:40 +0100 Subject: [PATCH 03/17] block: Deprecate QCOW/QCOW2 encryption We've steered users away from QCOW/QCOW2 encryption for a while, because it's a flawed design (commit 136cd19 Describe flaws in qcow/qcow2 encryption in the docs). In addition to flawed crypto, we have comically bad usability, and plain old bugs. Let me show you. = Example images = I'm going to use a raw image as backing file, and two QCOW2 images, one encrypted, and one not: $ qemu-img create -f raw backing.img 4m Formatting 'backing.img', fmt=raw size=4194304 $ qemu-img create -f qcow2 -o encryption,backing_file=backing.img,backing_fmt=raw geheim.qcow2 4m Formatting 'geheim.qcow2', fmt=qcow2 size=4194304 backing_file='backing.img' backing_fmt='raw' encryption=on cluster_size=65536 lazy_refcounts=off $ qemu-img create -f qcow2 -o backing_file=backing.img,backing_fmt=raw normal.qcow2 4m Formatting 'normal.qcow2', fmt=qcow2 size=4194304 backing_file='backing.img' backing_fmt='raw' encryption=off cluster_size=65536 lazy_refcounts=off = Usability issues = == Confusing startup == When no image is encrypted, and you don't give -S, QEMU starts the guest immediately: $ qemu-system-x86_64 -nodefaults -display none -monitor stdio normal.qcow2 QEMU 2.2.50 monitor - type 'help' for more information (qemu) info status VM status: running But as soon as there's an encrypted image in play, the guest is *not* started, with no notification whatsoever: $ qemu-system-x86_64 -nodefaults -display none -monitor stdio geheim.qcow2 QEMU 2.2.50 monitor - type 'help' for more information (qemu) info status VM status: paused (prelaunch) If the user figured out that he needs to type "cont" to enter his keys, the confusion enters the next level: "cont" asks for at most *one* key. If more are needed, it then silently does nothing. The user has to type "cont" once per encrypted image: $ qemu-system-x86_64 -nodefaults -display none -monitor stdio -drive if=none,file=geheim.qcow2 -drive if=none,file=geheim.qcow2 QEMU 2.2.50 monitor - type 'help' for more information (qemu) info status VM status: paused (prelaunch) (qemu) c none0 (geheim.qcow2) is encrypted. Password: ****** (qemu) info status VM status: paused (prelaunch) (qemu) c none1 (geheim.qcow2) is encrypted. Password: ****** (qemu) info status VM status: running == Incorrect passwords not caught == All existing encryption schemes give you the GIGO treatment: garbage password in, garbage data out. Guests usually refuse to mount garbage, but other usage is prone to data loss. == Need to stop the guest to add an encrypted image == $ qemu-system-x86_64 -nodefaults -display none -monitor stdio QEMU 2.2.50 monitor - type 'help' for more information (qemu) info status VM status: running (qemu) drive_add "" if=none,file=geheim.qcow2 Guest must be stopped for opening of encrypted image (qemu) stop (qemu) drive_add "" if=none,file=geheim.qcow2 OK Commit c3adb58 added this restriction. Before, we could expose images lacking an encryption key to guests, with potentially catastrophic results. See also "Use without key is not always caught". = Bugs = == Use without key is not always caught == Encrypted images can be in an intermediate state "opened, but no key". The weird startup behavior and the need to stop the guest are there to ensure the guest isn't exposed to that state. But other things still are! * drive_backup $ qemu-system-x86_64 -nodefaults -display none -monitor stdio geheim.qcow2 QEMU 2.2.50 monitor - type 'help' for more information (qemu) drive_backup -f ide0-hd0 out.img raw Formatting 'out.img', fmt=raw size=4194304 I guess this writes encrypted data to raw image out.img. Good luck with figuring out how to decrypt that again. * commit $ qemu-system-x86_64 -nodefaults -display none -monitor stdio geheim.qcow2 QEMU 2.2.50 monitor - type 'help' for more information (qemu) commit ide0-hd0 I guess this writes encrypted data into the unencrypted raw backing image, effectively destroying it. == QMP device_add of usb-storage fails when it shouldn't == When the image is encrypted, device_add creates the device, defers actually attaching it to when the key becomes available, then fails. This is wrong. device_add must either create the device and succeed, or do nothing and fail. $ qemu-system-x86_64 -nodefaults -display none -usb -qmp stdio -drive if=none,id=foo,file=geheim.qcow2 {"QMP": {"version": {"qemu": {"micro": 50, "minor": 2, "major": 2}, "package": ""}, "capabilities": []}} { "execute": "qmp_capabilities" } {"return": {}} { "execute": "device_add", "arguments": { "driver": "usb-storage", "id": "bar", "drive": "foo" } } {"error": {"class": "DeviceEncrypted", "desc": "'foo' (geheim.qcow2) is encrypted"}} {"execute":"device_del","arguments": { "id": "bar" } } {"timestamp": {"seconds": 1426003440, "microseconds": 237181}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/bar/bar.0/legacy[0]"}} {"timestamp": {"seconds": 1426003440, "microseconds": 238231}, "event": "DEVICE_DELETED", "data": {"device": "bar", "path": "/machine/peripheral/bar"}} {"return": {}} This stuff is worse than useless, it's a trap for users. If people become sufficiently interested in encrypted images to contribute a cryptographically sane implementation for QCOW2 (or whatever other format), then rewriting the necessary support around it from scratch will likely be easier and yield better results than fixing up the existing mess. Let's deprecate the mess now, drop it after a grace period, and move on. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 7 +++++++ qemu-doc.texi | 11 ++++++----- tests/qemu-iotests/049.out | 6 ++++++ tests/qemu-iotests/087.out | 18 ++++++++++++++++++ 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index 191a847940..10c32eb327 100644 --- a/block.c +++ b/block.c @@ -1065,6 +1065,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, goto free_and_fail; } + if (bs->encrypted) { + error_report("Encrypted images are deprecated"); + error_printf("Support for them will be removed in a future release.\n" + "You can use 'qemu-img convert' to convert your image" + " to an unencrypted one.\n"); + } + ret = refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { error_setg_errno(errp, -ret, "Could not refresh total sector count"); diff --git a/qemu-doc.texi b/qemu-doc.texi index f5b0dc4588..8aa6dbf5d7 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -539,8 +539,8 @@ storage. @item qcow2 QEMU image format, the most versatile format. Use it to have smaller images (useful if your filesystem does not supports holes, for example -on Windows), optional AES encryption, zlib based compression and -support of multiple VM snapshots. +on Windows), zlib based compression and support of multiple VM +snapshots. Supported options: @table @code @@ -574,9 +574,10 @@ original file must then be securely erased using a program like shred, though even this is ineffective with many modern storage technologies. @end itemize -Use of qcow / qcow2 encryption is thus strongly discouraged. Users are -recommended to use an alternative encryption technology such as the -Linux dm-crypt / LUKS system. +Use of qcow / qcow2 encryption with QEMU is deprecated, and support for +it will go away in a future release. Users are recommended to use an +alternative encryption technology such as the Linux dm-crypt / LUKS +system. @item cluster_size Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 765afdd2a6..9f93666c5b 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -188,6 +188,12 @@ qemu-img create -f qcow2 -o encryption=off TEST_DIR/t.qcow2 64M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o encryption=on TEST_DIR/t.qcow2 64M +qemu-img: Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. +qemu-img: Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=on cluster_size=65536 lazy_refcounts=off refcount_bits=16 == Check lazy_refcounts option (only with v3) == diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index 0ba2e43b40..c71bb3aa48 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -44,10 +44,19 @@ QMP_VERSION === Encrypted image === +qemu-img: Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. +qemu-img: Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on Testing: -S QMP_VERSION {"return": {}} +Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. {"error": {"class": "GenericError", "desc": "blockdev-add doesn't support encrypted devices"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} @@ -57,6 +66,9 @@ QMP_VERSION Testing: QMP_VERSION {"return": {}} +Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. {"error": {"class": "GenericError", "desc": "Guest must be stopped for opening of encrypted image"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} @@ -66,6 +78,12 @@ QMP_VERSION === Missing driver === +qemu-img: Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. +qemu-img: Encrypted images are deprecated +Support for them will be removed in a future release. +You can use 'qemu-img convert' to convert your image to an unencrypted one. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on Testing: -S QMP_VERSION From 6ec46ad541b20a1530cedc741d19eea9ffc39ac3 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Fri, 13 Mar 2015 18:51:38 +0100 Subject: [PATCH 04/17] block: Fix block-set-write-threshold not to use funky error class Error classes are a leftover from the days of "rich" error objects. New code should always use ERROR_CLASS_GENERIC_ERROR. Commit e246211 added a use of ERROR_CLASS_DEVICE_NOT_FOUND. Replace it. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/write-threshold.c | 2 +- qapi/block-core.json | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/block/write-threshold.c b/block/write-threshold.c index c2cd517716..a53c1f5e65 100644 --- a/block/write-threshold.c +++ b/block/write-threshold.c @@ -112,7 +112,7 @@ void qmp_block_set_write_threshold(const char *node_name, bs = bdrv_find_node(node_name); if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, node_name); + error_setg(errp, "Device '%s' not found", node_name); return; } diff --git a/qapi/block-core.json b/qapi/block-core.json index 90586a5949..42c885047f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1961,10 +1961,6 @@ # @write-threshold: configured threshold for the block device, bytes. # Use 0 to disable the threshold. # -# Returns: Nothing on success -# If @node name is not found on the block device graph, -# DeviceNotFound -# # Since: 2.3 ## { 'command': 'block-set-write-threshold', From 2867ce4ab86c77579e94c6bb2b6e44ddfcf67f5d Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 12 Mar 2015 16:08:02 +0100 Subject: [PATCH 05/17] qemu-img: Avoid qerror_report_err() outside QMP handlers, again qerror_report_err() is a transitional interface to help with converting existing monitor commands to QMP. It should not be used elsewhere. Replace by error_report_err(). Commit 6936f29 cleaned that up in qemu-img.c, but two calls have crept in since. Take care of them the same way. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 82d907801e..9dddfbefce 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -274,8 +274,7 @@ static int print_block_option_help(const char *filename, const char *fmt) if (filename) { proto_drv = bdrv_find_protocol(filename, true, &local_err); if (!proto_drv) { - qerror_report_err(local_err); - error_free(local_err); + error_report_err(local_err); qemu_opts_free(create_opts); return 1; } @@ -1526,8 +1525,7 @@ static int img_convert(int argc, char **argv) proto_drv = bdrv_find_protocol(out_filename, true, &local_err); if (!proto_drv) { - qerror_report_err(local_err); - error_free(local_err); + error_report_err(local_err); ret = -1; goto out; } From 14a58a4e0c2e98a7d9232e1c229a531ca231133b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:02:31 -0500 Subject: [PATCH 06/17] qcow2: Respect new_block in alloc_refcount_block() When choosing a new place for the refcount table, alloc_refcount_block() tries to infer the number of clusters used so far from its argument cluster_index (which comes from the idea that if any cluster with an index greater than cluster_index was in use, the refcount table would have to be big enough already to describe cluster_index). However, there is a cluster that may be at or after cluster_index, and which is not covered by the refcount structures, and that is the new refcount block new_block. Therefore, it should be taken into account for the blocks_used calculation. Also, because new_block already describes (or is intended to describe) cluster_index, we may not put the new refcount structures there. Signed-off-by: Max Reitz Message-id: 1423598552-24301-2-git-send-email-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index dc8d186a82..6cbae1d205 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -466,8 +466,20 @@ static int alloc_refcount_block(BlockDriverState *bs, */ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW); - /* Calculate the number of refcount blocks needed so far */ - uint64_t blocks_used = DIV_ROUND_UP(cluster_index, s->refcount_block_size); + /* Calculate the number of refcount blocks needed so far; this will be the + * basis for calculating the index of the first cluster used for the + * self-describing refcount structures which we are about to create. + * + * Because we reached this point, there cannot be any refcount entries for + * cluster_index or higher indices yet. However, because new_block has been + * allocated to describe that cluster (and it will assume this role later + * on), we cannot use that index; also, new_block may actually have a higher + * cluster index than cluster_index, so it needs to be taken into account + * here (and 1 needs to be added to its value because that cluster is used). + */ + uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1, + (new_block >> s->cluster_bits) + 1), + s->refcount_block_size); if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) { return -EFBIG; From 0e8a371468ce24513b15a9ae362f12822e1973a3 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:02:32 -0500 Subject: [PATCH 07/17] iotests: Add tests for refcount table growth Signed-off-by: Max Reitz Message-id: 1423598552-24301-3-git-send-email-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- tests/qemu-iotests/121 | 102 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/121.out | 23 +++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 126 insertions(+) create mode 100755 tests/qemu-iotests/121 create mode 100644 tests/qemu-iotests/121.out diff --git a/tests/qemu-iotests/121 b/tests/qemu-iotests/121 new file mode 100755 index 0000000000..0912c3f0cb --- /dev/null +++ b/tests/qemu-iotests/121 @@ -0,0 +1,102 @@ +#!/bin/bash +# +# Test cases for qcow2 refcount table growth +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +echo +echo '=== New refcount structures may not conflict with existing structures ===' + +echo +echo '--- Test 1 ---' +echo + +# Preallocation speeds up the write operation, but preallocating everything will +# destroy the purpose of the write; so preallocate one KB less than what would +# cause a reftable growth... +IMGOPTS='preallocation=metadata,cluster_size=1k' _make_test_img 64512K +# ...and make the image the desired size afterwards. +$QEMU_IMG resize "$TEST_IMG" 65M + +# The first write results in a growth of the refcount table during an allocation +# which has precisely the required size so that the new refcount block allocated +# in alloc_refcount_block() is right after cluster_index; this did lead to a +# different refcount block being written to disk (a zeroed cluster) than what is +# cached (a refblock with one entry having a refcount of 1), and the second +# write would then result in that cached cluster being marked dirty and then +# in it being written to disk. +# This should not happen, the new refcount structures may not conflict with +# new_block. +# (Note that for some reason, 'write 63M 1K' does not trigger the problem) +$QEMU_IO -c 'write 62M 1025K' -c 'write 64M 1M' "$TEST_IMG" | _filter_qemu_io + +_check_test_img + + +echo +echo '--- Test 2 ---' +echo + +IMGOPTS='preallocation=metadata,cluster_size=1k' _make_test_img 64513K +# This results in an L1 table growth which in turn results in some clusters at +# the start of the image becoming free +$QEMU_IMG resize "$TEST_IMG" 65M + +# This write results in a refcount table growth; but the refblock allocated +# immediately before that (new_block) takes cluster index 4 (which is now free) +# and is thus not self-describing (in contrast to test 1, where new_block was +# self-describing). The refcount table growth algorithm then used to place the +# new refcount structures at cluster index 65536 (which is the same as the +# cluster_index parameter in this case), allocating a new refcount block for +# that cluster while new_block already existed, leaking new_block. +# Therefore, the new refcount structures may not be put at cluster_index +# (because new_block already describes that cluster, and the new structures try +# to be self-describing). +$QEMU_IO -c 'write 63M 130K' "$TEST_IMG" | _filter_qemu_io + +_check_test_img + + +# success, all done +echo +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/121.out b/tests/qemu-iotests/121.out new file mode 100644 index 0000000000..ff18e2c618 --- /dev/null +++ b/tests/qemu-iotests/121.out @@ -0,0 +1,23 @@ +QA output created by 121 + +=== New refcount structures may not conflict with existing structures === + +--- Test 1 --- + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=66060288 preallocation='metadata' +Image resized. +wrote 1049600/1049600 bytes at offset 65011712 +1.001 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 67108864 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. + +--- Test 2 --- + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=66061312 preallocation='metadata' +Image resized. +wrote 133120/133120 bytes at offset 66060288 +130 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. + +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 71f19d4ece..961c7cda33 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -120,5 +120,6 @@ 113 rw auto quick 114 rw auto quick 116 rw auto quick +121 rw auto 123 rw auto quick 128 rw auto quick From 4b4d7b072f0faf9008ca835af101338857b28394 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 5 Dec 2014 17:53:32 +0100 Subject: [PATCH 08/17] iotests: Test non-self-referential qcow2 refblocks It is easy to create only self-referential refblocks, but there are cases where that is impossible. This adds a test for two of those cases (combined in a single test case). Suggested-by: Eric Blake Signed-off-by: Max Reitz Message-id: 1417798412-15330-1-git-send-email-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- tests/qemu-iotests/115 | 95 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/115.out | 8 ++++ tests/qemu-iotests/group | 1 + 3 files changed, 104 insertions(+) create mode 100755 tests/qemu-iotests/115 create mode 100644 tests/qemu-iotests/115.out diff --git a/tests/qemu-iotests/115 b/tests/qemu-iotests/115 new file mode 100755 index 0000000000..a6be1876aa --- /dev/null +++ b/tests/qemu-iotests/115 @@ -0,0 +1,95 @@ +#!/bin/bash +# +# Test case for non-self-referential qcow2 refcount blocks +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux +# This test relies on refcounts being 64 bits wide (which does not work with +# compat=0.10) +_unsupported_imgopts 'refcount_bits=\([^6]\|.\([^4]\|$\)\)' 'compat=0.10' + +echo +echo '=== Testing large refcount and L1 table ===' +echo + +# Create an image with an L1 table and a refcount table that each span twice the +# number of clusters which can be described by a single refblock; therefore, at +# least two refblocks cannot count their own refcounts because all the clusters +# they describe are part of the L1 table or refcount table. + +# One refblock can describe (with cluster_size=512 and refcount_bits=64) +# 512/8 = 64 clusters, therefore the L1 table should cover 128 clusters, which +# equals 128 * (512/8) = 8192 entries (actually, 8192 - 512/8 = 8129 would +# suffice, but it does not really matter). 8192 L2 tables can in turn describe +# 8192 * 512/8 = 524,288 clusters which cover a space of 256 MB. + +# Since with refcount_bits=64 every refcount block entry is 64 bits wide (just +# like the L2 table entries), the same calculation applies to the refcount table +# as well; the difference is that while for the L1 table the guest disk size is +# concerned, for the refcount table it is the image length that has to be at +# least 256 MB. We can achieve that by using preallocation=metadata for an image +# which has a guest disk size of 256 MB. + +IMGOPTS="$IMGOPTS,refcount_bits=64,cluster_size=512,preallocation=metadata" \ + _make_test_img 256M + +# We know for sure that the L1 and refcount tables do not overlap with any other +# structure because the metadata overlap checks would have caught that case. + +# Because qemu refuses to open qcow2 files whose L1 table does not cover the +# whole guest disk size, it is definitely large enough. On the other hand, to +# test whether the refcount table is large enough, we simply have to verify that +# indeed all the clusters are allocated, which is done by qemu-img check. + +# The final thing we need to test is whether the tables are actually covered by +# refcount blocks; since all clusters of the tables are referenced, we can use +# qemu-img check for that purpose, too. + +$QEMU_IMG check "$TEST_IMG" | \ + sed -e 's/^.* = \([0-9]\+\.[0-9]\+% allocated\).*\(clusters\)$/\1 \2/' \ + -e '/^Image end offset/d' + +# (Note that we cannot use _check_test_img because that function filters out the +# allocation status) + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/115.out b/tests/qemu-iotests/115.out new file mode 100644 index 0000000000..7b2c5e02f5 --- /dev/null +++ b/tests/qemu-iotests/115.out @@ -0,0 +1,8 @@ +QA output created by 115 + +=== Testing large refcount and L1 table === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=268435456 preallocation='metadata' +No errors were found on the image. +100.00% allocated clusters +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 961c7cda33..62621278e2 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -119,6 +119,7 @@ 112 rw auto 113 rw auto quick 114 rw auto quick +115 rw auto 116 rw auto quick 121 rw auto 123 rw auto quick From 5560625badb9e710577986de65ff4e4b413729a0 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 2 Mar 2015 19:36:46 +0800 Subject: [PATCH 09/17] monitor: Convert bdrv_find to blk_by_name Signed-off-by: Fam Zheng Message-id: 1425296209-1476-2-git-send-email-famz@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- monitor.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/monitor.c b/monitor.c index c86a89e9b0..8b703f97be 100644 --- a/monitor.c +++ b/monitor.c @@ -73,6 +73,7 @@ #include "block/qapi.h" #include "qapi/qmp-event.h" #include "qapi-event.h" +#include "sysemu/block-backend.h" /* for hmp_info_irq/pic */ #if defined(TARGET_SPARC) @@ -5414,15 +5415,15 @@ int monitor_read_block_device_key(Monitor *mon, const char *device, BlockCompletionFunc *completion_cb, void *opaque) { - BlockDriverState *bs; + BlockBackend *blk; - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { monitor_printf(mon, "Device not found %s\n", device); return -1; } - return monitor_read_bdrv_key_start(mon, bs, completion_cb, opaque); + return monitor_read_bdrv_key_start(mon, blk_bs(blk), completion_cb, opaque); } QemuOptsList qemu_mon_opts = { From c9ebaf744ea7785776c358400eb13c256a3c9db6 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 2 Mar 2015 19:36:47 +0800 Subject: [PATCH 10/17] migration: Convert bdrv_find to blk_by_name Signed-off-by: Fam Zheng Message-id: 1425296209-1476-3-git-send-email-famz@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- migration/block.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/migration/block.c b/migration/block.c index 0c7610600b..085c0fae05 100644 --- a/migration/block.c +++ b/migration/block.c @@ -23,6 +23,7 @@ #include "migration/block.h" #include "migration/migration.h" #include "sysemu/blockdev.h" +#include "sysemu/block-backend.h" #include #define BLOCK_SIZE (1 << 20) @@ -783,6 +784,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) char device_name[256]; int64_t addr; BlockDriverState *bs, *bs_prev = NULL; + BlockBackend *blk; uint8_t *buf; int64_t total_sectors = 0; int nr_sectors; @@ -800,12 +802,13 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) qemu_get_buffer(f, (uint8_t *)device_name, len); device_name[len] = '\0'; - bs = bdrv_find(device_name); - if (!bs) { + blk = blk_by_name(device_name); + if (!blk) { fprintf(stderr, "Error unknown block device %s\n", device_name); return -EINVAL; } + bs = blk_bs(blk); if (bs != bs_prev) { bs_prev = bs; From a0e8544cf84cf193e455a9a3b7d9997dbc268b4b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 2 Mar 2015 19:36:48 +0800 Subject: [PATCH 11/17] blockdev: Convert bdrv_find to blk_by_name Signed-off-by: Fam Zheng Message-id: 1425296209-1476-4-git-send-email-famz@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- blockdev.c | 92 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/blockdev.c b/blockdev.c index b9c1c0cc1a..0b509854b0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1016,18 +1016,18 @@ fail: void hmp_commit(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_str(qdict, "device"); - BlockDriverState *bs; + BlockBackend *blk; int ret; if (!strcmp(device, "all")) { ret = bdrv_commit_all(); } else { - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { monitor_printf(mon, "Device '%s' not found\n", device); return; } - ret = bdrv_commit(bs); + ret = bdrv_commit(blk_bs(blk)); } if (ret < 0) { monitor_printf(mon, "'commit' error for '%s': %s\n", device, @@ -1092,17 +1092,20 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, const char *name, Error **errp) { - BlockDriverState *bs = bdrv_find(device); + BlockDriverState *bs; + BlockBackend *blk; AioContext *aio_context; QEMUSnapshotInfo sn; Error *local_err = NULL; SnapshotInfo *info = NULL; int ret; - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return NULL; } + bs = blk_bs(blk); if (!has_id) { id = NULL; @@ -1205,6 +1208,7 @@ static void internal_snapshot_prepare(BlkTransactionState *common, Error *local_err = NULL; const char *device; const char *name; + BlockBackend *blk; BlockDriverState *bs; QEMUSnapshotInfo old_sn, *sn; bool ret; @@ -1223,11 +1227,12 @@ static void internal_snapshot_prepare(BlkTransactionState *common, name = internal->name; /* 2. check for validation */ - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); /* AioContext is released in .clean() */ state->aio_context = bdrv_get_aio_context(bs); @@ -1494,17 +1499,19 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); BlockDriverState *bs; + BlockBackend *blk; DriveBackup *backup; Error *local_err = NULL; assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); backup = common->action->drive_backup; - bs = bdrv_find(backup->device); - if (!bs) { + blk = blk_by_name(backup->device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device); return; } + bs = blk_bs(blk); /* AioContext is released in .clean() */ state->aio_context = bdrv_get_aio_context(bs); @@ -1559,22 +1566,25 @@ static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp) BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); BlockdevBackup *backup; BlockDriverState *bs, *target; + BlockBackend *blk; Error *local_err = NULL; assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); backup = common->action->blockdev_backup; - bs = bdrv_find(backup->device); - if (!bs) { + blk = blk_by_name(backup->device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device); return; } + bs = blk_bs(blk); - target = bdrv_find(backup->target); - if (!target) { + blk = blk_by_name(backup->target); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target); return; } + target = blk_bs(blk); /* AioContext is released in .clean() */ state->aio_context = bdrv_get_aio_context(bs); @@ -1881,13 +1891,15 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, { ThrottleConfig cfg; BlockDriverState *bs; + BlockBackend *blk; AioContext *aio_context; - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); memset(&cfg, 0, sizeof(cfg)); cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps; @@ -2091,6 +2103,7 @@ void qmp_block_stream(const char *device, bool has_on_error, BlockdevOnError on_error, Error **errp) { + BlockBackend *blk; BlockDriverState *bs; BlockDriverState *base_bs = NULL; AioContext *aio_context; @@ -2101,11 +2114,12 @@ void qmp_block_stream(const char *device, on_error = BLOCKDEV_ON_ERROR_REPORT; } - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -2155,6 +2169,7 @@ void qmp_block_commit(const char *device, bool has_speed, int64_t speed, Error **errp) { + BlockBackend *blk; BlockDriverState *bs; BlockDriverState *base_bs, *top_bs; AioContext *aio_context; @@ -2173,11 +2188,12 @@ void qmp_block_commit(const char *device, * live commit feature versions; for this to work, we must make sure to * perform the device lookup before any generic errors that may occur in a * scenario in which all optional arguments are omitted. */ - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -2258,6 +2274,7 @@ void qmp_drive_backup(const char *device, const char *target, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp) { + BlockBackend *blk; BlockDriverState *bs; BlockDriverState *target_bs; BlockDriverState *source = NULL; @@ -2281,11 +2298,12 @@ void qmp_drive_backup(const char *device, const char *target, mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; } - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -2385,6 +2403,7 @@ void qmp_blockdev_backup(const char *device, const char *target, BlockdevOnError on_target_error, Error **errp) { + BlockBackend *blk; BlockDriverState *bs; BlockDriverState *target_bs; Error *local_err = NULL; @@ -2400,20 +2419,22 @@ void qmp_blockdev_backup(const char *device, const char *target, on_target_error = BLOCKDEV_ON_ERROR_REPORT; } - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - target_bs = bdrv_find(target); - if (!target_bs) { + blk = blk_by_name(target); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, target); goto out; } + target_bs = blk_bs(blk); bdrv_ref(target_bs); bdrv_set_aio_context(target_bs, aio_context); @@ -2442,6 +2463,7 @@ void qmp_drive_mirror(const char *device, const char *target, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp) { + BlockBackend *blk; BlockDriverState *bs; BlockDriverState *source, *target_bs; AioContext *aio_context; @@ -2481,11 +2503,12 @@ void qmp_drive_mirror(const char *device, const char *target, return; } - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -2623,12 +2646,14 @@ out: static BlockJob *find_block_job(const char *device, AioContext **aio_context, Error **errp) { + BlockBackend *blk; BlockDriverState *bs; - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { goto notfound; } + bs = blk_bs(blk); *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(*aio_context); @@ -2733,6 +2758,7 @@ void qmp_change_backing_file(const char *device, const char *backing_file, Error **errp) { + BlockBackend *blk; BlockDriverState *bs = NULL; AioContext *aio_context; BlockDriverState *image_bs = NULL; @@ -2741,12 +2767,12 @@ void qmp_change_backing_file(const char *device, int open_flags; int ret; - /* find the top layer BDS of the chain */ - bs = bdrv_find(device); - if (!bs) { + blk = blk_by_name(device); + if (!blk) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + bs = blk_bs(blk); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); From d51a2427f68a312b676edd0e9efce881649d1767 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 2 Mar 2015 19:36:49 +0800 Subject: [PATCH 12/17] block: Drop bdrv_find All callers are converted, so drop it. Signed-off-by: Fam Zheng Message-id: 1425296209-1476-5-git-send-email-famz@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block.c | 9 --------- include/block/block.h | 1 - 2 files changed, 10 deletions(-) diff --git a/block.c b/block.c index 10c32eb327..0fe97de568 100644 --- a/block.c +++ b/block.c @@ -3821,15 +3821,6 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), g_free(formats); } -/* This function is to find block backend bs */ -/* TODO convert callers to blk_by_name(), then remove */ -BlockDriverState *bdrv_find(const char *name) -{ - BlockBackend *blk = blk_by_name(name); - - return blk ? blk_bs(blk) : NULL; -} - /* This function is to find a node in the bs graph */ BlockDriverState *bdrv_find_node(const char *node_name) { diff --git a/include/block/block.h b/include/block/block.h index 473c746342..4c57d63fe2 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -381,7 +381,6 @@ int bdrv_media_changed(BlockDriverState *bs); void bdrv_lock_medium(BlockDriverState *bs, bool locked); void bdrv_eject(BlockDriverState *bs, bool eject_flag); const char *bdrv_get_format_name(BlockDriverState *bs); -BlockDriverState *bdrv_find(const char *name); BlockDriverState *bdrv_find_node(const char *node_name); BlockDeviceInfoList *bdrv_named_nodes_list(void); BlockDriverState *bdrv_lookup_bs(const char *device, From 2ec711dcd45effc8d583dee6ff92d94573aad75b Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 3 Mar 2015 11:41:52 +0100 Subject: [PATCH 13/17] block/vpc: optimize vpc_co_get_block_status *pnum can't be greater than s->block_size / BDRV_SECTOR_SIZE for allocated sectors since there is always a bitmap in between. Signed-off-by: Peter Lieven Reviewed-by: Max Reitz Message-id: 1425379316-19639-2-git-send-email-pl@kamp.de Signed-off-by: Max Reitz --- block/vpc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 1533b6a64d..c8e17cb058 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -602,7 +602,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, { BDRVVPCState *s = bs->opaque; VHDFooter *footer = (VHDFooter*) s->footer_buf; - int64_t start, offset, next; + int64_t start, offset; bool allocated; int n; @@ -626,20 +626,18 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, *pnum += n; sector_num += n; nb_sectors -= n; - next = start + (*pnum * BDRV_SECTOR_SIZE); - + /* *pnum can't be greater than one block for allocated + * sectors since there is always a bitmap in between. */ + if (allocated) { + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + } if (nb_sectors == 0) { break; } - offset = get_sector_offset(bs, sector_num, 0); - } while ((allocated && offset == next) || (!allocated && offset == -1)); + } while (offset == -1); - if (allocated) { - return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; - } else { - return 0; - } + return 0; } /* From 0444dceee48fed54e8334428fa57f9ff997736e8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 3 Mar 2015 11:41:53 +0100 Subject: [PATCH 14/17] vpc: Ignore geometry for large images The CHS calculation as done per the VHD spec imposes a maximum image size of ~127 GB. Real VHD images exist that are larger than that. Apparently there are two separate non-standard ways to achieve this: You could use more heads than the spec does - this is the option that qemu-img create chooses. However, other images exist where the geometry is set to the maximum (65535/16/255), but the actual image size is larger. Until now, such images are truncated at 127 GB when opening them with qemu. This patch changes the vpc driver to ignore geometry in this case and only trust the size field in the header. Signed-off-by: Kevin Wolf [PL: Fixed maximum geometry in the commit msg] Signed-off-by: Peter Lieven Message-id: 1425379316-19639-3-git-send-email-pl@kamp.de Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/vpc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index c8e17cb058..1c9592ccf4 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -215,12 +215,10 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - /* images created with disk2vhd report a far higher virtual size - * than expected with the cyls * heads * sectors_per_cyl formula. - * use the footer->size instead if the image was created with - * disk2vhd. - */ - if (!strncmp(footer->creator_app, "d2v", 4)) { + /* Images that have exactly the maximum geometry are probably bigger and + * would be truncated if we adhered to the geometry for them. Rely on + * footer->size for them. */ + if (bs->total_sectors == 65535ULL * 16 * 255) { bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE; } From 690cbb095a17c429513890d991bc57c98dd83912 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 3 Mar 2015 11:41:54 +0100 Subject: [PATCH 15/17] block/vpc: make calculate_geometry spec conform The VHD spec [1] allows for total_sectors of 65535 x 16 x 255 (~127GB) represented by a CHS geometry. If total_sectors is greater than 65535 x 16 x 255 this geometry is set as a maximum. Qemu, Hyper-V and disk2vhd use this special geometry as an indicator to use the image current size from the footer as disk size. This patch changes vpc_create to effectively calculate a CxHxS geometry for the given image size if possible while rounding up if necessary. If the image size is too big to be represented in CHS we set the maximum and write the exact requested image size into the footer. This partly reverts commit 258d2edb, but leaves support for >127G disks intact. [1] http://download.microsoft.com/download/f/f/e/ffef50a5-07dd-4cf8-aaa3-442c0673a029/Virtual%20Hard%20Disk%20Format%20Spec_10_18_06.doc Signed-off-by: Peter Lieven Message-id: 1425379316-19639-4-git-send-email-pl@kamp.de Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/vpc.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 1c9592ccf4..7e99a69137 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -46,6 +46,7 @@ enum vhd_type { #define VHD_TIMESTAMP_BASE 946684800 #define VHD_MAX_SECTORS (65535LL * 255 * 255) +#define VHD_MAX_GEOMETRY (65535LL * 16 * 255) // always big-endian typedef struct vhd_footer { @@ -218,7 +219,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, /* Images that have exactly the maximum geometry are probably bigger and * would be truncated if we adhered to the geometry for them. Rely on * footer->size for them. */ - if (bs->total_sectors == 65535ULL * 16 * 255) { + if (bs->total_sectors == VHD_MAX_GEOMETRY) { bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE; } @@ -655,26 +656,20 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, { uint32_t cyls_times_heads; - /* Allow a maximum disk size of approximately 2 TB */ - if (total_sectors > 65535LL * 255 * 255) { - return -EFBIG; - } + total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY); - if (total_sectors > 65535 * 16 * 63) { + if (total_sectors >= 65535LL * 16 * 63) { *secs_per_cyl = 255; - if (total_sectors > 65535 * 16 * 255) { - *heads = 255; - } else { - *heads = 16; - } + *heads = 16; cyls_times_heads = total_sectors / *secs_per_cyl; } else { *secs_per_cyl = 17; cyls_times_heads = total_sectors / *secs_per_cyl; *heads = (cyls_times_heads + 1023) / 1024; - if (*heads < 4) + if (*heads < 4) { *heads = 4; + } if (cyls_times_heads >= (*heads * 1024) || *heads > 16) { *secs_per_cyl = 31; @@ -830,20 +825,28 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that * qemu-img convert doesn't truncate images, but rather rounds up. + * + * If the image size can't be represented by a spec conform CHS geometry, + * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use + * the image size from the VHD footer to calculate total_sectors. */ - total_sectors = total_size / BDRV_SECTOR_SIZE; + total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { - if (calculate_geometry(total_sectors + i, &cyls, &heads, - &secs_per_cyl)) - { + calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + } + + if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { + total_sectors = total_size / BDRV_SECTOR_SIZE; + /* Allow a maximum disk size of approximately 2 TB */ + if (total_sectors > VHD_MAX_SECTORS) { ret = -EFBIG; goto out; } + } else { + total_sectors = (int64_t)cyls * heads * secs_per_cyl; + total_size = total_sectors * BDRV_SECTOR_SIZE; } - total_sectors = (int64_t) cyls * heads * secs_per_cyl; - total_size = total_sectors * BDRV_SECTOR_SIZE; - /* Prepare the Hard Disk Footer */ memset(buf, 0, 1024); From 03671ded3078ebad1f0a701042622fd5e8918bc9 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 3 Mar 2015 11:41:55 +0100 Subject: [PATCH 16/17] block/vpc: rename footer->size -> footer->current_size the field is named current size in the spec. Name it accordingly. Signed-off-by: Peter Lieven Reviewed-by: Max Reitz Message-id: 1425379316-19639-5-git-send-email-pl@kamp.de Signed-off-by: Max Reitz --- block/vpc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 7e99a69137..226be02e26 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -66,7 +66,7 @@ typedef struct vhd_footer { char creator_os[4]; // "Wi2k" uint64_t orig_size; - uint64_t size; + uint64_t current_size; uint16_t cyls; uint8_t heads; @@ -218,9 +218,10 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, /* Images that have exactly the maximum geometry are probably bigger and * would be truncated if we adhered to the geometry for them. Rely on - * footer->size for them. */ + * footer->current_size for them. */ if (bs->total_sectors == VHD_MAX_GEOMETRY) { - bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE; + bs->total_sectors = be64_to_cpu(footer->current_size) / + BDRV_SECTOR_SIZE; } /* Allow a maximum disk size of approximately 2 TB */ @@ -868,7 +869,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) footer->major = cpu_to_be16(0x0005); footer->minor = cpu_to_be16(0x0003); footer->orig_size = cpu_to_be64(total_size); - footer->size = cpu_to_be64(total_size); + footer->current_size = cpu_to_be64(total_size); footer->cyls = cpu_to_be16(cyls); footer->heads = heads; footer->secs_per_cyl = secs_per_cyl; From 304ee9174f4761d3f4da611352a815ab27baba06 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 3 Mar 2015 11:41:56 +0100 Subject: [PATCH 17/17] block/vpc: remove disabled code from get_sector_offset The code to check the bitmap for the allocation status of each sector has been "disabled by reason" ever since the vpc driver existed. The reason might be that we might end up reading sector by sector in vpc_read if we really used it. This would be a performance desaster. The current code would furthermore not work if the disabled parts get reactivated since vpc_read and vpc_write only use get_sector_offset to check the allocation status of the first sector of a read/write operation. This might lead to sectors incorrectly treated as zero in vpc_read and to sectors getting allocated twice in vpc_write. Signed-off-by: Peter Lieven Message-id: 1425379316-19639-6-git-send-email-pl@kamp.de Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/vpc.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 226be02e26..43e768ee76 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -376,38 +376,6 @@ static inline int64_t get_sector_offset(BlockDriverState *bs, bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size); } -// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n", -// sector_num, pagetable_index, pageentry_index, -// bitmap_offset, block_offset); - -// disabled by reason -#if 0 -#ifdef CACHE - if (bitmap_offset != s->last_bitmap) - { - lseek(s->fd, bitmap_offset, SEEK_SET); - - s->last_bitmap = bitmap_offset; - - // Scary! Bitmap is stored as big endian 32bit entries, - // while we used to look it up byte by byte - read(s->fd, s->pageentry_u8, 512); - for (i = 0; i < 128; i++) - be32_to_cpus(&s->pageentry_u32[i]); - } - - if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1) - return -1; -#else - lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET); - - read(s->fd, &bitmap_entry, 1); - - if ((bitmap_entry >> (pageentry_index % 8)) & 1) - return -1; // not allocated -#endif -#endif - return block_offset; }