Block patches:

- qcow2: Use threads for encrypted I/O
 - qemu-img rebase: Optimizations
 - backup job: Allow any source node, and some refactoring
 - Some general simplifications in the block layer
 -----BEGIN PGP SIGNATURE-----
 
 iQFGBAABCAAwFiEEkb62CjDbPohX0Rgp9AfbAGHVz0AFAlzti4ASHG1yZWl0ekBy
 ZWRoYXQuY29tAAoJEPQH2wBh1c9Av6YH/1mCu9KsLTGaFruJgnx4MXNUII0tsmwG
 S+ZPiGTVLpJf1Na5wuFhOvGS1aVp1YKCzwXOP3TA2h/IJ/RByx+4fSc7GjJYuSJ4
 YPoB9tTCFk/V7LehtWxmm5SlKkPEcTNPP2odnmjICL5PBBQr6w+3vthYH3BHLQtV
 rLkNjswB9YBCciEESF6YZJyXKArIpLr7l95olatpDhGYagRA4/wcUdr2VUWboMRQ
 9+8yY9CPElZcTmeQjqXqOszYSxKAWYSLfBnkdLMuE1XJRwhL91+JCezuw5WY/o9A
 QgLl9p854/sEkN0hURfLSBzLljkGESdVw6WYa1DBuqxsUPEArzd0FRk=
 =wut7
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-05-28' into staging

Block patches:
- qcow2: Use threads for encrypted I/O
- qemu-img rebase: Optimizations
- backup job: Allow any source node, and some refactoring
- Some general simplifications in the block layer

# gpg: Signature made Tue 28 May 2019 20:26:56 BST
# gpg:                using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg:                issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2019-05-28: (21 commits)
  blockdev: loosen restrictions on drive-backup source node
  qcow2-bitmap: initialize bitmap directory alignment
  qcow2: skip writing zero buffers to empty COW areas
  qemu-img: rebase: Reuse in-chain BlockDriverState
  qemu-img: rebase: Reduce reads on in-chain rebase
  qemu-img: rebase: Reuse parent BlockDriverState
  block: Make bdrv_root_attach_child() unref child_bs on failure
  block: Use bdrv_unref_child() for all children in bdrv_close()
  block/backup: refactor: split out backup_calculate_cluster_size
  block/backup: unify different modes code path
  block/backup: refactor and tolerate unallocated cluster skipping
  block/backup: move to copy_bitmap with granularity
  block/backup: simplify backup_incremental_init_copy_bitmap
  qcow2: do encryption in threads
  qcow2: bdrv_co_pwritev: move encryption code out of the lock
  qcow2: qcow2_co_preadv: improve locking
  qcow2-threads: split out generic path
  qcow2-threads: qcow2_co_do_compress: protect queuing by mutex
  qcow2-threads: use thread_pool_submit_co
  qcow2: add separate file for threaded data processing functions
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2019-05-30 11:17:56 +01:00
commit e5714b5be3
23 changed files with 615 additions and 430 deletions

46
block.c
View File

@ -2243,6 +2243,13 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
}
}
/*
* This function steals the reference to child_bs from the caller.
* That reference is later dropped by bdrv_root_unref_child().
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
*/
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
@ -2255,6 +2262,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
if (ret < 0) {
bdrv_abort_perm_update(child_bs);
bdrv_unref(child_bs);
return NULL;
}
@ -2274,6 +2282,14 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
return child;
}
/*
* This function transfers the reference to child_bs from the caller
* to parent_bs. That reference is later dropped by parent_bs on
* bdrv_close() or if someone calls bdrv_unref_child().
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
*/
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
@ -2401,12 +2417,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
/* If backing_hd was already part of bs's backing chain, and
* inherits_from pointed recursively to bs then let's update it to
* point directly to bs (else it will become NULL). */
if (update_inherits_from) {
if (bs->backing && update_inherits_from) {
backing_hd->inherits_from = bs;
}
if (!bs->backing) {
bdrv_unref(backing_hd);
}
out:
bdrv_refresh_limits(bs, NULL);
@ -2594,7 +2607,6 @@ BdrvChild *bdrv_open_child(const char *filename,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BdrvChild *c;
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@ -2603,13 +2615,7 @@ BdrvChild *bdrv_open_child(const char *filename,
return NULL;
}
c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
if (!c) {
bdrv_unref(bs);
return NULL;
}
return c;
return bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
}
/* TODO Future callers may need to specify parent/child_role in order for
@ -3877,22 +3883,12 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv = NULL;
}
bdrv_set_backing_hd(bs, NULL, &error_abort);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
bs->file = NULL;
}
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
/* TODO Remove bdrv_unref() from drivers' close function and use
* bdrv_unref_child() here */
if (child->bs->inherits_from == bs) {
child->bs->inherits_from = NULL;
}
bdrv_detach_child(child);
bdrv_unref_child(bs, child);
}
bs->backing = NULL;
bs->file = NULL;
g_free(bs->opaque);
bs->opaque = NULL;
atomic_set(&bs->copy_on_read, 0);

View File

@ -6,7 +6,7 @@ block-obj-$(CONFIG_BOCHS) += bochs.o
block-obj-$(CONFIG_VVFAT) += vvfat.o
block-obj-$(CONFIG_DMG) += dmg.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o
block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-$(CONFIG_QED) += qed-check.o
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o

View File

@ -112,7 +112,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
assert(QEMU_IS_ALIGNED(start, job->cluster_size));
hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
nbytes = MIN(job->cluster_size, job->len - start);
if (!*bounce_buffer) {
*bounce_buffer = blk_blockalign(blk, job->cluster_size);
@ -145,7 +146,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
return nbytes;
fail:
hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
hbitmap_set(job->copy_bitmap, start, job->cluster_size);
return ret;
}
@ -165,16 +166,15 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
assert(QEMU_IS_ALIGNED(start, job->cluster_size));
nbytes = MIN(job->copy_range_size, end - start);
nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
nr_clusters);
hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters);
ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
read_flags, write_flags);
if (ret < 0) {
trace_backup_do_cow_copy_range_fail(job, start, ret);
hbitmap_set(job->copy_bitmap, start / job->cluster_size,
nr_clusters);
hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters);
return ret;
}
@ -202,7 +202,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
cow_request_begin(&cow_request, job, start, end);
while (start < end) {
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
if (!hbitmap_get(job->copy_bitmap, start)) {
trace_backup_do_cow_skip(job, start);
start += job->cluster_size;
continue; /* already copied */
@ -298,12 +298,16 @@ static void backup_clean(Job *job)
assert(s->target);
blk_unref(s->target);
s->target = NULL;
if (s->copy_bitmap) {
hbitmap_free(s->copy_bitmap);
s->copy_bitmap = NULL;
}
}
void backup_do_checkpoint(BlockJob *job, Error **errp)
{
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
int64_t len;
assert(block_job_driver(job) == &backup_job_driver);
@ -313,8 +317,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
return;
}
len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
hbitmap_set(backup_job->copy_bitmap, 0, len);
hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
}
static void backup_drain(BlockJob *job)
@ -365,20 +368,44 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
return false;
}
static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
static bool bdrv_is_unallocated_range(BlockDriverState *bs,
int64_t offset, int64_t bytes)
{
int64_t end = offset + bytes;
while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) {
if (bytes == 0) {
return true;
}
offset += bytes;
bytes = end - offset;
}
return offset >= end;
}
static int coroutine_fn backup_loop(BackupBlockJob *job)
{
int ret;
bool error_is_read;
int64_t cluster;
int64_t offset;
HBitmapIter hbi;
BlockDriverState *bs = blk_bs(job->common.blk);
hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
while ((offset = hbitmap_iter_next(&hbi)) != -1) {
if (job->sync_mode == MIRROR_SYNC_MODE_TOP &&
bdrv_is_unallocated_range(bs, offset, job->cluster_size))
{
hbitmap_reset(job->copy_bitmap, offset, job->cluster_size);
continue;
}
do {
if (yield_and_check(job)) {
return 0;
}
ret = backup_do_cow(job, cluster * job->cluster_size,
ret = backup_do_cow(job, offset,
job->cluster_size, &error_is_read, false);
if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
BLOCK_ERROR_ACTION_REPORT)
@ -394,66 +421,43 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* init copy_bitmap from sync_bitmap */
static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
{
BdrvDirtyBitmapIter *dbi;
int64_t offset;
int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
job->cluster_size);
uint64_t offset = 0;
uint64_t bytes = job->len;
dbi = bdrv_dirty_iter_new(job->sync_bitmap);
while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
int64_t cluster = offset / job->cluster_size;
int64_t next_cluster;
while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap,
&offset, &bytes))
{
hbitmap_set(job->copy_bitmap, offset, bytes);
offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
hbitmap_set(job->copy_bitmap, cluster, end - cluster);
offset += bytes;
if (offset >= job->len) {
break;
}
offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset,
UINT64_MAX);
if (offset == -1) {
hbitmap_set(job->copy_bitmap, cluster, end - cluster);
break;
}
next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
if (next_cluster >= end) {
break;
}
bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
bytes = job->len - offset;
}
/* TODO job_progress_set_remaining() would make more sense */
job_progress_update(&job->common.job,
job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);
bdrv_dirty_iter_free(dbi);
job->len - hbitmap_count(job->copy_bitmap));
}
static int coroutine_fn backup_run(Job *job, Error **errp)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
BlockDriverState *bs = blk_bs(s->common.blk);
int64_t offset, nb_clusters;
int ret = 0;
QLIST_INIT(&s->inflight_reqs);
qemu_co_rwlock_init(&s->flush_rwlock);
nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size);
job_progress_set_remaining(job, s->len);
s->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
backup_incremental_init_copy_bitmap(s);
} else {
hbitmap_set(s->copy_bitmap, 0, nb_clusters);
hbitmap_set(s->copy_bitmap, 0, s->len);
}
s->before_write.notify = backup_before_write_notify;
bdrv_add_before_write_notifier(bs, &s->before_write);
@ -465,68 +469,8 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
* notify callback service CoW requests. */
job_yield(job);
}
} else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
ret = backup_run_incremental(s);
} else {
/* Both FULL and TOP SYNC_MODE's require copying.. */
for (offset = 0; offset < s->len;
offset += s->cluster_size) {
bool error_is_read;
int alloced = 0;
if (yield_and_check(s)) {
break;
}
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int i;
int64_t n;
/* Check to see if these blocks are already in the
* backing file. */
for (i = 0; i < s->cluster_size;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
* For that reason we must verify each sector in the
* backup cluster length. We end up copying more than
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs, offset + i,
s->cluster_size - i, &n);
i += n;
if (alloced || n == 0) {
break;
}
}
/* If the above loop never found any sectors that are in
* the topmost image, skip this backup. */
if (alloced == 0) {
continue;
}
}
/* FULL sync mode we copy the whole drive. */
if (alloced < 0) {
ret = alloced;
} else {
ret = backup_do_cow(s, offset, s->cluster_size,
&error_is_read, false);
}
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
backup_error_action(s, error_is_read, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT) {
break;
} else {
offset -= s->cluster_size;
continue;
}
}
}
ret = backup_loop(s);
}
notifier_with_return_remove(&s->before_write);
@ -534,7 +478,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&s->flush_rwlock);
qemu_co_rwlock_unlock(&s->flush_rwlock);
hbitmap_free(s->copy_bitmap);
return ret;
}
@ -554,6 +497,42 @@ static const BlockJobDriver backup_job_driver = {
.drain = backup_drain,
};
static int64_t backup_calculate_cluster_size(BlockDriverState *target,
Error **errp)
{
int ret;
BlockDriverInfo bdi;
/*
* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible.
*/
ret = bdrv_get_info(target, &bdi);
if (ret == -ENOTSUP && !target->backing) {
/* Cluster size is not defined */
warn_report("The target block device doesn't provide "
"information about the block size and it doesn't have a "
"backing file. The default block size of %u bytes is "
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT);
return BACKUP_CLUSTER_SIZE_DEFAULT;
} else if (ret < 0 && !target->backing) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
"which has no backing file");
error_append_hint(errp,
"Aborting, since this may create an unusable destination image\n");
return ret;
} else if (ret < 0 && target->backing) {
/* Not fatal; just trudge on ahead. */
return BACKUP_CLUSTER_SIZE_DEFAULT;
}
return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@ -565,9 +544,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
JobTxn *txn, Error **errp)
{
int64_t len;
BlockDriverInfo bdi;
BackupBlockJob *job = NULL;
int ret;
int64_t cluster_size;
HBitmap *copy_bitmap = NULL;
assert(bs);
assert(target);
@ -629,6 +609,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
cluster_size = backup_calculate_cluster_size(target, errp);
if (cluster_size < 0) {
goto error;
}
copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size));
/* job->len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, txn, bs,
BLK_PERM_CONSISTENT_READ,
@ -657,33 +644,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
/* Detect image-fleecing (and similar) schemes */
job->serialize_target_writes = bdrv_chain_contains(target, bs);
/* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible. */
ret = bdrv_get_info(target, &bdi);
if (ret == -ENOTSUP && !target->backing) {
/* Cluster size is not defined */
warn_report("The target block device doesn't provide "
"information about the block size and it doesn't have a "
"backing file. The default block size of %u bytes is "
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT);
job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
} else if (ret < 0 && !target->backing) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
"which has no backing file");
error_append_hint(errp,
"Aborting, since this may create an unusable destination image\n");
goto error;
} else if (ret < 0 && target->backing) {
/* Not fatal; just trudge on ahead. */
job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
} else {
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
job->cluster_size = cluster_size;
job->copy_bitmap = copy_bitmap;
copy_bitmap = NULL;
job->use_copy_range = true;
job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
blk_get_max_transfer(job->target));
@ -699,6 +662,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
return &job->common;
error:
if (copy_bitmap) {
assert(!job || !job->copy_bitmap);
hbitmap_free(copy_bitmap);
}
if (sync_bitmap) {
bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
}

View File

@ -392,7 +392,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
perm, BLK_PERM_ALL, blk, errp);
if (!blk->root) {
bdrv_unref(bs);
blk_unref(blk);
return NULL;
}
@ -800,12 +799,12 @@ void blk_remove_bs(BlockBackend *blk)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
blk->perm, blk->shared_perm, blk, errp);
if (blk->root == NULL) {
return -EPERM;
}
bdrv_ref(bs);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (tgm->throttle_state) {

View File

@ -29,7 +29,6 @@
#include "qapi/error.h"
#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qcow2.h"
/* NOTICE: BME here means Bitmaps Extension and used as a namespace for
@ -754,7 +753,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
dir_offset = *offset;
}
dir = g_try_malloc(dir_size);
dir = g_try_malloc0(dir_size);
if (dir == NULL) {
return -ENOMEM;
}

View File

@ -23,7 +23,6 @@
*/
#include "qemu/osdep.h"
#include "block/block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
#include "trace.h"

View File

@ -27,7 +27,6 @@
#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/bswap.h"
#include "trace.h"
@ -472,13 +471,12 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
{
if (bytes && bs->encrypted) {
BDRVQcow2State *s = bs->opaque;
int64_t offset = (s->crypt_physical_offset ?
(cluster_offset + offset_in_cluster) :
(src_cluster_offset + offset_in_cluster));
assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
assert((bytes & ~BDRV_SECTOR_MASK) == 0);
assert(s->crypto);
if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) {
if (qcow2_co_encrypt(bs, cluster_offset,
src_cluster_offset + offset_in_cluster,
buffer, bytes) < 0) {
return false;
}
}
@ -833,7 +831,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
assert(start->offset + start->nb_bytes <= end->offset);
assert(!m->data_qiov || m->data_qiov->size == data_bytes);
if (start->nb_bytes == 0 && end->nb_bytes == 0) {
if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
return 0;
}

View File

@ -25,7 +25,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/range.h"
#include "qemu/bswap.h"

View File

@ -24,7 +24,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"

268
block/qcow2-threads.c Normal file
View File

@ -0,0 +1,268 @@
/*
* Threaded data processing for Qcow2: compression, encryption
*
* Copyright (c) 2004-2006 Fabrice Bellard
* Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#define ZLIB_CONST
#include <zlib.h>
#include "qcow2.h"
#include "block/thread-pool.h"
#include "crypto.h"
static int coroutine_fn
qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
{
int ret;
BDRVQcow2State *s = bs->opaque;
ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
qemu_co_mutex_lock(&s->lock);
while (s->nb_threads >= QCOW2_MAX_THREADS) {
qemu_co_queue_wait(&s->thread_task_queue, &s->lock);
}
s->nb_threads++;
qemu_co_mutex_unlock(&s->lock);
ret = thread_pool_submit_co(pool, func, arg);
qemu_co_mutex_lock(&s->lock);
s->nb_threads--;
qemu_co_queue_next(&s->thread_task_queue);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
/*
* Compression
*/
typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
const void *src, size_t src_size);
typedef struct Qcow2CompressData {
void *dest;
size_t dest_size;
const void *src;
size_t src_size;
ssize_t ret;
Qcow2CompressFunc func;
} Qcow2CompressData;
/*
* qcow2_compress()
*
* @dest - destination buffer, @dest_size bytes
* @src - source buffer, @src_size bytes
*
* Returns: compressed size on success
* -ENOMEM destination buffer is not enough to store compressed data
* -EIO on any other error
*/
static ssize_t qcow2_compress(void *dest, size_t dest_size,
const void *src, size_t src_size)
{
ssize_t ret;
z_stream strm;
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-12, 9, Z_DEFAULT_STRATEGY);
if (ret != Z_OK) {
return -EIO;
}
/*
* strm.next_in is not const in old zlib versions, such as those used on
* OpenBSD/NetBSD, so cast the const away
*/
strm.avail_in = src_size;
strm.next_in = (void *) src;
strm.avail_out = dest_size;
strm.next_out = dest;
ret = deflate(&strm, Z_FINISH);
if (ret == Z_STREAM_END) {
ret = dest_size - strm.avail_out;
} else {
ret = (ret == Z_OK ? -ENOMEM : -EIO);
}
deflateEnd(&strm);
return ret;
}
/*
* qcow2_decompress()
*
* Decompress some data (not more than @src_size bytes) to produce exactly
* @dest_size bytes.
*
* @dest - destination buffer, @dest_size bytes
* @src - source buffer, @src_size bytes
*
* Returns: 0 on success
* -1 on fail
*/
static ssize_t qcow2_decompress(void *dest, size_t dest_size,
const void *src, size_t src_size)
{
int ret = 0;
z_stream strm;
memset(&strm, 0, sizeof(strm));
strm.avail_in = src_size;
strm.next_in = (void *) src;
strm.avail_out = dest_size;
strm.next_out = dest;
ret = inflateInit2(&strm, -12);
if (ret != Z_OK) {
return -1;
}
ret = inflate(&strm, Z_FINISH);
if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
/*
* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
* @src buffer may be processed partly (because in qcow2 we know size of
* compressed data with precision of one sector)
*/
ret = -1;
}
inflateEnd(&strm);
return ret;
}
static int qcow2_compress_pool_func(void *opaque)
{
Qcow2CompressData *data = opaque;
data->ret = data->func(data->dest, data->dest_size,
data->src, data->src_size);
return 0;
}
static ssize_t coroutine_fn
qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size, Qcow2CompressFunc func)
{
Qcow2CompressData arg = {
.dest = dest,
.dest_size = dest_size,
.src = src,
.src_size = src_size,
.func = func,
};
qcow2_co_process(bs, qcow2_compress_pool_func, &arg);
return arg.ret;
}
ssize_t coroutine_fn
qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size)
{
return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
qcow2_compress);
}
ssize_t coroutine_fn
qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size)
{
return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
qcow2_decompress);
}
/*
* Cryptography
*/
/*
* Qcow2EncDecFunc: common prototype of qcrypto_block_encrypt() and
* qcrypto_block_decrypt() functions.
*/
typedef int (*Qcow2EncDecFunc)(QCryptoBlock *block, uint64_t offset,
uint8_t *buf, size_t len, Error **errp);
typedef struct Qcow2EncDecData {
QCryptoBlock *block;
uint64_t offset;
uint8_t *buf;
size_t len;
Qcow2EncDecFunc func;
} Qcow2EncDecData;
static int qcow2_encdec_pool_func(void *opaque)
{
Qcow2EncDecData *data = opaque;
return data->func(data->block, data->offset, data->buf, data->len, NULL);
}
static int coroutine_fn
qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset,
uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func)
{
BDRVQcow2State *s = bs->opaque;
Qcow2EncDecData arg = {
.block = s->crypto,
.offset = s->crypt_physical_offset ?
file_cluster_offset + offset_into_cluster(s, offset) :
offset,
.buf = buf,
.len = len,
.func = func,
};
return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
}
int coroutine_fn
qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
uint64_t offset, void *buf, size_t len)
{
return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
qcrypto_block_encrypt);
}
int coroutine_fn
qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
uint64_t offset, void *buf, size_t len)
{
return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
qcrypto_block_decrypt);
}

View File

@ -24,10 +24,6 @@
#include "qemu/osdep.h"
#define ZLIB_CONST
#include <zlib.h>
#include "block/block_int.h"
#include "block/qdict.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
@ -44,7 +40,6 @@
#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
#include "block/thread-pool.h"
/*
Differences with QCOW:
@ -302,7 +297,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
}
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
qcow2_crypto_hdr_read_func,
bs, cflags, 1, errp);
bs, cflags, QCOW2_MAX_THREADS, errp);
if (!s->crypto) {
return -EINVAL;
}
@ -1543,7 +1538,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
}
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
NULL, NULL, cflags, 1, errp);
NULL, NULL, cflags,
QCOW2_MAX_THREADS, errp);
if (!s->crypto) {
ret = -EINVAL;
goto fail;
@ -1698,7 +1694,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
}
#endif
qemu_co_queue_init(&s->compress_wait_queue);
qemu_co_queue_init(&s->thread_task_queue);
return ret;
@ -1974,8 +1970,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
qemu_iovec_init(&hd_qiov, qiov->niov);
qemu_co_mutex_lock(&s->lock);
while (bytes != 0) {
/* prepare next request */
@ -1985,7 +1979,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
goto fail;
}
@ -2000,10 +1996,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
if (bs->backing) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
&hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
@ -2019,11 +2013,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
break;
case QCOW2_CLUSTER_COMPRESSED:
qemu_co_mutex_unlock(&s->lock);
ret = qcow2_co_preadv_compressed(bs, cluster_offset,
offset, cur_bytes,
&hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
@ -2060,11 +2052,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_preadv(s->data_file,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
@ -2072,13 +2062,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
assert(s->crypto);
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
if (qcrypto_block_decrypt(s->crypto,
(s->crypt_physical_offset ?
cluster_offset + offset_in_cluster :
offset),
cluster_data,
cur_bytes,
NULL) < 0) {
if (qcow2_co_decrypt(bs, cluster_offset, offset,
cluster_data, cur_bytes) < 0) {
ret = -EIO;
goto fail;
}
@ -2099,8 +2084,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
ret = 0;
fail:
qemu_co_mutex_unlock(&s->lock);
qemu_iovec_destroy(&hd_qiov);
qemu_vfree(cluster_data);
@ -2120,6 +2103,11 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
continue;
}
/* If COW regions are handled already, skip this too */
if (m->skip_cow) {
continue;
}
/* The data (middle) region must be immediately after the
* start region */
if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
@ -2145,6 +2133,80 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
return false;
}
static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
int64_t nr;
return !bytes ||
(!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes);
}
static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
{
/*
* This check is designed for optimization shortcut so it must be
* efficient.
* Instead of is_zero(), use is_unallocated() as it is faster (but not
* as accurate and can result in false negatives).
*/
return is_unallocated(bs, m->offset + m->cow_start.offset,
m->cow_start.nb_bytes) &&
is_unallocated(bs, m->offset + m->cow_end.offset,
m->cow_end.nb_bytes);
}
static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
{
BDRVQcow2State *s = bs->opaque;
QCowL2Meta *m;
if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
return 0;
}
if (bs->encrypted) {
return 0;
}
for (m = l2meta; m != NULL; m = m->next) {
int ret;
if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
continue;
}
if (!is_zero_cow(bs, m)) {
continue;
}
/*
* instead of writing zero COW buffers,
* efficiently zero out the whole clusters
*/
ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
m->nb_clusters * s->cluster_size,
true);
if (ret < 0) {
return ret;
}
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
m->nb_clusters * s->cluster_size,
BDRV_REQ_NO_FALLBACK);
if (ret < 0) {
if (ret != -ENOTSUP && ret != -EAGAIN) {
return ret;
}
continue;
}
trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
m->skip_cow = true;
}
return 0;
}
static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
@ -2181,11 +2243,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&cluster_offset, &l2meta);
if (ret < 0) {
goto fail;
goto out_locked;
}
assert((cluster_offset & 511) == 0);
ret = qcow2_pre_write_overlap_check(bs, 0,
cluster_offset + offset_in_cluster,
cur_bytes, true);
if (ret < 0) {
goto out_locked;
}
qemu_co_mutex_unlock(&s->lock);
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
@ -2197,7 +2268,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
* s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
goto fail;
goto out_unlocked;
}
}
@ -2205,24 +2276,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
if (qcrypto_block_encrypt(s->crypto,
(s->crypt_physical_offset ?
cluster_offset + offset_in_cluster :
offset),
cluster_data,
cur_bytes, NULL) < 0) {
if (qcow2_co_encrypt(bs, cluster_offset, offset,
cluster_data, cur_bytes) < 0) {
ret = -EIO;
goto fail;
goto out_unlocked;
}
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
ret = qcow2_pre_write_overlap_check(bs, 0,
cluster_offset + offset_in_cluster, cur_bytes, true);
/* Try to efficiently initialize the physical space with zeroes */
ret = handle_alloc_space(bs, l2meta);
if (ret < 0) {
goto fail;
goto out_unlocked;
}
/* If we need to do COW, check if it's possible to merge the
@ -2230,22 +2297,22 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
* If it's not possible (or not necessary) then write the
* guest data now. */
if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
cluster_offset + offset_in_cluster);
ret = bdrv_co_pwritev(s->data_file,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
goto out_unlocked;
}
}
qemu_co_mutex_lock(&s->lock);
ret = qcow2_handle_l2meta(bs, &l2meta, true);
if (ret) {
goto fail;
goto out_locked;
}
bytes -= cur_bytes;
@ -2254,8 +2321,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
goto out_locked;
fail:
out_unlocked:
qemu_co_mutex_lock(&s->lock);
out_locked:
qcow2_handle_l2meta(bs, &l2meta, false);
qemu_co_mutex_unlock(&s->lock);
@ -3921,171 +3992,6 @@ fail:
return ret;
}
/*
* qcow2_compress()
*
* @dest - destination buffer, @dest_size bytes
* @src - source buffer, @src_size bytes
*
* Returns: compressed size on success
* -ENOMEM destination buffer is not enough to store compressed data
* -EIO on any other error
*/
static ssize_t qcow2_compress(void *dest, size_t dest_size,
const void *src, size_t src_size)
{
ssize_t ret;
z_stream strm;
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-12, 9, Z_DEFAULT_STRATEGY);
if (ret != Z_OK) {
return -EIO;
}
/* strm.next_in is not const in old zlib versions, such as those used on
* OpenBSD/NetBSD, so cast the const away */
strm.avail_in = src_size;
strm.next_in = (void *) src;
strm.avail_out = dest_size;
strm.next_out = dest;
ret = deflate(&strm, Z_FINISH);
if (ret == Z_STREAM_END) {
ret = dest_size - strm.avail_out;
} else {
ret = (ret == Z_OK ? -ENOMEM : -EIO);
}
deflateEnd(&strm);
return ret;
}
/*
* qcow2_decompress()
*
* Decompress some data (not more than @src_size bytes) to produce exactly
* @dest_size bytes.
*
* @dest - destination buffer, @dest_size bytes
* @src - source buffer, @src_size bytes
*
* Returns: 0 on success
* -1 on fail
*/
static ssize_t qcow2_decompress(void *dest, size_t dest_size,
const void *src, size_t src_size)
{
int ret = 0;
z_stream strm;
memset(&strm, 0, sizeof(strm));
strm.avail_in = src_size;
strm.next_in = (void *) src;
strm.avail_out = dest_size;
strm.next_out = dest;
ret = inflateInit2(&strm, -12);
if (ret != Z_OK) {
return -1;
}
ret = inflate(&strm, Z_FINISH);
if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
/* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
* @src buffer may be processed partly (because in qcow2 we know size of
* compressed data with precision of one sector) */
ret = -1;
}
inflateEnd(&strm);
return ret;
}
#define MAX_COMPRESS_THREADS 4
typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
const void *src, size_t src_size);
typedef struct Qcow2CompressData {
void *dest;
size_t dest_size;
const void *src;
size_t src_size;
ssize_t ret;
Qcow2CompressFunc func;
} Qcow2CompressData;
static int qcow2_compress_pool_func(void *opaque)
{
Qcow2CompressData *data = opaque;
data->ret = data->func(data->dest, data->dest_size,
data->src, data->src_size);
return 0;
}
static void qcow2_compress_complete(void *opaque, int ret)
{
qemu_coroutine_enter(opaque);
}
static ssize_t coroutine_fn
qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size, Qcow2CompressFunc func)
{
BDRVQcow2State *s = bs->opaque;
BlockAIOCB *acb;
ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
Qcow2CompressData arg = {
.dest = dest,
.dest_size = dest_size,
.src = src,
.src_size = src_size,
.func = func,
};
while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
qemu_co_queue_wait(&s->compress_wait_queue, NULL);
}
s->nb_compress_threads++;
acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
qcow2_compress_complete,
qemu_coroutine_self());
if (!acb) {
s->nb_compress_threads--;
return -EINVAL;
}
qemu_coroutine_yield();
s->nb_compress_threads--;
qemu_co_queue_next(&s->compress_wait_queue);
return arg.ret;
}
static ssize_t coroutine_fn
qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size)
{
return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
qcow2_compress);
}
static ssize_t coroutine_fn
qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size)
{
return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
qcow2_decompress);
}
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int

View File

@ -28,6 +28,7 @@
#include "crypto/block.h"
#include "qemu/coroutine.h"
#include "qemu/units.h"
#include "block/block_int.h"
//#define DEBUG_ALLOC
//#define DEBUG_ALLOC2
@ -267,6 +268,8 @@ typedef struct Qcow2BitmapHeaderExt {
uint64_t bitmap_directory_offset;
} QEMU_PACKED Qcow2BitmapHeaderExt;
#define QCOW2_MAX_THREADS 4
typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
@ -349,8 +352,8 @@ typedef struct BDRVQcow2State {
char *image_backing_format;
char *image_data_file;
CoQueue compress_wait_queue;
int nb_compress_threads;
CoQueue thread_task_queue;
int nb_threads;
BdrvChild *data_file;
} BDRVQcow2State;
@ -402,6 +405,12 @@ typedef struct QCowL2Meta
*/
Qcow2COWRegion cow_end;
/*
* Indicates that COW regions are already handled and do not require
* any more processing.
*/
bool skip_cow;
/**
* The I/O vector with the data from the actual guest write request.
* If non-NULL, this is meant to be merged together with the data
@ -737,4 +746,17 @@ void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
const char *name,
Error **errp);
ssize_t coroutine_fn
qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size);
ssize_t coroutine_fn
qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
const void *src, size_t src_size);
int coroutine_fn
qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
uint64_t offset, void *buf, size_t len);
int coroutine_fn
qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
uint64_t offset, void *buf, size_t len);
#endif

View File

@ -1019,7 +1019,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
if (child == NULL) {
s->next_child_index--;
bdrv_unref(child_bs);
goto out;
}
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);

View File

@ -68,6 +68,7 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d"
# qcow2-cluster.c
qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"

View File

@ -3450,11 +3450,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
backup->compress = false;
}
bs = qmp_get_root_bs(backup->device, errp);
bs = bdrv_lookup_bs(backup->device, backup->device, errp);
if (!bs) {
return NULL;
}
if (!bs->drv) {
error_setg(errp, "Device has no medium");
return NULL;
}
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);

View File

@ -204,6 +204,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
{
BdrvChild *c;
bdrv_ref(bs);
c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
job, errp);
if (c == NULL) {
@ -211,7 +212,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
}
job->nodes = g_slist_prepend(job->nodes, c);
bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
return 0;

View File

@ -3215,6 +3215,8 @@
#
# @cor_write: a write due to copy-on-read (since 2.11)
#
# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
#
# Since: 2.9
##
{ 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
@ -3233,7 +3235,7 @@
'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
'cor_write'] }
'cor_write', 'cluster_alloc_space'] }
##
# @BlkdebugInjectErrorOptions:

View File

@ -3164,7 +3164,7 @@ static int img_rebase(int argc, char **argv)
BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
uint8_t *buf_old = NULL;
uint8_t *buf_new = NULL;
BlockDriverState *bs = NULL;
BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
char *filename;
const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
int c, flags, src_flags, ret;
@ -3309,29 +3309,18 @@ static int img_rebase(int argc, char **argv)
/* For safe rebasing we need to compare old and new backing file */
if (!unsafe) {
char backing_name[PATH_MAX];
QDict *options = NULL;
BlockDriverState *base_bs = backing_bs(bs);
if (bs->backing) {
if (bs->backing_format[0] != '\0') {
options = qdict_new();
qdict_put_str(options, "driver", bs->backing_format);
}
if (force_share) {
if (!options) {
options = qdict_new();
}
qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
}
bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
blk_old_backing = blk_new_open(backing_name, NULL,
options, src_flags, &local_err);
if (!blk_old_backing) {
if (base_bs) {
blk_old_backing = blk_new(BLK_PERM_CONSISTENT_READ,
BLK_PERM_ALL);
ret = blk_insert_bs(blk_old_backing, base_bs,
&local_err);
if (ret < 0) {
error_reportf_err(local_err,
"Could not open old backing file '%s': ",
backing_name);
ret = -1;
"Could not reuse old backing file '%s': ",
base_bs->filename);
goto out;
}
} else {
@ -3364,15 +3353,34 @@ static int img_rebase(int argc, char **argv)
goto out;
}
blk_new_backing = blk_new_open(out_real_path, NULL,
options, src_flags, &local_err);
g_free(out_real_path);
if (!blk_new_backing) {
error_reportf_err(local_err,
"Could not open new backing file '%s': ",
out_baseimg);
ret = -1;
goto out;
/*
* Find out whether we rebase an image on top of a previous image
* in its chain.
*/
prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
if (prefix_chain_bs) {
g_free(out_real_path);
blk_new_backing = blk_new(BLK_PERM_CONSISTENT_READ,
BLK_PERM_ALL);
ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
&local_err);
if (ret < 0) {
error_reportf_err(local_err,
"Could not reuse backing file '%s': ",
out_baseimg);
goto out;
}
} else {
blk_new_backing = blk_new_open(out_real_path, NULL,
options, src_flags, &local_err);
g_free(out_real_path);
if (!blk_new_backing) {
error_reportf_err(local_err,
"Could not open new backing file '%s': ",
out_baseimg);
ret = -1;
goto out;
}
}
}
}
@ -3448,6 +3456,23 @@ static int img_rebase(int argc, char **argv)
continue;
}
if (prefix_chain_bs) {
/*
* If cluster wasn't changed since prefix_chain, we don't need
* to take action
*/
ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
offset, n, &n);
if (ret < 0) {
error_report("error while reading image metadata: %s",
strerror(-ret));
goto out;
}
if (!ret) {
continue;
}
}
/*
* Read old and new backing file and take into consideration that
* backing files may be smaller than the COW image.

View File

@ -214,7 +214,7 @@ class BackupTest(iotests.QMPTestCase):
res = self.vm.qmp('query-block-jobs')
self.assert_qmp(res, 'return[0]/status', 'concluded')
# Leave zombie job un-dismissed, observe a failure:
res = self.qmp_backup_and_wait(serror='Need a root block node',
res = self.qmp_backup_and_wait(serror="Node 'drive0' is busy: block device is in use by block job: backup",
device='drive0', format=iotests.imgfmt,
sync='full', target=self.dest_img,
auto_dismiss=False)

View File

@ -150,10 +150,15 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
echo
echo "=== Testing overlap while COW is in flight ==="
echo
BACKING_IMG=$TEST_IMG.base
TEST_IMG=$BACKING_IMG _make_test_img 1G
$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
# compat=0.10 is required in order to make the following discard actually
# unallocate the sector rather than make it a zero sector - we want COW, after
# all.
IMGOPTS='compat=0.10' _make_test_img 1G
IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
# Write two clusters, the second one enforces creation of an L2 table after
# the first data cluster.
$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io

View File

@ -97,7 +97,10 @@ read 512/512 bytes at offset 0
=== Testing overlap while COW is in flight ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824
wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=TEST_DIR/t.IMGFMT.base
wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 65536/65536 bytes at offset 536870912

View File

@ -1436,12 +1436,6 @@ static void test_detach_indirect(bool by_parent_cb)
bdrv_unref(parent_b);
blk_unref(blk);
/* XXX Once bdrv_close() unref's children instead of just detaching them,
* this won't be necessary any more. */
bdrv_unref(a);
bdrv_unref(a);
bdrv_unref(c);
g_assert_cmpint(a->refcnt, ==, 1);
g_assert_cmpint(b->refcnt, ==, 1);
g_assert_cmpint(c->refcnt, ==, 1);

View File

@ -116,7 +116,6 @@ static void test_update_perm_tree(void)
g_assert_nonnull(local_err);
error_free(local_err);
bdrv_unref(bs);
blk_unref(root);
}