Merge remote-tracking branch 'kwolf/for-anthony' into staging

# By Max Reitz (11) and others
# Via Kevin Wolf
* kwolf/for-anthony: (26 commits)
  qemu-iotests: Overlapping cluster allocations
  qcow2_check: Mark image consistent
  qcow2-refcount: Repair shared refcount blocks
  qcow2-refcount: Repair OFLAG_COPIED errors
  qcow2-refcount: Move OFLAG_COPIED checks
  qcow2: Employ metadata overlap checks
  qcow2: Metadata overlap checks
  qcow2: Add corrupt bit
  qemu-iotests: Snapshotting zero clusters
  qcow2-refcount: Snapshot update for zero clusters
  option: Add assigned flag to QEMUOptionParameter
  gluster: Abort on AIO completion failure
  block: Remove old raw driver
  switch raw block driver from "raw.o" to "raw_bsd.o"
  raw_bsd: register bdrv_raw
  raw_bsd: add raw_create_options
  raw_bsd: introduce "special members"
  raw_bsd: add raw_create()
  raw_bsd: emit debug events in bdrv_co_readv() and bdrv_co_writev()
  add skeleton for BSD licensed "raw" BlockDriver
  ...

Message-id: 1378111792-20436-1-git-send-email-kwolf@redhat.com
Signed-off-by: Anthony Liguori <anthony@codemonkey.ws>
master
Anthony Liguori 2013-09-03 12:32:45 -05:00
commit bb7d4d82b6
27 changed files with 1032 additions and 325 deletions

View File

@ -743,7 +743,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
ret = -EINVAL;
goto free_and_fail;
}
assert(file != NULL);
bs->file = file;
ret = drv->bdrv_open(bs, options, open_flags);
}

View File

@ -1,4 +1,4 @@
block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o

View File

@ -168,6 +168,7 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
[BLKDBG_REFTABLE_LOAD] = "reftable_load",
[BLKDBG_REFTABLE_GROW] = "reftable_grow",
[BLKDBG_REFTABLE_UPDATE] = "reftable_update",
[BLKDBG_REFBLOCK_LOAD] = "refblock_load",
[BLKDBG_REFBLOCK_UPDATE] = "refblock_update",

View File

@ -427,20 +427,9 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
/*
* Gluster AIO callback thread failed to notify the waiting
* QEMU thread about IO completion.
*
* Complete this IO request and make the disk inaccessible for
* subsequent reads and writes.
*/
error_report("Gluster failed to notify QEMU about IO completion");
qemu_mutex_lock_iothread(); /* We are in gluster thread context */
acb->common.cb(acb->common.opaque, -EIO);
qemu_aio_release(acb);
close(s->fds[GLUSTER_FD_READ]);
close(s->fds[GLUSTER_FD_WRITE]);
qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
bs->drv = NULL; /* Make the disk inaccessible */
qemu_mutex_unlock_iothread();
error_report("Gluster AIO completion failed: %s", strerror(errno));
abort();
}
}

View File

@ -114,6 +114,23 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
return ret;
}
if (c == s->refcount_block_cache) {
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_BLOCK,
c->entries[i].offset, s->cluster_size);
} else if (c == s->l2_table_cache) {
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2,
c->entries[i].offset, s->cluster_size);
} else {
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
c->entries[i].offset, s->cluster_size);
}
if (ret < 0) {
return ret;
}
if (c == s->refcount_block_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
} else if (c == s->l2_table_cache) {

View File

@ -80,6 +80,14 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
goto fail;
}
/* the L1 position has not yet been updated, so these clusters must
* indeed be completely free */
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
new_l1_table_offset, new_l1_size2);
if (ret < 0) {
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
@ -137,7 +145,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
* and we really don't want bdrv_pread to perform a read-modify-write)
*/
#define L1_ENTRIES_PER_SECTOR (512 / 8)
static int write_l1_entry(BlockDriverState *bs, int l1_index)
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
{
BDRVQcowState *s = bs->opaque;
uint64_t buf[L1_ENTRIES_PER_SECTOR];
@ -149,6 +157,13 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index)
buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
}
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
if (ret < 0) {
return ret;
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
buf, sizeof(buf));
@ -239,7 +254,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
/* update the L1 entry */
trace_qcow2_l2_allocate_write_l1(bs, l1_index);
s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
ret = write_l1_entry(bs, l1_index);
ret = qcow2_write_l1_entry(bs, l1_index);
if (ret < 0) {
goto fail;
}
@ -368,6 +383,12 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
&s->aes_encrypt_key);
}
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
if (ret < 0) {
goto out;
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
if (ret < 0) {

View File

@ -25,6 +25,8 @@
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
#include "qemu/range.h"
#include "qapi/qmp/types.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@ -861,11 +863,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
for(j = 0; j < s->l2_size; j++) {
uint64_t cluster_index;
offset = be64_to_cpu(l2_table[j]);
if (offset != 0) {
old_offset = offset;
offset &= ~QCOW_OFLAG_COPIED;
if (offset & QCOW_OFLAG_COMPRESSED) {
old_offset = offset;
offset &= ~QCOW_OFLAG_COPIED;
switch (qcow2_get_cluster_type(offset)) {
case QCOW2_CLUSTER_COMPRESSED:
nb_csectors = ((offset >> s->csize_shift) &
s->csize_mask) + 1;
if (addend != 0) {
@ -880,8 +885,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
/* compressed clusters are never modified */
refcount = 2;
} else {
uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
break;
case QCOW2_CLUSTER_NORMAL:
case QCOW2_CLUSTER_ZERO:
cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
if (!cluster_index) {
/* unallocated */
refcount = 0;
break;
}
if (addend != 0) {
refcount = update_cluster_refcount(bs, cluster_index, addend,
QCOW2_DISCARD_SNAPSHOT);
@ -893,19 +906,26 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
ret = refcount;
goto fail;
}
}
break;
if (refcount == 1) {
offset |= QCOW_OFLAG_COPIED;
}
if (offset != old_offset) {
if (addend > 0) {
qcow2_cache_set_dependency(bs, s->l2_table_cache,
s->refcount_block_cache);
}
l2_table[j] = cpu_to_be64(offset);
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
case QCOW2_CLUSTER_UNALLOCATED:
refcount = 0;
break;
default:
abort();
}
if (refcount == 1) {
offset |= QCOW_OFLAG_COPIED;
}
if (offset != old_offset) {
if (addend > 0) {
qcow2_cache_set_dependency(bs, s->l2_table_cache,
s->refcount_block_cache);
}
l2_table[j] = cpu_to_be64(offset);
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
}
}
@ -1033,7 +1053,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
BDRVQcowState *s = bs->opaque;
uint64_t *l2_table, l2_entry;
uint64_t next_contiguous_offset = 0;
int i, l2_size, nb_csectors, refcount;
int i, l2_size, nb_csectors;
/* Read L2 table from disk */
l2_size = s->l2_size * sizeof(uint64_t);
@ -1085,23 +1105,8 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
case QCOW2_CLUSTER_NORMAL:
{
/* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
uint64_t offset = l2_entry & L2E_OFFSET_MASK;
if (flags & CHECK_OFLAG_COPIED) {
refcount = get_refcount(bs, offset >> s->cluster_bits);
if (refcount < 0) {
fprintf(stderr, "Can't get refcount for offset %"
PRIx64 ": %s\n", l2_entry, strerror(-refcount));
goto fail;
}
if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
PRIx64 " refcount=%d\n", l2_entry, refcount);
res->corruptions++;
}
}
if (flags & CHECK_FRAG_INFO) {
res->bfi.allocated_clusters++;
if (next_contiguous_offset &&
@ -1158,7 +1163,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
uint64_t *l1_table, l2_offset, l1_size2;
int i, refcount, ret;
int i, ret;
l1_size2 = l1_size * sizeof(uint64_t);
@ -1182,22 +1187,6 @@ static int check_refcounts_l1(BlockDriverState *bs,
for(i = 0; i < l1_size; i++) {
l2_offset = l1_table[i];
if (l2_offset) {
/* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
if (flags & CHECK_OFLAG_COPIED) {
refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
>> s->cluster_bits);
if (refcount < 0) {
fprintf(stderr, "Can't get refcount for l2_offset %"
PRIx64 ": %s\n", l2_offset, strerror(-refcount));
goto fail;
}
if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
" refcount=%d\n", l2_offset, refcount);
res->corruptions++;
}
}
/* Mark L2 table as used */
l2_offset &= L1E_OFFSET_MASK;
inc_refcounts(bs, res, refcount_table, refcount_table_size,
@ -1228,6 +1217,241 @@ fail:
return -EIO;
}
/*
* Checks the OFLAG_COPIED flag for all L1 and L2 entries.
*
* This function does not print an error message nor does it increment
* check_errors if get_refcount fails (this is because such an error will have
* been already detected and sufficiently signaled by the calling function
* (qcow2_check_refcounts) by the time this function is called).
*/
static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
BDRVQcowState *s = bs->opaque;
uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
int ret;
int refcount;
int i, j;
for (i = 0; i < s->l1_size; i++) {
uint64_t l1_entry = s->l1_table[i];
uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
bool l2_dirty = false;
if (!l2_offset) {
continue;
}
refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
if (refcount < 0) {
/* don't print message nor increment check_errors */
continue;
}
if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
"l1_entry=%" PRIx64 " refcount=%d\n",
fix & BDRV_FIX_ERRORS ? "Repairing" :
"ERROR",
i, l1_entry, refcount);
if (fix & BDRV_FIX_ERRORS) {
s->l1_table[i] = refcount == 1
? l1_entry | QCOW_OFLAG_COPIED
: l1_entry & ~QCOW_OFLAG_COPIED;
ret = qcow2_write_l1_entry(bs, i);
if (ret < 0) {
res->check_errors++;
goto fail;
}
res->corruptions_fixed++;
} else {
res->corruptions++;
}
}
ret = bdrv_pread(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
strerror(-ret));
res->check_errors++;
goto fail;
}
for (j = 0; j < s->l2_size; j++) {
uint64_t l2_entry = be64_to_cpu(l2_table[j]);
uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
int cluster_type = qcow2_get_cluster_type(l2_entry);
if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
refcount = get_refcount(bs, data_offset >> s->cluster_bits);
if (refcount < 0) {
/* don't print message nor increment check_errors */
continue;
}
if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "%s OFLAG_COPIED data cluster: "
"l2_entry=%" PRIx64 " refcount=%d\n",
fix & BDRV_FIX_ERRORS ? "Repairing" :
"ERROR",
l2_entry, refcount);
if (fix & BDRV_FIX_ERRORS) {
l2_table[j] = cpu_to_be64(refcount == 1
? l2_entry | QCOW_OFLAG_COPIED
: l2_entry & ~QCOW_OFLAG_COPIED);
l2_dirty = true;
res->corruptions_fixed++;
} else {
res->corruptions++;
}
}
}
}
if (l2_dirty) {
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2, l2_offset,
s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table; metadata "
"overlap check failed: %s\n", strerror(-ret));
res->check_errors++;
goto fail;
}
ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
strerror(-ret));
res->check_errors++;
goto fail;
}
}
}
ret = 0;
fail:
qemu_vfree(l2_table);
return ret;
}
/*
* Writes one sector of the refcount table to the disk
*/
#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
static int write_reftable_entry(BlockDriverState *bs, int rt_index)
{
BDRVQcowState *s = bs->opaque;
uint64_t buf[RT_ENTRIES_PER_SECTOR];
int rt_start_index;
int i, ret;
rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
}
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_TABLE,
s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
sizeof(buf));
if (ret < 0) {
return ret;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
if (ret < 0) {
return ret;
}
return 0;
}
/*
* Allocates a new cluster for the given refcount block (represented by its
* offset in the image file) and copies the current content there. This function
* does _not_ decrement the reference count for the currently occupied cluster.
*
* This function prints an informative message to stderr on error (and returns
* -errno); on success, 0 is returned.
*/
static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
uint64_t offset)
{
BDRVQcowState *s = bs->opaque;
int64_t new_offset = 0;
void *refcount_block = NULL;
int ret;
/* allocate new refcount block */
new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
if (new_offset < 0) {
fprintf(stderr, "Could not allocate new cluster: %s\n",
strerror(-new_offset));
ret = new_offset;
goto fail;
}
/* fetch current refcount block content */
ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
if (ret < 0) {
fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
goto fail;
}
/* new block has not yet been entered into refcount table, therefore it is
* no refcount block yet (regarding this check) */
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, new_offset,
s->cluster_size);
if (ret < 0) {
fprintf(stderr, "Could not write refcount block; metadata overlap "
"check failed: %s\n", strerror(-ret));
/* the image will be marked corrupt, so don't even attempt on freeing
* the cluster */
new_offset = 0;
goto fail;
}
/* write to new block */
ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
s->cluster_sectors);
if (ret < 0) {
fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
goto fail;
}
/* update refcount table */
assert(!(new_offset & (s->cluster_size - 1)));
s->refcount_table[reftable_index] = new_offset;
ret = write_reftable_entry(bs, reftable_index);
if (ret < 0) {
fprintf(stderr, "Could not update refcount table: %s\n",
strerror(-ret));
goto fail;
}
fail:
if (new_offset && (ret < 0)) {
qcow2_free_clusters(bs, new_offset, s->cluster_size,
QCOW2_DISCARD_ALWAYS);
}
if (refcount_block) {
if (ret < 0) {
qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
} else {
ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
}
}
if (ret < 0) {
return ret;
}
return new_offset;
}
/*
* Checks an image for refcount consistency.
*
@ -1304,10 +1528,39 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
inc_refcounts(bs, res, refcount_table, nb_clusters,
offset, s->cluster_size);
if (refcount_table[cluster] != 1) {
fprintf(stderr, "ERROR refcount block %" PRId64
fprintf(stderr, "%s refcount block %" PRId64
" refcount=%d\n",
fix & BDRV_FIX_ERRORS ? "Repairing" :
"ERROR",
i, refcount_table[cluster]);
res->corruptions++;
if (fix & BDRV_FIX_ERRORS) {
int64_t new_offset;
new_offset = realloc_refcount_block(bs, i, offset);
if (new_offset < 0) {
res->corruptions++;
continue;
}
/* update refcounts */
if ((new_offset >> s->cluster_bits) >= nb_clusters) {
/* increase refcount_table size if necessary */
int old_nb_clusters = nb_clusters;
nb_clusters = (new_offset >> s->cluster_bits) + 1;
refcount_table = g_realloc(refcount_table,
nb_clusters * sizeof(uint16_t));
memset(&refcount_table[old_nb_clusters], 0, (nb_clusters
- old_nb_clusters) * sizeof(uint16_t));
}
refcount_table[cluster]--;
inc_refcounts(bs, res, refcount_table, nb_clusters,
new_offset, s->cluster_size);
res->corruptions_fixed++;
} else {
res->corruptions++;
}
}
}
}
@ -1363,6 +1616,12 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
}
}
/* check OFLAG_COPIED */
ret = check_oflag_copied(bs, res, fix);
if (ret < 0) {
goto fail;
}
res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
ret = 0;
@ -1372,3 +1631,173 @@ fail:
return ret;
}
#define overlaps_with(ofs, sz) \
ranges_overlap(offset, size, ofs, sz)
/*
* Checks if the given offset into the image file is actually free to use by
* looking for overlaps with important metadata sections (L1/L2 tables etc.),
* i.e. a sanity check without relying on the refcount tables.
*
* The chk parameter specifies exactly what checks to perform (being a bitmask
* of QCow2MetadataOverlap values).
*
* Returns:
* - 0 if writing to this offset will not affect the mentioned metadata
* - a positive QCow2MetadataOverlap value indicating one overlapping section
* - a negative value (-errno) indicating an error while performing a check,
* e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
*/
int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset,
int64_t size)
{
BDRVQcowState *s = bs->opaque;
int i, j;
if (!size) {
return 0;
}
if (chk & QCOW2_OL_MAIN_HEADER) {
if (offset < s->cluster_size) {
return QCOW2_OL_MAIN_HEADER;
}
}
/* align range to test to cluster boundaries */
size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
offset = start_of_cluster(s, offset);
if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) {
return QCOW2_OL_ACTIVE_L1;
}
}
if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
if (overlaps_with(s->refcount_table_offset,
s->refcount_table_size * sizeof(uint64_t))) {
return QCOW2_OL_REFCOUNT_TABLE;
}
}
if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
return QCOW2_OL_SNAPSHOT_TABLE;
}
}
if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
for (i = 0; i < s->nb_snapshots; i++) {
if (s->snapshots[i].l1_size &&
overlaps_with(s->snapshots[i].l1_table_offset,
s->snapshots[i].l1_size * sizeof(uint64_t))) {
return QCOW2_OL_INACTIVE_L1;
}
}
}
if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
for (i = 0; i < s->l1_size; i++) {
if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
s->cluster_size)) {
return QCOW2_OL_ACTIVE_L2;
}
}
}
if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
for (i = 0; i < s->refcount_table_size; i++) {
if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
s->cluster_size)) {
return QCOW2_OL_REFCOUNT_BLOCK;
}
}
}
if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
for (i = 0; i < s->nb_snapshots; i++) {
uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
uint32_t l1_sz = s->snapshots[i].l1_size;
uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t));
int ret;
ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1,
l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
if (ret < 0) {
g_free(l1);
return ret;
}
for (j = 0; j < l1_sz; j++) {
if ((l1[j] & L1E_OFFSET_MASK) &&
overlaps_with(l1[j] & L1E_OFFSET_MASK, s->cluster_size)) {
g_free(l1);
return QCOW2_OL_INACTIVE_L2;
}
}
g_free(l1);
}
}
return 0;
}
static const char *metadata_ol_names[] = {
[QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
[QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
[QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
[QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
[QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
[QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
[QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
[QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
};
/*
* First performs a check for metadata overlaps (through
* qcow2_check_metadata_overlap); if that fails with a negative value (error
* while performing a check), that value is returned. If an impending overlap
* is detected, the BDS will be made unusable, the qcow2 file marked corrupt
* and -EIO returned.
*
* Returns 0 if there were neither overlaps nor errors while checking for
* overlaps; or a negative value (-errno) on error.
*/
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset,
int64_t size)
{
int ret = qcow2_check_metadata_overlap(bs, chk, offset, size);
if (ret < 0) {
return ret;
} else if (ret > 0) {
int metadata_ol_bitnr = ffs(ret) - 1;
char *message;
QObject *data;
assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps "
"with %s); image marked as corrupt.\n",
metadata_ol_names[metadata_ol_bitnr]);
message = g_strdup_printf("Prevented %s overwrite",
metadata_ol_names[metadata_ol_bitnr]);
data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %"
PRId64 ", 'size': %" PRId64 " }", bs->device_name, message,
offset, size);
monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data);
g_free(message);
qobject_decref(data);
qcow2_mark_corrupt(bs);
bs->drv = NULL; /* make BDS unusable */
return -EIO;
}
return 0;
}

View File

@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
return ret;
}
/* The snapshot list position has not yet been updated, so these clusters
* must indeed be completely free */
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
s->snapshots_size);
if (ret < 0) {
return ret;
}
/* Write all snapshots to the new list */
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
@ -363,6 +372,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
@ -475,6 +490,13 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
s->l1_table_offset, cur_l1_bytes);
if (ret < 0) {
goto fail;
}
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {

View File

@ -272,6 +272,37 @@ static int qcow2_mark_clean(BlockDriverState *bs)
return 0;
}
/*
* Marks the image as corrupt.
*/
int qcow2_mark_corrupt(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
return qcow2_update_header(bs);
}
/*
* Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
* before if necessary.
*/
int qcow2_mark_consistent(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
int ret = bdrv_flush(bs);
if (ret < 0) {
return ret;
}
s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
return qcow2_update_header(bs);
}
return 0;
}
static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
BdrvCheckMode fix)
{
@ -281,7 +312,11 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
}
if (fix && result->check_errors == 0 && result->corruptions == 0) {
return qcow2_mark_clean(bs);
ret = qcow2_mark_clean(bs);
if (ret < 0) {
return ret;
}
return qcow2_mark_consistent(bs);
}
return ret;
}
@ -402,6 +437,17 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
goto fail;
}
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
/* Corrupt images may not be written to unless they are being repaired
*/
if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
error_report("qcow2: Image is corrupt; cannot be opened "
"read/write.");
ret = -EACCES;
goto fail;
}
}
/* Check support for various header values */
if (header.refcount_order != 4) {
report_unsupported(bs, "%d bit reference counts",
@ -582,6 +628,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
g_free(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
if (s->l2_table_cache) {
qcow2_cache_destroy(bs, s->l2_table_cache);
}
@ -881,6 +929,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
cur_nr_sectors * 512);
}
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
cur_nr_sectors * BDRV_SECTOR_SIZE);
if (ret < 0) {
goto fail;
}
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
@ -947,6 +1002,8 @@ static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
g_free(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
qcow2_cache_flush(bs, s->l2_table_cache);
qcow2_cache_flush(bs, s->refcount_block_cache);
@ -1129,6 +1186,11 @@ int qcow2_update_header(BlockDriverState *bs)
.bit = QCOW2_INCOMPAT_DIRTY_BITNR,
.name = "dirty bit",
},
{
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
.bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
.name = "corrupt bit",
},
{
.type = QCOW2_FEAT_TYPE_COMPATIBLE,
.bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
@ -1429,7 +1491,9 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
return -EINVAL;
}
} else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
if (!options->value.s || !strcmp(options->value.s, "0.10")) {
if (!options->value.s) {
/* keep the default */
} else if (!strcmp(options->value.s, "0.10")) {
version = 2;
} else if (!strcmp(options->value.s, "1.1")) {
version = 3;
@ -1619,6 +1683,14 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
sector_num * BDRV_SECTOR_SIZE,
s->cluster_sectors * BDRV_SECTOR_SIZE);
if (ret < 0) {
goto fail;
}
ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
@ -1631,6 +1703,13 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
cluster_offset, out_len);
if (ret < 0) {
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
if (ret < 0) {

View File

@ -40,11 +40,11 @@
#define QCOW_MAX_CRYPT_CLUSTERS 32
/* indicate that the refcount of the referenced cluster is exactly one. */
#define QCOW_OFLAG_COPIED (1LL << 63)
#define QCOW_OFLAG_COPIED (1ULL << 63)
/* indicate that the cluster is compressed (they never have the copied flag) */
#define QCOW_OFLAG_COMPRESSED (1LL << 62)
#define QCOW_OFLAG_COMPRESSED (1ULL << 62)
/* The cluster reads as all zeros */
#define QCOW_OFLAG_ZERO (1LL << 0)
#define QCOW_OFLAG_ZERO (1ULL << 0)
#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
@ -119,9 +119,12 @@ enum {
/* Incompatible feature bits */
enum {
QCOW2_INCOMPAT_DIRTY_BITNR = 0,
QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY
| QCOW2_INCOMPAT_CORRUPT,
};
/* Compatible feature bits */
@ -286,6 +289,40 @@ enum {
QCOW2_CLUSTER_ZERO
};
typedef enum QCow2MetadataOverlap {
QCOW2_OL_MAIN_HEADER_BITNR = 0,
QCOW2_OL_ACTIVE_L1_BITNR = 1,
QCOW2_OL_ACTIVE_L2_BITNR = 2,
QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
QCOW2_OL_INACTIVE_L1_BITNR = 6,
QCOW2_OL_INACTIVE_L2_BITNR = 7,
QCOW2_OL_MAX_BITNR = 8,
QCOW2_OL_NONE = 0,
QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
/* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
* reads. */
QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
} QCow2MetadataOverlap;
/* Perform all overlap checks which don't require disk access */
#define QCOW2_OL_CACHED \
(QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_ACTIVE_L2 | \
QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_REFCOUNT_BLOCK | \
QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_INACTIVE_L1)
/* The default checks to perform */
#define QCOW2_OL_DEFAULT QCOW2_OL_CACHED
#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
@ -361,6 +398,8 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors);
int qcow2_mark_dirty(BlockDriverState *bs);
int qcow2_mark_corrupt(BlockDriverState *bs);
int qcow2_mark_consistent(BlockDriverState *bs);
int qcow2_update_header(BlockDriverState *bs);
/* qcow2-refcount.c functions */
@ -385,9 +424,15 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
void qcow2_process_discards(BlockDriverState *bs, int ret);
int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset,
int64_t size);
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset,
int64_t size);
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
void qcow2_l2_cache_reset(BlockDriverState *bs);
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,

View File

@ -1,13 +1,17 @@
/*
* Block driver for RAW format
/* BlockDriver implementation for "raw"
*
* Copyright (c) 2006 Fabrice Bellard
* Copyright (C) 2010, 2013, Red Hat, Inc.
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
* Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
*
* Author:
* Laszlo Ersek <lersek@redhat.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
@ -15,27 +19,27 @@
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "qemu/option.h"
static int raw_open(BlockDriverState *bs, QDict *options, int flags)
{
bs->sg = bs->file->sg;
return 0;
}
static QEMUOptionParameter raw_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
.type = OPT_SIZE,
.help = "Virtual disk size"
},
{ 0 }
};
/* We have nothing to do for raw reopen, stubs just return
* success */
static int raw_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
static int raw_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp)
{
return 0;
}
@ -54,45 +58,40 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
}
static void raw_close(BlockDriverState *bs)
{
}
static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
int64_t sector_num, int nb_sectors,
int *pnum)
{
return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
}
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors)
int64_t sector_num, int nb_sectors)
{
return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
return bdrv_co_discard(bs->file, sector_num, nb_sectors);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
return bdrv_get_info(bs->file, bdi);
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
return bdrv_truncate(bs->file, offset);
}
static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
{
return 1; /* everything can be opened as raw image */
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
return bdrv_co_discard(bs->file, sector_num, nb_sectors);
}
static int raw_is_inserted(BlockDriverState *bs)
{
return bdrv_is_inserted(bs->file);
@ -115,14 +114,20 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked)
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
{
return bdrv_ioctl(bs->file, req, buf);
return bdrv_ioctl(bs->file, req, buf);
}
static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb, void *opaque)
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb,
void *opaque)
{
return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
}
static int raw_has_zero_init(BlockDriverState *bs)
{
return bdrv_has_zero_init(bs->file);
}
static int raw_create(const char *filename, QEMUOptionParameter *options)
@ -130,58 +135,47 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
return bdrv_create_file(filename, options);
}
static QEMUOptionParameter raw_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
.type = OPT_SIZE,
.help = "Virtual disk size"
},
{ NULL }
};
static int raw_has_zero_init(BlockDriverState *bs)
static int raw_open(BlockDriverState *bs, QDict *options, int flags)
{
return bdrv_has_zero_init(bs->file);
bs->sg = bs->file->sg;
return 0;
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static void raw_close(BlockDriverState *bs)
{
return bdrv_get_info(bs->file, bdi);
}
static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
{
/* smallest possible positive score so that raw is used if and only if no
* other block driver works
*/
return 1;
}
static BlockDriver bdrv_raw = {
.format_name = "raw",
/* It's really 0, but we need to make g_malloc() happy */
.instance_size = 1,
.bdrv_open = raw_open,
.bdrv_close = raw_close,
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
.bdrv_truncate = raw_truncate,
.bdrv_is_inserted = raw_is_inserted,
.bdrv_media_changed = raw_media_changed,
.bdrv_eject = raw_eject,
.bdrv_lock_medium = raw_lock_medium,
.bdrv_ioctl = raw_ioctl,
.bdrv_aio_ioctl = raw_aio_ioctl,
.bdrv_create = raw_create,
.create_options = raw_create_options,
.bdrv_has_zero_init = raw_has_zero_init,
.format_name = "raw",
.bdrv_probe = &raw_probe,
.bdrv_reopen_prepare = &raw_reopen_prepare,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
.bdrv_co_writev = &raw_co_writev,
.bdrv_co_write_zeroes = &raw_co_write_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_is_allocated = &raw_co_is_allocated,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.bdrv_get_info = &raw_get_info,
.bdrv_is_inserted = &raw_is_inserted,
.bdrv_media_changed = &raw_media_changed,
.bdrv_eject = &raw_eject,
.bdrv_lock_medium = &raw_lock_medium,
.bdrv_ioctl = &raw_ioctl,
.bdrv_aio_ioctl = &raw_aio_ioctl,
.create_options = &raw_create_options[0],
.bdrv_has_zero_init = &raw_has_zero_init
};
static void bdrv_raw_init(void)

View File

@ -46,7 +46,6 @@
static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
extern QemuOptsList qemu_common_drive_opts;
extern QemuOptsList qemu_old_drive_opts;
static const char *const if_name[IF_COUNT] = {
[IF_NONE] = "none",
@ -755,26 +754,6 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
{
const char *value;
/*
* Check that only old options are used by copying into a QemuOpts with
* stricter checks. Going through a QDict seems to be the easiest way to
* achieve this...
*/
QemuOpts* check_opts;
QDict *qdict;
Error *local_err = NULL;
qdict = qemu_opts_to_qdict(all_opts, NULL);
check_opts = qemu_opts_from_qdict(&qemu_old_drive_opts, qdict, &local_err);
QDECREF(qdict);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
return NULL;
}
qemu_opts_del(check_opts);
/* Change legacy command line options into QMP ones */
qemu_opt_rename(all_opts, "iops", "throttling.iops-total");
qemu_opt_rename(all_opts, "iops_rd", "throttling.iops-read");
@ -2001,128 +1980,6 @@ QemuOptsList qemu_common_drive_opts = {
},
};
QemuOptsList qemu_old_drive_opts = {
.name = "drive",
.head = QTAILQ_HEAD_INITIALIZER(qemu_old_drive_opts.head),
.desc = {
{
.name = "bus",
.type = QEMU_OPT_NUMBER,
.help = "bus number",
},{
.name = "unit",
.type = QEMU_OPT_NUMBER,
.help = "unit number (i.e. lun for scsi)",
},{
.name = "if",
.type = QEMU_OPT_STRING,
.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
},{
.name = "index",
.type = QEMU_OPT_NUMBER,
.help = "index number",
},{
.name = "cyls",
.type = QEMU_OPT_NUMBER,
.help = "number of cylinders (ide disk geometry)",
},{
.name = "heads",
.type = QEMU_OPT_NUMBER,
.help = "number of heads (ide disk geometry)",
},{
.name = "secs",
.type = QEMU_OPT_NUMBER,
.help = "number of sectors (ide disk geometry)",
},{
.name = "trans",
.type = QEMU_OPT_STRING,
.help = "chs translation (auto, lba. none)",
},{
.name = "media",
.type = QEMU_OPT_STRING,
.help = "media type (disk, cdrom)",
},{
.name = "snapshot",
.type = QEMU_OPT_BOOL,
.help = "enable/disable snapshot mode",
},{
.name = "file",
.type = QEMU_OPT_STRING,
.help = "disk image",
},{
.name = "discard",
.type = QEMU_OPT_STRING,
.help = "discard operation (ignore/off, unmap/on)",
},{
.name = "cache",
.type = QEMU_OPT_STRING,
.help = "host cache usage (none, writeback, writethrough, "
"directsync, unsafe)",
},{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
},{
.name = "format",
.type = QEMU_OPT_STRING,
.help = "disk format (raw, qcow2, ...)",
},{
.name = "serial",
.type = QEMU_OPT_STRING,
.help = "disk serial number",
},{
.name = "rerror",
.type = QEMU_OPT_STRING,
.help = "read error action",
},{
.name = "werror",
.type = QEMU_OPT_STRING,
.help = "write error action",
},{
.name = "addr",
.type = QEMU_OPT_STRING,
.help = "pci address (virtio only)",
},{
.name = "readonly",
.type = QEMU_OPT_BOOL,
.help = "open drive file as read-only",
},{
.name = "iops",
.type = QEMU_OPT_NUMBER,
.help = "limit total I/O operations per second",
},{
.name = "iops_rd",
.type = QEMU_OPT_NUMBER,
.help = "limit read operations per second",
},{
.name = "iops_wr",
.type = QEMU_OPT_NUMBER,
.help = "limit write operations per second",
},{
.name = "bps",
.type = QEMU_OPT_NUMBER,
.help = "limit total bytes per second",
},{
.name = "bps_rd",
.type = QEMU_OPT_NUMBER,
.help = "limit read bytes per second",
},{
.name = "bps_wr",
.type = QEMU_OPT_NUMBER,
.help = "limit write bytes per second",
},{
.name = "copy-on-read",
.type = QEMU_OPT_BOOL,
.help = "copy read data from backing file into image file",
},{
.name = "boot",
.type = QEMU_OPT_BOOL,
.help = "(deprecated, ignored)",
},
{ /* end of list */ }
},
};
QemuOptsList qemu_drive_opts = {
.name = "drive",
.head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),

View File

@ -80,7 +80,12 @@ in the description of a field.
tables to repair refcounts before accessing the
image.
Bits 1-63: Reserved (set to 0)
Bit 1: Corrupt bit. If this bit is set then any data
structure may be corrupt and the image must not
be written to (unless for regaining
consistency).
Bits 2-63: Reserved (set to 0)
80 - 87: compatible_features
Bitmask of compatible features. An implementation can

View File

@ -413,6 +413,7 @@ typedef enum {
BLKDBG_REFTABLE_LOAD,
BLKDBG_REFTABLE_GROW,
BLKDBG_REFTABLE_UPDATE,
BLKDBG_REFBLOCK_LOAD,
BLKDBG_REFBLOCK_UPDATE,

View File

@ -48,6 +48,7 @@ typedef enum MonitorEvent {
QEVENT_BALLOON_CHANGE,
QEVENT_SPICE_MIGRATE_COMPLETED,
QEVENT_GUEST_PANICKED,
QEVENT_BLOCK_IMAGE_CORRUPTED,
/* Add to 'monitor_event_names' array in monitor.c when
* defining new events here */

View File

@ -46,6 +46,7 @@ typedef struct QEMUOptionParameter {
char* s;
} value;
const char *help;
bool assigned;
} QEMUOptionParameter;

View File

@ -506,6 +506,7 @@ static const char *monitor_event_names[] = {
[QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE",
[QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
[QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
[QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
};
QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)

View File

@ -57,12 +57,8 @@ typedef struct %(name)sList
''',
name=name)
def generate_struct(structname, fieldname, members):
ret = mcgen('''
struct %(name)s
{
''',
name=structname)
def generate_struct_fields(members):
ret = ''
for argname, argentry, optional, structured in parse_args(members):
if optional:
@ -80,6 +76,17 @@ struct %(name)s
''',
c_type=c_type(argentry), c_name=c_var(argname))
return ret
def generate_struct(structname, fieldname, members):
ret = mcgen('''
struct %(name)s
{
''',
name=structname)
ret += generate_struct_fields(members)
if len(fieldname):
fieldname = " " + fieldname
ret += mcgen('''

View File

@ -54,7 +54,7 @@ header_length 72
Header extension:
magic 0x6803f857
length 96
length 144
data <binary>
Header extension:
@ -68,7 +68,7 @@ No errors were found on the image.
magic 0x514649fb
version 2
backing_file_offset 0xf8
backing_file_offset 0x128
backing_file_size 0x17
cluster_bits 16
size 67108864
@ -92,7 +92,7 @@ data 'host_device'
Header extension:
magic 0x6803f857
length 96
length 144
data <binary>
Header extension:
@ -155,7 +155,7 @@ header_length 104
Header extension:
magic 0x6803f857
length 96
length 144
data <binary>
Header extension:
@ -169,7 +169,7 @@ No errors were found on the image.
magic 0x514649fb
version 3
backing_file_offset 0x118
backing_file_offset 0x148
backing_file_size 0x17
cluster_bits 16
size 67108864
@ -193,7 +193,7 @@ data 'host_device'
Header extension:
magic 0x6803f857
length 96
length 144
data <binary>
Header extension:

View File

@ -46,7 +46,7 @@ header_length 104
Header extension:
magic 0x6803f857
length 96
length 144
data <binary>
*** done

View File

@ -85,7 +85,6 @@ QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized
Testing: -drive if=scsi
QEMU X.Y.Z monitor - type 'help' for more information
(qemu) QEMU_PROG: -drive if=scsi: Device needs media, but drive is empty
QEMU_PROG: -drive if=scsi: Device initialization failed.
QEMU_PROG: Device initialization failed.
QEMU_PROG: Initialization of device lsi53c895a failed

111
tests/qemu-iotests/060 Executable file
View File

@ -0,0 +1,111 @@
#!/bin/bash
#
# Test case for image corruption (overlapping data structures) in qcow2
#
# Copyright (C) 2013 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=mreitz@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
# This tests qocw2-specific low-level functionality
_supported_fmt qcow2
_supported_proto generic
_supported_os Linux
rt_offset=65536 # 0x10000 (XXX: just an assumption)
rb_offset=131072 # 0x20000 (XXX: just an assumption)
l1_offset=196608 # 0x30000 (XXX: just an assumption)
l2_offset=262144 # 0x40000 (XXX: just an assumption)
IMGOPTS="compat=1.1"
echo
echo "=== Testing L2 reference into L1 ==="
echo
_make_test_img 64M
# Link first L1 entry (first L2 table) onto itself
# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
# later write will result in a COW operation, effectively ruining this attempt
# on image corruption)
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
_check_test_img
# The corrupt bit should not be set anyway
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to write something, thereby forcing the corrupt bit to be set
$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io
# The corrupt bit must now be set
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to open the image R/W (which should fail)
$QEMU_IO -c "read 0 512" "$TEST_IMG" 2>&1 | _filter_qemu_io | sed -e "s/can't open device .*$/can't open device/"
# Try to open it RO (which should succeed)
$QEMU_IO -c "read 0 512" -r "$TEST_IMG" | _filter_qemu_io
# We could now try to fix the image, but this would probably fail (how should an
# L2 table linked onto the L1 table be fixed?)
echo
echo "=== Testing cluster data reference into refcount block ==="
echo
_make_test_img 64M
# Allocate L2 table
truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
# Mark cluster as used
poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
# Redirect new data cluster onto refcount block
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
_check_test_img
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to fix it
_check_test_img -r all
# The corrupt bit should be cleared
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Look if it's really really fixed
$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# success, all done
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,44 @@
QA output created by 060
=== Testing L2 reference into L1 ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
ERROR cluster 3 refcount=1 reference=3
1 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
incompatible_features 0x0
qcow2: Preventing invalid write on metadata (overlaps with active L1 table); image marked as corrupt.
write failed: Input/output error
incompatible_features 0x2
qcow2: Image is corrupt; cannot be opened read/write.
qemu-io: can't open device
no file open, try 'help open'
read 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Testing cluster data reference into refcount block ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
ERROR refcount block 0 refcount=2
ERROR cluster 2 refcount=1 reference=2
2 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
incompatible_features 0x0
qcow2: Preventing invalid write on metadata (overlaps with refcount block); image marked as corrupt.
write failed: Input/output error
incompatible_features 0x2
Repairing refcount block 0 refcount=2
The following inconsistencies were found and repaired:
0 leaked clusters
1 corruptions
Double checking the fixed image now...
No errors were found on the image.
incompatible_features 0x0
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
incompatible_features 0x0
*** done

64
tests/qemu-iotests/062 Executable file
View File

@ -0,0 +1,64 @@
#!/bin/bash
#
# Test case for snapshotting images with unallocated zero clusters in
# qcow2
#
# Copyright (C) 2013 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=mreitz@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
# This tests qocw2-specific low-level functionality
_supported_fmt qcow2
_supported_proto generic
_supported_os Linux
IMGOPTS="compat=1.1"
IMG_SIZE=64M
echo
echo "=== Testing snapshotting an image with zero clusters ==="
echo
_make_test_img $IMG_SIZE
# Write some zero clusters
$QEMU_IO -c "write -z 0 256k" "$TEST_IMG" | _filter_qemu_io
# Create a snapshot
$QEMU_IMG snapshot -c foo "$TEST_IMG"
# Check the image (there shouldn't be any errors or leaks)
_check_test_img
# success, all done
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,9 @@
QA output created by 062
=== Testing snapshotting an image with zero clusters ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 262144/262144 bytes at offset 0
256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
No errors were found on the image.
*** done

View File

@ -57,10 +57,12 @@
048 img auto quick
049 rw auto
050 rw auto backing quick
#051 rw auto
051 rw auto
052 rw auto backing
053 rw auto
054 rw auto
055 rw auto
056 rw auto backing
059 rw auto
060 rw auto
062 rw auto

View File

@ -275,6 +275,8 @@ int set_option_parameter(QEMUOptionParameter *list, const char *name,
return -1;
}
list->assigned = true;
return 0;
}
@ -306,6 +308,8 @@ int set_option_parameter_int(QEMUOptionParameter *list, const char *name,
return -1;
}
list->assigned = true;
return 0;
}
@ -397,6 +401,7 @@ QEMUOptionParameter *parse_option_parameters(const char *param,
char value[256];
char *param_delim, *value_delim;
char next_delim;
int i;
if (list == NULL) {
return NULL;
@ -406,6 +411,10 @@ QEMUOptionParameter *parse_option_parameters(const char *param,
dest = allocated = append_option_parameters(NULL, list);
}
for (i = 0; dest[i].name; i++) {
dest[i].assigned = false;
}
while (*param) {
// Find parameter name and value in the string