From 1b7c801b40ce90795397bb566d019c9b76ef9c13 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 13 Apr 2010 11:43:27 +0200 Subject: [PATCH 01/14] qcow2: Clear L2 table cache after write error If the L2 table was already updated in cache, but writing it to disk has failed, we must not continue using the changed version in the cache to stay consistent with what's on the disk. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c11680d12a..ed5c4b2a67 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -696,6 +696,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters); if (ret < 0) { + qcow2_l2_cache_reset(bs); goto err; } From 175e11526e2613b3dc031c23fec3107aa4a80307 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 12 May 2010 16:23:26 +0200 Subject: [PATCH 02/14] qcow2: Fix error handling in l2_allocate l2_allocate has some intermediate states in which the image is inconsistent. Change the order to write to the L1 table only after the new L2 table has successfully been initialized. Also reset the L2 cache in failure case, it's very likely wrong. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index ed5c4b2a67..244b4a786d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -239,14 +239,6 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) return l2_offset; } - /* update the L1 entry */ - - s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; - ret = write_l1_entry(bs, l1_index); - if (ret < 0) { - return ret; - } - /* allocate a new entry in the l2 cache */ min_index = l2_cache_new_entry(bs); @@ -261,7 +253,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) ret = bdrv_pread(bs->file, old_l2_offset, l2_table, s->l2_size * sizeof(uint64_t)); if (ret < 0) { - return ret; + goto fail; } } /* write the l2 table to the file */ @@ -269,7 +261,14 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)); if (ret < 0) { - return ret; + goto fail; + } + + /* update the L1 entry */ + s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; + ret = write_l1_entry(bs, l1_index); + if (ret < 0) { + goto fail; } /* update the l2 cache entry */ @@ -279,6 +278,10 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) *table = l2_table; return 0; + +fail: + qcow2_l2_cache_reset(bs); + return ret; } static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, From cbf1dff2f1033cadcb15c0ffc9c0a3d039d8ed42 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 21 May 2010 11:09:42 +0200 Subject: [PATCH 03/14] block: Fix multiwrite with overlapping requests With overlapping requests, the total number of sectors is smaller than the sum of the nb_sectors of both requests. Signed-off-by: Kevin Wolf --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index cd70730a3e..47be5ba125 100644 --- a/block.c +++ b/block.c @@ -2019,7 +2019,7 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, // Add the second request qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); - reqs[outidx].nb_sectors += reqs[i].nb_sectors; + reqs[outidx].nb_sectors = qiov->size >> 9; reqs[outidx].qiov = qiov; mcb->callbacks[i].free_qiov = reqs[outidx].qiov; From 776cbbbd788686e9735e8d1c008d8bc105fb1fab Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 21 May 2010 11:37:26 +0200 Subject: [PATCH 04/14] qemu-io: Add multiwrite command The new multiwrite commands allows to use qemu-io for testing bdrv_aio_multiwrite. Signed-off-by: Kevin Wolf --- qemu-io.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/qemu-io.c b/qemu-io.c index f39109e231..72a45247da 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -267,6 +267,47 @@ static int do_aio_writev(QEMUIOVector *qiov, int64_t offset, int *total) return async_ret < 0 ? async_ret : 1; } +struct multiwrite_async_ret { + int num_done; + int error; +}; + +static void multiwrite_cb(void *opaque, int ret) +{ + struct multiwrite_async_ret *async_ret = opaque; + + async_ret->num_done++; + if (ret < 0) { + async_ret->error = ret; + } +} + +static int do_aio_multiwrite(BlockRequest* reqs, int num_reqs, int *total) +{ + int i, ret; + struct multiwrite_async_ret async_ret = { + .num_done = 0, + .error = 0, + }; + + *total = 0; + for (i = 0; i < num_reqs; i++) { + reqs[i].cb = multiwrite_cb; + reqs[i].opaque = &async_ret; + *total += reqs[i].qiov->size; + } + + ret = bdrv_aio_multiwrite(bs, reqs, num_reqs); + if (ret < 0) { + return ret; + } + + while (async_ret.num_done < num_reqs) { + qemu_aio_wait(); + } + + return async_ret.error < 0 ? async_ret.error : 1; +} static void read_help(void) @@ -811,6 +852,156 @@ out: return 0; } +static void +multiwrite_help(void) +{ + printf( +"\n" +" writes a range of bytes from the given offset source from multiple buffers,\n" +" in a batch of requests that may be merged by qemu\n" +"\n" +" Example:\n" +" 'multiwrite 512 1k 1k ; 4k 1k' \n" +" writes 2 kB at 512 bytes and 1 kB at 4 kB into the open file\n" +"\n" +" Writes into a segment of the currently open file, using a buffer\n" +" filled with a set pattern (0xcdcdcdcd). The pattern byte is increased\n" +" by one for each request contained in the multiwrite command.\n" +" -P, -- use different pattern to fill file\n" +" -C, -- report statistics in a machine parsable format\n" +" -q, -- quiet mode, do not show I/O statistics\n" +"\n"); +} + +static int multiwrite_f(int argc, char **argv); + +static const cmdinfo_t multiwrite_cmd = { + .name = "multiwrite", + .cfunc = multiwrite_f, + .argmin = 2, + .argmax = -1, + .args = "[-Cq] [-P pattern ] off len [len..] [; off len [len..]..]", + .oneline = "issues multiple write requests at once", + .help = multiwrite_help, +}; + +static int +multiwrite_f(int argc, char **argv) +{ + struct timeval t1, t2; + int Cflag = 0, qflag = 0; + int c, cnt; + char **buf; + int64_t offset, first_offset = 0; + /* Some compilers get confused and warn if this is not initialized. */ + int total = 0; + int nr_iov; + int nr_reqs; + int pattern = 0xcd; + QEMUIOVector *qiovs; + int i; + BlockRequest *reqs; + + while ((c = getopt(argc, argv, "CqP:")) != EOF) { + switch (c) { + case 'C': + Cflag = 1; + break; + case 'q': + qflag = 1; + break; + case 'P': + pattern = parse_pattern(optarg); + if (pattern < 0) + return 0; + break; + default: + return command_usage(&writev_cmd); + } + } + + if (optind > argc - 2) + return command_usage(&writev_cmd); + + nr_reqs = 1; + for (i = optind; i < argc; i++) { + if (!strcmp(argv[i], ";")) { + nr_reqs++; + } + } + + reqs = qemu_malloc(nr_reqs * sizeof(*reqs)); + buf = qemu_malloc(nr_reqs * sizeof(*buf)); + qiovs = qemu_malloc(nr_reqs * sizeof(*qiovs)); + + for (i = 0; i < nr_reqs; i++) { + int j; + + /* Read the offset of the request */ + offset = cvtnum(argv[optind]); + if (offset < 0) { + printf("non-numeric offset argument -- %s\n", argv[optind]); + return 0; + } + optind++; + + if (offset & 0x1ff) { + printf("offset %lld is not sector aligned\n", + (long long)offset); + return 0; + } + + if (i == 0) { + first_offset = offset; + } + + /* Read lengths for qiov entries */ + for (j = optind; j < argc; j++) { + if (!strcmp(argv[j], ";")) { + break; + } + } + + nr_iov = j - optind; + + /* Build request */ + reqs[i].qiov = &qiovs[i]; + buf[i] = create_iovec(reqs[i].qiov, &argv[optind], nr_iov, pattern); + reqs[i].sector = offset >> 9; + reqs[i].nb_sectors = reqs[i].qiov->size >> 9; + + optind = j + 1; + + offset += reqs[i].qiov->size; + pattern++; + } + + gettimeofday(&t1, NULL); + cnt = do_aio_multiwrite(reqs, nr_reqs, &total); + gettimeofday(&t2, NULL); + + if (cnt < 0) { + printf("aio_multiwrite failed: %s\n", strerror(-cnt)); + goto out; + } + + if (qflag) + goto out; + + /* Finally, report back -- -C gives a parsable format */ + t2 = tsub(t2, t1); + print_report("wrote", &t2, first_offset, total, total, cnt, Cflag); +out: + for (i = 0; i < nr_reqs; i++) { + qemu_io_free(buf[i]); + qemu_iovec_destroy(&qiovs[i]); + } + qemu_free(buf); + qemu_free(reqs); + qemu_free(qiovs); + return 0; +} + struct aio_ctx { QEMUIOVector qiov; int64_t offset; @@ -1483,6 +1674,7 @@ int main(int argc, char **argv) add_command(&readv_cmd); add_command(&write_cmd); add_command(&writev_cmd); + add_command(&multiwrite_cmd); add_command(&aio_read_cmd); add_command(&aio_write_cmd); add_command(&aio_flush_cmd); From b50cbabc1bc12e6b0082089c70015c1b97db86a1 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Wed, 26 May 2010 11:35:36 +0900 Subject: [PATCH 05/14] add support for protocol driver create_options This patch enables protocol drivers to use their create options which are not supported by the format. For example, protcol drivers can use a backing_file option with raw format. Signed-off-by: MORITA Kazutaka Signed-off-by: Kevin Wolf --- block.c | 7 +++---- block.h | 1 + qemu-img.c | 49 ++++++++++++++++++++++++++++++++--------------- qemu-option.c | 53 ++++++++++++++++++++++++++++++++++++++++++++------- qemu-option.h | 2 ++ 5 files changed, 86 insertions(+), 26 deletions(-) diff --git a/block.c b/block.c index 47be5ba125..24c63f6338 100644 --- a/block.c +++ b/block.c @@ -56,7 +56,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); -static BlockDriver *find_protocol(const char *filename); static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -210,7 +209,7 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options) { BlockDriver *drv; - drv = find_protocol(filename); + drv = bdrv_find_protocol(filename); if (drv == NULL) { drv = bdrv_find_format("file"); } @@ -283,7 +282,7 @@ static BlockDriver *find_hdev_driver(const char *filename) return drv; } -static BlockDriver *find_protocol(const char *filename) +BlockDriver *bdrv_find_protocol(const char *filename) { BlockDriver *drv1; char protocol[128]; @@ -478,7 +477,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) BlockDriver *drv; int ret; - drv = find_protocol(filename); + drv = bdrv_find_protocol(filename); if (!drv) { return -ENOENT; } diff --git a/block.h b/block.h index 24efeb68b1..9034ebb924 100644 --- a/block.h +++ b/block.h @@ -54,6 +54,7 @@ void bdrv_info_stats(Monitor *mon, QObject **ret_data); void bdrv_init(void); void bdrv_init_with_whitelist(void); +BlockDriver *bdrv_find_protocol(const char *filename); BlockDriver *bdrv_find_format(const char *format_name); BlockDriver *bdrv_find_whitelisted_format(const char *format_name); int bdrv_create(BlockDriver *drv, const char* filename, diff --git a/qemu-img.c b/qemu-img.c index cb007b757d..ea091f00ca 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -252,8 +252,8 @@ static int img_create(int argc, char **argv) const char *base_fmt = NULL; const char *filename; const char *base_filename = NULL; - BlockDriver *drv; - QEMUOptionParameter *param = NULL; + BlockDriver *drv, *proto_drv; + QEMUOptionParameter *param = NULL, *create_options = NULL; char *options = NULL; flags = 0; @@ -286,33 +286,42 @@ static int img_create(int argc, char **argv) } } + /* Get the filename */ + if (optind >= argc) + help(); + filename = argv[optind++]; + /* Find driver and parse its options */ drv = bdrv_find_format(fmt); if (!drv) error("Unknown file format '%s'", fmt); + proto_drv = bdrv_find_protocol(filename); + if (!proto_drv) + error("Unknown protocol '%s'", filename); + + create_options = append_option_parameters(create_options, + drv->create_options); + create_options = append_option_parameters(create_options, + proto_drv->create_options); + if (options && !strcmp(options, "?")) { - print_option_help(drv->create_options); + print_option_help(create_options); return 0; } /* Create parameter list with default values */ - param = parse_option_parameters("", drv->create_options, param); + param = parse_option_parameters("", create_options, param); set_option_parameter_int(param, BLOCK_OPT_SIZE, -1); /* Parse -o options */ if (options) { - param = parse_option_parameters(options, drv->create_options, param); + param = parse_option_parameters(options, create_options, param); if (param == NULL) { error("Invalid options for file format '%s'.", fmt); } } - /* Get the filename */ - if (optind >= argc) - help(); - filename = argv[optind++]; - /* Add size to parameters */ if (optind < argc) { set_option_parameter(param, BLOCK_OPT_SIZE, argv[optind++]); @@ -362,6 +371,7 @@ static int img_create(int argc, char **argv) puts(""); ret = bdrv_create(drv, filename, param); + free_option_parameters(create_options); free_option_parameters(param); if (ret < 0) { @@ -543,14 +553,14 @@ static int img_convert(int argc, char **argv) { int c, ret, n, n1, bs_n, bs_i, flags, cluster_size, cluster_sectors; const char *fmt, *out_fmt, *out_baseimg, *out_filename; - BlockDriver *drv; + BlockDriver *drv, *proto_drv; BlockDriverState **bs, *out_bs; int64_t total_sectors, nb_sectors, sector_num, bs_offset; uint64_t bs_sectors; uint8_t * buf; const uint8_t *buf1; BlockDriverInfo bdi; - QEMUOptionParameter *param = NULL; + QEMUOptionParameter *param = NULL, *create_options = NULL; char *options = NULL; fmt = NULL; @@ -615,19 +625,27 @@ static int img_convert(int argc, char **argv) if (!drv) error("Unknown file format '%s'", out_fmt); + proto_drv = bdrv_find_protocol(out_filename); + if (!proto_drv) + error("Unknown protocol '%s'", out_filename); + + create_options = append_option_parameters(create_options, + drv->create_options); + create_options = append_option_parameters(create_options, + proto_drv->create_options); if (options && !strcmp(options, "?")) { - print_option_help(drv->create_options); + print_option_help(create_options); free(bs); return 0; } if (options) { - param = parse_option_parameters(options, drv->create_options, param); + param = parse_option_parameters(options, create_options, param); if (param == NULL) { error("Invalid options for file format '%s'.", out_fmt); } } else { - param = parse_option_parameters("", drv->create_options, param); + param = parse_option_parameters("", create_options, param); } set_option_parameter_int(param, BLOCK_OPT_SIZE, total_sectors * 512); @@ -649,6 +667,7 @@ static int img_convert(int argc, char **argv) /* Create the new image */ ret = bdrv_create(drv, out_filename, param); + free_option_parameters(create_options); free_option_parameters(param); if (ret < 0) { diff --git a/qemu-option.c b/qemu-option.c index 076dddfc02..acd74f9124 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -345,6 +345,51 @@ void free_option_parameters(QEMUOptionParameter *list) qemu_free(list); } +/* + * Count valid options in list + */ +static size_t count_option_parameters(QEMUOptionParameter *list) +{ + size_t num_options = 0; + + while (list && list->name) { + num_options++; + list++; + } + + return num_options; +} + +/* + * Append an option list (list) to an option list (dest). + * + * If dest is NULL, a new copy of list is created. + * + * Returns a pointer to the first element of dest (or the newly allocated copy) + */ +QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest, + QEMUOptionParameter *list) +{ + size_t num_options, num_dest_options; + + num_options = count_option_parameters(dest); + num_dest_options = num_options; + + num_options += count_option_parameters(list); + + dest = qemu_realloc(dest, (num_options + 1) * sizeof(QEMUOptionParameter)); + + while (list && list->name) { + if (get_option_parameter(dest, list->name) == NULL) { + dest[num_dest_options++] = *list; + dest[num_dest_options].name = NULL; + } + list++; + } + + return dest; +} + /* * Parses a parameter string (param) into an option list (dest). * @@ -365,7 +410,6 @@ void free_option_parameters(QEMUOptionParameter *list) QEMUOptionParameter *parse_option_parameters(const char *param, QEMUOptionParameter *list, QEMUOptionParameter *dest) { - QEMUOptionParameter *cur; QEMUOptionParameter *allocated = NULL; char name[256]; char value[256]; @@ -379,12 +423,7 @@ QEMUOptionParameter *parse_option_parameters(const char *param, if (dest == NULL) { // Count valid options - num_options = 0; - cur = list; - while (cur->name) { - num_options++; - cur++; - } + num_options = count_option_parameters(list); // Create a copy of the option list to fill in values dest = qemu_mallocz((num_options + 1) * sizeof(QEMUOptionParameter)); diff --git a/qemu-option.h b/qemu-option.h index 58136f3032..4823219a18 100644 --- a/qemu-option.h +++ b/qemu-option.h @@ -70,6 +70,8 @@ int set_option_parameter(QEMUOptionParameter *list, const char *name, const char *value); int set_option_parameter_int(QEMUOptionParameter *list, const char *name, uint64_t value); +QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest, + QEMUOptionParameter *list); QEMUOptionParameter *parse_option_parameters(const char *param, QEMUOptionParameter *list, QEMUOptionParameter *dest); void free_option_parameters(QEMUOptionParameter *list); From dc33bb341122623105573d47fa6d71a0ba5ff15c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 26 May 2010 10:44:44 +0200 Subject: [PATCH 06/14] drive: allow rerror, werror and readonly for if=none When creating guest disks the qdev way using ... -drive if=none,id=$name,args -device $driver,drive=$name it is not possible to specify rerror, werror and readonly arguments for drive as drive_init allows/blocks them based on the interface (if=) specified and none isn't white-listed there. Signed-off-by: Gerd Hoffmann Signed-off-by: Kevin Wolf --- vl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vl.c b/vl.c index 417554fc64..7121cd0264 100644 --- a/vl.c +++ b/vl.c @@ -953,7 +953,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, on_write_error = BLOCK_ERR_STOP_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { - if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO) { + if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { fprintf(stderr, "werror is no supported by this format\n"); return NULL; } @@ -966,7 +966,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, on_read_error = BLOCK_ERR_REPORT; if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { - if (type != IF_IDE && type != IF_VIRTIO) { + if (type != IF_IDE && type != IF_VIRTIO && type != IF_NONE) { fprintf(stderr, "rerror is no supported by this format\n"); return NULL; } @@ -1114,7 +1114,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, /* CDROM is fine for any interface, don't check. */ ro = 1; } else if (ro == 1) { - if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY) { + if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY && type != IF_NONE) { fprintf(stderr, "qemu: readonly flag not supported for drive with this interface\n"); return NULL; } From b587a52c007efb9e100d99fa6c78461ad1901973 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 27 May 2010 12:52:08 +0100 Subject: [PATCH 07/14] posix-aio-compat: Expand tabs that have crept in This patch expands tabs on a few lines so the code formats nicely and follows the QEMU coding style. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- posix-aio-compat.c | 54 +++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/posix-aio-compat.c b/posix-aio-compat.c index b43c5315a8..a67ffe3113 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -35,7 +35,7 @@ struct qemu_paiocb { int aio_fildes; union { struct iovec *aio_iov; - void *aio_ioctl_buf; + void *aio_ioctl_buf; }; int aio_niov; size_t aio_nbytes; @@ -119,21 +119,21 @@ static void thread_create(pthread_t *thread, pthread_attr_t *attr, static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) { - int ret; + int ret; - ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); - if (ret == -1) - return -errno; + ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); + if (ret == -1) + return -errno; - /* - * This looks weird, but the aio code only consideres a request - * successfull if it has written the number full number of bytes. - * - * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, - * so in fact we return the ioctl command here to make posix_aio_read() - * happy.. - */ - return aiocb->aio_nbytes; + /* + * This looks weird, but the aio code only consideres a request + * successfull if it has written the number full number of bytes. + * + * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, + * so in fact we return the ioctl command here to make posix_aio_read() + * happy.. + */ + return aiocb->aio_nbytes; } static ssize_t handle_aiocb_flush(struct qemu_paiocb *aiocb) @@ -249,10 +249,10 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb) * Try preadv/pwritev first and fall back to linearizing the * buffer if it's not supported. */ - if (preadv_present) { + if (preadv_present) { nbytes = handle_aiocb_rw_vector(aiocb); if (nbytes == aiocb->aio_nbytes) - return nbytes; + return nbytes; if (nbytes < 0 && nbytes != -ENOSYS) return nbytes; preadv_present = 0; @@ -335,19 +335,19 @@ static void *aio_thread(void *unused) switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { case QEMU_AIO_READ: case QEMU_AIO_WRITE: - ret = handle_aiocb_rw(aiocb); - break; + ret = handle_aiocb_rw(aiocb); + break; case QEMU_AIO_FLUSH: - ret = handle_aiocb_flush(aiocb); - break; + ret = handle_aiocb_flush(aiocb); + break; case QEMU_AIO_IOCTL: - ret = handle_aiocb_ioctl(aiocb); - break; - default: - fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); - ret = -EINVAL; - break; - } + ret = handle_aiocb_ioctl(aiocb); + break; + default: + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); + ret = -EINVAL; + break; + } mutex_lock(&lock); aiocb->ret = ret; From c63782cbe8bdc2c401ea710cef427de0214c5900 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Thu, 27 May 2010 15:46:55 +0200 Subject: [PATCH 08/14] block.h: Make BDRV_SECTOR_SIZE 64 bit safe C defaults to int, so make definition of BDRV_SECTOR_SIZE 64 bit safe as it and BDRV_SECTOR_MASK may be used against 64 bit addresses. Signed-off-by: Jes Sorensen Signed-off-by: Kevin Wolf --- block.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.h b/block.h index 9034ebb924..756670d22f 100644 --- a/block.h +++ b/block.h @@ -38,7 +38,7 @@ typedef struct QEMUSnapshotInfo { #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) #define BDRV_SECTOR_BITS 9 -#define BDRV_SECTOR_SIZE (1 << BDRV_SECTOR_BITS) +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) typedef enum { From 1c46efaa0a175e468772405385ca26a1e35dd94c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 21 May 2010 17:59:36 +0200 Subject: [PATCH 09/14] qcow2: Allow qcow2_get_cluster_offset to return errors qcow2_get_cluster_offset() looks up a given virtual disk offset and returns the offset of the corresponding cluster in the image file. Errors (e.g. L2 table can't be read) are currenctly indicated by a return value of 0, which is unfortuately the same as for any unallocated cluster. So in effect we can't check for errors. This makes the old return value a by-reference parameter and returns the usual 0/-errno error code. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 36 ++++++++++++++++++++++-------------- block/qcow2.c | 16 +++++++++++++--- block/qcow2.h | 4 ++-- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 244b4a786d..ea98afc365 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -345,7 +345,13 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, while (nb_sectors > 0) { n = nb_sectors; - cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, &n); + + ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n, + &cluster_offset); + if (ret < 0) { + return ret; + } + index_in_cluster = sector_num & (s->cluster_sectors - 1); if (!cluster_offset) { if (bs->backing_hd) { @@ -412,25 +418,25 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, /* * get_cluster_offset * - * For a given offset of the disk image, return cluster offset in - * qcow2 file. + * For a given offset of the disk image, find the cluster offset in + * qcow2 file. The offset is stored in *cluster_offset. * * on entry, *num is the number of contiguous clusters we'd like to * access following offset. * * on exit, *num is the number of contiguous clusters we can read. * - * Return 1, if the offset is found - * Return 0, otherwise. + * Return 0, if the offset is found + * Return -errno, otherwise. * */ -uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, - int *num) +int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, + int *num, uint64_t *cluster_offset) { BDRVQcowState *s = bs->opaque; unsigned int l1_index, l2_index; - uint64_t l2_offset, *l2_table, cluster_offset; + uint64_t l2_offset, *l2_table; int l1_bits, c; unsigned int index_in_cluster, nb_clusters; uint64_t nb_available, nb_needed; @@ -454,7 +460,7 @@ uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, nb_needed = nb_available; } - cluster_offset = 0; + *cluster_offset = 0; /* seek the the l2 offset in the l1 table */ @@ -473,16 +479,17 @@ uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, l2_offset &= ~QCOW_OFLAG_COPIED; l2_table = l2_load(bs, l2_offset); - if (l2_table == NULL) - return 0; + if (l2_table == NULL) { + return -EIO; + } /* find the cluster offset for the given disk offset */ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); - cluster_offset = be64_to_cpu(l2_table[l2_index]); + *cluster_offset = be64_to_cpu(l2_table[l2_index]); nb_clusters = size_to_clusters(s, nb_needed << 9); - if (!cluster_offset) { + if (!*cluster_offset) { /* how many empty clusters ? */ c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); } else { @@ -498,7 +505,8 @@ out: *num = nb_available - index_in_cluster; - return cluster_offset & ~QCOW_OFLAG_COPIED; + *cluster_offset &=~QCOW_OFLAG_COPIED; + return 0; } /* diff --git a/block/qcow2.c b/block/qcow2.c index 5b72758915..33fa9a9012 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -297,9 +297,15 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { uint64_t cluster_offset; + int ret; *pnum = nb_sectors; - cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, pnum); + /* FIXME We can get errors here, but the bdrv_is_allocated interface can't + * pass them on today */ + ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); + if (ret < 0) { + *pnum = 0; + } return (cluster_offset != 0); } @@ -409,8 +415,12 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* prepare next AIO request */ acb->cur_nr_sectors = acb->remaining_sectors; - acb->cluster_offset = qcow2_get_cluster_offset(bs, acb->sector_num << 9, - &acb->cur_nr_sectors); + ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, + &acb->cur_nr_sectors, &acb->cluster_offset); + if (ret < 0) { + goto done; + } + index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); if (!acb->cluster_offset) { diff --git a/block/qcow2.h b/block/qcow2.h index 01053b79d9..c59b827da8 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -196,8 +196,8 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int nb_sectors, int enc, const AES_KEY *key); -uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, - int *num); +int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, + int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, int n_start, int n_end, int *num, QCowL2Meta *m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, From 55c17e9821c474d5fcdebdc82ed2fc096777d611 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 21 May 2010 18:25:20 +0200 Subject: [PATCH 10/14] qcow2: Change l2_load to return 0/-errno Provide the error code to the caller instead of just indicating success/error. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index ea98afc365..03a9f25799 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -157,31 +157,36 @@ static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset) * the image file failed. */ -static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset) +static int l2_load(BlockDriverState *bs, uint64_t l2_offset, + uint64_t **l2_table) { BDRVQcowState *s = bs->opaque; int min_index; - uint64_t *l2_table; + int ret; /* seek if the table for the given offset is in the cache */ - l2_table = seek_l2_table(s, l2_offset); - if (l2_table != NULL) - return l2_table; + *l2_table = seek_l2_table(s, l2_offset); + if (*l2_table != NULL) { + return 0; + } /* not found: load a new entry in the least used one */ min_index = l2_cache_new_entry(bs); - l2_table = s->l2_cache + (min_index << s->l2_bits); + *l2_table = s->l2_cache + (min_index << s->l2_bits); BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); - if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != - s->l2_size * sizeof(uint64_t)) - return NULL; + ret = bdrv_pread(bs->file, l2_offset, *l2_table, + s->l2_size * sizeof(uint64_t)); + if (ret < 0) { + return ret; + } + s->l2_cache_offsets[min_index] = l2_offset; s->l2_cache_counts[min_index] = 1; - return l2_table; + return 0; } /* @@ -440,6 +445,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int l1_bits, c; unsigned int index_in_cluster, nb_clusters; uint64_t nb_available, nb_needed; + int ret; index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); nb_needed = *num + index_in_cluster; @@ -478,9 +484,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* load the l2 table in memory */ l2_offset &= ~QCOW_OFLAG_COPIED; - l2_table = l2_load(bs, l2_offset); - if (l2_table == NULL) { - return -EIO; + ret = l2_load(bs, l2_offset, &l2_table); + if (ret < 0) { + return ret; } /* find the cluster offset for the given disk offset */ @@ -547,9 +553,9 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, if (l2_offset & QCOW_OFLAG_COPIED) { /* load the l2 table in memory */ l2_offset &= ~QCOW_OFLAG_COPIED; - l2_table = l2_load(bs, l2_offset); - if (l2_table == NULL) { - return -EIO; + ret = l2_load(bs, l2_offset, &l2_table); + if (ret < 0) { + return ret; } } else { if (l2_offset) From ed0df867d93341289d085ed9e9d44907e342c7ff Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 28 May 2010 09:47:44 +0200 Subject: [PATCH 11/14] qcow2: Return right error code in write_refcount_block_entries write_refcount_block_entries used to return -EIO for any errors. Change this to return the real error code. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 744107cd1d..a7f295d933 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -404,6 +404,7 @@ static int write_refcount_block_entries(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; size_t size; + int ret; if (cache_refcount_updates) { return 0; @@ -414,12 +415,13 @@ static int write_refcount_block_entries(BlockDriverState *bs, & ~(REFCOUNTS_PER_SECTOR - 1); size = (last_index - first_index) << REFCOUNT_SHIFT; + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); - if (bdrv_pwrite(bs->file, + ret = bdrv_pwrite(bs->file, refcount_block_offset + (first_index << REFCOUNT_SHIFT), - &s->refcount_block_cache[first_index], size) != size) - { - return -EIO; + &s->refcount_block_cache[first_index], size); + if (ret < 0) { + return ret; } return 0; @@ -460,10 +462,10 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); if ((old_table_index >= 0) && (table_index != old_table_index)) { - if (write_refcount_block_entries(bs, refcount_block_offset, - first_index, last_index) < 0) - { - return -EIO; + ret = write_refcount_block_entries(bs, refcount_block_offset, + first_index, last_index); + if (ret < 0) { + return ret; } first_index = -1; @@ -505,10 +507,11 @@ fail: /* Write last changed block to disk */ if (refcount_block_offset != 0) { - if (write_refcount_block_entries(bs, refcount_block_offset, - first_index, last_index) < 0) - { - return ret < 0 ? ret : -EIO; + int wret; + wret = write_refcount_block_entries(bs, refcount_block_offset, + first_index, last_index); + if (wret < 0) { + return ret < 0 ? ret : wret; } } From 25408c09502be036e5575754fe54019ed4ed5dfa Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 28 May 2010 12:05:45 +0200 Subject: [PATCH 12/14] qcow2: Fix corruption after refblock allocation Refblock allocation code needs to take into consideration that update_refcount will load a different refcount block into the cache, so it must initialize the cache for a new refcount block only afterwards. Not doing this means that not only the refcount in the wrong block is updated, but also that the caller will work on the wrong block. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index a7f295d933..5b7cda4774 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -221,8 +221,6 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* Allocate the refcount block itself and mark it as used */ uint64_t new_block = alloc_clusters_noref(bs, s->cluster_size); - memset(s->refcount_block_cache, 0, s->cluster_size); - s->refcount_block_cache_offset = new_block; #ifdef DEBUG_ALLOC2 fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64 @@ -231,6 +229,10 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) #endif if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { + /* Zero the new refcount block before updating it */ + memset(s->refcount_block_cache, 0, s->cluster_size); + s->refcount_block_cache_offset = new_block; + /* The block describes itself, need to update the cache */ int block_index = (new_block >> s->cluster_bits) & ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); @@ -242,6 +244,11 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) if (ret < 0) { goto fail_block; } + + /* Initialize the new refcount block only after updating its refcount, + * update_refcount uses the refcount cache itself */ + memset(s->refcount_block_cache, 0, s->cluster_size); + s->refcount_block_cache_offset = new_block; } /* Now the new refcount block needs to be written to disk */ From 86fa8da83771238de55dc44819a1a27bafef5353 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 28 May 2010 12:21:27 +0200 Subject: [PATCH 13/14] qcow2: Fix corruption after error in update_refcount After it is done with updating refcounts in the cache, update_refcount writes all changed entries to disk. If a refcount block allocation fails, however, there was no change yet and therefore first_index = last_index = -1. Don't treat -1 as a normal sector index (resulting in a 512 byte write!) but return without updating anything in this case. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 5b7cda4774..22b0b45ff7 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -417,6 +417,10 @@ static int write_refcount_block_entries(BlockDriverState *bs, return 0; } + if (first_index < 0) { + return 0; + } + first_index &= ~(REFCOUNTS_PER_SECTOR - 1); last_index = (last_index + REFCOUNTS_PER_SECTOR) & ~(REFCOUNTS_PER_SECTOR - 1); From 1a396859105c4c27fdec08180be26288b8a629a3 Mon Sep 17 00:00:00 2001 From: "Nicholas A. Bellinger" Date: Thu, 27 May 2010 08:56:28 -0700 Subject: [PATCH 14/14] block: Add missing bdrv_delete() for SG_IO BlockDriver in find_image_format() This patch adds a missing bdrv_delete() call in find_image_format() so that a SG_IO BlockDriver properly releases the temporary BlockDriverState *bs created from bdrv_file_open() Signed-off-by: Nicholas A. Bellinger Reported-by: Chris Krumme Signed-off-by: Kevin Wolf --- block.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 24c63f6338..296de890b6 100644 --- a/block.c +++ b/block.c @@ -332,8 +332,10 @@ static BlockDriver *find_image_format(const char *filename) return NULL; /* Return the raw BlockDriver * to scsi-generic devices */ - if (bs->sg) + if (bs->sg) { + bdrv_delete(bs); return bdrv_find_format("raw"); + } ret = bdrv_pread(bs, 0, buf, sizeof(buf)); bdrv_delete(bs);