From c53a7285b4377e91f30b7742c7e12c16d6bf86f0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 May 2010 14:59:57 +0300 Subject: [PATCH 1/8] block: fix aio_flush segfaults for read-only protocols (e.g. curl) Not all block format drivers expose an io_flush method (reasonable for read-only protocols), so calling io_flush there will immediately segfault. Fix by checking for the method's existence before calling it. Signed-off-by: Avi Kivity Signed-off-by: Kevin Wolf --- aio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aio.c b/aio.c index f164a478c5..2f086557b6 100644 --- a/aio.c +++ b/aio.c @@ -113,7 +113,9 @@ void qemu_aio_flush(void) qemu_aio_wait(); QLIST_FOREACH(node, &aio_handlers, node) { - ret |= node->io_flush(node->opaque); + if (node->io_flush) { + ret |= node->io_flush(node->opaque); + } } } while (qemu_bh_poll() || ret > 0); } From de6c8042ec55da18702fa51f09072fcaa315edc3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 14 May 2010 22:52:30 +0100 Subject: [PATCH 2/8] virtio-blk: Avoid zeroing every request structure The VirtIOBlockRequest structure is about 40 KB in size. This patch avoids zeroing every request by only initializing fields that are read. The other fields are either written to or may not be used at all. Oprofile shows about 10% of CPU samples in memset called by virtio_blk_alloc_request(). The workload is dd if=/dev/vda of=/dev/null iflag=direct bs=8k running concurrently 4 times. This patch makes memset disappear to the bottom of the profile. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index b05d15ecdd..d270225020 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -105,8 +105,10 @@ static void virtio_blk_flush_complete(void *opaque, int ret) static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) { - VirtIOBlockReq *req = qemu_mallocz(sizeof(*req)); + VirtIOBlockReq *req = qemu_malloc(sizeof(*req)); req->dev = s; + req->qiov.size = 0; + req->next = NULL; return req; } From 618fbb84299780af96e3d4c4b6f2148656fe3708 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 May 2010 12:40:09 +0200 Subject: [PATCH 3/8] virtio-blk: fix barrier support Before issuing the barrier to the block driver we need to flush our oustanding queue of write requests, as the flush is supposed to be issued after them. Signed-off-by: Christoph Hellwig Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index d270225020..5d7f1a2200 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -240,10 +240,20 @@ static void do_multiwrite(BlockDriverState *bs, BlockRequest *blkreq, } } -static void virtio_blk_handle_flush(VirtIOBlockReq *req) +static void virtio_blk_handle_flush(BlockRequest *blkreq, int *num_writes, + VirtIOBlockReq *req, BlockDriverState **old_bs) { BlockDriverAIOCB *acb; + /* + * Make sure all outstanding writes are posted to the backing device. + */ + if (*old_bs != NULL) { + do_multiwrite(*old_bs, blkreq, *num_writes); + } + *num_writes = 0; + *old_bs = req->dev->bs; + acb = bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req); if (!acb) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); @@ -316,7 +326,8 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req, req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; if (req->out->type & VIRTIO_BLK_T_FLUSH) { - virtio_blk_handle_flush(req); + virtio_blk_handle_flush(mrb->blkreq, &mrb->num_writes, + req, &mrb->old_bs); } else if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) { virtio_blk_handle_scsi(req); } else if (req->out->type & VIRTIO_BLK_T_OUT) { From 77be4366baface6613cfc312ba281f8e5860997c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 May 2010 20:53:10 +0200 Subject: [PATCH 4/8] block: fix sector comparism in multiwrite_req_compare The difference between the start sectors of two requests can be larger than the size of the "int" type, which can lead to a not correctly sorted multiwrite array and thus spurious I/O errors and filesystem corruption due to incorrect request merges. So instead of doing the cute sector arithmetics trick spell out the exact comparisms. Spotted by Kevin Wolf based on a testcase from Michael Tokarev. Signed-off-by: Christoph Hellwig Signed-off-by: Kevin Wolf --- block.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index bfe46e3765..89eece7b7b 100644 --- a/block.c +++ b/block.c @@ -1929,7 +1929,19 @@ static void multiwrite_cb(void *opaque, int ret) static int multiwrite_req_compare(const void *a, const void *b) { - return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector); + const BlockRequest *req1 = a, *req2 = b; + + /* + * Note that we can't simply subtract req2->sector from req1->sector + * here as that could overflow the return value. + */ + if (req1->sector > req2->sector) { + return 1; + } else if (req1->sector < req2->sector) { + return -1; + } else { + return 0; + } } /* From f8ea0b00e087380fa0c7309f843f67b7e2d0126a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 17 May 2010 09:45:57 -0700 Subject: [PATCH 5/8] block: Make find_image_format() return 'raw' BlockDriver for SG_IO devices This patch adds a special BlockDriverState->sg check in block.c:find_image_format() after bdrv_file_open() -> block/raw-posix.c:hdev_open() has been called to determine if we are dealing with a Linux host scsi-generic device. The patch then returns the BlockDriver * from bdrv_find_format("raw"), skipping the subsequent bdrv_read() and rest of find_image_format(). Signed-off-by: Nicholas A. Bellinger Signed-off-by: Kevin Wolf --- block.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block.c b/block.c index 89eece7b7b..6a95768e81 100644 --- a/block.c +++ b/block.c @@ -329,6 +329,11 @@ static BlockDriver *find_image_format(const char *filename) ret = bdrv_file_open(&bs, filename, 0); if (ret < 0) return NULL; + + /* Return the raw BlockDriver * to scsi-generic devices */ + if (bs->sg) + return bdrv_find_format("raw"); + ret = bdrv_pread(bs, 0, buf, sizeof(buf)); bdrv_delete(bs); if (ret < 0) { From 396759ad4ad5289623eb7e1993c433ad4e7b13a1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 17 May 2010 09:46:04 -0700 Subject: [PATCH 6/8] block: Add SG_IO device check in refresh_total_sectors() This patch adds a special case check for scsi-generic devices in refresh_total_sectors() to skip the subsequent BlockDriver->bdrv_getlength() that will be returning -ESPIPE from block/raw-posic.c:raw_getlength() for BlockDriverState->sg=1 devices. Signed-off-by: Nicholas A. Bellinger Signed-off-by: Kevin Wolf --- block.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block.c b/block.c index 6a95768e81..0b0966c571 100644 --- a/block.c +++ b/block.c @@ -361,6 +361,10 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) { BlockDriver *drv = bs->drv; + /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ + if (bs->sg) + return 0; + /* query actual device if possible, otherwise just trust the hint */ if (drv->bdrv_getlength) { int64_t length = drv->bdrv_getlength(bs); From 792b45b142e6b901e1de20886bc3369211582b8c Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 19 May 2010 22:53:44 +0200 Subject: [PATCH 7/8] vvfat: Fix compilation with DEBUG defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc does not like passing a NULL where an int value is expected: block/vvfat.c: In function ‘checkpoint’: block/vvfat.c:2868: error: passing argument 2 of ‘remove_mapping’ makes integer from pointer without a cast Signed-off-by: Riccardo Magliocchetti Signed-off-by: Kevin Wolf --- block/vvfat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/vvfat.c b/block/vvfat.c index ce16bbd8c4..13c31fadd9 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2865,7 +2865,7 @@ static void checkpoint(void) { return; /* avoid compiler warnings: */ hexdump(NULL, 100); - remove_mapping(vvv, NULL); + remove_mapping(vvv, 0); print_mapping(NULL); print_direntry(NULL); } From 3e89cb0419cf5ff8f97fe219c6faa58f4c0c8728 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 20 May 2010 10:34:50 +0200 Subject: [PATCH 8/8] vvfat: More build fixes with DEBUG Casting a pointer to an int doesn't work on 64 bit platforms. Use the %p printf conversion specifier instead. Signed-off-by: Kevin Wolf --- block/vvfat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 13c31fadd9..6d61c2e6c3 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1244,7 +1244,7 @@ static void print_direntry(const direntry_t* direntry) int j = 0; char buffer[1024]; - fprintf(stderr, "direntry 0x%x: ", (int)direntry); + fprintf(stderr, "direntry %p: ", direntry); if(!direntry) return; if(is_long_name(direntry)) { @@ -1273,7 +1273,11 @@ static void print_direntry(const direntry_t* direntry) static void print_mapping(const mapping_t* mapping) { - fprintf(stderr, "mapping (0x%x): begin, end = %d, %d, dir_index = %d, first_mapping_index = %d, name = %s, mode = 0x%x, " , (int)mapping, mapping->begin, mapping->end, mapping->dir_index, mapping->first_mapping_index, mapping->path, mapping->mode); + fprintf(stderr, "mapping (%p): begin, end = %d, %d, dir_index = %d, " + "first_mapping_index = %d, name = %s, mode = 0x%x, " , + mapping, mapping->begin, mapping->end, mapping->dir_index, + mapping->first_mapping_index, mapping->path, mapping->mode); + if (mapping->mode & MODE_DIRECTORY) fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index); else