diff --git a/async.c b/async.c index 6930185e64..5b6fe6b4cc 100644 --- a/async.c +++ b/async.c @@ -117,15 +117,21 @@ void qemu_bh_schedule_idle(QEMUBH *bh) void qemu_bh_schedule(QEMUBH *bh) { + AioContext *ctx; + if (bh->scheduled) return; + ctx = bh->ctx; bh->idle = 0; - /* Make sure that idle & any writes needed by the callback are done - * before the locations are read in the aio_bh_poll. + /* Make sure that: + * 1. idle & any writes needed by the callback are done before the + * locations are read in the aio_bh_poll. + * 2. ctx is loaded before scheduled is set and the callback has a chance + * to execute. */ - smp_wmb(); + smp_mb(); bh->scheduled = 1; - aio_notify(bh->ctx); + aio_notify(ctx); } diff --git a/block-migration.c b/block-migration.c index 16562709c8..25a03889f4 100644 --- a/block-migration.c +++ b/block-migration.c @@ -629,6 +629,7 @@ static int block_save_setup(QEMUFile *f, void *opaque) block_mig_state.submitted, block_mig_state.transferred); qemu_mutex_lock_iothread(); + init_blk_migration(f); /* start track dirty blocks */ ret = set_dirty_tracking(); @@ -638,8 +639,6 @@ static int block_save_setup(QEMUFile *f, void *opaque) return ret; } - init_blk_migration(f); - qemu_mutex_unlock_iothread(); ret = flush_blks(f); diff --git a/block.c b/block.c index 310ea89fce..17f763db79 100644 --- a/block.c +++ b/block.c @@ -179,6 +179,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs) { assert(!bs->io_limits_enabled); throttle_init(&bs->throttle_state, + bdrv_get_aio_context(bs), QEMU_CLOCK_VIRTUAL, bdrv_throttle_read_timer_cb, bdrv_throttle_write_timer_cb, @@ -363,6 +364,7 @@ BlockDriverState *bdrv_new(const char *device_name, Error **errp) qemu_co_queue_init(&bs->throttled_reqs[0]); qemu_co_queue_init(&bs->throttled_reqs[1]); bs->refcnt = 1; + bs->aio_context = qemu_get_aio_context(); return bs; } @@ -1856,7 +1858,11 @@ void bdrv_close_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); bdrv_close(bs); + aio_context_release(aio_context); } } @@ -1881,17 +1887,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs) return false; } -static bool bdrv_requests_pending_all(void) -{ - BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - if (bdrv_requests_pending(bs)) { - return true; - } - } - return false; -} - /* * Wait for pending requests to complete across all BlockDriverStates * @@ -1911,12 +1906,20 @@ void bdrv_drain_all(void) BlockDriverState *bs; while (busy) { - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - bdrv_start_throttled_reqs(bs); - } + busy = false; - busy = bdrv_requests_pending_all(); - busy |= aio_poll(qemu_get_aio_context(), busy); + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + bool bs_busy; + + aio_context_acquire(aio_context); + bdrv_start_throttled_reqs(bs); + bs_busy = bdrv_requests_pending(bs); + bs_busy |= aio_poll(aio_context, bs_busy); + aio_context_release(aio_context); + + busy |= bs_busy; + } } } @@ -2352,12 +2355,17 @@ int bdrv_commit_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); if (bs->drv && bs->backing_hd) { int ret = bdrv_commit(bs); if (ret < 0) { + aio_context_release(aio_context); return ret; } } + aio_context_release(aio_context); } return 0; } @@ -2775,10 +2783,12 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, /* Fast-path if already in coroutine context */ bdrv_rw_co_entry(&rwco); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_rw_co_entry); qemu_coroutine_enter(co, &rwco); while (rwco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } return rwco.ret; @@ -3831,10 +3841,15 @@ int bdrv_flush_all(void) int result = 0; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - int ret = bdrv_flush(bs); + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + aio_context_acquire(aio_context); + ret = bdrv_flush(bs); if (ret < 0 && !result) { result = ret; } + aio_context_release(aio_context); } return result; @@ -4025,10 +4040,12 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, /* Fast-path if already in coroutine context */ bdrv_get_block_status_co_entry(&data); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_get_block_status_co_entry); qemu_coroutine_enter(co, &data); while (!data.done) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } return data.ret; @@ -4621,7 +4638,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, acb->is_write = is_write; acb->qiov = qiov; acb->bounce = qemu_blockalign(bs, qiov->size); - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); if (is_write) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); @@ -4660,13 +4677,14 @@ typedef struct BlockDriverAIOCBCoroutine { static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) { + AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); BlockDriverAIOCBCoroutine *acb = container_of(blockacb, BlockDriverAIOCBCoroutine, common); bool done = false; acb->done = &done; while (!done) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -4703,7 +4721,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) acb->req.nb_sectors, acb->req.qiov, acb->req.flags); } - acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } @@ -4739,7 +4757,7 @@ static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) BlockDriverState *bs = acb->common.bs; acb->req.error = bdrv_co_flush(bs); - acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } @@ -4766,7 +4784,7 @@ static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) BlockDriverState *bs = acb->common.bs; acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); - acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } @@ -4977,7 +4995,11 @@ void bdrv_invalidate_cache_all(Error **errp) Error *local_err = NULL; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); bdrv_invalidate_cache(bs, &local_err); + aio_context_release(aio_context); if (local_err) { error_propagate(errp, local_err); return; @@ -4990,7 +5012,11 @@ void bdrv_clear_incoming_migration_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); + aio_context_release(aio_context); } } @@ -5006,10 +5032,12 @@ int bdrv_flush(BlockDriverState *bs) /* Fast-path if already in coroutine context */ bdrv_flush_co_entry(&rwco); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_flush_co_entry); qemu_coroutine_enter(co, &rwco); while (rwco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -5119,10 +5147,12 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) /* Fast-path if already in coroutine context */ bdrv_discard_co_entry(&rwco); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_discard_co_entry); qemu_coroutine_enter(co, &rwco); while (rwco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -5633,8 +5663,66 @@ out: AioContext *bdrv_get_aio_context(BlockDriverState *bs) { - /* Currently BlockDriverState always uses the main loop AioContext */ - return qemu_get_aio_context(); + return bs->aio_context; +} + +void bdrv_detach_aio_context(BlockDriverState *bs) +{ + if (!bs->drv) { + return; + } + + if (bs->io_limits_enabled) { + throttle_detach_aio_context(&bs->throttle_state); + } + if (bs->drv->bdrv_detach_aio_context) { + bs->drv->bdrv_detach_aio_context(bs); + } + if (bs->file) { + bdrv_detach_aio_context(bs->file); + } + if (bs->backing_hd) { + bdrv_detach_aio_context(bs->backing_hd); + } + + bs->aio_context = NULL; +} + +void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + if (!bs->drv) { + return; + } + + bs->aio_context = new_context; + + if (bs->backing_hd) { + bdrv_attach_aio_context(bs->backing_hd, new_context); + } + if (bs->file) { + bdrv_attach_aio_context(bs->file, new_context); + } + if (bs->drv->bdrv_attach_aio_context) { + bs->drv->bdrv_attach_aio_context(bs, new_context); + } + if (bs->io_limits_enabled) { + throttle_attach_aio_context(&bs->throttle_state, new_context); + } +} + +void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +{ + bdrv_drain_all(); /* ensure there are no in-flight requests */ + + bdrv_detach_aio_context(bs); + + /* This function executes in the old AioContext so acquire the new one in + * case it runs in a different thread. + */ + aio_context_acquire(new_context); + bdrv_attach_aio_context(bs, new_context); + aio_context_release(new_context); } void bdrv_add_before_write_notifier(BlockDriverState *bs, diff --git a/block/blkdebug.c b/block/blkdebug.c index 380c736101..f51407de3f 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -471,7 +471,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs, acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque); acb->ret = -error; - bh = qemu_bh_new(error_callback_bh, acb); + bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb); acb->bh = bh; qemu_bh_schedule(bh); diff --git a/block/blkverify.c b/block/blkverify.c index e1c31171c3..621b78593b 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -39,12 +39,13 @@ struct BlkverifyAIOCB { static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb) { BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb; + AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); bool finished = false; /* Wait until request completes, invokes its callback, and frees itself */ acb->finished = &finished; while (!finished) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -228,7 +229,8 @@ static void blkverify_aio_cb(void *opaque, int ret) acb->verify(acb); } - acb->bh = qemu_bh_new(blkverify_aio_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs), + blkverify_aio_bh, acb); qemu_bh_schedule(acb->bh); break; } @@ -302,21 +304,40 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, return bdrv_recurse_is_first_non_filter(s->test_file, candidate); } +/* Propagate AioContext changes to ->test_file */ +static void blkverify_detach_aio_context(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + bdrv_detach_aio_context(s->test_file); +} + +static void blkverify_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVBlkverifyState *s = bs->opaque; + + bdrv_attach_aio_context(s->test_file, new_context); +} + static BlockDriver bdrv_blkverify = { - .format_name = "blkverify", - .protocol_name = "blkverify", - .instance_size = sizeof(BDRVBlkverifyState), + .format_name = "blkverify", + .protocol_name = "blkverify", + .instance_size = sizeof(BDRVBlkverifyState), - .bdrv_parse_filename = blkverify_parse_filename, - .bdrv_file_open = blkverify_open, - .bdrv_close = blkverify_close, - .bdrv_getlength = blkverify_getlength, + .bdrv_parse_filename = blkverify_parse_filename, + .bdrv_file_open = blkverify_open, + .bdrv_close = blkverify_close, + .bdrv_getlength = blkverify_getlength, - .bdrv_aio_readv = blkverify_aio_readv, - .bdrv_aio_writev = blkverify_aio_writev, - .bdrv_aio_flush = blkverify_aio_flush, + .bdrv_aio_readv = blkverify_aio_readv, + .bdrv_aio_writev = blkverify_aio_writev, + .bdrv_aio_flush = blkverify_aio_flush, - .is_filter = true, + .bdrv_attach_aio_context = blkverify_attach_aio_context, + .bdrv_detach_aio_context = blkverify_detach_aio_context, + + .is_filter = true, .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, }; diff --git a/block/curl.c b/block/curl.c index f491b0ba4c..8c84141ced 100644 --- a/block/curl.c +++ b/block/curl.c @@ -110,6 +110,7 @@ typedef struct BDRVCURLState { size_t readahead_size; bool sslverify; bool accept_range; + AioContext *aio_context; } BDRVCURLState; static void curl_clean_state(CURLState *s); @@ -134,25 +135,29 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) #endif static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, - void *s, void *sp) + void *userp, void *sp) { + BDRVCURLState *s; CURLState *state = NULL; curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state); state->sock_fd = fd; + s = state->s; DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd); switch (action) { case CURL_POLL_IN: - qemu_aio_set_fd_handler(fd, curl_multi_read, NULL, state); + aio_set_fd_handler(s->aio_context, fd, curl_multi_read, + NULL, state); break; case CURL_POLL_OUT: - qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, state); + aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state); break; case CURL_POLL_INOUT: - qemu_aio_set_fd_handler(fd, curl_multi_read, curl_multi_do, state); + aio_set_fd_handler(s->aio_context, fd, curl_multi_read, + curl_multi_do, state); break; case CURL_POLL_REMOVE: - qemu_aio_set_fd_handler(fd, NULL, NULL, NULL); + aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL); break; } @@ -365,7 +370,7 @@ static CURLState *curl_init_state(BDRVCURLState *s) break; } if (!state) { - qemu_aio_wait(); + aio_poll(state->s->aio_context, true); } } while(!state); @@ -422,6 +427,51 @@ static void curl_parse_filename(const char *filename, QDict *options, qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename)); } +static void curl_detach_aio_context(BlockDriverState *bs) +{ + BDRVCURLState *s = bs->opaque; + int i; + + for (i = 0; i < CURL_NUM_STATES; i++) { + if (s->states[i].in_use) { + curl_clean_state(&s->states[i]); + } + if (s->states[i].curl) { + curl_easy_cleanup(s->states[i].curl); + s->states[i].curl = NULL; + } + if (s->states[i].orig_buf) { + g_free(s->states[i].orig_buf); + s->states[i].orig_buf = NULL; + } + } + if (s->multi) { + curl_multi_cleanup(s->multi); + s->multi = NULL; + } + + timer_del(&s->timer); +} + +static void curl_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVCURLState *s = bs->opaque; + + aio_timer_init(new_context, &s->timer, + QEMU_CLOCK_REALTIME, SCALE_NS, + curl_multi_timeout_do, s); + + assert(!s->multi); + s->multi = curl_multi_init(); + s->aio_context = new_context; + curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb); +#ifdef NEED_CURL_TIMER_CALLBACK + curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); + curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb); +#endif +} + static QemuOptsList runtime_opts = { .name = "curl", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), @@ -491,6 +541,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, } DPRINTF("CURL: Opening %s\n", file); + s->aio_context = bdrv_get_aio_context(bs); s->url = g_strdup(file); state = curl_init_state(s); if (!state) @@ -523,19 +574,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, curl_easy_cleanup(state->curl); state->curl = NULL; - aio_timer_init(bdrv_get_aio_context(bs), &s->timer, - QEMU_CLOCK_REALTIME, SCALE_NS, - curl_multi_timeout_do, s); - - // Now we know the file exists and its size, so let's - // initialize the multi interface! - - s->multi = curl_multi_init(); - curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb); -#ifdef NEED_CURL_TIMER_CALLBACK - curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); - curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb); -#endif + curl_attach_aio_context(bs, bdrv_get_aio_context(bs)); qemu_opts_del(opts); return 0; @@ -630,7 +669,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs, acb->sector_num = sector_num; acb->nb_sectors = nb_sectors; - acb->bh = qemu_bh_new(curl_readv_bh_cb, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb); qemu_bh_schedule(acb->bh); return &acb->common; } @@ -638,25 +677,9 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs, static void curl_close(BlockDriverState *bs) { BDRVCURLState *s = bs->opaque; - int i; DPRINTF("CURL: Close\n"); - for (i=0; istates[i].in_use) - curl_clean_state(&s->states[i]); - if (s->states[i].curl) { - curl_easy_cleanup(s->states[i].curl); - s->states[i].curl = NULL; - } - if (s->states[i].orig_buf) { - g_free(s->states[i].orig_buf); - s->states[i].orig_buf = NULL; - } - } - if (s->multi) - curl_multi_cleanup(s->multi); - - timer_del(&s->timer); + curl_detach_aio_context(bs); g_free(s->url); } @@ -668,68 +691,83 @@ static int64_t curl_getlength(BlockDriverState *bs) } static BlockDriver bdrv_http = { - .format_name = "http", - .protocol_name = "http", + .format_name = "http", + .protocol_name = "http", - .instance_size = sizeof(BDRVCURLState), - .bdrv_parse_filename = curl_parse_filename, - .bdrv_file_open = curl_open, - .bdrv_close = curl_close, - .bdrv_getlength = curl_getlength, + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_aio_readv = curl_aio_readv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, }; static BlockDriver bdrv_https = { - .format_name = "https", - .protocol_name = "https", + .format_name = "https", + .protocol_name = "https", - .instance_size = sizeof(BDRVCURLState), - .bdrv_parse_filename = curl_parse_filename, - .bdrv_file_open = curl_open, - .bdrv_close = curl_close, - .bdrv_getlength = curl_getlength, + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_aio_readv = curl_aio_readv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, }; static BlockDriver bdrv_ftp = { - .format_name = "ftp", - .protocol_name = "ftp", + .format_name = "ftp", + .protocol_name = "ftp", - .instance_size = sizeof(BDRVCURLState), - .bdrv_parse_filename = curl_parse_filename, - .bdrv_file_open = curl_open, - .bdrv_close = curl_close, - .bdrv_getlength = curl_getlength, + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_aio_readv = curl_aio_readv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, }; static BlockDriver bdrv_ftps = { - .format_name = "ftps", - .protocol_name = "ftps", + .format_name = "ftps", + .protocol_name = "ftps", - .instance_size = sizeof(BDRVCURLState), - .bdrv_parse_filename = curl_parse_filename, - .bdrv_file_open = curl_open, - .bdrv_close = curl_close, - .bdrv_getlength = curl_getlength, + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_aio_readv = curl_aio_readv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, }; static BlockDriver bdrv_tftp = { - .format_name = "tftp", - .protocol_name = "tftp", + .format_name = "tftp", + .protocol_name = "tftp", - .instance_size = sizeof(BDRVCURLState), - .bdrv_parse_filename = curl_parse_filename, - .bdrv_file_open = curl_open, - .bdrv_close = curl_close, - .bdrv_getlength = curl_getlength, + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_aio_readv = curl_aio_readv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, }; static void curl_block_init(void) diff --git a/block/gluster.c b/block/gluster.c index d0726ec92c..114689e441 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -16,6 +16,7 @@ typedef struct GlusterAIOCB { int ret; QEMUBH *bh; Coroutine *coroutine; + AioContext *aio_context; } GlusterAIOCB; typedef struct BDRVGlusterState { @@ -249,7 +250,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) acb->ret = -EIO; /* Partial read/write - fail it */ } - acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); + acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb); qemu_bh_schedule(acb->bh); } @@ -436,6 +437,7 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs, acb->size = size; acb->ret = 0; acb->coroutine = qemu_coroutine_self(); + acb->aio_context = bdrv_get_aio_context(bs); ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); if (ret < 0) { @@ -549,6 +551,7 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, acb->size = size; acb->ret = 0; acb->coroutine = qemu_coroutine_self(); + acb->aio_context = bdrv_get_aio_context(bs); if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, @@ -605,6 +608,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) acb->size = 0; acb->ret = 0; acb->coroutine = qemu_coroutine_self(); + acb->aio_context = bdrv_get_aio_context(bs); ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); if (ret < 0) { @@ -633,6 +637,7 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, acb->size = 0; acb->ret = 0; acb->coroutine = qemu_coroutine_self(); + acb->aio_context = bdrv_get_aio_context(bs); ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); if (ret < 0) { diff --git a/block/iscsi.c b/block/iscsi.c index 3892cc551e..877b877cf2 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -49,6 +49,7 @@ typedef struct IscsiLun { struct iscsi_context *iscsi; + AioContext *aio_context; int lun; enum scsi_inquiry_peripheral_device_type type; int block_size; @@ -73,6 +74,7 @@ typedef struct IscsiTask { struct scsi_task *task; Coroutine *co; QEMUBH *bh; + IscsiLun *iscsilun; } IscsiTask; typedef struct IscsiAIOCB { @@ -133,7 +135,7 @@ iscsi_schedule_bh(IscsiAIOCB *acb) if (acb->bh) { return; } - acb->bh = qemu_bh_new(iscsi_bh_cb, acb); + acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb); qemu_bh_schedule(acb->bh); } @@ -169,7 +171,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, out: if (iTask->co) { - iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask); + iTask->bh = aio_bh_new(iTask->iscsilun->aio_context, + iscsi_co_generic_bh_cb, iTask); qemu_bh_schedule(iTask->bh); } } @@ -177,8 +180,9 @@ out: static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) { *iTask = (struct IscsiTask) { - .co = qemu_coroutine_self(), - .retries = ISCSI_CMD_RETRIES, + .co = qemu_coroutine_self(), + .retries = ISCSI_CMD_RETRIES, + .iscsilun = iscsilun, }; } @@ -209,7 +213,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb) iscsi_abort_task_cb, acb); while (acb->status == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(iscsilun->aio_context, true); } } @@ -232,10 +236,11 @@ iscsi_set_events(IscsiLun *iscsilun) ev = POLLIN; ev |= iscsi_which_events(iscsi); if (ev != iscsilun->events) { - qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), - iscsi_process_read, - (ev & POLLOUT) ? iscsi_process_write : NULL, - iscsilun); + aio_set_fd_handler(iscsilun->aio_context, + iscsi_get_fd(iscsi), + iscsi_process_read, + (ev & POLLOUT) ? iscsi_process_write : NULL, + iscsilun); } @@ -791,7 +796,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); while (status == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(iscsilun->aio_context, true); } return 0; @@ -1195,6 +1200,40 @@ fail_with_err: return NULL; } +static void iscsi_detach_aio_context(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + + aio_set_fd_handler(iscsilun->aio_context, + iscsi_get_fd(iscsilun->iscsi), + NULL, NULL, NULL); + iscsilun->events = 0; + + if (iscsilun->nop_timer) { + timer_del(iscsilun->nop_timer); + timer_free(iscsilun->nop_timer); + iscsilun->nop_timer = NULL; + } +} + +static void iscsi_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + IscsiLun *iscsilun = bs->opaque; + + iscsilun->aio_context = new_context; + iscsi_set_events(iscsilun); + +#if defined(LIBISCSI_FEATURE_NOP_COUNTER) + /* Set up a timer for sending out iSCSI NOPs */ + iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context, + QEMU_CLOCK_REALTIME, SCALE_MS, + iscsi_nop_timed_event, iscsilun); + timer_mod(iscsilun->nop_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); +#endif +} + /* * We support iscsi url's on the form * iscsi://[%@][:]// @@ -1301,6 +1340,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, } iscsilun->iscsi = iscsi; + iscsilun->aio_context = bdrv_get_aio_context(bs); iscsilun->lun = iscsi_url->lun; iscsilun->has_write_same = true; @@ -1374,11 +1414,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, scsi_free_scsi_task(task); task = NULL; -#if defined(LIBISCSI_FEATURE_NOP_COUNTER) - /* Set up a timer for sending out iSCSI NOPs */ - iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun); - timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); -#endif + iscsi_attach_aio_context(bs, iscsilun->aio_context); /* Guess the internal cluster (page) size of the iscsi target by the means * of opt_unmap_gran. Transfer the unmap granularity only if it has a @@ -1422,11 +1458,7 @@ static void iscsi_close(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; struct iscsi_context *iscsi = iscsilun->iscsi; - if (iscsilun->nop_timer) { - timer_del(iscsilun->nop_timer); - timer_free(iscsilun->nop_timer); - } - qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); + iscsi_detach_aio_context(bs); iscsi_destroy_context(iscsi); g_free(iscsilun->zeroblock); g_free(iscsilun->allocationmap); @@ -1530,10 +1562,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options, if (ret != 0) { goto out; } - if (iscsilun->nop_timer) { - timer_del(iscsilun->nop_timer); - timer_free(iscsilun->nop_timer); - } + iscsi_detach_aio_context(bs); if (iscsilun->type != TYPE_DISK) { ret = -ENODEV; goto out; @@ -1604,6 +1633,9 @@ static BlockDriver bdrv_iscsi = { .bdrv_ioctl = iscsi_ioctl, .bdrv_aio_ioctl = iscsi_aio_ioctl, #endif + + .bdrv_detach_aio_context = iscsi_detach_aio_context, + .bdrv_attach_aio_context = iscsi_attach_aio_context, }; static QemuOptsList qemu_iscsi_opts = { diff --git a/block/linux-aio.c b/block/linux-aio.c index 53434e2df5..f0a2c087b2 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -177,6 +177,20 @@ out_free_aiocb: return NULL; } +void laio_detach_aio_context(void *s_, AioContext *old_context) +{ + struct qemu_laio_state *s = s_; + + aio_set_event_notifier(old_context, &s->e, NULL); +} + +void laio_attach_aio_context(void *s_, AioContext *new_context) +{ + struct qemu_laio_state *s = s_; + + aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb); +} + void *laio_init(void) { struct qemu_laio_state *s; @@ -190,8 +204,6 @@ void *laio_init(void) goto out_close_efd; } - qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb); - return s; out_close_efd: @@ -200,3 +212,11 @@ out_free_state: g_free(s); return NULL; } + +void laio_cleanup(void *s_) +{ + struct qemu_laio_state *s = s_; + + event_notifier_cleanup(&s->e); + g_free(s); +} diff --git a/block/nbd-client.c b/block/nbd-client.c index 7d698cb619..6e1c97cad0 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -49,7 +49,7 @@ static void nbd_teardown_connection(NbdClientSession *client) shutdown(client->sock, 2); nbd_recv_coroutines_enter_all(client); - qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL); + nbd_client_session_detach_aio_context(client); closesocket(client->sock); client->sock = -1; } @@ -103,11 +103,14 @@ static int nbd_co_send_request(NbdClientSession *s, struct nbd_request *request, QEMUIOVector *qiov, int offset) { + AioContext *aio_context; int rc, ret; qemu_co_mutex_lock(&s->send_mutex); s->send_coroutine = qemu_coroutine_self(); - qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s); + aio_context = bdrv_get_aio_context(s->bs); + aio_set_fd_handler(aio_context, s->sock, + nbd_reply_ready, nbd_restart_write, s); if (qiov) { if (!s->is_unix) { socket_set_cork(s->sock, 1); @@ -126,7 +129,7 @@ static int nbd_co_send_request(NbdClientSession *s, } else { rc = nbd_send_request(s->sock, request); } - qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s); + aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s); s->send_coroutine = NULL; qemu_co_mutex_unlock(&s->send_mutex); return rc; @@ -335,6 +338,19 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num, } +void nbd_client_session_detach_aio_context(NbdClientSession *client) +{ + aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock, + NULL, NULL, NULL); +} + +void nbd_client_session_attach_aio_context(NbdClientSession *client, + AioContext *new_context) +{ + aio_set_fd_handler(new_context, client->sock, + nbd_reply_ready, NULL, client); +} + void nbd_client_session_close(NbdClientSession *client) { struct nbd_request request = { @@ -381,7 +397,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, /* Now that we're connected, set the socket to be non-blocking and * kick the reply mechanism. */ qemu_set_nonblock(sock); - qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client); + nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs)); logout("Established connection with NBD server\n"); return 0; diff --git a/block/nbd-client.h b/block/nbd-client.h index f2a63378bb..cd478f3a98 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -47,4 +47,8 @@ int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num, int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +void nbd_client_session_detach_aio_context(NbdClientSession *client); +void nbd_client_session_attach_aio_context(NbdClientSession *client, + AioContext *new_context); + #endif /* NBD_CLIENT_H */ diff --git a/block/nbd.c b/block/nbd.c index 613f2581ae..4eda0958d7 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -323,46 +323,67 @@ static int64_t nbd_getlength(BlockDriverState *bs) return s->client.size; } +static void nbd_detach_aio_context(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + + nbd_client_session_detach_aio_context(&s->client); +} + +static void nbd_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVNBDState *s = bs->opaque; + + nbd_client_session_attach_aio_context(&s->client, new_context); +} + static BlockDriver bdrv_nbd = { - .format_name = "nbd", - .protocol_name = "nbd", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, - .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev = nbd_co_writev, - .bdrv_close = nbd_close, - .bdrv_co_flush_to_os = nbd_co_flush, - .bdrv_co_discard = nbd_co_discard, - .bdrv_getlength = nbd_getlength, + .format_name = "nbd", + .protocol_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, + .bdrv_file_open = nbd_open, + .bdrv_co_readv = nbd_co_readv, + .bdrv_co_writev = nbd_co_writev, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_discard = nbd_co_discard, + .bdrv_getlength = nbd_getlength, + .bdrv_detach_aio_context = nbd_detach_aio_context, + .bdrv_attach_aio_context = nbd_attach_aio_context, }; static BlockDriver bdrv_nbd_tcp = { - .format_name = "nbd", - .protocol_name = "nbd+tcp", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, - .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev = nbd_co_writev, - .bdrv_close = nbd_close, - .bdrv_co_flush_to_os = nbd_co_flush, - .bdrv_co_discard = nbd_co_discard, - .bdrv_getlength = nbd_getlength, + .format_name = "nbd", + .protocol_name = "nbd+tcp", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, + .bdrv_file_open = nbd_open, + .bdrv_co_readv = nbd_co_readv, + .bdrv_co_writev = nbd_co_writev, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_discard = nbd_co_discard, + .bdrv_getlength = nbd_getlength, + .bdrv_detach_aio_context = nbd_detach_aio_context, + .bdrv_attach_aio_context = nbd_attach_aio_context, }; static BlockDriver bdrv_nbd_unix = { - .format_name = "nbd", - .protocol_name = "nbd+unix", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, - .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev = nbd_co_writev, - .bdrv_close = nbd_close, - .bdrv_co_flush_to_os = nbd_co_flush, - .bdrv_co_discard = nbd_co_discard, - .bdrv_getlength = nbd_getlength, + .format_name = "nbd", + .protocol_name = "nbd+unix", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, + .bdrv_file_open = nbd_open, + .bdrv_co_readv = nbd_co_readv, + .bdrv_co_writev = nbd_co_writev, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_discard = nbd_co_discard, + .bdrv_getlength = nbd_getlength, + .bdrv_detach_aio_context = nbd_detach_aio_context, + .bdrv_attach_aio_context = nbd_attach_aio_context, }; static void bdrv_nbd_init(void) diff --git a/block/nfs.c b/block/nfs.c index 539bd951df..bd9177f3ae 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -40,6 +40,7 @@ typedef struct NFSClient { struct nfsfh *fh; int events; bool has_zero_init; + AioContext *aio_context; } NFSClient; typedef struct NFSRPC { @@ -49,6 +50,7 @@ typedef struct NFSRPC { struct stat *st; Coroutine *co; QEMUBH *bh; + NFSClient *client; } NFSRPC; static void nfs_process_read(void *arg); @@ -58,10 +60,11 @@ static void nfs_set_events(NFSClient *client) { int ev = nfs_which_events(client->context); if (ev != client->events) { - qemu_aio_set_fd_handler(nfs_get_fd(client->context), - (ev & POLLIN) ? nfs_process_read : NULL, - (ev & POLLOUT) ? nfs_process_write : NULL, - client); + aio_set_fd_handler(client->aio_context, + nfs_get_fd(client->context), + (ev & POLLIN) ? nfs_process_read : NULL, + (ev & POLLOUT) ? nfs_process_write : NULL, + client); } client->events = ev; @@ -84,7 +87,8 @@ static void nfs_process_write(void *arg) static void nfs_co_init_task(NFSClient *client, NFSRPC *task) { *task = (NFSRPC) { - .co = qemu_coroutine_self(), + .co = qemu_coroutine_self(), + .client = client, }; } @@ -116,7 +120,8 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data, error_report("NFS Error: %s", nfs_get_error(nfs)); } if (task->co) { - task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task); + task->bh = aio_bh_new(task->client->aio_context, + nfs_co_generic_bh_cb, task); qemu_bh_schedule(task->bh); } } @@ -224,13 +229,34 @@ static QemuOptsList runtime_opts = { }, }; +static void nfs_detach_aio_context(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + + aio_set_fd_handler(client->aio_context, + nfs_get_fd(client->context), + NULL, NULL, NULL); + client->events = 0; +} + +static void nfs_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + NFSClient *client = bs->opaque; + + client->aio_context = new_context; + nfs_set_events(client); +} + static void nfs_client_close(NFSClient *client) { if (client->context) { if (client->fh) { nfs_close(client->context, client->fh); } - qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL); + aio_set_fd_handler(client->aio_context, + nfs_get_fd(client->context), + NULL, NULL, NULL); nfs_destroy_context(client->context); } memset(client, 0, sizeof(NFSClient)); @@ -345,6 +371,8 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; + client->aio_context = bdrv_get_aio_context(bs); + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -368,6 +396,8 @@ static int nfs_file_create(const char *url, QEMUOptionParameter *options, int64_t total_size = 0; NFSClient *client = g_malloc0(sizeof(NFSClient)); + client->aio_context = qemu_get_aio_context(); + /* Read out options */ while (options && options->name) { if (!strcmp(options->name, "size")) { @@ -407,7 +437,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) while (!task.complete) { nfs_set_events(client); - qemu_aio_wait(); + aio_poll(client->aio_context, true); } return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize); @@ -420,22 +450,25 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset) } static BlockDriver bdrv_nfs = { - .format_name = "nfs", - .protocol_name = "nfs", + .format_name = "nfs", + .protocol_name = "nfs", - .instance_size = sizeof(NFSClient), - .bdrv_needs_filename = true, - .bdrv_has_zero_init = nfs_has_zero_init, - .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, - .bdrv_truncate = nfs_file_truncate, + .instance_size = sizeof(NFSClient), + .bdrv_needs_filename = true, + .bdrv_has_zero_init = nfs_has_zero_init, + .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, + .bdrv_truncate = nfs_file_truncate, - .bdrv_file_open = nfs_file_open, - .bdrv_close = nfs_file_close, - .bdrv_create = nfs_file_create, + .bdrv_file_open = nfs_file_open, + .bdrv_close = nfs_file_close, + .bdrv_create = nfs_file_create, - .bdrv_co_readv = nfs_co_readv, - .bdrv_co_writev = nfs_co_writev, - .bdrv_co_flush_to_disk = nfs_co_flush, + .bdrv_co_readv = nfs_co_readv, + .bdrv_co_writev = nfs_co_writev, + .bdrv_co_flush_to_disk = nfs_co_flush, + + .bdrv_detach_aio_context = nfs_detach_aio_context, + .bdrv_attach_aio_context = nfs_attach_aio_context, }; static void nfs_block_init(void) diff --git a/block/qed-table.c b/block/qed-table.c index 76d2dcccf8..f61107a1cf 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -173,7 +173,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s) qed_read_table(s, s->header.l1_table_offset, s->l1_table, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(s->bs), true); } return ret; @@ -194,7 +194,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, qed_write_l1_table(s, index, n, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(s->bs), true); } return ret; @@ -267,7 +267,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(s->bs), true); } return ret; @@ -289,7 +289,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(s->bs), true); } return ret; diff --git a/block/qed.c b/block/qed.c index c130e42d0d..79f5bd392a 100644 --- a/block/qed.c +++ b/block/qed.c @@ -21,12 +21,13 @@ static void qed_aio_cancel(BlockDriverAIOCB *blockacb) { QEDAIOCB *acb = (QEDAIOCB *)blockacb; + AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); bool finished = false; /* Wait for the request to finish */ acb->finished = &finished; while (!finished) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -373,6 +374,27 @@ static void bdrv_qed_rebind(BlockDriverState *bs) s->bs = bs; } +static void bdrv_qed_detach_aio_context(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + qed_cancel_need_check_timer(s); + timer_free(s->need_check_timer); +} + +static void bdrv_qed_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVQEDState *s = bs->opaque; + + s->need_check_timer = aio_timer_new(new_context, + QEMU_CLOCK_VIRTUAL, SCALE_NS, + qed_need_check_timer_cb, s); + if (s->header.features & QED_F_NEED_CHECK) { + qed_start_need_check_timer(s); + } +} + static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -496,8 +518,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } } - s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - qed_need_check_timer_cb, s); + bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs)); out: if (ret) { @@ -528,8 +549,7 @@ static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; - qed_cancel_need_check_timer(s); - timer_free(s->need_check_timer); + bdrv_qed_detach_aio_context(bs); /* Ensure writes reach stable storage */ bdrv_flush(bs->file); @@ -919,7 +939,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) /* Arrange for a bh to invoke the completion function */ acb->bh_ret = ret; - acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs), + qed_aio_complete_bh, acb); qemu_bh_schedule(acb->bh); /* Start next allocating write request waiting behind this one. Note that @@ -1644,6 +1665,8 @@ static BlockDriver bdrv_qed = { .bdrv_change_backing_file = bdrv_qed_change_backing_file, .bdrv_invalidate_cache = bdrv_qed_invalidate_cache, .bdrv_check = bdrv_qed_check, + .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, + .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, }; static void bdrv_qed_init(void) diff --git a/block/quorum.c b/block/quorum.c index ecec3a5407..426077a520 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -848,25 +848,49 @@ static void quorum_close(BlockDriverState *bs) g_free(s->bs); } +static void quorum_detach_aio_context(BlockDriverState *bs) +{ + BDRVQuorumState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_children; i++) { + bdrv_detach_aio_context(s->bs[i]); + } +} + +static void quorum_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVQuorumState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_children; i++) { + bdrv_attach_aio_context(s->bs[i], new_context); + } +} + static BlockDriver bdrv_quorum = { - .format_name = "quorum", - .protocol_name = "quorum", + .format_name = "quorum", + .protocol_name = "quorum", - .instance_size = sizeof(BDRVQuorumState), + .instance_size = sizeof(BDRVQuorumState), - .bdrv_file_open = quorum_open, - .bdrv_close = quorum_close, + .bdrv_file_open = quorum_open, + .bdrv_close = quorum_close, - .bdrv_co_flush_to_disk = quorum_co_flush, + .bdrv_co_flush_to_disk = quorum_co_flush, - .bdrv_getlength = quorum_getlength, + .bdrv_getlength = quorum_getlength, - .bdrv_aio_readv = quorum_aio_readv, - .bdrv_aio_writev = quorum_aio_writev, - .bdrv_invalidate_cache = quorum_invalidate_cache, + .bdrv_aio_readv = quorum_aio_readv, + .bdrv_aio_writev = quorum_aio_writev, + .bdrv_invalidate_cache = quorum_invalidate_cache, - .is_filter = true, - .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, + .bdrv_detach_aio_context = quorum_detach_aio_context, + .bdrv_attach_aio_context = quorum_attach_aio_context, + + .is_filter = true, + .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; static void bdrv_quorum_init(void) diff --git a/block/raw-aio.h b/block/raw-aio.h index 7ad0a8a0a7..8cf084eeb5 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -34,19 +34,27 @@ /* linux-aio.c - Linux native implementation */ #ifdef CONFIG_LINUX_AIO void *laio_init(void); +void laio_cleanup(void *s); BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type); +void laio_detach_aio_context(void *s, AioContext *old_context); +void laio_attach_aio_context(void *s, AioContext *new_context); #endif #ifdef _WIN32 typedef struct QEMUWin32AIOState QEMUWin32AIOState; QEMUWin32AIOState *win32_aio_init(void); +void win32_aio_cleanup(QEMUWin32AIOState *aio); int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile); BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, QEMUWin32AIOState *aio, HANDLE hfile, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type); +void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, + AioContext *old_context); +void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, + AioContext *new_context); #endif #endif /* QEMU_RAW_AIO_H */ diff --git a/block/raw-posix.c b/block/raw-posix.c index b7f0f2624b..c2b30be3d3 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -307,6 +307,29 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags) } } +static void raw_detach_aio_context(BlockDriverState *bs) +{ +#ifdef CONFIG_LINUX_AIO + BDRVRawState *s = bs->opaque; + + if (s->use_aio) { + laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs)); + } +#endif +} + +static void raw_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ +#ifdef CONFIG_LINUX_AIO + BDRVRawState *s = bs->opaque; + + if (s->use_aio) { + laio_attach_aio_context(s->aio_ctx, new_context); + } +#endif +} + #ifdef CONFIG_LINUX_AIO static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags) { @@ -447,6 +470,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif + raw_attach_aio_context(bs, bdrv_get_aio_context(bs)); + ret = 0; fail: if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { @@ -1059,6 +1084,14 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, static void raw_close(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; + + raw_detach_aio_context(bs); + +#ifdef CONFIG_LINUX_AIO + if (s->use_aio) { + laio_cleanup(s->aio_ctx); + } +#endif if (s->fd >= 0) { qemu_close(s->fd); s->fd = -1; @@ -1478,6 +1511,9 @@ static BlockDriver bdrv_file = { .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + .create_options = raw_create_options, }; @@ -1878,6 +1914,9 @@ static BlockDriver bdrv_host_device = { .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + /* generic scsi device */ #ifdef __linux__ .bdrv_ioctl = hdev_ioctl, @@ -2020,6 +2059,9 @@ static BlockDriver bdrv_host_floppy = { .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + /* removable device support */ .bdrv_is_inserted = floppy_is_inserted, .bdrv_media_changed = floppy_media_changed, @@ -2145,6 +2187,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + /* removable device support */ .bdrv_is_inserted = cdrom_is_inserted, .bdrv_eject = cdrom_eject, @@ -2276,6 +2321,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + /* removable device support */ .bdrv_is_inserted = cdrom_is_inserted, .bdrv_eject = cdrom_eject, @@ -2283,40 +2331,6 @@ static BlockDriver bdrv_host_cdrom = { }; #endif /* __FreeBSD__ */ -#ifdef CONFIG_LINUX_AIO -/** - * Return the file descriptor for Linux AIO - * - * This function is a layering violation and should be removed when it becomes - * possible to call the block layer outside the global mutex. It allows the - * caller to hijack the file descriptor so I/O can be performed outside the - * block layer. - */ -int raw_get_aio_fd(BlockDriverState *bs) -{ - BDRVRawState *s; - - if (!bs->drv) { - return -ENOMEDIUM; - } - - if (bs->drv == bdrv_find_format("raw")) { - bs = bs->file; - } - - /* raw-posix has several protocols so just check for raw_aio_readv */ - if (bs->drv->bdrv_aio_readv != raw_aio_readv) { - return -ENOTSUP; - } - - s = bs->opaque; - if (!s->use_aio) { - return -ENOTSUP; - } - return s->fd; -} -#endif /* CONFIG_LINUX_AIO */ - static void bdrv_file_init(void) { /* diff --git a/block/raw-win32.c b/block/raw-win32.c index 064ea3123c..324e8187f5 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -36,8 +36,6 @@ #define FTYPE_CD 1 #define FTYPE_HARDDISK 2 -static QEMUWin32AIOState *aio; - typedef struct RawWin32AIOData { BlockDriverState *bs; HANDLE hfile; @@ -202,6 +200,25 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static void raw_detach_aio_context(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if (s->aio) { + win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs)); + } +} + +static void raw_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVRawState *s = bs->opaque; + + if (s->aio) { + win32_aio_attach_aio_context(s->aio, new_context); + } +} + static void raw_probe_alignment(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; @@ -300,15 +317,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, raw_parse_flags(flags, &access_flags, &overlapped); - if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) { - aio = win32_aio_init(); - if (aio == NULL) { - error_setg(errp, "Could not initialize AIO"); - ret = -EINVAL; - goto fail; - } - } - if (filename[0] && filename[1] == ':') { snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]); } else if (filename[0] == '\\' && filename[1] == '\\') { @@ -335,13 +343,23 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } if (flags & BDRV_O_NATIVE_AIO) { - ret = win32_aio_attach(aio, s->hfile); + s->aio = win32_aio_init(); + if (s->aio == NULL) { + CloseHandle(s->hfile); + error_setg(errp, "Could not initialize AIO"); + ret = -EINVAL; + goto fail; + } + + ret = win32_aio_attach(s->aio, s->hfile); if (ret < 0) { + win32_aio_cleanup(s->aio); CloseHandle(s->hfile); error_setg_errno(errp, -ret, "Could not enable AIO"); goto fail; } - s->aio = aio; + + win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs)); } raw_probe_alignment(bs); @@ -389,6 +407,13 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, static void raw_close(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; + + if (s->aio) { + win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs)); + win32_aio_cleanup(s->aio); + s->aio = NULL; + } + CloseHandle(s->hfile); if (bs->open_flags & BDRV_O_TEMPORARY) { unlink(bs->filename); @@ -684,6 +709,9 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + .bdrv_getlength = raw_getlength, .has_variable_length = true, diff --git a/block/rbd.c b/block/rbd.c index 09af48426e..93639f783c 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -555,7 +555,7 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) acb->cancelled = 1; while (acb->status == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(acb->common.bs), true); } qemu_aio_release(acb); @@ -588,7 +588,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) rcb->ret = rbd_aio_get_return_value(c); rbd_aio_release(c); - acb->bh = qemu_bh_new(rbd_finish_bh, rcb); + acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs), + rbd_finish_bh, rcb); qemu_bh_schedule(acb->bh); } @@ -684,13 +685,16 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, } if (r < 0) { - goto failed; + goto failed_completion; } return &acb->common; +failed_completion: + rbd_aio_release(c); failed: g_free(rcb); + qemu_vfree(acb->bounce); qemu_aio_release(acb); return NULL; } diff --git a/block/sheepdog.c b/block/sheepdog.c index 4ecbf5f498..1fa19399f0 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -200,6 +200,8 @@ typedef struct SheepdogInode { uint32_t data_vdi_id[MAX_DATA_OBJS]; } SheepdogInode; +#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id) + /* * 64 bit FNV-1a non-zero initial basis */ @@ -282,6 +284,7 @@ typedef struct AIOReq { unsigned int data_len; uint8_t flags; uint32_t id; + bool create; QLIST_ENTRY(AIOReq) aio_siblings; } AIOReq; @@ -314,6 +317,7 @@ struct SheepdogAIOCB { typedef struct BDRVSheepdogState { BlockDriverState *bs; + AioContext *aio_context; SheepdogInode inode; @@ -404,7 +408,7 @@ static const char * sd_strerror(int err) static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, uint64_t oid, unsigned int data_len, - uint64_t offset, uint8_t flags, + uint64_t offset, uint8_t flags, bool create, uint64_t base_oid, unsigned int iov_offset) { AIOReq *aio_req; @@ -418,6 +422,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, aio_req->data_len = data_len; aio_req->flags = flags; aio_req->id = s->aioreq_seq_num++; + aio_req->create = create; acb->nr_pending++; return aio_req; @@ -496,7 +501,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb) sd_finish_aiocb(acb); return; } - qemu_aio_wait(); + aio_poll(s->aio_context, true); } } @@ -578,6 +583,7 @@ static void restart_co_req(void *opaque) typedef struct SheepdogReqCo { int sockfd; + AioContext *aio_context; SheepdogReq *hdr; void *data; unsigned int *wlen; @@ -598,14 +604,14 @@ static coroutine_fn void do_co_req(void *opaque) unsigned int *rlen = srco->rlen; co = qemu_coroutine_self(); - qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co); + aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co); ret = send_co_req(sockfd, hdr, data, wlen); if (ret < 0) { goto out; } - qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co); + aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { @@ -630,18 +636,19 @@ static coroutine_fn void do_co_req(void *opaque) out: /* there is at most one request for this sockfd, so it is safe to * set each handler to NULL. */ - qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL); + aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL); srco->ret = ret; srco->finished = true; } -static int do_req(int sockfd, SheepdogReq *hdr, void *data, - unsigned int *wlen, unsigned int *rlen) +static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr, + void *data, unsigned int *wlen, unsigned int *rlen) { Coroutine *co; SheepdogReqCo srco = { .sockfd = sockfd, + .aio_context = aio_context, .hdr = hdr, .data = data, .wlen = wlen, @@ -656,7 +663,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, co = qemu_coroutine_create(do_co_req); qemu_coroutine_enter(co, &srco); while (!srco.finished) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -664,8 +671,8 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, } static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, bool create, - enum AIOCBState aiocb_type); + struct iovec *iov, int niov, + enum AIOCBState aiocb_type); static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static int get_sheep_fd(BDRVSheepdogState *s, Error **errp); @@ -698,7 +705,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) /* move aio_req from pending list to inflight one */ QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false, + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); } } @@ -709,7 +716,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque) BDRVSheepdogState *s = opaque; AIOReq *aio_req, *next; - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); close(s->fd); s->fd = -1; @@ -797,7 +804,7 @@ static void coroutine_fn aio_read_response(void *opaque) } idx = data_oid_to_idx(aio_req->oid); - if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) { + if (aio_req->create) { /* * If the object is newly created one, we need to update * the vdi object (metadata object). min_dirty_data_idx @@ -922,7 +929,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp) return fd; } - qemu_aio_set_fd_handler(fd, co_read_response, NULL, s); + aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s); return fd; } @@ -1092,7 +1099,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename, hdr.snapid = snapid; hdr.flags = SD_FLAG_CMD_WRITE; - ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); if (ret) { error_setg_errno(errp, -ret, "cannot get vdi info"); goto out; @@ -1117,8 +1124,8 @@ out: } static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, bool create, - enum AIOCBState aiocb_type) + struct iovec *iov, int niov, + enum AIOCBState aiocb_type) { int nr_copies = s->inode.nr_copies; SheepdogObjReq hdr; @@ -1129,6 +1136,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, uint64_t offset = aio_req->offset; uint8_t flags = aio_req->flags; uint64_t old_oid = aio_req->base_oid; + bool create = aio_req->create; if (!nr_copies) { error_report("bug"); @@ -1173,7 +1181,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, qemu_co_mutex_lock(&s->lock); s->co_send = qemu_coroutine_self(); - qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s); + aio_set_fd_handler(s->aio_context, s->fd, + co_read_response, co_write_request, s); socket_set_cork(s->fd, 1); /* send a header */ @@ -1191,12 +1200,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } out: socket_set_cork(s->fd, 0); - qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s); + aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s); s->co_send = NULL; qemu_co_mutex_unlock(&s->lock); } -static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, +static int read_write_object(int fd, AioContext *aio_context, char *buf, + uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool write, bool create, uint32_t cache_flags) { @@ -1229,7 +1239,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, hdr.offset = offset; hdr.copies = copies; - ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); if (ret) { error_report("failed to send a request to the sheep"); return ret; @@ -1244,19 +1254,23 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, } } -static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies, +static int read_object(int fd, AioContext *aio_context, char *buf, + uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, uint32_t cache_flags) { - return read_write_object(fd, buf, oid, copies, datalen, offset, false, + return read_write_object(fd, aio_context, buf, oid, copies, + datalen, offset, false, false, cache_flags); } -static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies, +static int write_object(int fd, AioContext *aio_context, char *buf, + uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool create, uint32_t cache_flags) { - return read_write_object(fd, buf, oid, copies, datalen, offset, true, + return read_write_object(fd, aio_context, buf, oid, copies, + datalen, offset, true, create, cache_flags); } @@ -1275,7 +1289,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) return -EIO; } - inode = g_malloc(sizeof(s->inode)); + inode = g_malloc(SD_INODE_HEADER_SIZE); ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err); if (ret) { @@ -1284,14 +1298,15 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) goto out; } - ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid), - s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags); + ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid), + s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0, + s->cache_flags); if (ret < 0) { goto out; } if (inode->vdi_id != s->inode.vdi_id) { - memcpy(&s->inode, inode, sizeof(s->inode)); + memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE); } out: @@ -1315,6 +1330,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid); aio_req->flags = 0; aio_req->base_oid = 0; + aio_req->create = false; QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); return true; @@ -1327,7 +1343,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; - bool create = false; + + aio_req->create = false; /* check whether this request becomes a CoW one */ if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { @@ -1345,20 +1362,36 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); aio_req->flags |= SD_FLAG_CMD_COW; } - create = true; + aio_req->create = true; } out: if (is_data_obj(aio_req->oid)) { - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); } else { struct iovec iov; iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); - add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); } } +static void sd_detach_aio_context(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + + aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); +} + +static void sd_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVSheepdogState *s = bs->opaque; + + s->aio_context = new_context; + aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s); +} + /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "sheepdog", @@ -1387,6 +1420,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, const char *filename; s->bs = bs; + s->aio_context = bdrv_get_aio_context(bs); opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -1448,8 +1482,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, } buf = g_malloc(SD_INODE_SIZE); - ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, - s->cache_flags); + ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid), + 0, SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1469,7 +1503,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, g_free(buf); return 0; out: - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL); if (s->fd >= 0) { closesocket(s->fd); } @@ -1512,7 +1546,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.copy_policy = s->inode.copy_policy; hdr.copies = s->inode.nr_copies; - ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); closesocket(fd); @@ -1766,7 +1800,8 @@ static void sd_close(BlockDriverState *bs) hdr.data_length = wlen; hdr.flags = SD_FLAG_CMD_WRITE; - ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + s->name, &wlen, &rlen); closesocket(fd); @@ -1775,7 +1810,7 @@ static void sd_close(BlockDriverState *bs) error_report("%s, %s", sd_strerror(rsp->result), s->name); } - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL); closesocket(s->fd); g_free(s->host_spec); } @@ -1812,8 +1847,9 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) /* we don't need to update entire object */ datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; - ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_flags); + ret = write_object(fd, s->aio_context, (char *)&s->inode, + vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, + datalen, 0, false, s->cache_flags); close(fd); if (ret < 0) { @@ -1849,9 +1885,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), - data_len, offset, 0, 0, offset); + data_len, offset, 0, false, 0, offset); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); acb->aio_done_func = sd_finish_aiocb; acb->aiocb_type = AIOCB_WRITE_UDATA; @@ -1882,7 +1918,8 @@ static bool sd_delete(BDRVSheepdogState *s) return false; } - ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + s->name, &wlen, &rlen); closesocket(fd); if (ret) { return false; @@ -1939,8 +1976,8 @@ static int sd_create_branch(BDRVSheepdogState *s) goto out; } - ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_flags); + ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid), + s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -2049,7 +2086,8 @@ static int coroutine_fn sd_co_rw_vector(void *p) DPRINTF("new oid %" PRIx64 "\n", oid); } - aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); + aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create, + old_oid, done); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); if (create) { @@ -2058,7 +2096,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) } } - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); done: offset = 0; @@ -2138,9 +2176,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) acb->aio_done_func = sd_finish_aiocb; aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), - 0, 0, 0, 0, 0); + 0, 0, 0, false, 0, 0); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); + add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type); qemu_coroutine_yield(); return acb->ret; @@ -2187,8 +2225,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_flags); + ret = write_object(fd, s->aio_context, (char *)&s->inode, + vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, + datalen, 0, false, s->cache_flags); if (ret < 0) { error_report("failed to write snapshot's inode."); goto cleanup; @@ -2203,8 +2242,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), - s->inode.nr_copies, datalen, 0, s->cache_flags); + ret = read_object(fd, s->aio_context, (char *)inode, + vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0, + s->cache_flags); if (ret < 0) { error_report("failed to read new inode info. %s", strerror(errno)); @@ -2311,7 +2351,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) req.opcode = SD_OP_READ_VDIS; req.data_length = max; - ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&req, + vdi_inuse, &wlen, &rlen); closesocket(fd); if (ret) { @@ -2338,7 +2379,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) } /* we don't need to read entire object */ - ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), + ret = read_object(fd, s->aio_context, (char *)&inode, + vid_to_vdi_oid(vid), 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, s->cache_flags); @@ -2403,11 +2445,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, create = (offset == 0); if (load) { - ret = read_object(fd, (char *)data, vmstate_oid, + ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, s->cache_flags); } else { - ret = write_object(fd, (char *)data, vmstate_oid, + ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, create, s->cache_flags); } @@ -2580,6 +2622,9 @@ static BlockDriver bdrv_sheepdog = { .bdrv_save_vmstate = sd_save_vmstate, .bdrv_load_vmstate = sd_load_vmstate, + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + .create_options = sd_create_options, }; @@ -2610,6 +2655,9 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_save_vmstate = sd_save_vmstate, .bdrv_load_vmstate = sd_load_vmstate, + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + .create_options = sd_create_options, }; @@ -2640,6 +2688,9 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_save_vmstate = sd_save_vmstate, .bdrv_load_vmstate = sd_load_vmstate, + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + .create_options = sd_create_options, }; diff --git a/block/ssh.c b/block/ssh.c index b2129714bc..9779eac2bd 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -773,7 +773,7 @@ static void restart_coroutine(void *opaque) qemu_coroutine_enter(co, NULL); } -static coroutine_fn void set_fd_handler(BDRVSSHState *s) +static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs) { int r; IOHandler *rd_handler = NULL, *wr_handler = NULL; @@ -791,24 +791,26 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s) DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock, rd_handler, wr_handler); - qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, + rd_handler, wr_handler, co); } -static coroutine_fn void clear_fd_handler(BDRVSSHState *s) +static coroutine_fn void clear_fd_handler(BDRVSSHState *s, + BlockDriverState *bs) { DPRINTF("s->sock=%d", s->sock); - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL); } /* A non-blocking call returned EAGAIN, so yield, ensuring the * handlers are set up so that we'll be rescheduled when there is an * interesting event on the socket. */ -static coroutine_fn void co_yield(BDRVSSHState *s) +static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) { - set_fd_handler(s); + set_fd_handler(s, bs); qemu_coroutine_yield(); - clear_fd_handler(s); + clear_fd_handler(s, bs); } /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position @@ -838,7 +840,7 @@ static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags) } } -static coroutine_fn int ssh_read(BDRVSSHState *s, +static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs, int64_t offset, size_t size, QEMUIOVector *qiov) { @@ -871,7 +873,7 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, DPRINTF("sftp_read returned %zd", r); if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) { - co_yield(s); + co_yield(s, bs); goto again; } if (r < 0) { @@ -906,14 +908,14 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs, int ret; qemu_co_mutex_lock(&s->lock); - ret = ssh_read(s, sector_num * BDRV_SECTOR_SIZE, + ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, qiov); qemu_co_mutex_unlock(&s->lock); return ret; } -static int ssh_write(BDRVSSHState *s, +static int ssh_write(BDRVSSHState *s, BlockDriverState *bs, int64_t offset, size_t size, QEMUIOVector *qiov) { @@ -941,7 +943,7 @@ static int ssh_write(BDRVSSHState *s, DPRINTF("sftp_write returned %zd", r); if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) { - co_yield(s); + co_yield(s, bs); goto again; } if (r < 0) { @@ -960,7 +962,7 @@ static int ssh_write(BDRVSSHState *s, */ if (r == 0) { ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE); - co_yield(s); + co_yield(s, bs); goto again; } @@ -988,7 +990,7 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs, int ret; qemu_co_mutex_lock(&s->lock); - ret = ssh_write(s, sector_num * BDRV_SECTOR_SIZE, + ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, qiov); qemu_co_mutex_unlock(&s->lock); @@ -1009,7 +1011,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what) #ifdef HAS_LIBSSH2_SFTP_FSYNC -static coroutine_fn int ssh_flush(BDRVSSHState *s) +static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs) { int r; @@ -1017,7 +1019,7 @@ static coroutine_fn int ssh_flush(BDRVSSHState *s) again: r = libssh2_sftp_fsync(s->sftp_handle); if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) { - co_yield(s); + co_yield(s, bs); goto again; } if (r == LIBSSH2_ERROR_SFTP_PROTOCOL && @@ -1039,7 +1041,7 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs) int ret; qemu_co_mutex_lock(&s->lock); - ret = ssh_flush(s); + ret = ssh_flush(s, bs); qemu_co_mutex_unlock(&s->lock); return ret; diff --git a/block/vmdk.c b/block/vmdk.c index 2b38f61fcd..b8a476278a 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2096,6 +2096,27 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return 0; } +static void vmdk_detach_aio_context(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_extents; i++) { + bdrv_detach_aio_context(s->extents[i].file); + } +} + +static void vmdk_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVVmdkState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_extents; i++) { + bdrv_attach_aio_context(s->extents[i].file, new_context); + } +} + static QEMUOptionParameter vmdk_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -2153,6 +2174,8 @@ static BlockDriver bdrv_vmdk = { .bdrv_get_specific_info = vmdk_get_specific_info, .bdrv_refresh_limits = vmdk_refresh_limits, .bdrv_get_info = vmdk_get_info, + .bdrv_detach_aio_context = vmdk_detach_aio_context, + .bdrv_attach_aio_context = vmdk_attach_aio_context, .create_options = vmdk_create_options, }; diff --git a/block/win32-aio.c b/block/win32-aio.c index 5d1d199b61..8e417f70ae 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -40,6 +40,7 @@ struct QEMUWin32AIOState { HANDLE hIOCP; EventNotifier e; int count; + bool is_aio_context_attached; }; typedef struct QEMUWin32AIOCB { @@ -114,7 +115,7 @@ static void win32_aio_cancel(BlockDriverAIOCB *blockacb) * wait for completion. */ while (!HasOverlappedIoCompleted(&waiocb->ov)) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(blockacb->bs), true); } } @@ -180,6 +181,20 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) } } +void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, + AioContext *old_context) +{ + aio_set_event_notifier(old_context, &aio->e, NULL); + aio->is_aio_context_attached = false; +} + +void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, + AioContext *new_context) +{ + aio->is_aio_context_attached = true; + aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb); +} + QEMUWin32AIOState *win32_aio_init(void) { QEMUWin32AIOState *s; @@ -194,8 +209,6 @@ QEMUWin32AIOState *win32_aio_init(void) goto out_close_efd; } - qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb); - return s; out_close_efd: @@ -204,3 +217,11 @@ out_free_state: g_free(s); return NULL; } + +void win32_aio_cleanup(QEMUWin32AIOState *aio) +{ + assert(!aio->is_aio_context_attached); + CloseHandle(aio->hIOCP); + event_notifier_cleanup(&aio->e); + g_free(aio); +} diff --git a/blockdev.c b/blockdev.c index 9b5261b765..4cbcc56b5e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1703,6 +1703,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, { ThrottleConfig cfg; BlockDriverState *bs; + AioContext *aio_context; bs = bdrv_find(device); if (!bs) { @@ -1746,6 +1747,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + if (!bs->io_limits_enabled && throttle_enabled(&cfg)) { bdrv_io_limits_enable(bs); } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) { @@ -1755,6 +1759,8 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, if (bs->io_limits_enabled) { bdrv_set_io_limits(bs, &cfg); } + + aio_context_release(aio_context); } int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) diff --git a/hw/block/dataplane/Makefile.objs b/hw/block/dataplane/Makefile.objs index 9da2eb82ba..e786f66421 100644 --- a/hw/block/dataplane/Makefile.objs +++ b/hw/block/dataplane/Makefile.objs @@ -1 +1 @@ -obj-y += ioq.o virtio-blk.o +obj-y += virtio-blk.o diff --git a/hw/block/dataplane/ioq.c b/hw/block/dataplane/ioq.c deleted file mode 100644 index f709f87ed6..0000000000 --- a/hw/block/dataplane/ioq.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Linux AIO request queue - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "ioq.h" - -void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs) -{ - int rc; - - ioq->fd = fd; - ioq->max_reqs = max_reqs; - - memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx); - rc = io_setup(max_reqs, &ioq->io_ctx); - if (rc != 0) { - fprintf(stderr, "ioq io_setup failed %d\n", rc); - exit(1); - } - - rc = event_notifier_init(&ioq->io_notifier, 0); - if (rc != 0) { - fprintf(stderr, "ioq io event notifier creation failed %d\n", rc); - exit(1); - } - - ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs); - ioq->freelist_idx = 0; - - ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs); - ioq->queue_idx = 0; -} - -void ioq_cleanup(IOQueue *ioq) -{ - g_free(ioq->freelist); - g_free(ioq->queue); - - event_notifier_cleanup(&ioq->io_notifier); - io_destroy(ioq->io_ctx); -} - -EventNotifier *ioq_get_notifier(IOQueue *ioq) -{ - return &ioq->io_notifier; -} - -struct iocb *ioq_get_iocb(IOQueue *ioq) -{ - /* Underflow cannot happen since ioq is sized for max_reqs */ - assert(ioq->freelist_idx != 0); - - struct iocb *iocb = ioq->freelist[--ioq->freelist_idx]; - ioq->queue[ioq->queue_idx++] = iocb; - return iocb; -} - -void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb) -{ - /* Overflow cannot happen since ioq is sized for max_reqs */ - assert(ioq->freelist_idx != ioq->max_reqs); - - ioq->freelist[ioq->freelist_idx++] = iocb; -} - -struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov, - unsigned int count, long long offset) -{ - struct iocb *iocb = ioq_get_iocb(ioq); - - if (read) { - io_prep_preadv(iocb, ioq->fd, iov, count, offset); - } else { - io_prep_pwritev(iocb, ioq->fd, iov, count, offset); - } - io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier)); - return iocb; -} - -int ioq_submit(IOQueue *ioq) -{ - int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue); - ioq->queue_idx = 0; /* reset */ - return rc; -} - -int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion, - void *opaque) -{ - struct io_event events[ioq->max_reqs]; - int nevents, i; - - do { - nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL); - } while (nevents < 0 && errno == EINTR); - if (nevents < 0) { - return nevents; - } - - for (i = 0; i < nevents; i++) { - ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res; - - completion(events[i].obj, ret, opaque); - ioq_put_iocb(ioq, events[i].obj); - } - return nevents; -} diff --git a/hw/block/dataplane/ioq.h b/hw/block/dataplane/ioq.h deleted file mode 100644 index b49b5de7f4..0000000000 --- a/hw/block/dataplane/ioq.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Linux AIO request queue - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef IOQ_H -#define IOQ_H - -#include -#include "qemu/event_notifier.h" - -typedef struct { - int fd; /* file descriptor */ - unsigned int max_reqs; /* max length of freelist and queue */ - - io_context_t io_ctx; /* Linux AIO context */ - EventNotifier io_notifier; /* Linux AIO eventfd */ - - /* Requests can complete in any order so a free list is necessary to manage - * available iocbs. - */ - struct iocb **freelist; /* free iocbs */ - unsigned int freelist_idx; - - /* Multiple requests are queued up before submitting them all in one go */ - struct iocb **queue; /* queued iocbs */ - unsigned int queue_idx; -} IOQueue; - -void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs); -void ioq_cleanup(IOQueue *ioq); -EventNotifier *ioq_get_notifier(IOQueue *ioq); -struct iocb *ioq_get_iocb(IOQueue *ioq); -void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb); -struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov, - unsigned int count, long long offset); -int ioq_submit(IOQueue *ioq); - -static inline unsigned int ioq_num_queued(IOQueue *ioq) -{ - return ioq->queue_idx; -} - -typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque); -int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion, - void *opaque); - -#endif /* IOQ_H */ diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index e49c2536b1..c10b7b70fb 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -17,7 +17,6 @@ #include "qemu/thread.h" #include "qemu/error-report.h" #include "hw/virtio/dataplane/vring.h" -#include "ioq.h" #include "block/block.h" #include "hw/virtio/virtio-blk.h" #include "virtio-blk.h" @@ -25,20 +24,14 @@ #include "hw/virtio/virtio-bus.h" #include "qom/object_interfaces.h" -enum { - SEG_MAX = 126, /* maximum number of I/O segments */ - VRING_MAX = SEG_MAX + 2, /* maximum number of vring descriptors */ - REQ_MAX = VRING_MAX, /* maximum number of requests in the vring, - * is VRING_MAX / 2 with traditional and - * VRING_MAX with indirect descriptors */ -}; - typedef struct { - struct iocb iocb; /* Linux AIO control block */ + VirtIOBlockDataPlane *s; QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */ VirtQueueElement *elem; /* saved data from the virtqueue */ - struct iovec *bounce_iov; /* used if guest buffers are unaligned */ - QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */ + QEMUIOVector qiov; /* original request iovecs */ + struct iovec bounce_iov; /* used if guest buffers are unaligned */ + QEMUIOVector bounce_qiov; /* bounce buffer iovecs */ + bool read; /* read or write? */ } VirtIOBlockRequest; struct VirtIOBlockDataPlane { @@ -47,7 +40,6 @@ struct VirtIOBlockDataPlane { bool stopping; VirtIOBlkConf *blk; - int fd; /* image file descriptor */ VirtIODevice *vdev; Vring vring; /* virtqueue vring */ @@ -61,16 +53,8 @@ struct VirtIOBlockDataPlane { IOThread *iothread; IOThread internal_iothread_obj; AioContext *ctx; - EventNotifier io_notifier; /* Linux AIO completion */ EventNotifier host_notifier; /* doorbell */ - IOQueue ioqueue; /* Linux AIO queue (should really be per - IOThread) */ - VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the - queue */ - - unsigned int num_reqs; - /* Operation blocker on BDS */ Error *blocker; }; @@ -85,33 +69,28 @@ static void notify_guest(VirtIOBlockDataPlane *s) event_notifier_set(s->guest_notifier); } -static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque) +static void complete_rdwr(void *opaque, int ret) { - VirtIOBlockDataPlane *s = opaque; - VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb); + VirtIOBlockRequest *req = opaque; struct virtio_blk_inhdr hdr; int len; - if (likely(ret >= 0)) { + if (likely(ret == 0)) { hdr.status = VIRTIO_BLK_S_OK; - len = ret; + len = req->qiov.size; } else { hdr.status = VIRTIO_BLK_S_IOERR; len = 0; } - trace_virtio_blk_data_plane_complete_request(s, req->elem->index, ret); + trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret); - if (req->read_qiov) { - assert(req->bounce_iov); - qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len); - qemu_iovec_destroy(req->read_qiov); - g_slice_free(QEMUIOVector, req->read_qiov); + if (req->read && req->bounce_iov.iov_base) { + qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len); } - if (req->bounce_iov) { - qemu_vfree(req->bounce_iov->iov_base); - g_slice_free(struct iovec, req->bounce_iov); + if (req->bounce_iov.iov_base) { + qemu_vfree(req->bounce_iov.iov_base); } qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr)); @@ -122,9 +101,9 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque) * written to, but for virtio-blk it seems to be the number of bytes * transferred plus the status bytes. */ - vring_push(&s->vring, req->elem, len + sizeof(hdr)); - req->elem = NULL; - s->num_reqs--; + vring_push(&req->s->vring, req->elem, len + sizeof(hdr)); + notify_guest(req->s); + g_slice_free(VirtIOBlockRequest, req); } static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem, @@ -155,51 +134,87 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s, complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK); } -static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read, - struct iovec *iov, unsigned iov_cnt, - long long offset, VirtQueueElement *elem, - QEMUIOVector *inhdr) +static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read, + struct iovec *iov, unsigned iov_cnt, + int64_t sector_num, VirtQueueElement *elem, + QEMUIOVector *inhdr) { - struct iocb *iocb; - QEMUIOVector qiov; - struct iovec *bounce_iov = NULL; - QEMUIOVector *read_qiov = NULL; + VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest); + QEMUIOVector *qiov; + int nb_sectors; - qemu_iovec_init_external(&qiov, iov, iov_cnt); - if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) { - void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size); + /* Fill in virtio block metadata needed for completion */ + req->s = s; + req->elem = elem; + req->inhdr = inhdr; + req->read = read; + qemu_iovec_init_external(&req->qiov, iov, iov_cnt); - if (read) { - /* Need to copy back from bounce buffer on completion */ - read_qiov = g_slice_new(QEMUIOVector); - qemu_iovec_init(read_qiov, iov_cnt); - qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size); - } else { - qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size); + qiov = &req->qiov; + + if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) { + void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size); + + /* Populate bounce buffer with data for writes */ + if (!read) { + qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size); } /* Redirect I/O to aligned bounce buffer */ - bounce_iov = g_slice_new(struct iovec); - bounce_iov->iov_base = bounce_buffer; - bounce_iov->iov_len = qiov.size; - iov = bounce_iov; - iov_cnt = 1; + req->bounce_iov.iov_base = bounce_buffer; + req->bounce_iov.iov_len = qiov->size; + qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1); + qiov = &req->bounce_qiov; } - iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset); + nb_sectors = qiov->size / BDRV_SECTOR_SIZE; - /* Fill in virtio block metadata needed for completion */ - VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb); - req->elem = elem; - req->inhdr = inhdr; - req->bounce_iov = bounce_iov; - req->read_qiov = read_qiov; - return 0; + if (read) { + bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors, + complete_rdwr, req); + } else { + bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors, + complete_rdwr, req); + } } -static int process_request(IOQueue *ioq, VirtQueueElement *elem) +static void complete_flush(void *opaque, int ret) +{ + VirtIOBlockRequest *req = opaque; + unsigned char status; + + if (ret == 0) { + status = VIRTIO_BLK_S_OK; + } else { + status = VIRTIO_BLK_S_IOERR; + } + + complete_request_early(req->s, req->elem, req->inhdr, status); + g_slice_free(VirtIOBlockRequest, req); +} + +static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem, + QEMUIOVector *inhdr) +{ + VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest); + req->s = s; + req->elem = elem; + req->inhdr = inhdr; + + bdrv_aio_flush(s->blk->conf.bs, complete_flush, req); +} + +static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem, + QEMUIOVector *inhdr) +{ + int status; + + status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem); + complete_request_early(s, elem, inhdr, status); +} + +static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem) { - VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue); struct iovec *iov = elem->out_sg; struct iovec *in_iov = elem->in_sg; unsigned out_num = elem->out_num; @@ -234,25 +249,23 @@ static int process_request(IOQueue *ioq, VirtQueueElement *elem) switch (outhdr.type) { case VIRTIO_BLK_T_IN: - do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, elem, inhdr); + do_rdwr_cmd(s, true, in_iov, in_num, + outhdr.sector * 512 / BDRV_SECTOR_SIZE, + elem, inhdr); return 0; case VIRTIO_BLK_T_OUT: - do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, elem, inhdr); + do_rdwr_cmd(s, false, iov, out_num, + outhdr.sector * 512 / BDRV_SECTOR_SIZE, + elem, inhdr); return 0; case VIRTIO_BLK_T_SCSI_CMD: - /* TODO support SCSI commands */ - complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP); + do_scsi_cmd(s, elem, inhdr); return 0; case VIRTIO_BLK_T_FLUSH: - /* TODO fdsync not supported by Linux AIO, do it synchronously here! */ - if (qemu_fdatasync(s->fd) < 0) { - complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_IOERR); - } else { - complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK); - } + do_flush_cmd(s, elem, inhdr); return 0; case VIRTIO_BLK_T_GET_ID: @@ -274,7 +287,6 @@ static void handle_notify(EventNotifier *e) VirtQueueElement *elem; int ret; - unsigned int num_queued; event_notifier_test_and_clear(&s->host_notifier); for (;;) { @@ -291,7 +303,7 @@ static void handle_notify(EventNotifier *e) trace_virtio_blk_data_plane_process_request(s, elem->out_num, elem->in_num, elem->index); - if (process_request(&s->ioqueue, elem) < 0) { + if (process_request(s, elem) < 0) { vring_set_broken(&s->vring); vring_free_element(elem); ret = -EFAULT; @@ -306,44 +318,10 @@ static void handle_notify(EventNotifier *e) if (vring_enable_notification(s->vdev, &s->vring)) { break; } - } else { /* ret == -ENOBUFS or fatal error, iovecs[] is depleted */ - /* Since there are no iovecs[] left, stop processing for now. Do - * not re-enable guest->host notifies since the I/O completion - * handler knows to check for more vring descriptors anyway. - */ + } else { /* fatal error */ break; } } - - num_queued = ioq_num_queued(&s->ioqueue); - if (num_queued > 0) { - s->num_reqs += num_queued; - - int rc = ioq_submit(&s->ioqueue); - if (unlikely(rc < 0)) { - fprintf(stderr, "ioq_submit failed %d\n", rc); - exit(1); - } - } -} - -static void handle_io(EventNotifier *e) -{ - VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, - io_notifier); - - event_notifier_test_and_clear(&s->io_notifier); - if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) { - notify_guest(s); - } - - /* If there were more requests than iovecs, the vring will not be empty yet - * so check again. There should now be enough resources to process more - * requests. - */ - if (unlikely(vring_more_avail(&s->vring))) { - handle_notify(&s->host_notifier); - } } /* Context: QEMU global mutex held */ @@ -352,7 +330,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, Error **errp) { VirtIOBlockDataPlane *s; - int fd; Error *local_err = NULL; *dataplane = NULL; @@ -361,18 +338,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, return; } - if (blk->scsi) { - error_setg(errp, - "device is incompatible with x-data-plane, use scsi=off"); - return; - } - - if (blk->config_wce) { - error_setg(errp, "device is incompatible with x-data-plane, " - "use config-wce=off"); - return; - } - /* If dataplane is (re-)enabled while the guest is running there could be * block jobs that can conflict. */ @@ -383,16 +348,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, return; } - fd = raw_get_aio_fd(blk->conf.bs); - if (fd < 0) { - error_setg(errp, "drive is incompatible with x-data-plane, " - "use format=raw,cache=none,aio=native"); - return; - } - s = g_new0(VirtIOBlockDataPlane, 1); s->vdev = vdev; - s->fd = fd; s->blk = blk; if (blk->iothread) { @@ -437,7 +394,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtQueue *vq; - int i; if (s->started) { return; @@ -470,24 +426,18 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) } s->host_notifier = *virtio_queue_get_host_notifier(vq); - /* Set up ioqueue */ - ioq_init(&s->ioqueue, s->fd, REQ_MAX); - for (i = 0; i < ARRAY_SIZE(s->requests); i++) { - ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb); - } - s->io_notifier = *ioq_get_notifier(&s->ioqueue); - s->starting = false; s->started = true; trace_virtio_blk_data_plane_start(s); + bdrv_set_aio_context(s->blk->conf.bs, s->ctx); + /* Kick right away to begin processing requests already in vring */ event_notifier_set(virtio_queue_get_host_notifier(vq)); /* Get this show started by hooking up our callbacks */ aio_context_acquire(s->ctx); aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); - aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io); aio_context_release(s->ctx); } @@ -507,13 +457,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) /* Stop notifications for new requests from guest */ aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); - /* Complete pending requests */ - while (s->num_reqs > 0) { - aio_poll(s->ctx, true); - } - - /* Stop ioq callbacks (there are no pending requests left) */ - aio_set_event_notifier(s->ctx, &s->io_notifier, NULL); + /* Drain and switch bs back to the QEMU main loop */ + bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context()); aio_context_release(s->ctx); @@ -522,7 +467,6 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) */ vring_teardown(&s->vring, s->vdev, 0); - ioq_cleanup(&s->ioqueue); k->set_host_notifier(qbus->parent, 0, false); /* Clean up guest notifier (irq) */ diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b1fc1de0dc..85aa8715ba 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -33,7 +33,6 @@ typedef struct VirtIOBlockReq VirtQueueElement elem; struct virtio_blk_inhdr *in; struct virtio_blk_outhdr *out; - struct virtio_scsi_inhdr *scsi; QEMUIOVector qiov; struct VirtIOBlockReq *next; BlockAcctCookie acct; @@ -125,13 +124,15 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) return req; } -static void virtio_blk_handle_scsi(VirtIOBlockReq *req) +int virtio_blk_handle_scsi_req(VirtIOBlock *blk, + VirtQueueElement *elem) { -#ifdef __linux__ - int ret; - int i; -#endif int status = VIRTIO_BLK_S_OK; + struct virtio_scsi_inhdr *scsi = NULL; +#ifdef __linux__ + int i; + struct sg_io_hdr hdr; +#endif /* * We require at least one output segment each for the virtio_blk_outhdr @@ -140,19 +141,18 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr * and the sense buffer pointer in the input segments. */ - if (req->elem.out_num < 2 || req->elem.in_num < 3) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - g_free(req); - return; + if (elem->out_num < 2 || elem->in_num < 3) { + status = VIRTIO_BLK_S_IOERR; + goto fail; } /* * The scsi inhdr is placed in the second-to-last input segment, just * before the regular inhdr. */ - req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; + scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; - if (!req->dev->blk.scsi) { + if (!blk->blk.scsi) { status = VIRTIO_BLK_S_UNSUPP; goto fail; } @@ -160,43 +160,42 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) /* * No support for bidirection commands yet. */ - if (req->elem.out_num > 2 && req->elem.in_num > 3) { + if (elem->out_num > 2 && elem->in_num > 3) { status = VIRTIO_BLK_S_UNSUPP; goto fail; } #ifdef __linux__ - struct sg_io_hdr hdr; memset(&hdr, 0, sizeof(struct sg_io_hdr)); hdr.interface_id = 'S'; - hdr.cmd_len = req->elem.out_sg[1].iov_len; - hdr.cmdp = req->elem.out_sg[1].iov_base; + hdr.cmd_len = elem->out_sg[1].iov_len; + hdr.cmdp = elem->out_sg[1].iov_base; hdr.dxfer_len = 0; - if (req->elem.out_num > 2) { + if (elem->out_num > 2) { /* * If there are more than the minimally required 2 output segments * there is write payload starting from the third iovec. */ hdr.dxfer_direction = SG_DXFER_TO_DEV; - hdr.iovec_count = req->elem.out_num - 2; + hdr.iovec_count = elem->out_num - 2; for (i = 0; i < hdr.iovec_count; i++) - hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len; + hdr.dxfer_len += elem->out_sg[i + 2].iov_len; - hdr.dxferp = req->elem.out_sg + 2; + hdr.dxferp = elem->out_sg + 2; - } else if (req->elem.in_num > 3) { + } else if (elem->in_num > 3) { /* * If we have more than 3 input segments the guest wants to actually * read data. */ hdr.dxfer_direction = SG_DXFER_FROM_DEV; - hdr.iovec_count = req->elem.in_num - 3; + hdr.iovec_count = elem->in_num - 3; for (i = 0; i < hdr.iovec_count; i++) - hdr.dxfer_len += req->elem.in_sg[i].iov_len; + hdr.dxfer_len += elem->in_sg[i].iov_len; - hdr.dxferp = req->elem.in_sg; + hdr.dxferp = elem->in_sg; } else { /* * Some SCSI commands don't actually transfer any data. @@ -204,11 +203,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) hdr.dxfer_direction = SG_DXFER_NONE; } - hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base; - hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len; + hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; + hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; - ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr); - if (ret) { + status = bdrv_ioctl(blk->bs, SG_IO, &hdr); + if (status) { status = VIRTIO_BLK_S_UNSUPP; goto fail; } @@ -224,23 +223,31 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) hdr.status = CHECK_CONDITION; } - stl_p(&req->scsi->errors, + stl_p(&scsi->errors, hdr.status | (hdr.msg_status << 8) | (hdr.host_status << 16) | (hdr.driver_status << 24)); - stl_p(&req->scsi->residual, hdr.resid); - stl_p(&req->scsi->sense_len, hdr.sb_len_wr); - stl_p(&req->scsi->data_len, hdr.dxfer_len); + stl_p(&scsi->residual, hdr.resid); + stl_p(&scsi->sense_len, hdr.sb_len_wr); + stl_p(&scsi->data_len, hdr.dxfer_len); - virtio_blk_req_complete(req, status); - g_free(req); - return; + return status; #else abort(); #endif fail: /* Just put anything nonzero so that the ioctl fails in the guest. */ - stl_p(&req->scsi->errors, 255); + if (scsi) { + stl_p(&scsi->errors, 255); + } + return status; +} + +static void virtio_blk_handle_scsi(VirtIOBlockReq *req) +{ + int status; + + status = virtio_blk_handle_scsi_req(req->dev, &req->elem); virtio_blk_req_complete(req, status); g_free(req); } @@ -523,7 +530,10 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) struct virtio_blk_config blkcfg; memcpy(&blkcfg, config, sizeof(blkcfg)); + + aio_context_acquire(bdrv_get_aio_context(s->bs)); bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0); + aio_context_release(bdrv_get_aio_context(s->bs)); } static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) @@ -582,7 +592,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) * s->bs would erroneously be placed in writethrough mode. */ if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) { - bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE))); + aio_context_acquire(bdrv_get_aio_context(s->bs)); + bdrv_set_enable_write_cache(s->bs, + !!(features & (1 << VIRTIO_BLK_F_WCE))); + aio_context_release(bdrv_get_aio_context(s->bs)); } } diff --git a/include/block/block.h b/include/block/block.h index faee3aa246..7d86e29cf4 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -481,15 +481,6 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason); void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); bool bdrv_op_blocker_is_empty(BlockDriverState *bs); -#ifdef CONFIG_LINUX_AIO -int raw_get_aio_fd(BlockDriverState *bs); -#else -static inline int raw_get_aio_fd(BlockDriverState *bs) -{ - return -ENOTSUP; -} -#endif - enum BlockAcctType { BDRV_ACCT_READ, BDRV_ACCT_WRITE, @@ -574,4 +565,22 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); int bdrv_debug_resume(BlockDriverState *bs, const char *tag); bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); +/** + * bdrv_get_aio_context: + * + * Returns: the currently bound #AioContext + */ +AioContext *bdrv_get_aio_context(BlockDriverState *bs); + +/** + * bdrv_set_aio_context: + * + * Changes the #AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children. + * + * This function must be called from the old #AioContext or with a lock held so + * the old #AioContext is not executing. + */ +void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); + #endif diff --git a/include/block/block_int.h b/include/block/block_int.h index f2e753f632..8d58334c1d 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -247,6 +247,19 @@ struct BlockDriver { */ int (*bdrv_has_zero_init)(BlockDriverState *bs); + /* Remove fd handlers, timers, and other event loop callbacks so the event + * loop is no longer in use. Called with no in-flight requests and in + * depth-first traversal order with parents before child nodes. + */ + void (*bdrv_detach_aio_context)(BlockDriverState *bs); + + /* Add fd handlers, timers, and other event loop callbacks so I/O requests + * can be processed again. Called with no in-flight requests and in + * depth-first traversal order with child nodes before parent nodes. + */ + void (*bdrv_attach_aio_context)(BlockDriverState *bs, + AioContext *new_context); + QLIST_ENTRY(BlockDriver) list; }; @@ -297,6 +310,8 @@ struct BlockDriverState { const BlockDevOps *dev_ops; void *dev_opaque; + AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ + char filename[1024]; char backing_file[1024]; /* if non zero, the image is a diff of this file image */ @@ -390,11 +405,25 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs, NotifierWithReturn *notifier); /** - * bdrv_get_aio_context: + * bdrv_detach_aio_context: * - * Returns: the currently bound #AioContext + * May be called from .bdrv_detach_aio_context() to detach children from the + * current #AioContext. This is only needed by block drivers that manage their + * own children. Both ->file and ->backing_hd are automatically handled and + * block drivers should not call this function on them explicitly. */ -AioContext *bdrv_get_aio_context(BlockDriverState *bs); +void bdrv_detach_aio_context(BlockDriverState *bs); + +/** + * bdrv_attach_aio_context: + * + * May be called from .bdrv_attach_aio_context() to attach children to the new + * #AioContext. This is only needed by block drivers that manage their own + * children. Both ->file and ->backing_hd are automatically handled and block + * drivers should not call this function on them explicitly. + */ +void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context); #ifdef _WIN32 int is_windows_drive(const char *filename); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index e4c41ff2ef..4bc9b549ad 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -155,4 +155,7 @@ typedef struct VirtIOBlock { void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk); +int virtio_blk_handle_scsi_req(VirtIOBlock *blk, + VirtQueueElement *elem); + #endif diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h index ab29b0b918..b890613a9c 100644 --- a/include/qemu/throttle.h +++ b/include/qemu/throttle.h @@ -67,6 +67,11 @@ typedef struct ThrottleState { int64_t previous_leak; /* timestamp of the last leak done */ QEMUTimer * timers[2]; /* timers used to do the throttling */ QEMUClockType clock_type; /* the clock used */ + + /* Callbacks */ + QEMUTimerCB *read_timer_cb; + QEMUTimerCB *write_timer_cb; + void *timer_opaque; } ThrottleState; /* operations on single leaky buckets */ @@ -82,6 +87,7 @@ bool throttle_compute_timer(ThrottleState *ts, /* init/destroy cycle */ void throttle_init(ThrottleState *ts, + AioContext *aio_context, QEMUClockType clock_type, void (read_timer)(void *), void (write_timer)(void *), @@ -89,6 +95,10 @@ void throttle_init(ThrottleState *ts, void throttle_destroy(ThrottleState *ts); +void throttle_detach_aio_context(ThrottleState *ts); + +void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context); + bool throttle_have_timer(ThrottleState *ts); /* configuration */ diff --git a/qapi-schema.json b/qapi-schema.json index 7bc33ea717..14b498b442 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2,32 +2,11 @@ # # QAPI Schema -## -# @ErrorClass -# -# QEMU error classes -# -# @GenericError: this is used for errors that don't require a specific error -# class. This should be the default case for most errors -# -# @CommandNotFound: the requested command has not been found -# -# @DeviceEncrypted: the requested operation can't be fulfilled because the -# selected device is encrypted -# -# @DeviceNotActive: a device has failed to be become active -# -# @DeviceNotFound: the requested device has not been found -# -# @KVMMissingCap: the requested operation can't be fulfilled because a -# required KVM capability is missing -# -# Since: 1.2 -## -{ 'enum': 'ErrorClass', - 'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted', - 'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] } +# QAPI common definitions +{ 'include': 'qapi/common.json' } +# QAPI block definitions +{ 'include': 'qapi/block.json' } ## # LostTickPolicy: @@ -53,40 +32,6 @@ { 'enum': 'LostTickPolicy', 'data': ['discard', 'delay', 'merge', 'slew' ] } -## -# BiosAtaTranslation: -# -# Policy that BIOS should use to interpret cylinder/head/sector -# addresses. Note that Bochs BIOS and SeaBIOS will not actually -# translate logical CHS to physical; instead, they will use logical -# block addressing. -# -# @auto: If cylinder/heads/sizes are passed, choose between none and LBA -# depending on the size of the disk. If they are not passed, -# choose none if QEMU can guess that the disk had 16 or fewer -# heads, large if QEMU can guess that the disk had 131072 or -# fewer tracks across all heads (i.e. cylinders*heads<131072), -# otherwise LBA. -# -# @none: The physical disk geometry is equal to the logical geometry. -# -# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255 -# heads (if fewer than 255 are enough to cover the whole disk -# with 1024 cylinders/head). The number of cylinders/head is -# then computed based on the number of sectors and heads. -# -# @large: The number of cylinders per head is scaled down to 1024 -# by correspondingly scaling up the number of heads. -# -# @rechs: Same as @large, but first convert a 16-head geometry to -# 15-head, by proportionally scaling up the number of -# cylinders/head. -# -# Since: 2.0 -## -{ 'enum': 'BiosAtaTranslation', - 'data': ['auto', 'none', 'lba', 'large', 'rechs']} - # @add_client # # Allow client connections for VNC, Spice and socket based @@ -133,43 +78,6 @@ ## { 'command': 'query-name', 'returns': 'NameInfo' } -## -# @VersionInfo: -# -# A description of QEMU's version. -# -# @qemu.major: The major version of QEMU -# -# @qemu.minor: The minor version of QEMU -# -# @qemu.micro: The micro version of QEMU. By current convention, a micro -# version of 50 signifies a development branch. A micro version -# greater than or equal to 90 signifies a release candidate for -# the next minor version. A micro version of less than 50 -# signifies a stable release. -# -# @package: QEMU will always set this field to an empty string. Downstream -# versions of QEMU should set this to a non-empty string. The -# exact format depends on the downstream however it highly -# recommended that a unique name is used. -# -# Since: 0.14.0 -## -{ 'type': 'VersionInfo', - 'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'}, - 'package': 'str'} } - -## -# @query-version: -# -# Returns the current version of QEMU. -# -# Returns: A @VersionInfo object describing the current version of QEMU. -# -# Since: 0.14.0 -## -{ 'command': 'query-version', 'returns': 'VersionInfo' } - ## # @KvmInfo: # @@ -241,179 +149,6 @@ 'running', 'save-vm', 'shutdown', 'suspended', 'watchdog', 'guest-panicked' ] } -## -# @SnapshotInfo -# -# @id: unique snapshot id -# -# @name: user chosen name -# -# @vm-state-size: size of the VM state -# -# @date-sec: UTC date of the snapshot in seconds -# -# @date-nsec: fractional part in nano seconds to be used with date-sec -# -# @vm-clock-sec: VM clock relative to boot in seconds -# -# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec -# -# Since: 1.3 -# -## - -{ 'type': 'SnapshotInfo', - 'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int', - 'date-sec': 'int', 'date-nsec': 'int', - 'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } } - -## -# @ImageInfoSpecificQCow2: -# -# @compat: compatibility level -# -# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1 -# -# Since: 1.7 -## -{ 'type': 'ImageInfoSpecificQCow2', - 'data': { - 'compat': 'str', - '*lazy-refcounts': 'bool' - } } - -## -# @ImageInfoSpecificVmdk: -# -# @create-type: The create type of VMDK image -# -# @cid: Content id of image -# -# @parent-cid: Parent VMDK image's cid -# -# @extents: List of extent files -# -# Since: 1.7 -## -{ 'type': 'ImageInfoSpecificVmdk', - 'data': { - 'create-type': 'str', - 'cid': 'int', - 'parent-cid': 'int', - 'extents': ['ImageInfo'] - } } - -## -# @ImageInfoSpecific: -# -# A discriminated record of image format specific information structures. -# -# Since: 1.7 -## - -{ 'union': 'ImageInfoSpecific', - 'data': { - 'qcow2': 'ImageInfoSpecificQCow2', - 'vmdk': 'ImageInfoSpecificVmdk' - } } - -## -# @ImageInfo: -# -# Information about a QEMU image file -# -# @filename: name of the image file -# -# @format: format of the image file -# -# @virtual-size: maximum capacity in bytes of the image -# -# @actual-size: #optional actual size on disk in bytes of the image -# -# @dirty-flag: #optional true if image is not cleanly closed -# -# @cluster-size: #optional size of a cluster in bytes -# -# @encrypted: #optional true if the image is encrypted -# -# @compressed: #optional true if the image is compressed (Since 1.7) -# -# @backing-filename: #optional name of the backing file -# -# @full-backing-filename: #optional full path of the backing file -# -# @backing-filename-format: #optional the format of the backing file -# -# @snapshots: #optional list of VM snapshots -# -# @backing-image: #optional info of the backing image (since 1.6) -# -# @format-specific: #optional structure supplying additional format-specific -# information (since 1.7) -# -# Since: 1.3 -# -## - -{ 'type': 'ImageInfo', - 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', - '*actual-size': 'int', 'virtual-size': 'int', - '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', - '*backing-filename': 'str', '*full-backing-filename': 'str', - '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], - '*backing-image': 'ImageInfo', - '*format-specific': 'ImageInfoSpecific' } } - -## -# @ImageCheck: -# -# Information about a QEMU image file check -# -# @filename: name of the image file checked -# -# @format: format of the image file checked -# -# @check-errors: number of unexpected errors occurred during check -# -# @image-end-offset: #optional offset (in bytes) where the image ends, this -# field is present if the driver for the image format -# supports it -# -# @corruptions: #optional number of corruptions found during the check if any -# -# @leaks: #optional number of leaks found during the check if any -# -# @corruptions-fixed: #optional number of corruptions fixed during the check -# if any -# -# @leaks-fixed: #optional number of leaks fixed during the check if any -# -# @total-clusters: #optional total number of clusters, this field is present -# if the driver for the image format supports it -# -# @allocated-clusters: #optional total number of allocated clusters, this -# field is present if the driver for the image format -# supports it -# -# @fragmented-clusters: #optional total number of fragmented clusters, this -# field is present if the driver for the image format -# supports it -# -# @compressed-clusters: #optional total number of compressed clusters, this -# field is present if the driver for the image format -# supports it -# -# Since: 1.4 -# -## - -{ 'type': 'ImageCheck', - 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', - '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int', - '*corruptions-fixed': 'int', '*leaks-fixed': 'int', - '*total-clusters': 'int', '*allocated-clusters': 'int', - '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } } - ## # @StatusInfo: # @@ -583,28 +318,6 @@ 'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'}, 'returns': 'str' } -## -# @CommandInfo: -# -# Information about a QMP command -# -# @name: The command name -# -# Since: 0.14.0 -## -{ 'type': 'CommandInfo', 'data': {'name': 'str'} } - -## -# @query-commands: -# -# Return a list of supported QMP commands by this server -# -# Returns: A list of @CommandInfo for all supported commands -# -# Since: 0.14.0 -## -{ 'command': 'query-commands', 'returns': ['CommandInfo'] } - ## # @EventInfo: # @@ -916,252 +629,6 @@ ## { 'command': 'query-iothreads', 'returns': ['IOThreadInfo'] } -## -# @BlockDeviceInfo: -# -# Information about the backing device for a block device. -# -# @file: the filename of the backing device -# -# @node-name: #optional the name of the block driver node (Since 2.0) -# -# @ro: true if the backing device was open read-only -# -# @drv: the name of the block format used to open the backing device. As of -# 0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg', -# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device', -# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow', -# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat' -# -# @backing_file: #optional the name of the backing file (for copy-on-write) -# -# @backing_file_depth: number of files in the backing file chain (since: 1.2) -# -# @encrypted: true if the backing device is encrypted -# -# @encryption_key_missing: true if the backing device is encrypted but an -# valid encryption key is missing -# -# @detect_zeroes: detect and optimize zero writes (Since 2.1) -# -# @bps: total throughput limit in bytes per second is specified -# -# @bps_rd: read throughput limit in bytes per second is specified -# -# @bps_wr: write throughput limit in bytes per second is specified -# -# @iops: total I/O operations per second is specified -# -# @iops_rd: read I/O operations per second is specified -# -# @iops_wr: write I/O operations per second is specified -# -# @image: the info of image used (since: 1.6) -# -# @bps_max: #optional total max in bytes (Since 1.7) -# -# @bps_rd_max: #optional read max in bytes (Since 1.7) -# -# @bps_wr_max: #optional write max in bytes (Since 1.7) -# -# @iops_max: #optional total I/O operations max (Since 1.7) -# -# @iops_rd_max: #optional read I/O operations max (Since 1.7) -# -# @iops_wr_max: #optional write I/O operations max (Since 1.7) -# -# @iops_size: #optional an I/O size in bytes (Since 1.7) -# -# Since: 0.14.0 -# -## -{ 'type': 'BlockDeviceInfo', - 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', - '*backing_file': 'str', 'backing_file_depth': 'int', - 'encrypted': 'bool', 'encryption_key_missing': 'bool', - 'detect_zeroes': 'BlockdevDetectZeroesOptions', - 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', - 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', - 'image': 'ImageInfo', - '*bps_max': 'int', '*bps_rd_max': 'int', - '*bps_wr_max': 'int', '*iops_max': 'int', - '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int' } } - -## -# @BlockDeviceIoStatus: -# -# An enumeration of block device I/O status. -# -# @ok: The last I/O operation has succeeded -# -# @failed: The last I/O operation has failed -# -# @nospace: The last I/O operation has failed due to a no-space condition -# -# Since: 1.0 -## -{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] } - -## -# @BlockDeviceMapEntry: -# -# Entry in the metadata map of the device (returned by "qemu-img map") -# -# @start: Offset in the image of the first byte described by this entry -# (in bytes) -# -# @length: Length of the range described by this entry (in bytes) -# -# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.) -# before reaching one for which the range is allocated. The value is -# in the range 0 to the depth of the image chain - 1. -# -# @zero: the sectors in this range read as zeros -# -# @data: reading the image will actually read data from a file (in particular, -# if @offset is present this means that the sectors are not simply -# preallocated, but contain actual data in raw format) -# -# @offset: if present, the image file stores the data for this range in -# raw format at the given offset. -# -# Since 1.7 -## -{ 'type': 'BlockDeviceMapEntry', - 'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool', - 'data': 'bool', '*offset': 'int' } } - -## -# @BlockDirtyInfo: -# -# Block dirty bitmap information. -# -# @count: number of dirty bytes according to the dirty bitmap -# -# @granularity: granularity of the dirty bitmap in bytes (since 1.4) -# -# Since: 1.3 -## -{ 'type': 'BlockDirtyInfo', - 'data': {'count': 'int', 'granularity': 'int'} } - -## -# @BlockInfo: -# -# Block device information. This structure describes a virtual device and -# the backing device associated with it. -# -# @device: The device name associated with the virtual device. -# -# @type: This field is returned only for compatibility reasons, it should -# not be used (always returns 'unknown') -# -# @removable: True if the device supports removable media. -# -# @locked: True if the guest has locked this device from having its media -# removed -# -# @tray_open: #optional True if the device has a tray and it is open -# (only present if removable is true) -# -# @dirty-bitmaps: #optional dirty bitmaps information (only present if the -# driver has one or more dirty bitmaps) (Since 2.0) -# -# @io-status: #optional @BlockDeviceIoStatus. Only present if the device -# supports it and the VM is configured to stop on errors -# -# @inserted: #optional @BlockDeviceInfo describing the device if media is -# present -# -# Since: 0.14.0 -## -{ 'type': 'BlockInfo', - 'data': {'device': 'str', 'type': 'str', 'removable': 'bool', - 'locked': 'bool', '*inserted': 'BlockDeviceInfo', - '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus', - '*dirty-bitmaps': ['BlockDirtyInfo'] } } - -## -# @query-block: -# -# Get a list of BlockInfo for all virtual block devices. -# -# Returns: a list of @BlockInfo describing each virtual block device -# -# Since: 0.14.0 -## -{ 'command': 'query-block', 'returns': ['BlockInfo'] } - -## -# @BlockDeviceStats: -# -# Statistics of a virtual block device or a block backing device. -# -# @rd_bytes: The number of bytes read by the device. -# -# @wr_bytes: The number of bytes written by the device. -# -# @rd_operations: The number of read operations performed by the device. -# -# @wr_operations: The number of write operations performed by the device. -# -# @flush_operations: The number of cache flush operations performed by the -# device (since 0.15.0) -# -# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds -# (since 0.15.0). -# -# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0). -# -# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0). -# -# @wr_highest_offset: The offset after the greatest byte written to the -# device. The intended use of this information is for -# growable sparse files (like qcow2) that are used on top -# of a physical device. -# -# Since: 0.14.0 -## -{ 'type': 'BlockDeviceStats', - 'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int', - 'wr_operations': 'int', 'flush_operations': 'int', - 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int', - 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } } - -## -# @BlockStats: -# -# Statistics of a virtual block device or a block backing device. -# -# @device: #optional If the stats are for a virtual block device, the name -# corresponding to the virtual block device. -# -# @stats: A @BlockDeviceStats for the device. -# -# @parent: #optional This describes the file block device if it has one. -# -# @backing: #optional This describes the backing block device if it has one. -# (Since 2.0) -# -# Since: 0.14.0 -## -{ 'type': 'BlockStats', - 'data': {'*device': 'str', 'stats': 'BlockDeviceStats', - '*parent': 'BlockStats', - '*backing': 'BlockStats'} } - -## -# @query-blockstats: -# -# Query the @BlockStats for all virtual block devices. -# -# Returns: A list of @BlockStats for each virtual block devices. -# -# Since: 0.14.0 -## -{ 'command': 'query-blockstats', 'returns': ['BlockStats'] } - ## # @VncClientInfo: # @@ -1500,105 +967,6 @@ ## { 'command': 'query-pci', 'returns': ['PciInfo'] } -## -# @BlockdevOnError: -# -# An enumeration of possible behaviors for errors on I/O operations. -# The exact meaning depends on whether the I/O was initiated by a guest -# or by a block job -# -# @report: for guest operations, report the error to the guest; -# for jobs, cancel the job -# -# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR -# or BLOCK_JOB_ERROR) -# -# @enospc: same as @stop on ENOSPC, same as @report otherwise. -# -# @stop: for guest operations, stop the virtual machine; -# for jobs, pause the job -# -# Since: 1.3 -## -{ 'enum': 'BlockdevOnError', - 'data': ['report', 'ignore', 'enospc', 'stop'] } - -## -# @MirrorSyncMode: -# -# An enumeration of possible behaviors for the initial synchronization -# phase of storage mirroring. -# -# @top: copies data in the topmost image to the destination -# -# @full: copies data from all images to the destination -# -# @none: only copy data written from now on -# -# Since: 1.3 -## -{ 'enum': 'MirrorSyncMode', - 'data': ['top', 'full', 'none'] } - -## -# @BlockJobType: -# -# Type of a block job. -# -# @commit: block commit job type, see "block-commit" -# -# @stream: block stream job type, see "block-stream" -# -# @mirror: drive mirror job type, see "drive-mirror" -# -# @backup: drive backup job type, see "drive-backup" -# -# Since: 1.7 -## -{ 'enum': 'BlockJobType', - 'data': ['commit', 'stream', 'mirror', 'backup'] } - -## -# @BlockJobInfo: -# -# Information about a long-running block device operation. -# -# @type: the job type ('stream' for image streaming) -# -# @device: the block device name -# -# @len: the maximum progress value -# -# @busy: false if the job is known to be in a quiescent state, with -# no pending I/O. Since 1.3. -# -# @paused: whether the job is paused or, if @busy is true, will -# pause itself as soon as possible. Since 1.3. -# -# @offset: the current progress value -# -# @speed: the rate limit, bytes per second -# -# @io-status: the status of the job (since 1.3) -# -# Since: 1.1 -## -{ 'type': 'BlockJobInfo', - 'data': {'type': 'str', 'device': 'str', 'len': 'int', - 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int', - 'io-status': 'BlockDeviceIoStatus'} } - -## -# @query-block-jobs: -# -# Return information about long-running block device operations. -# -# Returns: a list of @BlockJobInfo for each active block job -# -# Since: 1.1 -## -{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] } - ## # @quit: # @@ -1778,43 +1146,6 @@ ## { 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} } -## -# @block_passwd: -# -# This command sets the password of a block device that has not been open -# with a password and requires one. -# -# The two cases where this can happen are a block device is created through -# QEMU's initial command line or a block device is changed through the legacy -# @change interface. -# -# In the event that the block device is created through the initial command -# line, the VM will start in the stopped state regardless of whether '-S' is -# used. The intention is for a management tool to query the block devices to -# determine which ones are encrypted, set the passwords with this command, and -# then start the guest with the @cont command. -# -# Either @device or @node-name must be set but not both. -# -# @device: #optional the name of the block backend device to set the password on -# -# @node-name: #optional graph node name to set the password on (Since 2.0) -# -# @password: the password to use for the device -# -# Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound -# If @device is not encrypted, DeviceNotEncrypted -# -# Notes: Not all block formats support encryption and some that do are not -# able to validate that a password is correct. Disk corruption may -# occur if an invalid password is specified. -# -# Since: 0.14.0 -## -{ 'command': 'block_passwd', 'data': {'*device': 'str', - '*node-name': 'str', 'password': 'str'} } - ## # @balloon: # @@ -1835,126 +1166,6 @@ ## { 'command': 'balloon', 'data': {'value': 'int'} } -## -# @block_resize -# -# Resize a block image while a guest is running. -# -# Either @device or @node-name must be set but not both. -# -# @device: #optional the name of the device to get the image resized -# -# @node-name: #optional graph node name to get the image resized (Since 2.0) -# -# @size: new image size in bytes -# -# Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound -# -# Since: 0.14.0 -## -{ 'command': 'block_resize', 'data': { '*device': 'str', - '*node-name': 'str', - 'size': 'int' }} - -## -# @NewImageMode -# -# An enumeration that tells QEMU how to set the backing file path in -# a new image file. -# -# @existing: QEMU should look for an existing image file. -# -# @absolute-paths: QEMU should create a new image with absolute paths -# for the backing file. If there is no backing file available, the new -# image will not be backed either. -# -# Since: 1.1 -## -{ 'enum': 'NewImageMode', - 'data': [ 'existing', 'absolute-paths' ] } - -## -# @BlockdevSnapshot -# -# Either @device or @node-name must be set but not both. -# -# @device: #optional the name of the device to generate the snapshot from. -# -# @node-name: #optional graph node name to generate the snapshot from (Since 2.0) -# -# @snapshot-file: the target of the new image. A new file will be created. -# -# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0) -# -# @format: #optional the format of the snapshot image, default is 'qcow2'. -# -# @mode: #optional whether and how QEMU should create a new image, default is -# 'absolute-paths'. -## -{ 'type': 'BlockdevSnapshot', - 'data': { '*device': 'str', '*node-name': 'str', - 'snapshot-file': 'str', '*snapshot-node-name': 'str', - '*format': 'str', '*mode': 'NewImageMode' } } - -## -# @BlockdevSnapshotInternal -# -# @device: the name of the device to generate the snapshot from -# -# @name: the name of the internal snapshot to be created -# -# Notes: In transaction, if @name is empty, or any snapshot matching @name -# exists, the operation will fail. Only some image formats support it, -# for example, qcow2, rbd, and sheepdog. -# -# Since: 1.7 -## -{ 'type': 'BlockdevSnapshotInternal', - 'data': { 'device': 'str', 'name': 'str' } } - -## -# @DriveBackup -# -# @device: the name of the device which should be copied. -# -# @target: the target of the new image. If the file exists, or if it -# is a device, the existing file/device will be used as the new -# destination. If it does not exist, a new file will be created. -# -# @format: #optional the format of the new destination, default is to -# probe if @mode is 'existing', else the format of the source -# -# @sync: what parts of the disk image should be copied to the destination -# (all the disk, only the sectors allocated in the topmost image, or -# only new I/O). -# -# @mode: #optional whether and how QEMU should create a new image, default is -# 'absolute-paths'. -# -# @speed: #optional the maximum speed, in bytes per second -# -# @on-source-error: #optional the action to take on an error on the source, -# default 'report'. 'stop' and 'enospc' can only be used -# if the block device supports io-status (see BlockInfo). -# -# @on-target-error: #optional the action to take on an error on the target, -# default 'report' (no limitations, since this applies to -# a different block device than @device). -# -# Note that @on-source-error and @on-target-error only affect background I/O. -# If an error occurs during a guest write request, the device's rerror/werror -# actions will be used. -# -# Since: 1.6 -## -{ 'type': 'DriveBackup', - 'data': { 'device': 'str', 'target': 'str', '*format': 'str', - 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', - '*speed': 'int', - '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } - ## # @Abort # @@ -2001,68 +1212,6 @@ { 'command': 'transaction', 'data': { 'actions': [ 'TransactionAction' ] } } -## -# @blockdev-snapshot-sync -# -# Generates a synchronous snapshot of a block device. -# -# For the arguments, see the documentation of BlockdevSnapshot. -# -# Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound -# -# Since 0.14.0 -## -{ 'command': 'blockdev-snapshot-sync', - 'data': 'BlockdevSnapshot' } - -## -# @blockdev-snapshot-internal-sync -# -# Synchronously take an internal snapshot of a block device, when the format -# of the image used supports it. -# -# For the arguments, see the documentation of BlockdevSnapshotInternal. -# -# Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound -# If any snapshot matching @name exists, or @name is empty, -# GenericError -# If the format of the image used does not support it, -# BlockFormatFeatureNotSupported -# -# Since 1.7 -## -{ 'command': 'blockdev-snapshot-internal-sync', - 'data': 'BlockdevSnapshotInternal' } - -## -# @blockdev-snapshot-delete-internal-sync -# -# Synchronously delete an internal snapshot of a block device, when the format -# of the image used support it. The snapshot is identified by name or id or -# both. One of the name or id is required. Return SnapshotInfo for the -# successfully deleted snapshot. -# -# @device: the name of the device to delete the snapshot from -# -# @id: optional the snapshot's ID to be deleted -# -# @name: optional the snapshot's name to be deleted -# -# Returns: SnapshotInfo on success -# If @device is not a valid block device, DeviceNotFound -# If snapshot not found, GenericError -# If the format of the image used does not support it, -# BlockFormatFeatureNotSupported -# If @id and @name are both not specified, GenericError -# -# Since 1.7 -## -{ 'command': 'blockdev-snapshot-delete-internal-sync', - 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, - 'returns': 'SnapshotInfo' } - ## # @human-monitor-command: # @@ -2091,129 +1240,6 @@ 'data': {'command-line': 'str', '*cpu-index': 'int'}, 'returns': 'str' } -## -# @block-commit -# -# Live commit of data from overlay image nodes into backing nodes - i.e., -# writes data between 'top' and 'base' into 'base'. -# -# @device: the name of the device -# -# @base: #optional The file name of the backing image to write data into. -# If not specified, this is the deepest backing image -# -# @top: The file name of the backing image within the image chain, -# which contains the topmost data to be committed down. -# -# If top == base, that is an error. -# If top == active, the job will not be completed by itself, -# user needs to complete the job with the block-job-complete -# command after getting the ready event. (Since 2.0) -# -# If the base image is smaller than top, then the base image -# will be resized to be the same size as top. If top is -# smaller than the base image, the base will not be -# truncated. If you want the base image size to match the -# size of the smaller top, you can safely truncate it -# yourself once the commit operation successfully completes. -# -# -# @speed: #optional the maximum speed, in bytes per second -# -# Returns: Nothing on success -# If commit or stream is already active on this device, DeviceInUse -# If @device does not exist, DeviceNotFound -# If image commit is not supported by this device, NotSupported -# If @base or @top is invalid, a generic error is returned -# If @speed is invalid, InvalidParameter -# -# Since: 1.3 -# -## -{ 'command': 'block-commit', - 'data': { 'device': 'str', '*base': 'str', 'top': 'str', - '*speed': 'int' } } - -## -# @drive-backup -# -# Start a point-in-time copy of a block device to a new destination. The -# status of ongoing drive-backup operations can be checked with -# query-block-jobs where the BlockJobInfo.type field has the value 'backup'. -# The operation can be stopped before it has completed using the -# block-job-cancel command. -# -# For the arguments, see the documentation of DriveBackup. -# -# Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound -# -# Since 1.6 -## -{ 'command': 'drive-backup', 'data': 'DriveBackup' } - -## -# @query-named-block-nodes -# -# Get the named block driver list -# -# Returns: the list of BlockDeviceInfo -# -# Since 2.0 -## -{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] } - -## -# @drive-mirror -# -# Start mirroring a block device's writes to a new destination. -# -# @device: the name of the device whose writes should be mirrored. -# -# @target: the target of the new image. If the file exists, or if it -# is a device, the existing file/device will be used as the new -# destination. If it does not exist, a new file will be created. -# -# @format: #optional the format of the new destination, default is to -# probe if @mode is 'existing', else the format of the source -# -# @mode: #optional whether and how QEMU should create a new image, default is -# 'absolute-paths'. -# -# @speed: #optional the maximum speed, in bytes per second -# -# @sync: what parts of the disk image should be copied to the destination -# (all the disk, only the sectors allocated in the topmost image, or -# only new I/O). -# -# @granularity: #optional granularity of the dirty bitmap, default is 64K -# if the image format doesn't have clusters, 4K if the clusters -# are smaller than that, else the cluster size. Must be a -# power of 2 between 512 and 64M (since 1.4). -# -# @buf-size: #optional maximum amount of data in flight from source to -# target (since 1.4). -# -# @on-source-error: #optional the action to take on an error on the source, -# default 'report'. 'stop' and 'enospc' can only be used -# if the block device supports io-status (see BlockInfo). -# -# @on-target-error: #optional the action to take on an error on the target, -# default 'report' (no limitations, since this applies to -# a different block device than @device). -# -# Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound -# -# Since 1.3 -## -{ 'command': 'drive-mirror', - 'data': { 'device': 'str', 'target': 'str', '*format': 'str', - 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', - '*speed': 'int', '*granularity': 'uint32', - '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } - ## # @migrate_cancel # @@ -2429,25 +1455,6 @@ ## { 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} } -## -# @eject: -# -# Ejects a device from a removable drive. -# -# @device: The name of the device -# -# @force: @optional If true, eject regardless of whether the drive is locked. -# If not specified, the default value is false. -# -# Returns: Nothing on success -# If @device is not a valid block device, DeviceNotFound -# -# Notes: Ejecting a device will no media results in success -# -# Since: 0.14.0 -## -{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} } - ## # @change-vnc-password: # @@ -2497,211 +1504,6 @@ { 'command': 'change', 'data': {'device': 'str', 'target': 'str', '*arg': 'str'} } -## -# @block_set_io_throttle: -# -# Change I/O throttle limits for a block drive. -# -# @device: The name of the device -# -# @bps: total throughput limit in bytes per second -# -# @bps_rd: read throughput limit in bytes per second -# -# @bps_wr: write throughput limit in bytes per second -# -# @iops: total I/O operations per second -# -# @ops_rd: read I/O operations per second -# -# @iops_wr: write I/O operations per second -# -# @bps_max: #optional total max in bytes (Since 1.7) -# -# @bps_rd_max: #optional read max in bytes (Since 1.7) -# -# @bps_wr_max: #optional write max in bytes (Since 1.7) -# -# @iops_max: #optional total I/O operations max (Since 1.7) -# -# @iops_rd_max: #optional read I/O operations max (Since 1.7) -# -# @iops_wr_max: #optional write I/O operations max (Since 1.7) -# -# @iops_size: #optional an I/O size in bytes (Since 1.7) -# -# Returns: Nothing on success -# If @device is not a valid block device, DeviceNotFound -# -# Since: 1.1 -## -{ 'command': 'block_set_io_throttle', - 'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', - 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', - '*bps_max': 'int', '*bps_rd_max': 'int', - '*bps_wr_max': 'int', '*iops_max': 'int', - '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int' } } - -## -# @block-stream: -# -# Copy data from a backing file into a block device. -# -# The block streaming operation is performed in the background until the entire -# backing file has been copied. This command returns immediately once streaming -# has started. The status of ongoing block streaming operations can be checked -# with query-block-jobs. The operation can be stopped before it has completed -# using the block-job-cancel command. -# -# If a base file is specified then sectors are not copied from that base file and -# its backing chain. When streaming completes the image file will have the base -# file as its backing file. This can be used to stream a subset of the backing -# file chain instead of flattening the entire image. -# -# On successful completion the image file is updated to drop the backing file -# and the BLOCK_JOB_COMPLETED event is emitted. -# -# @device: the device name -# -# @base: #optional the common backing file name -# -# @speed: #optional the maximum speed, in bytes per second -# -# @on-error: #optional the action to take on an error (default report). -# 'stop' and 'enospc' can only be used if the block device -# supports io-status (see BlockInfo). Since 1.3. -# -# Returns: Nothing on success -# If @device does not exist, DeviceNotFound -# -# Since: 1.1 -## -{ 'command': 'block-stream', - 'data': { 'device': 'str', '*base': 'str', '*speed': 'int', - '*on-error': 'BlockdevOnError' } } - -## -# @block-job-set-speed: -# -# Set maximum speed for a background block operation. -# -# This command can only be issued when there is an active block job. -# -# Throttling can be disabled by setting the speed to 0. -# -# @device: the device name -# -# @speed: the maximum speed, in bytes per second, or 0 for unlimited. -# Defaults to 0. -# -# Returns: Nothing on success -# If no background operation is active on this device, DeviceNotActive -# -# Since: 1.1 -## -{ 'command': 'block-job-set-speed', - 'data': { 'device': 'str', 'speed': 'int' } } - -## -# @block-job-cancel: -# -# Stop an active background block operation. -# -# This command returns immediately after marking the active background block -# operation for cancellation. It is an error to call this command if no -# operation is in progress. -# -# The operation will cancel as soon as possible and then emit the -# BLOCK_JOB_CANCELLED event. Before that happens the job is still visible when -# enumerated using query-block-jobs. -# -# For streaming, the image file retains its backing file unless the streaming -# operation happens to complete just as it is being cancelled. A new streaming -# operation can be started at a later time to finish copying all data from the -# backing file. -# -# @device: the device name -# -# @force: #optional whether to allow cancellation of a paused job (default -# false). Since 1.3. -# -# Returns: Nothing on success -# If no background operation is active on this device, DeviceNotActive -# -# Since: 1.1 -## -{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } } - -## -# @block-job-pause: -# -# Pause an active background block operation. -# -# This command returns immediately after marking the active background block -# operation for pausing. It is an error to call this command if no -# operation is in progress. Pausing an already paused job has no cumulative -# effect; a single block-job-resume command will resume the job. -# -# The operation will pause as soon as possible. No event is emitted when -# the operation is actually paused. Cancelling a paused job automatically -# resumes it. -# -# @device: the device name -# -# Returns: Nothing on success -# If no background operation is active on this device, DeviceNotActive -# -# Since: 1.3 -## -{ 'command': 'block-job-pause', 'data': { 'device': 'str' } } - -## -# @block-job-resume: -# -# Resume an active background block operation. -# -# This command returns immediately after resuming a paused background block -# operation. It is an error to call this command if no operation is in -# progress. Resuming an already running job is not an error. -# -# This command also clears the error status of the job. -# -# @device: the device name -# -# Returns: Nothing on success -# If no background operation is active on this device, DeviceNotActive -# -# Since: 1.3 -## -{ 'command': 'block-job-resume', 'data': { 'device': 'str' } } - -## -# @block-job-complete: -# -# Manually trigger completion of an active background block operation. This -# is supported for drive mirroring, where it also switches the device to -# write to the target path only. The ability to complete is signaled with -# a BLOCK_JOB_READY event. -# -# This command completes an active background block operation synchronously. -# The ordering of this command's return with the BLOCK_JOB_COMPLETED event -# is not defined. Note that if an I/O error occurs during the processing of -# this command: 1) the command itself will fail; 2) the error will be processed -# according to the rerror/werror arguments that were specified when starting -# the operation. -# -# A cancelled or paused job cannot be completed. -# -# @device: the device name -# -# Returns: Nothing on success -# If no background operation is active on this device, DeviceNotActive -# -# Since: 1.3 -## -{ 'command': 'block-job-complete', 'data': { 'device': 'str' } } - ## # @ObjectTypeInfo: # @@ -3660,49 +2462,6 @@ ## { 'command': 'screendump', 'data': {'filename': 'str'} } -## -# @nbd-server-start: -# -# Start an NBD server listening on the given host and port. Block -# devices can then be exported using @nbd-server-add. The NBD -# server will present them as named exports; for example, another -# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME". -# -# @addr: Address on which to listen. -# -# Returns: error if the server is already running. -# -# Since: 1.3.0 -## -{ 'command': 'nbd-server-start', - 'data': { 'addr': 'SocketAddress' } } - -## -# @nbd-server-add: -# -# Export a device to QEMU's embedded NBD server. -# -# @device: Block device to be exported -# -# @writable: Whether clients should be able to write to the device via the -# NBD connection (default false). #optional -# -# Returns: error if the device is already marked for export. -# -# Since: 1.3.0 -## -{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} } - -## -# @nbd-server-stop: -# -# Stop QEMU's embedded NBD server, and unregister all devices previously -# added via @nbd-server-add. -# -# Since: 1.3.0 -## -{ 'command': 'nbd-server-stop' } - ## # @ChardevFile: # @@ -4243,410 +3002,6 @@ { 'command': 'query-rx-filter', 'data': { '*name': 'str' }, 'returns': ['RxFilterInfo'] } - -## -# @BlockdevDiscardOptions -# -# Determines how to handle discard requests. -# -# @ignore: Ignore the request -# @unmap: Forward as an unmap request -# -# Since: 1.7 -## -{ 'enum': 'BlockdevDiscardOptions', - 'data': [ 'ignore', 'unmap' ] } - -## -# @BlockdevDetectZeroesOptions -# -# Describes the operation mode for the automatic conversion of plain -# zero writes by the OS to driver specific optimized zero write commands. -# -# @off: Disabled (default) -# @on: Enabled -# @unmap: Enabled and even try to unmap blocks if possible. This requires -# also that @BlockdevDiscardOptions is set to unmap for this device. -# -# Since: 2.1 -## -{ 'enum': 'BlockdevDetectZeroesOptions', - 'data': [ 'off', 'on', 'unmap' ] } - -## -# @BlockdevAioOptions -# -# Selects the AIO backend to handle I/O requests -# -# @threads: Use qemu's thread pool -# @native: Use native AIO backend (only Linux and Windows) -# -# Since: 1.7 -## -{ 'enum': 'BlockdevAioOptions', - 'data': [ 'threads', 'native' ] } - -## -# @BlockdevCacheOptions -# -# Includes cache-related options for block devices -# -# @writeback: #optional enables writeback mode for any caches (default: true) -# @direct: #optional enables use of O_DIRECT (bypass the host page cache; -# default: false) -# @no-flush: #optional ignore any flush requests for the device (default: -# false) -# -# Since: 1.7 -## -{ 'type': 'BlockdevCacheOptions', - 'data': { '*writeback': 'bool', - '*direct': 'bool', - '*no-flush': 'bool' } } - -## -# @BlockdevDriver -# -# Drivers that are supported in block device operations. -# -# @host_device, @host_cdrom, @host_floppy: Since 2.1 -# -# Since: 2.0 -## -{ 'enum': 'BlockdevDriver', - 'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy', - 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug', - 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow', - 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] } - -## -# @BlockdevOptionsBase -# -# Options that are available for all block devices, independent of the block -# driver. -# -# @driver: block driver name -# @id: #optional id by which the new block device can be referred to. -# This is a required option on the top level of blockdev-add, and -# currently not allowed on any other level. -# @node-name: #optional the name of a block driver state node (Since 2.0) -# @discard: #optional discard-related options (default: ignore) -# @cache: #optional cache-related options -# @aio: #optional AIO backend (default: threads) -# @rerror: #optional how to handle read errors on the device -# (default: report) -# @werror: #optional how to handle write errors on the device -# (default: enospc) -# @read-only: #optional whether the block device should be read-only -# (default: false) -# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) -# (default: off) -# -# Since: 1.7 -## -{ 'type': 'BlockdevOptionsBase', - 'data': { 'driver': 'BlockdevDriver', - '*id': 'str', - '*node-name': 'str', - '*discard': 'BlockdevDiscardOptions', - '*cache': 'BlockdevCacheOptions', - '*aio': 'BlockdevAioOptions', - '*rerror': 'BlockdevOnError', - '*werror': 'BlockdevOnError', - '*read-only': 'bool', - '*detect-zeroes': 'BlockdevDetectZeroesOptions' } } - -## -# @BlockdevOptionsFile -# -# Driver specific block device options for the file backend and similar -# protocols. -# -# @filename: path to the image file -# -# Since: 1.7 -## -{ 'type': 'BlockdevOptionsFile', - 'data': { 'filename': 'str' } } - -## -# @BlockdevOptionsVVFAT -# -# Driver specific block device options for the vvfat protocol. -# -# @dir: directory to be exported as FAT image -# @fat-type: #optional FAT type: 12, 16 or 32 -# @floppy: #optional whether to export a floppy image (true) or -# partitioned hard disk (false; default) -# @rw: #optional whether to allow write operations (default: false) -# -# Since: 1.7 -## -{ 'type': 'BlockdevOptionsVVFAT', - 'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool', - '*rw': 'bool' } } - -## -# @BlockdevOptionsGenericFormat -# -# Driver specific block device options for image format that have no option -# besides their data source. -# -# @file: reference to or definition of the data source block device -# -# Since: 1.7 -## -{ 'type': 'BlockdevOptionsGenericFormat', - 'data': { 'file': 'BlockdevRef' } } - -## -# @BlockdevOptionsGenericCOWFormat -# -# Driver specific block device options for image format that have no option -# besides their data source and an optional backing file. -# -# @backing: #optional reference to or definition of the backing file block -# device (if missing, taken from the image file content). It is -# allowed to pass an empty string here in order to disable the -# default backing file. -# -# Since: 1.7 -## -{ 'type': 'BlockdevOptionsGenericCOWFormat', - 'base': 'BlockdevOptionsGenericFormat', - 'data': { '*backing': 'BlockdevRef' } } - -## -# @BlockdevOptionsQcow2 -# -# Driver specific block device options for qcow2. -# -# @lazy-refcounts: #optional whether to enable the lazy refcounts -# feature (default is taken from the image file) -# -# @pass-discard-request: #optional whether discard requests to the qcow2 -# device should be forwarded to the data source -# -# @pass-discard-snapshot: #optional whether discard requests for the data source -# should be issued when a snapshot operation (e.g. -# deleting a snapshot) frees clusters in the qcow2 file -# -# @pass-discard-other: #optional whether discard requests for the data source -# should be issued on other occasions where a cluster -# gets freed -# -# Since: 1.7 -## -{ 'type': 'BlockdevOptionsQcow2', - 'base': 'BlockdevOptionsGenericCOWFormat', - 'data': { '*lazy-refcounts': 'bool', - '*pass-discard-request': 'bool', - '*pass-discard-snapshot': 'bool', - '*pass-discard-other': 'bool' } } - -## -# @BlkdebugEvent -# -# Trigger events supported by blkdebug. -## -{ 'enum': 'BlkdebugEvent', - 'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table', - 'l1_grow.activate_table', 'l2_load', 'l2_update', - 'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write', - 'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio', - 'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read', - 'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update', - 'refblock_load', 'refblock_update', 'refblock_update_part', - 'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write', - 'refblock_alloc.write_blocks', 'refblock_alloc.write_table', - 'refblock_alloc.switch_table', 'cluster_alloc', - 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', - 'flush_to_disk' ] } - -## -# @BlkdebugInjectErrorOptions -# -# Describes a single error injection for blkdebug. -# -# @event: trigger event -# -# @state: #optional the state identifier blkdebug needs to be in to -# actually trigger the event; defaults to "any" -# -# @errno: #optional error identifier (errno) to be returned; defaults to -# EIO -# -# @sector: #optional specifies the sector index which has to be affected -# in order to actually trigger the event; defaults to "any -# sector" -# -# @once: #optional disables further events after this one has been -# triggered; defaults to false -# -# @immediately: #optional fail immediately; defaults to false -# -# Since: 2.0 -## -{ 'type': 'BlkdebugInjectErrorOptions', - 'data': { 'event': 'BlkdebugEvent', - '*state': 'int', - '*errno': 'int', - '*sector': 'int', - '*once': 'bool', - '*immediately': 'bool' } } - -## -# @BlkdebugSetStateOptions -# -# Describes a single state-change event for blkdebug. -# -# @event: trigger event -# -# @state: #optional the current state identifier blkdebug needs to be in; -# defaults to "any" -# -# @new_state: the state identifier blkdebug is supposed to assume if -# this event is triggered -# -# Since: 2.0 -## -{ 'type': 'BlkdebugSetStateOptions', - 'data': { 'event': 'BlkdebugEvent', - '*state': 'int', - 'new_state': 'int' } } - -## -# @BlockdevOptionsBlkdebug -# -# Driver specific block device options for blkdebug. -# -# @image: underlying raw block device (or image file) -# -# @config: #optional filename of the configuration file -# -# @align: #optional required alignment for requests in bytes -# -# @inject-error: #optional array of error injection descriptions -# -# @set-state: #optional array of state-change descriptions -# -# Since: 2.0 -## -{ 'type': 'BlockdevOptionsBlkdebug', - 'data': { 'image': 'BlockdevRef', - '*config': 'str', - '*align': 'int', - '*inject-error': ['BlkdebugInjectErrorOptions'], - '*set-state': ['BlkdebugSetStateOptions'] } } - -## -# @BlockdevOptionsBlkverify -# -# Driver specific block device options for blkverify. -# -# @test: block device to be tested -# -# @raw: raw image used for verification -# -# Since: 2.0 -## -{ 'type': 'BlockdevOptionsBlkverify', - 'data': { 'test': 'BlockdevRef', - 'raw': 'BlockdevRef' } } - -## -# @BlockdevOptionsQuorum -# -# Driver specific block device options for Quorum -# -# @blkverify: #optional true if the driver must print content mismatch -# set to false by default -# -# @children: the children block devices to use -# -# @vote-threshold: the vote limit under which a read will fail -# -# Since: 2.0 -## -{ 'type': 'BlockdevOptionsQuorum', - 'data': { '*blkverify': 'bool', - 'children': [ 'BlockdevRef' ], - 'vote-threshold': 'int' } } - -## -# @BlockdevOptions -# -# Options for creating a block device. -# -# Since: 1.7 -## -{ 'union': 'BlockdevOptions', - 'base': 'BlockdevOptionsBase', - 'discriminator': 'driver', - 'data': { - 'file': 'BlockdevOptionsFile', - 'host_device':'BlockdevOptionsFile', - 'host_cdrom': 'BlockdevOptionsFile', - 'host_floppy':'BlockdevOptionsFile', - 'http': 'BlockdevOptionsFile', - 'https': 'BlockdevOptionsFile', - 'ftp': 'BlockdevOptionsFile', - 'ftps': 'BlockdevOptionsFile', - 'tftp': 'BlockdevOptionsFile', -# TODO gluster: Wait for structured options -# TODO iscsi: Wait for structured options -# TODO nbd: Should take InetSocketAddress for 'host'? -# TODO nfs: Wait for structured options -# TODO rbd: Wait for structured options -# TODO sheepdog: Wait for structured options -# TODO ssh: Should take InetSocketAddress for 'host'? - 'vvfat': 'BlockdevOptionsVVFAT', - 'blkdebug': 'BlockdevOptionsBlkdebug', - 'blkverify': 'BlockdevOptionsBlkverify', - 'bochs': 'BlockdevOptionsGenericFormat', - 'cloop': 'BlockdevOptionsGenericFormat', - 'cow': 'BlockdevOptionsGenericCOWFormat', - 'dmg': 'BlockdevOptionsGenericFormat', - 'parallels': 'BlockdevOptionsGenericFormat', - 'qcow': 'BlockdevOptionsGenericCOWFormat', - 'qcow2': 'BlockdevOptionsQcow2', - 'qed': 'BlockdevOptionsGenericCOWFormat', - 'raw': 'BlockdevOptionsGenericFormat', - 'vdi': 'BlockdevOptionsGenericFormat', - 'vhdx': 'BlockdevOptionsGenericFormat', - 'vmdk': 'BlockdevOptionsGenericCOWFormat', - 'vpc': 'BlockdevOptionsGenericFormat', - 'quorum': 'BlockdevOptionsQuorum' - } } - -## -# @BlockdevRef -# -# Reference to a block device. -# -# @definition: defines a new block device inline -# @reference: references the ID of an existing block device. An -# empty string means that no block device should be -# referenced. -# -# Since: 1.7 -## -{ 'union': 'BlockdevRef', - 'discriminator': {}, - 'data': { 'definition': 'BlockdevOptions', - 'reference': 'str' } } - -## -# @blockdev-add: -# -# Creates a new block device. -# -# @options: block device options for the new device -# -# Since: 1.7 -## -{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } } - ## # @InputButton # diff --git a/qapi/block-core.json b/qapi/block-core.json new file mode 100644 index 0000000000..7215e48130 --- /dev/null +++ b/qapi/block-core.json @@ -0,0 +1,1412 @@ +# -*- Mode: Python -*- +# +# QAPI block core definitions (vm unrelated) + +# QAPI common definitions +{ 'include': 'common.json' } + +## +# @SnapshotInfo +# +# @id: unique snapshot id +# +# @name: user chosen name +# +# @vm-state-size: size of the VM state +# +# @date-sec: UTC date of the snapshot in seconds +# +# @date-nsec: fractional part in nano seconds to be used with date-sec +# +# @vm-clock-sec: VM clock relative to boot in seconds +# +# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec +# +# Since: 1.3 +# +## + +{ 'type': 'SnapshotInfo', + 'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int', + 'date-sec': 'int', 'date-nsec': 'int', + 'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } } + +## +# @ImageInfoSpecificQCow2: +# +# @compat: compatibility level +# +# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1 +# +# Since: 1.7 +## +{ 'type': 'ImageInfoSpecificQCow2', + 'data': { + 'compat': 'str', + '*lazy-refcounts': 'bool' + } } + +## +# @ImageInfoSpecificVmdk: +# +# @create-type: The create type of VMDK image +# +# @cid: Content id of image +# +# @parent-cid: Parent VMDK image's cid +# +# @extents: List of extent files +# +# Since: 1.7 +## +{ 'type': 'ImageInfoSpecificVmdk', + 'data': { + 'create-type': 'str', + 'cid': 'int', + 'parent-cid': 'int', + 'extents': ['ImageInfo'] + } } + +## +# @ImageInfoSpecific: +# +# A discriminated record of image format specific information structures. +# +# Since: 1.7 +## + +{ 'union': 'ImageInfoSpecific', + 'data': { + 'qcow2': 'ImageInfoSpecificQCow2', + 'vmdk': 'ImageInfoSpecificVmdk' + } } + +## +# @ImageInfo: +# +# Information about a QEMU image file +# +# @filename: name of the image file +# +# @format: format of the image file +# +# @virtual-size: maximum capacity in bytes of the image +# +# @actual-size: #optional actual size on disk in bytes of the image +# +# @dirty-flag: #optional true if image is not cleanly closed +# +# @cluster-size: #optional size of a cluster in bytes +# +# @encrypted: #optional true if the image is encrypted +# +# @compressed: #optional true if the image is compressed (Since 1.7) +# +# @backing-filename: #optional name of the backing file +# +# @full-backing-filename: #optional full path of the backing file +# +# @backing-filename-format: #optional the format of the backing file +# +# @snapshots: #optional list of VM snapshots +# +# @backing-image: #optional info of the backing image (since 1.6) +# +# @format-specific: #optional structure supplying additional format-specific +# information (since 1.7) +# +# Since: 1.3 +# +## + +{ 'type': 'ImageInfo', + 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', + '*actual-size': 'int', 'virtual-size': 'int', + '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', + '*backing-filename': 'str', '*full-backing-filename': 'str', + '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], + '*backing-image': 'ImageInfo', + '*format-specific': 'ImageInfoSpecific' } } + +## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @image-end-offset: #optional offset (in bytes) where the image ends, this +# field is present if the driver for the image format +# supports it +# +# @corruptions: #optional number of corruptions found during the check if any +# +# @leaks: #optional number of leaks found during the check if any +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# if any +# +# @leaks-fixed: #optional number of leaks fixed during the check if any +# +# @total-clusters: #optional total number of clusters, this field is present +# if the driver for the image format supports it +# +# @allocated-clusters: #optional total number of allocated clusters, this +# field is present if the driver for the image format +# supports it +# +# @fragmented-clusters: #optional total number of fragmented clusters, this +# field is present if the driver for the image format +# supports it +# +# @compressed-clusters: #optional total number of compressed clusters, this +# field is present if the driver for the image format +# supports it +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } } + +## +# @BlockDeviceInfo: +# +# Information about the backing device for a block device. +# +# @file: the filename of the backing device +# +# @node-name: #optional the name of the block driver node (Since 2.0) +# +# @ro: true if the backing device was open read-only +# +# @drv: the name of the block format used to open the backing device. As of +# 0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg', +# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device', +# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow', +# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat' +# +# @backing_file: #optional the name of the backing file (for copy-on-write) +# +# @backing_file_depth: number of files in the backing file chain (since: 1.2) +# +# @encrypted: true if the backing device is encrypted +# +# @encryption_key_missing: true if the backing device is encrypted but an +# valid encryption key is missing +# +# @detect_zeroes: detect and optimize zero writes (Since 2.1) +# +# @bps: total throughput limit in bytes per second is specified +# +# @bps_rd: read throughput limit in bytes per second is specified +# +# @bps_wr: write throughput limit in bytes per second is specified +# +# @iops: total I/O operations per second is specified +# +# @iops_rd: read I/O operations per second is specified +# +# @iops_wr: write I/O operations per second is specified +# +# @image: the info of image used (since: 1.6) +# +# @bps_max: #optional total max in bytes (Since 1.7) +# +# @bps_rd_max: #optional read max in bytes (Since 1.7) +# +# @bps_wr_max: #optional write max in bytes (Since 1.7) +# +# @iops_max: #optional total I/O operations max (Since 1.7) +# +# @iops_rd_max: #optional read I/O operations max (Since 1.7) +# +# @iops_wr_max: #optional write I/O operations max (Since 1.7) +# +# @iops_size: #optional an I/O size in bytes (Since 1.7) +# +# Since: 0.14.0 +# +## +{ 'type': 'BlockDeviceInfo', + 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', + '*backing_file': 'str', 'backing_file_depth': 'int', + 'encrypted': 'bool', 'encryption_key_missing': 'bool', + 'detect_zeroes': 'BlockdevDetectZeroesOptions', + 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', + 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', + 'image': 'ImageInfo', + '*bps_max': 'int', '*bps_rd_max': 'int', + '*bps_wr_max': 'int', '*iops_max': 'int', + '*iops_rd_max': 'int', '*iops_wr_max': 'int', + '*iops_size': 'int' } } + +## +# @BlockDeviceIoStatus: +# +# An enumeration of block device I/O status. +# +# @ok: The last I/O operation has succeeded +# +# @failed: The last I/O operation has failed +# +# @nospace: The last I/O operation has failed due to a no-space condition +# +# Since: 1.0 +## +{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] } + +## +# @BlockDeviceMapEntry: +# +# Entry in the metadata map of the device (returned by "qemu-img map") +# +# @start: Offset in the image of the first byte described by this entry +# (in bytes) +# +# @length: Length of the range described by this entry (in bytes) +# +# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.) +# before reaching one for which the range is allocated. The value is +# in the range 0 to the depth of the image chain - 1. +# +# @zero: the sectors in this range read as zeros +# +# @data: reading the image will actually read data from a file (in particular, +# if @offset is present this means that the sectors are not simply +# preallocated, but contain actual data in raw format) +# +# @offset: if present, the image file stores the data for this range in +# raw format at the given offset. +# +# Since 1.7 +## +{ 'type': 'BlockDeviceMapEntry', + 'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool', + 'data': 'bool', '*offset': 'int' } } + +## +# @BlockDirtyInfo: +# +# Block dirty bitmap information. +# +# @count: number of dirty bytes according to the dirty bitmap +# +# @granularity: granularity of the dirty bitmap in bytes (since 1.4) +# +# Since: 1.3 +## +{ 'type': 'BlockDirtyInfo', + 'data': {'count': 'int', 'granularity': 'int'} } + +## +# @BlockInfo: +# +# Block device information. This structure describes a virtual device and +# the backing device associated with it. +# +# @device: The device name associated with the virtual device. +# +# @type: This field is returned only for compatibility reasons, it should +# not be used (always returns 'unknown') +# +# @removable: True if the device supports removable media. +# +# @locked: True if the guest has locked this device from having its media +# removed +# +# @tray_open: #optional True if the device has a tray and it is open +# (only present if removable is true) +# +# @dirty-bitmaps: #optional dirty bitmaps information (only present if the +# driver has one or more dirty bitmaps) (Since 2.0) +# +# @io-status: #optional @BlockDeviceIoStatus. Only present if the device +# supports it and the VM is configured to stop on errors +# +# @inserted: #optional @BlockDeviceInfo describing the device if media is +# present +# +# Since: 0.14.0 +## +{ 'type': 'BlockInfo', + 'data': {'device': 'str', 'type': 'str', 'removable': 'bool', + 'locked': 'bool', '*inserted': 'BlockDeviceInfo', + '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus', + '*dirty-bitmaps': ['BlockDirtyInfo'] } } + +## +# @query-block: +# +# Get a list of BlockInfo for all virtual block devices. +# +# Returns: a list of @BlockInfo describing each virtual block device +# +# Since: 0.14.0 +## +{ 'command': 'query-block', 'returns': ['BlockInfo'] } + +## +# @BlockDeviceStats: +# +# Statistics of a virtual block device or a block backing device. +# +# @rd_bytes: The number of bytes read by the device. +# +# @wr_bytes: The number of bytes written by the device. +# +# @rd_operations: The number of read operations performed by the device. +# +# @wr_operations: The number of write operations performed by the device. +# +# @flush_operations: The number of cache flush operations performed by the +# device (since 0.15.0) +# +# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds +# (since 0.15.0). +# +# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0). +# +# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0). +# +# @wr_highest_offset: The offset after the greatest byte written to the +# device. The intended use of this information is for +# growable sparse files (like qcow2) that are used on top +# of a physical device. +# +# Since: 0.14.0 +## +{ 'type': 'BlockDeviceStats', + 'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int', + 'wr_operations': 'int', 'flush_operations': 'int', + 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int', + 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } } + +## +# @BlockStats: +# +# Statistics of a virtual block device or a block backing device. +# +# @device: #optional If the stats are for a virtual block device, the name +# corresponding to the virtual block device. +# +# @stats: A @BlockDeviceStats for the device. +# +# @parent: #optional This describes the file block device if it has one. +# +# @backing: #optional This describes the backing block device if it has one. +# (Since 2.0) +# +# Since: 0.14.0 +## +{ 'type': 'BlockStats', + 'data': {'*device': 'str', 'stats': 'BlockDeviceStats', + '*parent': 'BlockStats', + '*backing': 'BlockStats'} } + +## +# @query-blockstats: +# +# Query the @BlockStats for all virtual block devices. +# +# Returns: A list of @BlockStats for each virtual block devices. +# +# Since: 0.14.0 +## +{ 'command': 'query-blockstats', 'returns': ['BlockStats'] } + +## +# @BlockdevOnError: +# +# An enumeration of possible behaviors for errors on I/O operations. +# The exact meaning depends on whether the I/O was initiated by a guest +# or by a block job +# +# @report: for guest operations, report the error to the guest; +# for jobs, cancel the job +# +# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR +# or BLOCK_JOB_ERROR) +# +# @enospc: same as @stop on ENOSPC, same as @report otherwise. +# +# @stop: for guest operations, stop the virtual machine; +# for jobs, pause the job +# +# Since: 1.3 +## +{ 'enum': 'BlockdevOnError', + 'data': ['report', 'ignore', 'enospc', 'stop'] } + +## +# @MirrorSyncMode: +# +# An enumeration of possible behaviors for the initial synchronization +# phase of storage mirroring. +# +# @top: copies data in the topmost image to the destination +# +# @full: copies data from all images to the destination +# +# @none: only copy data written from now on +# +# Since: 1.3 +## +{ 'enum': 'MirrorSyncMode', + 'data': ['top', 'full', 'none'] } + +## +# @BlockJobType: +# +# Type of a block job. +# +# @commit: block commit job type, see "block-commit" +# +# @stream: block stream job type, see "block-stream" +# +# @mirror: drive mirror job type, see "drive-mirror" +# +# @backup: drive backup job type, see "drive-backup" +# +# Since: 1.7 +## +{ 'enum': 'BlockJobType', + 'data': ['commit', 'stream', 'mirror', 'backup'] } + +## +# @BlockJobInfo: +# +# Information about a long-running block device operation. +# +# @type: the job type ('stream' for image streaming) +# +# @device: the block device name +# +# @len: the maximum progress value +# +# @busy: false if the job is known to be in a quiescent state, with +# no pending I/O. Since 1.3. +# +# @paused: whether the job is paused or, if @busy is true, will +# pause itself as soon as possible. Since 1.3. +# +# @offset: the current progress value +# +# @speed: the rate limit, bytes per second +# +# @io-status: the status of the job (since 1.3) +# +# Since: 1.1 +## +{ 'type': 'BlockJobInfo', + 'data': {'type': 'str', 'device': 'str', 'len': 'int', + 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int', + 'io-status': 'BlockDeviceIoStatus'} } + +## +# @query-block-jobs: +# +# Return information about long-running block device operations. +# +# Returns: a list of @BlockJobInfo for each active block job +# +# Since: 1.1 +## +{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] } + +## +# @block_passwd: +# +# This command sets the password of a block device that has not been open +# with a password and requires one. +# +# The two cases where this can happen are a block device is created through +# QEMU's initial command line or a block device is changed through the legacy +# @change interface. +# +# In the event that the block device is created through the initial command +# line, the VM will start in the stopped state regardless of whether '-S' is +# used. The intention is for a management tool to query the block devices to +# determine which ones are encrypted, set the passwords with this command, and +# then start the guest with the @cont command. +# +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the block backend device to set the password on +# +# @node-name: #optional graph node name to set the password on (Since 2.0) +# +# @password: the password to use for the device +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# If @device is not encrypted, DeviceNotEncrypted +# +# Notes: Not all block formats support encryption and some that do are not +# able to validate that a password is correct. Disk corruption may +# occur if an invalid password is specified. +# +# Since: 0.14.0 +## +{ 'command': 'block_passwd', 'data': {'*device': 'str', + '*node-name': 'str', 'password': 'str'} } + +## +# @block_resize +# +# Resize a block image while a guest is running. +# +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the device to get the image resized +# +# @node-name: #optional graph node name to get the image resized (Since 2.0) +# +# @size: new image size in bytes +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since: 0.14.0 +## +{ 'command': 'block_resize', 'data': { '*device': 'str', + '*node-name': 'str', + 'size': 'int' }} + +## +# @NewImageMode +# +# An enumeration that tells QEMU how to set the backing file path in +# a new image file. +# +# @existing: QEMU should look for an existing image file. +# +# @absolute-paths: QEMU should create a new image with absolute paths +# for the backing file. If there is no backing file available, the new +# image will not be backed either. +# +# Since: 1.1 +## +{ 'enum': 'NewImageMode', + 'data': [ 'existing', 'absolute-paths' ] } + +## +# @BlockdevSnapshot +# +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the device to generate the snapshot from. +# +# @node-name: #optional graph node name to generate the snapshot from (Since 2.0) +# +# @snapshot-file: the target of the new image. A new file will be created. +# +# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0) +# +# @format: #optional the format of the snapshot image, default is 'qcow2'. +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +## +{ 'type': 'BlockdevSnapshot', + 'data': { '*device': 'str', '*node-name': 'str', + 'snapshot-file': 'str', '*snapshot-node-name': 'str', + '*format': 'str', '*mode': 'NewImageMode' } } + +## +# @DriveBackup +# +# @device: the name of the device which should be copied. +# +# @target: the target of the new image. If the file exists, or if it +# is a device, the existing file/device will be used as the new +# destination. If it does not exist, a new file will be created. +# +# @format: #optional the format of the new destination, default is to +# probe if @mode is 'existing', else the format of the source +# +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# +# @speed: #optional the maximum speed, in bytes per second +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Note that @on-source-error and @on-target-error only affect background I/O. +# If an error occurs during a guest write request, the device's rerror/werror +# actions will be used. +# +# Since: 1.6 +## +{ 'type': 'DriveBackup', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', + '*speed': 'int', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + +## +# @blockdev-snapshot-sync +# +# Generates a synchronous snapshot of a block device. +# +# For the arguments, see the documentation of BlockdevSnapshot. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since 0.14.0 +## +{ 'command': 'blockdev-snapshot-sync', + 'data': 'BlockdevSnapshot' } + +## +# @block-commit +# +# Live commit of data from overlay image nodes into backing nodes - i.e., +# writes data between 'top' and 'base' into 'base'. +# +# @device: the name of the device +# +# @base: #optional The file name of the backing image to write data into. +# If not specified, this is the deepest backing image +# +# @top: The file name of the backing image within the image chain, +# which contains the topmost data to be committed down. +# +# If top == base, that is an error. +# If top == active, the job will not be completed by itself, +# user needs to complete the job with the block-job-complete +# command after getting the ready event. (Since 2.0) +# +# If the base image is smaller than top, then the base image +# will be resized to be the same size as top. If top is +# smaller than the base image, the base will not be +# truncated. If you want the base image size to match the +# size of the smaller top, you can safely truncate it +# yourself once the commit operation successfully completes. +# +# +# @speed: #optional the maximum speed, in bytes per second +# +# Returns: Nothing on success +# If commit or stream is already active on this device, DeviceInUse +# If @device does not exist, DeviceNotFound +# If image commit is not supported by this device, NotSupported +# If @base or @top is invalid, a generic error is returned +# If @speed is invalid, InvalidParameter +# +# Since: 1.3 +# +## +{ 'command': 'block-commit', + 'data': { 'device': 'str', '*base': 'str', 'top': 'str', + '*speed': 'int' } } + +## +# @drive-backup +# +# Start a point-in-time copy of a block device to a new destination. The +# status of ongoing drive-backup operations can be checked with +# query-block-jobs where the BlockJobInfo.type field has the value 'backup'. +# The operation can be stopped before it has completed using the +# block-job-cancel command. +# +# For the arguments, see the documentation of DriveBackup. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since 1.6 +## +{ 'command': 'drive-backup', 'data': 'DriveBackup' } + +## +# @query-named-block-nodes +# +# Get the named block driver list +# +# Returns: the list of BlockDeviceInfo +# +# Since 2.0 +## +{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] } + +## +# @drive-mirror +# +# Start mirroring a block device's writes to a new destination. +# +# @device: the name of the device whose writes should be mirrored. +# +# @target: the target of the new image. If the file exists, or if it +# is a device, the existing file/device will be used as the new +# destination. If it does not exist, a new file will be created. +# +# @format: #optional the format of the new destination, default is to +# probe if @mode is 'existing', else the format of the source +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# +# @speed: #optional the maximum speed, in bytes per second +# +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# +# @granularity: #optional granularity of the dirty bitmap, default is 64K +# if the image format doesn't have clusters, 4K if the clusters +# are smaller than that, else the cluster size. Must be a +# power of 2 between 512 and 64M (since 1.4). +# +# @buf-size: #optional maximum amount of data in flight from source to +# target (since 1.4). +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since 1.3 +## +{ 'command': 'drive-mirror', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', + '*speed': 'int', '*granularity': 'uint32', + '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + +## +# @block_set_io_throttle: +# +# Change I/O throttle limits for a block drive. +# +# @device: The name of the device +# +# @bps: total throughput limit in bytes per second +# +# @bps_rd: read throughput limit in bytes per second +# +# @bps_wr: write throughput limit in bytes per second +# +# @iops: total I/O operations per second +# +# @ops_rd: read I/O operations per second +# +# @iops_wr: write I/O operations per second +# +# @bps_max: #optional total max in bytes (Since 1.7) +# +# @bps_rd_max: #optional read max in bytes (Since 1.7) +# +# @bps_wr_max: #optional write max in bytes (Since 1.7) +# +# @iops_max: #optional total I/O operations max (Since 1.7) +# +# @iops_rd_max: #optional read I/O operations max (Since 1.7) +# +# @iops_wr_max: #optional write I/O operations max (Since 1.7) +# +# @iops_size: #optional an I/O size in bytes (Since 1.7) +# +# Returns: Nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since: 1.1 +## +{ 'command': 'block_set_io_throttle', + 'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', + 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', + '*bps_max': 'int', '*bps_rd_max': 'int', + '*bps_wr_max': 'int', '*iops_max': 'int', + '*iops_rd_max': 'int', '*iops_wr_max': 'int', + '*iops_size': 'int' } } + +## +# @block-stream: +# +# Copy data from a backing file into a block device. +# +# The block streaming operation is performed in the background until the entire +# backing file has been copied. This command returns immediately once streaming +# has started. The status of ongoing block streaming operations can be checked +# with query-block-jobs. The operation can be stopped before it has completed +# using the block-job-cancel command. +# +# If a base file is specified then sectors are not copied from that base file and +# its backing chain. When streaming completes the image file will have the base +# file as its backing file. This can be used to stream a subset of the backing +# file chain instead of flattening the entire image. +# +# On successful completion the image file is updated to drop the backing file +# and the BLOCK_JOB_COMPLETED event is emitted. +# +# @device: the device name +# +# @base: #optional the common backing file name +# +# @speed: #optional the maximum speed, in bytes per second +# +# @on-error: #optional the action to take on an error (default report). +# 'stop' and 'enospc' can only be used if the block device +# supports io-status (see BlockInfo). Since 1.3. +# +# Returns: Nothing on success +# If @device does not exist, DeviceNotFound +# +# Since: 1.1 +## +{ 'command': 'block-stream', + 'data': { 'device': 'str', '*base': 'str', '*speed': 'int', + '*on-error': 'BlockdevOnError' } } + +## +# @block-job-set-speed: +# +# Set maximum speed for a background block operation. +# +# This command can only be issued when there is an active block job. +# +# Throttling can be disabled by setting the speed to 0. +# +# @device: the device name +# +# @speed: the maximum speed, in bytes per second, or 0 for unlimited. +# Defaults to 0. +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.1 +## +{ 'command': 'block-job-set-speed', + 'data': { 'device': 'str', 'speed': 'int' } } + +## +# @block-job-cancel: +# +# Stop an active background block operation. +# +# This command returns immediately after marking the active background block +# operation for cancellation. It is an error to call this command if no +# operation is in progress. +# +# The operation will cancel as soon as possible and then emit the +# BLOCK_JOB_CANCELLED event. Before that happens the job is still visible when +# enumerated using query-block-jobs. +# +# For streaming, the image file retains its backing file unless the streaming +# operation happens to complete just as it is being cancelled. A new streaming +# operation can be started at a later time to finish copying all data from the +# backing file. +# +# @device: the device name +# +# @force: #optional whether to allow cancellation of a paused job (default +# false). Since 1.3. +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.1 +## +{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } } + +## +# @block-job-pause: +# +# Pause an active background block operation. +# +# This command returns immediately after marking the active background block +# operation for pausing. It is an error to call this command if no +# operation is in progress. Pausing an already paused job has no cumulative +# effect; a single block-job-resume command will resume the job. +# +# The operation will pause as soon as possible. No event is emitted when +# the operation is actually paused. Cancelling a paused job automatically +# resumes it. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-pause', 'data': { 'device': 'str' } } + +## +# @block-job-resume: +# +# Resume an active background block operation. +# +# This command returns immediately after resuming a paused background block +# operation. It is an error to call this command if no operation is in +# progress. Resuming an already running job is not an error. +# +# This command also clears the error status of the job. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-resume', 'data': { 'device': 'str' } } + +## +# @block-job-complete: +# +# Manually trigger completion of an active background block operation. This +# is supported for drive mirroring, where it also switches the device to +# write to the target path only. The ability to complete is signaled with +# a BLOCK_JOB_READY event. +# +# This command completes an active background block operation synchronously. +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event +# is not defined. Note that if an I/O error occurs during the processing of +# this command: 1) the command itself will fail; 2) the error will be processed +# according to the rerror/werror arguments that were specified when starting +# the operation. +# +# A cancelled or paused job cannot be completed. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-complete', 'data': { 'device': 'str' } } + +## +# @BlockdevDiscardOptions +# +# Determines how to handle discard requests. +# +# @ignore: Ignore the request +# @unmap: Forward as an unmap request +# +# Since: 1.7 +## +{ 'enum': 'BlockdevDiscardOptions', + 'data': [ 'ignore', 'unmap' ] } + +## +# @BlockdevDetectZeroesOptions +# +# Describes the operation mode for the automatic conversion of plain +# zero writes by the OS to driver specific optimized zero write commands. +# +# @off: Disabled (default) +# @on: Enabled +# @unmap: Enabled and even try to unmap blocks if possible. This requires +# also that @BlockdevDiscardOptions is set to unmap for this device. +# +# Since: 2.1 +## +{ 'enum': 'BlockdevDetectZeroesOptions', + 'data': [ 'off', 'on', 'unmap' ] } + +## +# @BlockdevAioOptions +# +# Selects the AIO backend to handle I/O requests +# +# @threads: Use qemu's thread pool +# @native: Use native AIO backend (only Linux and Windows) +# +# Since: 1.7 +## +{ 'enum': 'BlockdevAioOptions', + 'data': [ 'threads', 'native' ] } + +## +# @BlockdevCacheOptions +# +# Includes cache-related options for block devices +# +# @writeback: #optional enables writeback mode for any caches (default: true) +# @direct: #optional enables use of O_DIRECT (bypass the host page cache; +# default: false) +# @no-flush: #optional ignore any flush requests for the device (default: +# false) +# +# Since: 1.7 +## +{ 'type': 'BlockdevCacheOptions', + 'data': { '*writeback': 'bool', + '*direct': 'bool', + '*no-flush': 'bool' } } + +## +# @BlockdevDriver +# +# Drivers that are supported in block device operations. +# +# @host_device, @host_cdrom, @host_floppy: Since 2.1 +# +# Since: 2.0 +## +{ 'enum': 'BlockdevDriver', + 'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy', + 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug', + 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow', + 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] } + +## +# @BlockdevOptionsBase +# +# Options that are available for all block devices, independent of the block +# driver. +# +# @driver: block driver name +# @id: #optional id by which the new block device can be referred to. +# This is a required option on the top level of blockdev-add, and +# currently not allowed on any other level. +# @node-name: #optional the name of a block driver state node (Since 2.0) +# @discard: #optional discard-related options (default: ignore) +# @cache: #optional cache-related options +# @aio: #optional AIO backend (default: threads) +# @rerror: #optional how to handle read errors on the device +# (default: report) +# @werror: #optional how to handle write errors on the device +# (default: enospc) +# @read-only: #optional whether the block device should be read-only +# (default: false) +# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) +# (default: off) +# +# Since: 1.7 +## +{ 'type': 'BlockdevOptionsBase', + 'data': { 'driver': 'BlockdevDriver', + '*id': 'str', + '*node-name': 'str', + '*discard': 'BlockdevDiscardOptions', + '*cache': 'BlockdevCacheOptions', + '*aio': 'BlockdevAioOptions', + '*rerror': 'BlockdevOnError', + '*werror': 'BlockdevOnError', + '*read-only': 'bool', + '*detect-zeroes': 'BlockdevDetectZeroesOptions' } } + +## +# @BlockdevOptionsFile +# +# Driver specific block device options for the file backend and similar +# protocols. +# +# @filename: path to the image file +# +# Since: 1.7 +## +{ 'type': 'BlockdevOptionsFile', + 'data': { 'filename': 'str' } } + +## +# @BlockdevOptionsVVFAT +# +# Driver specific block device options for the vvfat protocol. +# +# @dir: directory to be exported as FAT image +# @fat-type: #optional FAT type: 12, 16 or 32 +# @floppy: #optional whether to export a floppy image (true) or +# partitioned hard disk (false; default) +# @rw: #optional whether to allow write operations (default: false) +# +# Since: 1.7 +## +{ 'type': 'BlockdevOptionsVVFAT', + 'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool', + '*rw': 'bool' } } + +## +# @BlockdevOptionsGenericFormat +# +# Driver specific block device options for image format that have no option +# besides their data source. +# +# @file: reference to or definition of the data source block device +# +# Since: 1.7 +## +{ 'type': 'BlockdevOptionsGenericFormat', + 'data': { 'file': 'BlockdevRef' } } + +## +# @BlockdevOptionsGenericCOWFormat +# +# Driver specific block device options for image format that have no option +# besides their data source and an optional backing file. +# +# @backing: #optional reference to or definition of the backing file block +# device (if missing, taken from the image file content). It is +# allowed to pass an empty string here in order to disable the +# default backing file. +# +# Since: 1.7 +## +{ 'type': 'BlockdevOptionsGenericCOWFormat', + 'base': 'BlockdevOptionsGenericFormat', + 'data': { '*backing': 'BlockdevRef' } } + +## +# @BlockdevOptionsQcow2 +# +# Driver specific block device options for qcow2. +# +# @lazy-refcounts: #optional whether to enable the lazy refcounts +# feature (default is taken from the image file) +# +# @pass-discard-request: #optional whether discard requests to the qcow2 +# device should be forwarded to the data source +# +# @pass-discard-snapshot: #optional whether discard requests for the data source +# should be issued when a snapshot operation (e.g. +# deleting a snapshot) frees clusters in the qcow2 file +# +# @pass-discard-other: #optional whether discard requests for the data source +# should be issued on other occasions where a cluster +# gets freed +# +# Since: 1.7 +## +{ 'type': 'BlockdevOptionsQcow2', + 'base': 'BlockdevOptionsGenericCOWFormat', + 'data': { '*lazy-refcounts': 'bool', + '*pass-discard-request': 'bool', + '*pass-discard-snapshot': 'bool', + '*pass-discard-other': 'bool' } } + +## +# @BlkdebugEvent +# +# Trigger events supported by blkdebug. +## +{ 'enum': 'BlkdebugEvent', + 'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table', + 'l1_grow.activate_table', 'l2_load', 'l2_update', + 'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write', + 'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio', + 'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read', + 'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update', + 'refblock_load', 'refblock_update', 'refblock_update_part', + 'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write', + 'refblock_alloc.write_blocks', 'refblock_alloc.write_table', + 'refblock_alloc.switch_table', 'cluster_alloc', + 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', + 'flush_to_disk' ] } + +## +# @BlkdebugInjectErrorOptions +# +# Describes a single error injection for blkdebug. +# +# @event: trigger event +# +# @state: #optional the state identifier blkdebug needs to be in to +# actually trigger the event; defaults to "any" +# +# @errno: #optional error identifier (errno) to be returned; defaults to +# EIO +# +# @sector: #optional specifies the sector index which has to be affected +# in order to actually trigger the event; defaults to "any +# sector" +# +# @once: #optional disables further events after this one has been +# triggered; defaults to false +# +# @immediately: #optional fail immediately; defaults to false +# +# Since: 2.0 +## +{ 'type': 'BlkdebugInjectErrorOptions', + 'data': { 'event': 'BlkdebugEvent', + '*state': 'int', + '*errno': 'int', + '*sector': 'int', + '*once': 'bool', + '*immediately': 'bool' } } + +## +# @BlkdebugSetStateOptions +# +# Describes a single state-change event for blkdebug. +# +# @event: trigger event +# +# @state: #optional the current state identifier blkdebug needs to be in; +# defaults to "any" +# +# @new_state: the state identifier blkdebug is supposed to assume if +# this event is triggered +# +# Since: 2.0 +## +{ 'type': 'BlkdebugSetStateOptions', + 'data': { 'event': 'BlkdebugEvent', + '*state': 'int', + 'new_state': 'int' } } + +## +# @BlockdevOptionsBlkdebug +# +# Driver specific block device options for blkdebug. +# +# @image: underlying raw block device (or image file) +# +# @config: #optional filename of the configuration file +# +# @align: #optional required alignment for requests in bytes +# +# @inject-error: #optional array of error injection descriptions +# +# @set-state: #optional array of state-change descriptions +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsBlkdebug', + 'data': { 'image': 'BlockdevRef', + '*config': 'str', + '*align': 'int', + '*inject-error': ['BlkdebugInjectErrorOptions'], + '*set-state': ['BlkdebugSetStateOptions'] } } + +## +# @BlockdevOptionsBlkverify +# +# Driver specific block device options for blkverify. +# +# @test: block device to be tested +# +# @raw: raw image used for verification +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsBlkverify', + 'data': { 'test': 'BlockdevRef', + 'raw': 'BlockdevRef' } } + +## +# @BlockdevOptionsQuorum +# +# Driver specific block device options for Quorum +# +# @blkverify: #optional true if the driver must print content mismatch +# set to false by default +# +# @children: the children block devices to use +# +# @vote-threshold: the vote limit under which a read will fail +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsQuorum', + 'data': { '*blkverify': 'bool', + 'children': [ 'BlockdevRef' ], + 'vote-threshold': 'int' } } + +## +# @BlockdevOptions +# +# Options for creating a block device. +# +# Since: 1.7 +## +{ 'union': 'BlockdevOptions', + 'base': 'BlockdevOptionsBase', + 'discriminator': 'driver', + 'data': { + 'file': 'BlockdevOptionsFile', + 'host_device':'BlockdevOptionsFile', + 'host_cdrom': 'BlockdevOptionsFile', + 'host_floppy':'BlockdevOptionsFile', + 'http': 'BlockdevOptionsFile', + 'https': 'BlockdevOptionsFile', + 'ftp': 'BlockdevOptionsFile', + 'ftps': 'BlockdevOptionsFile', + 'tftp': 'BlockdevOptionsFile', +# TODO gluster: Wait for structured options +# TODO iscsi: Wait for structured options +# TODO nbd: Should take InetSocketAddress for 'host'? +# TODO nfs: Wait for structured options +# TODO rbd: Wait for structured options +# TODO sheepdog: Wait for structured options +# TODO ssh: Should take InetSocketAddress for 'host'? + 'vvfat': 'BlockdevOptionsVVFAT', + 'blkdebug': 'BlockdevOptionsBlkdebug', + 'blkverify': 'BlockdevOptionsBlkverify', + 'bochs': 'BlockdevOptionsGenericFormat', + 'cloop': 'BlockdevOptionsGenericFormat', + 'cow': 'BlockdevOptionsGenericCOWFormat', + 'dmg': 'BlockdevOptionsGenericFormat', + 'parallels': 'BlockdevOptionsGenericFormat', + 'qcow': 'BlockdevOptionsGenericCOWFormat', + 'qcow2': 'BlockdevOptionsQcow2', + 'qed': 'BlockdevOptionsGenericCOWFormat', + 'raw': 'BlockdevOptionsGenericFormat', + 'vdi': 'BlockdevOptionsGenericFormat', + 'vhdx': 'BlockdevOptionsGenericFormat', + 'vmdk': 'BlockdevOptionsGenericCOWFormat', + 'vpc': 'BlockdevOptionsGenericFormat', + 'quorum': 'BlockdevOptionsQuorum' + } } + +## +# @BlockdevRef +# +# Reference to a block device. +# +# @definition: defines a new block device inline +# @reference: references the ID of an existing block device. An +# empty string means that no block device should be +# referenced. +# +# Since: 1.7 +## +{ 'union': 'BlockdevRef', + 'discriminator': {}, + 'data': { 'definition': 'BlockdevOptions', + 'reference': 'str' } } + +## +# @blockdev-add: +# +# Creates a new block device. +# +# @options: block device options for the new device +# +# Since: 1.7 +## +{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } } + diff --git a/qapi/block.json b/qapi/block.json new file mode 100644 index 0000000000..61c463ab05 --- /dev/null +++ b/qapi/block.json @@ -0,0 +1,166 @@ +# -*- Mode: Python -*- +# +# QAPI block definitions (vm related) + +# QAPI block core definitions +{ 'include': 'block-core.json' } + +## +# BiosAtaTranslation: +# +# Policy that BIOS should use to interpret cylinder/head/sector +# addresses. Note that Bochs BIOS and SeaBIOS will not actually +# translate logical CHS to physical; instead, they will use logical +# block addressing. +# +# @auto: If cylinder/heads/sizes are passed, choose between none and LBA +# depending on the size of the disk. If they are not passed, +# choose none if QEMU can guess that the disk had 16 or fewer +# heads, large if QEMU can guess that the disk had 131072 or +# fewer tracks across all heads (i.e. cylinders*heads<131072), +# otherwise LBA. +# +# @none: The physical disk geometry is equal to the logical geometry. +# +# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255 +# heads (if fewer than 255 are enough to cover the whole disk +# with 1024 cylinders/head). The number of cylinders/head is +# then computed based on the number of sectors and heads. +# +# @large: The number of cylinders per head is scaled down to 1024 +# by correspondingly scaling up the number of heads. +# +# @rechs: Same as @large, but first convert a 16-head geometry to +# 15-head, by proportionally scaling up the number of +# cylinders/head. +# +# Since: 2.0 +## +{ 'enum': 'BiosAtaTranslation', + 'data': ['auto', 'none', 'lba', 'large', 'rechs']} + +## +# @BlockdevSnapshotInternal +# +# @device: the name of the device to generate the snapshot from +# +# @name: the name of the internal snapshot to be created +# +# Notes: In transaction, if @name is empty, or any snapshot matching @name +# exists, the operation will fail. Only some image formats support it, +# for example, qcow2, rbd, and sheepdog. +# +# Since: 1.7 +## +{ 'type': 'BlockdevSnapshotInternal', + 'data': { 'device': 'str', 'name': 'str' } } + +## +# @blockdev-snapshot-internal-sync +# +# Synchronously take an internal snapshot of a block device, when the format +# of the image used supports it. +# +# For the arguments, see the documentation of BlockdevSnapshotInternal. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# If any snapshot matching @name exists, or @name is empty, +# GenericError +# If the format of the image used does not support it, +# BlockFormatFeatureNotSupported +# +# Since 1.7 +## +{ 'command': 'blockdev-snapshot-internal-sync', + 'data': 'BlockdevSnapshotInternal' } + +## +# @blockdev-snapshot-delete-internal-sync +# +# Synchronously delete an internal snapshot of a block device, when the format +# of the image used support it. The snapshot is identified by name or id or +# both. One of the name or id is required. Return SnapshotInfo for the +# successfully deleted snapshot. +# +# @device: the name of the device to delete the snapshot from +# +# @id: optional the snapshot's ID to be deleted +# +# @name: optional the snapshot's name to be deleted +# +# Returns: SnapshotInfo on success +# If @device is not a valid block device, DeviceNotFound +# If snapshot not found, GenericError +# If the format of the image used does not support it, +# BlockFormatFeatureNotSupported +# If @id and @name are both not specified, GenericError +# +# Since 1.7 +## +{ 'command': 'blockdev-snapshot-delete-internal-sync', + 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, + 'returns': 'SnapshotInfo' } + +## +# @eject: +# +# Ejects a device from a removable drive. +# +# @device: The name of the device +# +# @force: @optional If true, eject regardless of whether the drive is locked. +# If not specified, the default value is false. +# +# Returns: Nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Notes: Ejecting a device will no media results in success +# +# Since: 0.14.0 +## +{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} } + +## +# @nbd-server-start: +# +# Start an NBD server listening on the given host and port. Block +# devices can then be exported using @nbd-server-add. The NBD +# server will present them as named exports; for example, another +# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME". +# +# @addr: Address on which to listen. +# +# Returns: error if the server is already running. +# +# Since: 1.3.0 +## +{ 'command': 'nbd-server-start', + 'data': { 'addr': 'SocketAddress' } } + +## +# @nbd-server-add: +# +# Export a device to QEMU's embedded NBD server. +# +# @device: Block device to be exported +# +# @writable: Whether clients should be able to write to the device via the +# NBD connection (default false). #optional +# +# Returns: error if the device is already marked for export. +# +# Since: 1.3.0 +## +{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} } + +## +# @nbd-server-stop: +# +# Stop QEMU's embedded NBD server, and unregister all devices previously +# added via @nbd-server-add. +# +# Since: 1.3.0 +## +{ 'command': 'nbd-server-stop' } + diff --git a/qapi/common.json b/qapi/common.json new file mode 100644 index 0000000000..4e9a21f2f6 --- /dev/null +++ b/qapi/common.json @@ -0,0 +1,89 @@ +# -*- Mode: Python -*- +# +# QAPI common definitions + +## +# @ErrorClass +# +# QEMU error classes +# +# @GenericError: this is used for errors that don't require a specific error +# class. This should be the default case for most errors +# +# @CommandNotFound: the requested command has not been found +# +# @DeviceEncrypted: the requested operation can't be fulfilled because the +# selected device is encrypted +# +# @DeviceNotActive: a device has failed to be become active +# +# @DeviceNotFound: the requested device has not been found +# +# @KVMMissingCap: the requested operation can't be fulfilled because a +# required KVM capability is missing +# +# Since: 1.2 +## +{ 'enum': 'ErrorClass', + 'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted', + 'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] } + +## +# @VersionInfo: +# +# A description of QEMU's version. +# +# @qemu.major: The major version of QEMU +# +# @qemu.minor: The minor version of QEMU +# +# @qemu.micro: The micro version of QEMU. By current convention, a micro +# version of 50 signifies a development branch. A micro version +# greater than or equal to 90 signifies a release candidate for +# the next minor version. A micro version of less than 50 +# signifies a stable release. +# +# @package: QEMU will always set this field to an empty string. Downstream +# versions of QEMU should set this to a non-empty string. The +# exact format depends on the downstream however it highly +# recommended that a unique name is used. +# +# Since: 0.14.0 +## +{ 'type': 'VersionInfo', + 'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'}, + 'package': 'str'} } + +## +# @query-version: +# +# Returns the current version of QEMU. +# +# Returns: A @VersionInfo object describing the current version of QEMU. +# +# Since: 0.14.0 +## +{ 'command': 'query-version', 'returns': 'VersionInfo' } + +## +# @CommandInfo: +# +# Information about a QMP command +# +# @name: The command name +# +# Since: 0.14.0 +## +{ 'type': 'CommandInfo', 'data': {'name': 'str'} } + +## +# @query-commands: +# +# Return a list of supported QMP commands by this server +# +# Returns: A list of @CommandInfo for all supported commands +# +# Since: 0.14.0 +## +{ 'command': 'query-commands', 'returns': ['CommandInfo'] } + diff --git a/qemu-img.c b/qemu-img.c index b3d2bc6f02..aa89ba21fd 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -580,10 +580,11 @@ static int collect_image_check(BlockDriverState *bs, /* * Checks an image for consistency. Exit codes: * - * 0 - Check completed, image is good - * 1 - Check not completed because of internal errors - * 2 - Check completed, image is corrupted - * 3 - Check completed, image has leaked clusters, but is good otherwise + * 0 - Check completed, image is good + * 1 - Check not completed because of internal errors + * 2 - Check completed, image is corrupted + * 3 - Check completed, image has leaked clusters, but is good otherwise + * 63 - Checks are not supported by the image format */ static int img_check(int argc, char **argv) { diff --git a/qemu-img.texi b/qemu-img.texi index f84590ebf0..c68b54148a 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -126,6 +126,29 @@ wrong fix or hiding corruption that has already occurred. Only the formats @code{qcow2}, @code{qed} and @code{vdi} support consistency checks. +In case the image does not have any inconsistencies, check exits with @code{0}. +Other exit codes indicate the kind of inconsistency found or if another error +occurred. The following table summarizes all exit codes of the check subcommand: + +@table @option + +@item 0 +Check completed, the image is (now) consistent +@item 1 +Check not completed because of internal errors +@item 2 +Check completed, image is corrupted +@item 3 +Check completed, image has leaked clusters, but is not corrupted +@item 63 +Checks are not supported by the image format + +@end table + +If @code{-r} is specified, exit codes representing the image state refer to the +state after (the attempt at) repairing it. That is, a successful @code{-r all} +will yield the exit code 0, independently of the image state before. + @item create [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}] Create the new disk image @var{filename} of size @var{size} and format diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 1d4ffd3603..3de6ab80e0 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -12,8 +12,10 @@ #include #include +#include "block/aio.h" #include "qemu/throttle.h" +AioContext *ctx; LeakyBucket bkt; ThrottleConfig cfg; ThrottleState ts; @@ -104,7 +106,8 @@ static void test_init(void) memset(&ts, 1, sizeof(ts)); /* init the structure */ - throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* check initialized fields */ g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL); @@ -126,7 +129,8 @@ static void test_init(void) static void test_destroy(void) { int i; - throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); throttle_destroy(&ts); for (i = 0; i < 2; i++) { g_assert(!ts.timers[i]); @@ -165,7 +169,8 @@ static void test_config_functions(void) orig_cfg.op_size = 1; - throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* structure reset by throttle_init previous_leak should be null */ g_assert(!ts.previous_leak); throttle_config(&ts, &orig_cfg); @@ -324,7 +329,8 @@ static void test_have_timer(void) g_assert(!throttle_have_timer(&ts)); /* init the structure */ - throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* timer set by init should return true */ g_assert(throttle_have_timer(&ts)); @@ -332,6 +338,29 @@ static void test_have_timer(void) throttle_destroy(&ts); } +static void test_detach_attach(void) +{ + /* zero the structure */ + memset(&ts, 0, sizeof(ts)); + + /* init the structure */ + throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); + + /* timer set by init should return true */ + g_assert(throttle_have_timer(&ts)); + + /* timer should no longer exist after detaching */ + throttle_detach_aio_context(&ts); + g_assert(!throttle_have_timer(&ts)); + + /* timer should exist again after attaching */ + throttle_attach_aio_context(&ts, ctx); + g_assert(throttle_have_timer(&ts)); + + throttle_destroy(&ts); +} + static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ int size, /* size of the operation to do */ double avg, /* io limit */ @@ -357,7 +386,8 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ cfg.op_size = op_size; - throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); throttle_config(&ts, &cfg); /* account a read */ @@ -461,7 +491,15 @@ static void test_accounting(void) int main(int argc, char **argv) { + GSource *src; + init_clocks(); + + ctx = aio_context_new(); + src = aio_get_g_source(ctx); + g_source_attach(src, NULL); + g_source_unref(src); + do {} while (g_main_context_iteration(NULL, false)); /* tests in the same order as the header function declarations */ @@ -471,6 +509,7 @@ int main(int argc, char **argv) g_test_add_func("/throttle/init", test_init); g_test_add_func("/throttle/destroy", test_destroy); g_test_add_func("/throttle/have_timer", test_have_timer); + g_test_add_func("/throttle/detach_attach", test_detach_attach); g_test_add_func("/throttle/config/enabled", test_enabled); g_test_add_func("/throttle/config/conflicting", test_conflicting_config); g_test_add_func("/throttle/config/is_valid", test_is_valid); diff --git a/util/throttle.c b/util/throttle.c index 02e6f15587..f976ac7de5 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -22,6 +22,7 @@ #include "qemu/throttle.h" #include "qemu/timer.h" +#include "block/aio.h" /* This function make a bucket leak * @@ -157,8 +158,18 @@ bool throttle_compute_timer(ThrottleState *ts, return false; } +/* Add timers to event loop */ +void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context) +{ + ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS, + ts->read_timer_cb, ts->timer_opaque); + ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS, + ts->write_timer_cb, ts->timer_opaque); +} + /* To be called first on the ThrottleState */ void throttle_init(ThrottleState *ts, + AioContext *aio_context, QEMUClockType clock_type, QEMUTimerCB *read_timer_cb, QEMUTimerCB *write_timer_cb, @@ -167,8 +178,10 @@ void throttle_init(ThrottleState *ts, memset(ts, 0, sizeof(ThrottleState)); ts->clock_type = clock_type; - ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque); - ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque); + ts->read_timer_cb = read_timer_cb; + ts->write_timer_cb = write_timer_cb; + ts->timer_opaque = timer_opaque; + throttle_attach_aio_context(ts, aio_context); } /* destroy a timer */ @@ -181,8 +194,8 @@ static void throttle_timer_destroy(QEMUTimer **timer) *timer = NULL; } -/* To be called last on the ThrottleState */ -void throttle_destroy(ThrottleState *ts) +/* Remove timers from event loop */ +void throttle_detach_aio_context(ThrottleState *ts) { int i; @@ -191,6 +204,12 @@ void throttle_destroy(ThrottleState *ts) } } +/* To be called last on the ThrottleState */ +void throttle_destroy(ThrottleState *ts) +{ + throttle_detach_aio_context(ts); +} + /* is any throttling timer configured */ bool throttle_have_timer(ThrottleState *ts) {