diff --git a/async.c b/async.c
index 6930185e64..5b6fe6b4cc 100644
--- a/async.c
+++ b/async.c
@@ -117,15 +117,21 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
 
 void qemu_bh_schedule(QEMUBH *bh)
 {
+    AioContext *ctx;
+
     if (bh->scheduled)
         return;
+    ctx = bh->ctx;
     bh->idle = 0;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
+    /* Make sure that:
+     * 1. idle & any writes needed by the callback are done before the
+     *    locations are read in the aio_bh_poll.
+     * 2. ctx is loaded before scheduled is set and the callback has a chance
+     *    to execute.
      */
-    smp_wmb();
+    smp_mb();
     bh->scheduled = 1;
-    aio_notify(bh->ctx);
+    aio_notify(ctx);
 }
 
 
diff --git a/block-migration.c b/block-migration.c
index 16562709c8..25a03889f4 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -629,6 +629,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
             block_mig_state.submitted, block_mig_state.transferred);
 
     qemu_mutex_lock_iothread();
+    init_blk_migration(f);
 
     /* start track dirty blocks */
     ret = set_dirty_tracking();
@@ -638,8 +639,6 @@ static int block_save_setup(QEMUFile *f, void *opaque)
         return ret;
     }
 
-    init_blk_migration(f);
-
     qemu_mutex_unlock_iothread();
 
     ret = flush_blks(f);
diff --git a/block.c b/block.c
index 310ea89fce..17f763db79 100644
--- a/block.c
+++ b/block.c
@@ -179,6 +179,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
 {
     assert(!bs->io_limits_enabled);
     throttle_init(&bs->throttle_state,
+                  bdrv_get_aio_context(bs),
                   QEMU_CLOCK_VIRTUAL,
                   bdrv_throttle_read_timer_cb,
                   bdrv_throttle_write_timer_cb,
@@ -363,6 +364,7 @@ BlockDriverState *bdrv_new(const char *device_name, Error **errp)
     qemu_co_queue_init(&bs->throttled_reqs[0]);
     qemu_co_queue_init(&bs->throttled_reqs[1]);
     bs->refcnt = 1;
+    bs->aio_context = qemu_get_aio_context();
 
     return bs;
 }
@@ -1856,7 +1858,11 @@ void bdrv_close_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         bdrv_close(bs);
+        aio_context_release(aio_context);
     }
 }
 
@@ -1881,17 +1887,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
     return false;
 }
 
-static bool bdrv_requests_pending_all(void)
-{
-    BlockDriverState *bs;
-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-        if (bdrv_requests_pending(bs)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 /*
  * Wait for pending requests to complete across all BlockDriverStates
  *
@@ -1911,12 +1906,20 @@ void bdrv_drain_all(void)
     BlockDriverState *bs;
 
     while (busy) {
-        QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-            bdrv_start_throttled_reqs(bs);
-        }
+        busy = false;
 
-        busy = bdrv_requests_pending_all();
-        busy |= aio_poll(qemu_get_aio_context(), busy);
+        QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+            AioContext *aio_context = bdrv_get_aio_context(bs);
+            bool bs_busy;
+
+            aio_context_acquire(aio_context);
+            bdrv_start_throttled_reqs(bs);
+            bs_busy = bdrv_requests_pending(bs);
+            bs_busy |= aio_poll(aio_context, bs_busy);
+            aio_context_release(aio_context);
+
+            busy |= bs_busy;
+        }
     }
 }
 
@@ -2352,12 +2355,17 @@ int bdrv_commit_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         if (bs->drv && bs->backing_hd) {
             int ret = bdrv_commit(bs);
             if (ret < 0) {
+                aio_context_release(aio_context);
                 return ret;
             }
         }
+        aio_context_release(aio_context);
     }
     return 0;
 }
@@ -2775,10 +2783,12 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
         /* Fast-path if already in coroutine context */
         bdrv_rw_co_entry(&rwco);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_rw_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
     return rwco.ret;
@@ -3831,10 +3841,15 @@ int bdrv_flush_all(void)
     int result = 0;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-        int ret = bdrv_flush(bs);
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        int ret;
+
+        aio_context_acquire(aio_context);
+        ret = bdrv_flush(bs);
         if (ret < 0 && !result) {
             result = ret;
         }
+        aio_context_release(aio_context);
     }
 
     return result;
@@ -4025,10 +4040,12 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
         /* Fast-path if already in coroutine context */
         bdrv_get_block_status_co_entry(&data);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
         qemu_coroutine_enter(co, &data);
         while (!data.done) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
     return data.ret;
@@ -4621,7 +4638,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
     acb->is_write = is_write;
     acb->qiov = qiov;
     acb->bounce = qemu_blockalign(bs, qiov->size);
-    acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
 
     if (is_write) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -4660,13 +4677,14 @@ typedef struct BlockDriverAIOCBCoroutine {
 
 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
 {
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
     BlockDriverAIOCBCoroutine *acb =
         container_of(blockacb, BlockDriverAIOCBCoroutine, common);
     bool done = false;
 
     acb->done = &done;
     while (!done) {
-        qemu_aio_wait();
+        aio_poll(aio_context, true);
     }
 }
 
@@ -4703,7 +4721,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
     }
 
-    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4739,7 +4757,7 @@ static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
     BlockDriverState *bs = acb->common.bs;
 
     acb->req.error = bdrv_co_flush(bs);
-    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4766,7 +4784,7 @@ static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
     BlockDriverState *bs = acb->common.bs;
 
     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
-    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4977,7 +4995,11 @@ void bdrv_invalidate_cache_all(Error **errp)
     Error *local_err = NULL;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         bdrv_invalidate_cache(bs, &local_err);
+        aio_context_release(aio_context);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
@@ -4990,7 +5012,11 @@ void bdrv_clear_incoming_migration_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+        aio_context_release(aio_context);
     }
 }
 
@@ -5006,10 +5032,12 @@ int bdrv_flush(BlockDriverState *bs)
         /* Fast-path if already in coroutine context */
         bdrv_flush_co_entry(&rwco);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_flush_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
 
@@ -5119,10 +5147,12 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
         /* Fast-path if already in coroutine context */
         bdrv_discard_co_entry(&rwco);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_discard_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
 
@@ -5633,8 +5663,66 @@ out:
 
 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
 {
-    /* Currently BlockDriverState always uses the main loop AioContext */
-    return qemu_get_aio_context();
+    return bs->aio_context;
+}
+
+void bdrv_detach_aio_context(BlockDriverState *bs)
+{
+    if (!bs->drv) {
+        return;
+    }
+
+    if (bs->io_limits_enabled) {
+        throttle_detach_aio_context(&bs->throttle_state);
+    }
+    if (bs->drv->bdrv_detach_aio_context) {
+        bs->drv->bdrv_detach_aio_context(bs);
+    }
+    if (bs->file) {
+        bdrv_detach_aio_context(bs->file);
+    }
+    if (bs->backing_hd) {
+        bdrv_detach_aio_context(bs->backing_hd);
+    }
+
+    bs->aio_context = NULL;
+}
+
+void bdrv_attach_aio_context(BlockDriverState *bs,
+                             AioContext *new_context)
+{
+    if (!bs->drv) {
+        return;
+    }
+
+    bs->aio_context = new_context;
+
+    if (bs->backing_hd) {
+        bdrv_attach_aio_context(bs->backing_hd, new_context);
+    }
+    if (bs->file) {
+        bdrv_attach_aio_context(bs->file, new_context);
+    }
+    if (bs->drv->bdrv_attach_aio_context) {
+        bs->drv->bdrv_attach_aio_context(bs, new_context);
+    }
+    if (bs->io_limits_enabled) {
+        throttle_attach_aio_context(&bs->throttle_state, new_context);
+    }
+}
+
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
+{
+    bdrv_drain_all(); /* ensure there are no in-flight requests */
+
+    bdrv_detach_aio_context(bs);
+
+    /* This function executes in the old AioContext so acquire the new one in
+     * case it runs in a different thread.
+     */
+    aio_context_acquire(new_context);
+    bdrv_attach_aio_context(bs, new_context);
+    aio_context_release(new_context);
 }
 
 void bdrv_add_before_write_notifier(BlockDriverState *bs,
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 380c736101..f51407de3f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -471,7 +471,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
     acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
     acb->ret = -error;
 
-    bh = qemu_bh_new(error_callback_bh, acb);
+    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
     acb->bh = bh;
     qemu_bh_schedule(bh);
 
diff --git a/block/blkverify.c b/block/blkverify.c
index e1c31171c3..621b78593b 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -39,12 +39,13 @@ struct BlkverifyAIOCB {
 static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
     bool finished = false;
 
     /* Wait until request completes, invokes its callback, and frees itself */
     acb->finished = &finished;
     while (!finished) {
-        qemu_aio_wait();
+        aio_poll(aio_context, true);
     }
 }
 
@@ -228,7 +229,8 @@ static void blkverify_aio_cb(void *opaque, int ret)
             acb->verify(acb);
         }
 
-        acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+        acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+                             blkverify_aio_bh, acb);
         qemu_bh_schedule(acb->bh);
         break;
     }
@@ -302,21 +304,40 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
     return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
 }
 
+/* Propagate AioContext changes to ->test_file */
+static void blkverify_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bdrv_detach_aio_context(s->test_file);
+}
+
+static void blkverify_attach_aio_context(BlockDriverState *bs,
+                                         AioContext *new_context)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bdrv_attach_aio_context(s->test_file, new_context);
+}
+
 static BlockDriver bdrv_blkverify = {
-    .format_name            = "blkverify",
-    .protocol_name          = "blkverify",
-    .instance_size          = sizeof(BDRVBlkverifyState),
+    .format_name                      = "blkverify",
+    .protocol_name                    = "blkverify",
+    .instance_size                    = sizeof(BDRVBlkverifyState),
 
-    .bdrv_parse_filename    = blkverify_parse_filename,
-    .bdrv_file_open         = blkverify_open,
-    .bdrv_close             = blkverify_close,
-    .bdrv_getlength         = blkverify_getlength,
+    .bdrv_parse_filename              = blkverify_parse_filename,
+    .bdrv_file_open                   = blkverify_open,
+    .bdrv_close                       = blkverify_close,
+    .bdrv_getlength                   = blkverify_getlength,
 
-    .bdrv_aio_readv         = blkverify_aio_readv,
-    .bdrv_aio_writev        = blkverify_aio_writev,
-    .bdrv_aio_flush         = blkverify_aio_flush,
+    .bdrv_aio_readv                   = blkverify_aio_readv,
+    .bdrv_aio_writev                  = blkverify_aio_writev,
+    .bdrv_aio_flush                   = blkverify_aio_flush,
 
-    .is_filter              = true,
+    .bdrv_attach_aio_context          = blkverify_attach_aio_context,
+    .bdrv_detach_aio_context          = blkverify_detach_aio_context,
+
+    .is_filter                        = true,
     .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 };
 
diff --git a/block/curl.c b/block/curl.c
index f491b0ba4c..8c84141ced 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -110,6 +110,7 @@ typedef struct BDRVCURLState {
     size_t readahead_size;
     bool sslverify;
     bool accept_range;
+    AioContext *aio_context;
 } BDRVCURLState;
 
 static void curl_clean_state(CURLState *s);
@@ -134,25 +135,29 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 #endif
 
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
-                        void *s, void *sp)
+                        void *userp, void *sp)
 {
+    BDRVCURLState *s;
     CURLState *state = NULL;
     curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
     state->sock_fd = fd;
+    s = state->s;
 
     DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
     switch (action) {
         case CURL_POLL_IN:
-            qemu_aio_set_fd_handler(fd, curl_multi_read, NULL, state);
+            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+                               NULL, state);
             break;
         case CURL_POLL_OUT:
-            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
             break;
         case CURL_POLL_INOUT:
-            qemu_aio_set_fd_handler(fd, curl_multi_read, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+                               curl_multi_do, state);
             break;
         case CURL_POLL_REMOVE:
-            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
             break;
     }
 
@@ -365,7 +370,7 @@ static CURLState *curl_init_state(BDRVCURLState *s)
             break;
         }
         if (!state) {
-            qemu_aio_wait();
+            aio_poll(state->s->aio_context, true);
         }
     } while(!state);
 
@@ -422,6 +427,51 @@ static void curl_parse_filename(const char *filename, QDict *options,
     qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
 }
 
+static void curl_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVCURLState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CURL_NUM_STATES; i++) {
+        if (s->states[i].in_use) {
+            curl_clean_state(&s->states[i]);
+        }
+        if (s->states[i].curl) {
+            curl_easy_cleanup(s->states[i].curl);
+            s->states[i].curl = NULL;
+        }
+        if (s->states[i].orig_buf) {
+            g_free(s->states[i].orig_buf);
+            s->states[i].orig_buf = NULL;
+        }
+    }
+    if (s->multi) {
+        curl_multi_cleanup(s->multi);
+        s->multi = NULL;
+    }
+
+    timer_del(&s->timer);
+}
+
+static void curl_attach_aio_context(BlockDriverState *bs,
+                                    AioContext *new_context)
+{
+    BDRVCURLState *s = bs->opaque;
+
+    aio_timer_init(new_context, &s->timer,
+                   QEMU_CLOCK_REALTIME, SCALE_NS,
+                   curl_multi_timeout_do, s);
+
+    assert(!s->multi);
+    s->multi = curl_multi_init();
+    s->aio_context = new_context;
+    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
+}
+
 static QemuOptsList runtime_opts = {
     .name = "curl",
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -491,6 +541,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     DPRINTF("CURL: Opening %s\n", file);
+    s->aio_context = bdrv_get_aio_context(bs);
     s->url = g_strdup(file);
     state = curl_init_state(s);
     if (!state)
@@ -523,19 +574,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 
-    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   curl_multi_timeout_do, s);
-
-    // Now we know the file exists and its size, so let's
-    // initialize the multi interface!
-
-    s->multi = curl_multi_init();
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
+    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
     qemu_opts_del(opts);
     return 0;
@@ -630,7 +669,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
 
-    acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
     return &acb->common;
 }
@@ -638,25 +677,9 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
 static void curl_close(BlockDriverState *bs)
 {
     BDRVCURLState *s = bs->opaque;
-    int i;
 
     DPRINTF("CURL: Close\n");
-    for (i=0; i<CURL_NUM_STATES; i++) {
-        if (s->states[i].in_use)
-            curl_clean_state(&s->states[i]);
-        if (s->states[i].curl) {
-            curl_easy_cleanup(s->states[i].curl);
-            s->states[i].curl = NULL;
-        }
-        if (s->states[i].orig_buf) {
-            g_free(s->states[i].orig_buf);
-            s->states[i].orig_buf = NULL;
-        }
-    }
-    if (s->multi)
-        curl_multi_cleanup(s->multi);
-
-    timer_del(&s->timer);
+    curl_detach_aio_context(bs);
 
     g_free(s->url);
 }
@@ -668,68 +691,83 @@ static int64_t curl_getlength(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_http = {
-    .format_name            = "http",
-    .protocol_name          = "http",
+    .format_name                = "http",
+    .protocol_name              = "http",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_https = {
-    .format_name            = "https",
-    .protocol_name          = "https",
+    .format_name                = "https",
+    .protocol_name              = "https",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_ftp = {
-    .format_name            = "ftp",
-    .protocol_name          = "ftp",
+    .format_name                = "ftp",
+    .protocol_name              = "ftp",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_ftps = {
-    .format_name            = "ftps",
-    .protocol_name          = "ftps",
+    .format_name                = "ftps",
+    .protocol_name              = "ftps",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_tftp = {
-    .format_name            = "tftp",
-    .protocol_name          = "tftp",
+    .format_name                = "tftp",
+    .protocol_name              = "tftp",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static void curl_block_init(void)
diff --git a/block/gluster.c b/block/gluster.c
index d0726ec92c..114689e441 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -16,6 +16,7 @@ typedef struct GlusterAIOCB {
     int ret;
     QEMUBH *bh;
     Coroutine *coroutine;
+    AioContext *aio_context;
 } GlusterAIOCB;
 
 typedef struct BDRVGlusterState {
@@ -249,7 +250,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
         acb->ret = -EIO; /* Partial read/write - fail it */
     }
 
-    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -436,6 +437,7 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
     acb->size = size;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
     if (ret < 0) {
@@ -549,6 +551,7 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
     acb->size = size;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     if (write) {
         ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -605,6 +608,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
     acb->size = 0;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
     if (ret < 0) {
@@ -633,6 +637,7 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
     acb->size = 0;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
     if (ret < 0) {
diff --git a/block/iscsi.c b/block/iscsi.c
index 3892cc551e..877b877cf2 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,7 @@
 
 typedef struct IscsiLun {
     struct iscsi_context *iscsi;
+    AioContext *aio_context;
     int lun;
     enum scsi_inquiry_peripheral_device_type type;
     int block_size;
@@ -73,6 +74,7 @@ typedef struct IscsiTask {
     struct scsi_task *task;
     Coroutine *co;
     QEMUBH *bh;
+    IscsiLun *iscsilun;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -133,7 +135,7 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
     if (acb->bh) {
         return;
     }
-    acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
+    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -169,7 +171,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
 out:
     if (iTask->co) {
-        iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
+        iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
+                               iscsi_co_generic_bh_cb, iTask);
         qemu_bh_schedule(iTask->bh);
     }
 }
@@ -177,8 +180,9 @@ out:
 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 {
     *iTask = (struct IscsiTask) {
-        .co         = qemu_coroutine_self(),
-        .retries    = ISCSI_CMD_RETRIES,
+        .co             = qemu_coroutine_self(),
+        .retries        = ISCSI_CMD_RETRIES,
+        .iscsilun       = iscsilun,
     };
 }
 
@@ -209,7 +213,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
                                      iscsi_abort_task_cb, acb);
 
     while (acb->status == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(iscsilun->aio_context, true);
     }
 }
 
@@ -232,10 +236,11 @@ iscsi_set_events(IscsiLun *iscsilun)
     ev = POLLIN;
     ev |= iscsi_which_events(iscsi);
     if (ev != iscsilun->events) {
-        qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
-                      iscsi_process_read,
-                      (ev & POLLOUT) ? iscsi_process_write : NULL,
-                      iscsilun);
+        aio_set_fd_handler(iscsilun->aio_context,
+                           iscsi_get_fd(iscsi),
+                           iscsi_process_read,
+                           (ev & POLLOUT) ? iscsi_process_write : NULL,
+                           iscsilun);
 
     }
 
@@ -791,7 +796,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
 
         while (status == -EINPROGRESS) {
-            qemu_aio_wait();
+            aio_poll(iscsilun->aio_context, true);
         }
 
         return 0;
@@ -1195,6 +1200,40 @@ fail_with_err:
     return NULL;
 }
 
+static void iscsi_detach_aio_context(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    aio_set_fd_handler(iscsilun->aio_context,
+                       iscsi_get_fd(iscsilun->iscsi),
+                       NULL, NULL, NULL);
+    iscsilun->events = 0;
+
+    if (iscsilun->nop_timer) {
+        timer_del(iscsilun->nop_timer);
+        timer_free(iscsilun->nop_timer);
+        iscsilun->nop_timer = NULL;
+    }
+}
+
+static void iscsi_attach_aio_context(BlockDriverState *bs,
+                                     AioContext *new_context)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    iscsilun->aio_context = new_context;
+    iscsi_set_events(iscsilun);
+
+#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
+    /* Set up a timer for sending out iSCSI NOPs */
+    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
+                                        QEMU_CLOCK_REALTIME, SCALE_MS,
+                                        iscsi_nop_timed_event, iscsilun);
+    timer_mod(iscsilun->nop_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+#endif
+}
+
 /*
  * We support iscsi url's on the form
  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -1301,6 +1340,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     iscsilun->iscsi = iscsi;
+    iscsilun->aio_context = bdrv_get_aio_context(bs);
     iscsilun->lun   = iscsi_url->lun;
     iscsilun->has_write_same = true;
 
@@ -1374,11 +1414,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     scsi_free_scsi_task(task);
     task = NULL;
 
-#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
-    /* Set up a timer for sending out iSCSI NOPs */
-    iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
-    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-#endif
+    iscsi_attach_aio_context(bs, iscsilun->aio_context);
 
     /* Guess the internal cluster (page) size of the iscsi target by the means
      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
@@ -1422,11 +1458,7 @@ static void iscsi_close(BlockDriverState *bs)
     IscsiLun *iscsilun = bs->opaque;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-    }
-    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
+    iscsi_detach_aio_context(bs);
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
     g_free(iscsilun->allocationmap);
@@ -1530,10 +1562,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options,
     if (ret != 0) {
         goto out;
     }
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-    }
+    iscsi_detach_aio_context(bs);
     if (iscsilun->type != TYPE_DISK) {
         ret = -ENODEV;
         goto out;
@@ -1604,6 +1633,9 @@ static BlockDriver bdrv_iscsi = {
     .bdrv_ioctl       = iscsi_ioctl,
     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
 #endif
+
+    .bdrv_detach_aio_context = iscsi_detach_aio_context,
+    .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
 
 static QemuOptsList qemu_iscsi_opts = {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 53434e2df5..f0a2c087b2 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -177,6 +177,20 @@ out_free_aiocb:
     return NULL;
 }
 
+void laio_detach_aio_context(void *s_, AioContext *old_context)
+{
+    struct qemu_laio_state *s = s_;
+
+    aio_set_event_notifier(old_context, &s->e, NULL);
+}
+
+void laio_attach_aio_context(void *s_, AioContext *new_context)
+{
+    struct qemu_laio_state *s = s_;
+
+    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+}
+
 void *laio_init(void)
 {
     struct qemu_laio_state *s;
@@ -190,8 +204,6 @@ void *laio_init(void)
         goto out_close_efd;
     }
 
-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
-
     return s;
 
 out_close_efd:
@@ -200,3 +212,11 @@ out_free_state:
     g_free(s);
     return NULL;
 }
+
+void laio_cleanup(void *s_)
+{
+    struct qemu_laio_state *s = s_;
+
+    event_notifier_cleanup(&s->e);
+    g_free(s);
+}
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7d698cb619..6e1c97cad0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,7 +49,7 @@ static void nbd_teardown_connection(NbdClientSession *client)
     shutdown(client->sock, 2);
     nbd_recv_coroutines_enter_all(client);
 
-    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
+    nbd_client_session_detach_aio_context(client);
     closesocket(client->sock);
     client->sock = -1;
 }
@@ -103,11 +103,14 @@ static int nbd_co_send_request(NbdClientSession *s,
     struct nbd_request *request,
     QEMUIOVector *qiov, int offset)
 {
+    AioContext *aio_context;
     int rc, ret;
 
     qemu_co_mutex_lock(&s->send_mutex);
     s->send_coroutine = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+    aio_context = bdrv_get_aio_context(s->bs);
+    aio_set_fd_handler(aio_context, s->sock,
+                       nbd_reply_ready, nbd_restart_write, s);
     if (qiov) {
         if (!s->is_unix) {
             socket_set_cork(s->sock, 1);
@@ -126,7 +129,7 @@ static int nbd_co_send_request(NbdClientSession *s,
     } else {
         rc = nbd_send_request(s->sock, request);
     }
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s);
     s->send_coroutine = NULL;
     qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
@@ -335,6 +338,19 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
 
 }
 
+void nbd_client_session_detach_aio_context(NbdClientSession *client)
+{
+    aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock,
+                       NULL, NULL, NULL);
+}
+
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+                                           AioContext *new_context)
+{
+    aio_set_fd_handler(new_context, client->sock,
+                       nbd_reply_ready, NULL, client);
+}
+
 void nbd_client_session_close(NbdClientSession *client)
 {
     struct nbd_request request = {
@@ -381,7 +397,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
     /* Now that we're connected, set the socket to be non-blocking and
      * kick the reply mechanism.  */
     qemu_set_nonblock(sock);
-    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
+    nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs));
 
     logout("Established connection with NBD server\n");
     return 0;
diff --git a/block/nbd-client.h b/block/nbd-client.h
index f2a63378bb..cd478f3a98 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -47,4 +47,8 @@ int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
 int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
                                 int nb_sectors, QEMUIOVector *qiov);
 
+void nbd_client_session_detach_aio_context(NbdClientSession *client);
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+                                           AioContext *new_context);
+
 #endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index 613f2581ae..4eda0958d7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -323,46 +323,67 @@ static int64_t nbd_getlength(BlockDriverState *bs)
     return s->client.size;
 }
 
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    nbd_client_session_detach_aio_context(&s->client);
+}
+
+static void nbd_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    nbd_client_session_attach_aio_context(&s->client, new_context);
+}
+
 static BlockDriver bdrv_nbd = {
-    .format_name         = "nbd",
-    .protocol_name       = "nbd",
-    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
-    .bdrv_file_open      = nbd_open,
-    .bdrv_co_readv       = nbd_co_readv,
-    .bdrv_co_writev      = nbd_co_writev,
-    .bdrv_close          = nbd_close,
-    .bdrv_co_flush_to_os = nbd_co_flush,
-    .bdrv_co_discard     = nbd_co_discard,
-    .bdrv_getlength      = nbd_getlength,
+    .format_name                = "nbd",
+    .protocol_name              = "nbd",
+    .instance_size              = sizeof(BDRVNBDState),
+    .bdrv_parse_filename        = nbd_parse_filename,
+    .bdrv_file_open             = nbd_open,
+    .bdrv_co_readv              = nbd_co_readv,
+    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_close                 = nbd_close,
+    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
-    .format_name         = "nbd",
-    .protocol_name       = "nbd+tcp",
-    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
-    .bdrv_file_open      = nbd_open,
-    .bdrv_co_readv       = nbd_co_readv,
-    .bdrv_co_writev      = nbd_co_writev,
-    .bdrv_close          = nbd_close,
-    .bdrv_co_flush_to_os = nbd_co_flush,
-    .bdrv_co_discard     = nbd_co_discard,
-    .bdrv_getlength      = nbd_getlength,
+    .format_name                = "nbd",
+    .protocol_name              = "nbd+tcp",
+    .instance_size              = sizeof(BDRVNBDState),
+    .bdrv_parse_filename        = nbd_parse_filename,
+    .bdrv_file_open             = nbd_open,
+    .bdrv_co_readv              = nbd_co_readv,
+    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_close                 = nbd_close,
+    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_unix = {
-    .format_name         = "nbd",
-    .protocol_name       = "nbd+unix",
-    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
-    .bdrv_file_open      = nbd_open,
-    .bdrv_co_readv       = nbd_co_readv,
-    .bdrv_co_writev      = nbd_co_writev,
-    .bdrv_close          = nbd_close,
-    .bdrv_co_flush_to_os = nbd_co_flush,
-    .bdrv_co_discard     = nbd_co_discard,
-    .bdrv_getlength      = nbd_getlength,
+    .format_name                = "nbd",
+    .protocol_name              = "nbd+unix",
+    .instance_size              = sizeof(BDRVNBDState),
+    .bdrv_parse_filename        = nbd_parse_filename,
+    .bdrv_file_open             = nbd_open,
+    .bdrv_co_readv              = nbd_co_readv,
+    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_close                 = nbd_close,
+    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
 };
 
 static void bdrv_nbd_init(void)
diff --git a/block/nfs.c b/block/nfs.c
index 539bd951df..bd9177f3ae 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -40,6 +40,7 @@ typedef struct NFSClient {
     struct nfsfh *fh;
     int events;
     bool has_zero_init;
+    AioContext *aio_context;
 } NFSClient;
 
 typedef struct NFSRPC {
@@ -49,6 +50,7 @@ typedef struct NFSRPC {
     struct stat *st;
     Coroutine *co;
     QEMUBH *bh;
+    NFSClient *client;
 } NFSRPC;
 
 static void nfs_process_read(void *arg);
@@ -58,10 +60,11 @@ static void nfs_set_events(NFSClient *client)
 {
     int ev = nfs_which_events(client->context);
     if (ev != client->events) {
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
-                      (ev & POLLIN) ? nfs_process_read : NULL,
-                      (ev & POLLOUT) ? nfs_process_write : NULL,
-                      client);
+        aio_set_fd_handler(client->aio_context,
+                           nfs_get_fd(client->context),
+                           (ev & POLLIN) ? nfs_process_read : NULL,
+                           (ev & POLLOUT) ? nfs_process_write : NULL,
+                           client);
 
     }
     client->events = ev;
@@ -84,7 +87,8 @@ static void nfs_process_write(void *arg)
 static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
 {
     *task = (NFSRPC) {
-        .co         = qemu_coroutine_self(),
+        .co             = qemu_coroutine_self(),
+        .client         = client,
     };
 }
 
@@ -116,7 +120,8 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
     if (task->co) {
-        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+        task->bh = aio_bh_new(task->client->aio_context,
+                              nfs_co_generic_bh_cb, task);
         qemu_bh_schedule(task->bh);
     }
 }
@@ -224,13 +229,34 @@ static QemuOptsList runtime_opts = {
     },
 };
 
+static void nfs_detach_aio_context(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+
+    aio_set_fd_handler(client->aio_context,
+                       nfs_get_fd(client->context),
+                       NULL, NULL, NULL);
+    client->events = 0;
+}
+
+static void nfs_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    NFSClient *client = bs->opaque;
+
+    client->aio_context = new_context;
+    nfs_set_events(client);
+}
+
 static void nfs_client_close(NFSClient *client)
 {
     if (client->context) {
         if (client->fh) {
             nfs_close(client->context, client->fh);
         }
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+        aio_set_fd_handler(client->aio_context,
+                           nfs_get_fd(client->context),
+                           NULL, NULL, NULL);
         nfs_destroy_context(client->context);
     }
     memset(client, 0, sizeof(NFSClient));
@@ -345,6 +371,8 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
     QemuOpts *opts;
     Error *local_err = NULL;
 
+    client->aio_context = bdrv_get_aio_context(bs);
+
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -368,6 +396,8 @@ static int nfs_file_create(const char *url, QEMUOptionParameter *options,
     int64_t total_size = 0;
     NFSClient *client = g_malloc0(sizeof(NFSClient));
 
+    client->aio_context = qemu_get_aio_context();
+
     /* Read out options */
     while (options && options->name) {
         if (!strcmp(options->name, "size")) {
@@ -407,7 +437,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
 
     while (!task.complete) {
         nfs_set_events(client);
-        qemu_aio_wait();
+        aio_poll(client->aio_context, true);
     }
 
     return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
@@ -420,22 +450,25 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
 }
 
 static BlockDriver bdrv_nfs = {
-    .format_name     = "nfs",
-    .protocol_name   = "nfs",
+    .format_name                    = "nfs",
+    .protocol_name                  = "nfs",
 
-    .instance_size   = sizeof(NFSClient),
-    .bdrv_needs_filename = true,
-    .bdrv_has_zero_init = nfs_has_zero_init,
-    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
-    .bdrv_truncate = nfs_file_truncate,
+    .instance_size                  = sizeof(NFSClient),
+    .bdrv_needs_filename            = true,
+    .bdrv_has_zero_init             = nfs_has_zero_init,
+    .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
+    .bdrv_truncate                  = nfs_file_truncate,
 
-    .bdrv_file_open  = nfs_file_open,
-    .bdrv_close      = nfs_file_close,
-    .bdrv_create     = nfs_file_create,
+    .bdrv_file_open                 = nfs_file_open,
+    .bdrv_close                     = nfs_file_close,
+    .bdrv_create                    = nfs_file_create,
 
-    .bdrv_co_readv         = nfs_co_readv,
-    .bdrv_co_writev        = nfs_co_writev,
-    .bdrv_co_flush_to_disk = nfs_co_flush,
+    .bdrv_co_readv                  = nfs_co_readv,
+    .bdrv_co_writev                 = nfs_co_writev,
+    .bdrv_co_flush_to_disk          = nfs_co_flush,
+
+    .bdrv_detach_aio_context        = nfs_detach_aio_context,
+    .bdrv_attach_aio_context        = nfs_attach_aio_context,
 };
 
 static void nfs_block_init(void)
diff --git a/block/qed-table.c b/block/qed-table.c
index 76d2dcccf8..f61107a1cf 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -173,7 +173,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
@@ -194,7 +194,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
 
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
@@ -267,7 +267,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
 
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
@@ -289,7 +289,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
 
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
diff --git a/block/qed.c b/block/qed.c
index c130e42d0d..79f5bd392a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -21,12 +21,13 @@
 static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
     bool finished = false;
 
     /* Wait for the request to finish */
     acb->finished = &finished;
     while (!finished) {
-        qemu_aio_wait();
+        aio_poll(aio_context, true);
     }
 }
 
@@ -373,6 +374,27 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
     s->bs = bs;
 }
 
+static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    qed_cancel_need_check_timer(s);
+    timer_free(s->need_check_timer);
+}
+
+static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
+                                        AioContext *new_context)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    s->need_check_timer = aio_timer_new(new_context,
+                                        QEMU_CLOCK_VIRTUAL, SCALE_NS,
+                                        qed_need_check_timer_cb, s);
+    if (s->header.features & QED_F_NEED_CHECK) {
+        qed_start_need_check_timer(s);
+    }
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
@@ -496,8 +518,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                            qed_need_check_timer_cb, s);
+    bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
 out:
     if (ret) {
@@ -528,8 +549,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
-    qed_cancel_need_check_timer(s);
-    timer_free(s->need_check_timer);
+    bdrv_qed_detach_aio_context(bs);
 
     /* Ensure writes reach stable storage */
     bdrv_flush(bs->file);
@@ -919,7 +939,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
 
     /* Arrange for a bh to invoke the completion function */
     acb->bh_ret = ret;
-    acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+                         qed_aio_complete_bh, acb);
     qemu_bh_schedule(acb->bh);
 
     /* Start next allocating write request waiting behind this one.  Note that
@@ -1644,6 +1665,8 @@ static BlockDriver bdrv_qed = {
     .bdrv_change_backing_file = bdrv_qed_change_backing_file,
     .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
     .bdrv_check               = bdrv_qed_check,
+    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
+    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
 };
 
 static void bdrv_qed_init(void)
diff --git a/block/quorum.c b/block/quorum.c
index ecec3a5407..426077a520 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -848,25 +848,49 @@ static void quorum_close(BlockDriverState *bs)
     g_free(s->bs);
 }
 
+static void quorum_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_detach_aio_context(s->bs[i]);
+    }
+}
+
+static void quorum_attach_aio_context(BlockDriverState *bs,
+                                      AioContext *new_context)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_attach_aio_context(s->bs[i], new_context);
+    }
+}
+
 static BlockDriver bdrv_quorum = {
-    .format_name        = "quorum",
-    .protocol_name      = "quorum",
+    .format_name                        = "quorum",
+    .protocol_name                      = "quorum",
 
-    .instance_size      = sizeof(BDRVQuorumState),
+    .instance_size                      = sizeof(BDRVQuorumState),
 
-    .bdrv_file_open     = quorum_open,
-    .bdrv_close         = quorum_close,
+    .bdrv_file_open                     = quorum_open,
+    .bdrv_close                         = quorum_close,
 
-    .bdrv_co_flush_to_disk = quorum_co_flush,
+    .bdrv_co_flush_to_disk              = quorum_co_flush,
 
-    .bdrv_getlength     = quorum_getlength,
+    .bdrv_getlength                     = quorum_getlength,
 
-    .bdrv_aio_readv     = quorum_aio_readv,
-    .bdrv_aio_writev    = quorum_aio_writev,
-    .bdrv_invalidate_cache = quorum_invalidate_cache,
+    .bdrv_aio_readv                     = quorum_aio_readv,
+    .bdrv_aio_writev                    = quorum_aio_writev,
+    .bdrv_invalidate_cache              = quorum_invalidate_cache,
 
-    .is_filter           = true,
-    .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+    .bdrv_detach_aio_context            = quorum_detach_aio_context,
+    .bdrv_attach_aio_context            = quorum_attach_aio_context,
+
+    .is_filter                          = true,
+    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
 
 static void bdrv_quorum_init(void)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 7ad0a8a0a7..8cf084eeb5 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -34,19 +34,27 @@
 /* linux-aio.c - Linux native implementation */
 #ifdef CONFIG_LINUX_AIO
 void *laio_init(void);
+void laio_cleanup(void *s);
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
+void laio_detach_aio_context(void *s, AioContext *old_context);
+void laio_attach_aio_context(void *s, AioContext *new_context);
 #endif
 
 #ifdef _WIN32
 typedef struct QEMUWin32AIOState QEMUWin32AIOState;
 QEMUWin32AIOState *win32_aio_init(void);
+void win32_aio_cleanup(QEMUWin32AIOState *aio);
 int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
 BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
         QEMUWin32AIOState *aio, HANDLE hfile,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *old_context);
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *new_context);
 #endif
 
 #endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index b7f0f2624b..c2b30be3d3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -307,6 +307,29 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
     }
 }
 
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+
+    if (s->use_aio) {
+        laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
+    }
+#endif
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+
+    if (s->use_aio) {
+        laio_attach_aio_context(s->aio_ctx, new_context);
+    }
+#endif
+}
+
 #ifdef CONFIG_LINUX_AIO
 static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
 {
@@ -447,6 +470,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
 #endif
 
+    raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
+
     ret = 0;
 fail:
     if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -1059,6 +1084,14 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
 static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
+
+    raw_detach_aio_context(bs);
+
+#ifdef CONFIG_LINUX_AIO
+    if (s->use_aio) {
+        laio_cleanup(s->aio_ctx);
+    }
+#endif
     if (s->fd >= 0) {
         qemu_close(s->fd);
         s->fd = -1;
@@ -1478,6 +1511,9 @@ static BlockDriver bdrv_file = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     .create_options = raw_create_options,
 };
 
@@ -1878,6 +1914,9 @@ static BlockDriver bdrv_host_device = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* generic scsi device */
 #ifdef __linux__
     .bdrv_ioctl         = hdev_ioctl,
@@ -2020,6 +2059,9 @@ static BlockDriver bdrv_host_floppy = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* removable device support */
     .bdrv_is_inserted   = floppy_is_inserted,
     .bdrv_media_changed = floppy_media_changed,
@@ -2145,6 +2187,9 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* removable device support */
     .bdrv_is_inserted   = cdrom_is_inserted,
     .bdrv_eject         = cdrom_eject,
@@ -2276,6 +2321,9 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* removable device support */
     .bdrv_is_inserted   = cdrom_is_inserted,
     .bdrv_eject         = cdrom_eject,
@@ -2283,40 +2331,6 @@ static BlockDriver bdrv_host_cdrom = {
 };
 #endif /* __FreeBSD__ */
 
-#ifdef CONFIG_LINUX_AIO
-/**
- * Return the file descriptor for Linux AIO
- *
- * This function is a layering violation and should be removed when it becomes
- * possible to call the block layer outside the global mutex.  It allows the
- * caller to hijack the file descriptor so I/O can be performed outside the
- * block layer.
- */
-int raw_get_aio_fd(BlockDriverState *bs)
-{
-    BDRVRawState *s;
-
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-
-    if (bs->drv == bdrv_find_format("raw")) {
-        bs = bs->file;
-    }
-
-    /* raw-posix has several protocols so just check for raw_aio_readv */
-    if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
-        return -ENOTSUP;
-    }
-
-    s = bs->opaque;
-    if (!s->use_aio) {
-        return -ENOTSUP;
-    }
-    return s->fd;
-}
-#endif /* CONFIG_LINUX_AIO */
-
 static void bdrv_file_init(void)
 {
     /*
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 064ea3123c..324e8187f5 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -36,8 +36,6 @@
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2
 
-static QEMUWin32AIOState *aio;
-
 typedef struct RawWin32AIOData {
     BlockDriverState *bs;
     HANDLE hfile;
@@ -202,6 +200,25 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }
 
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (s->aio) {
+        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+    }
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (s->aio) {
+        win32_aio_attach_aio_context(s->aio, new_context);
+    }
+}
+
 static void raw_probe_alignment(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -300,15 +317,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
 
     raw_parse_flags(flags, &access_flags, &overlapped);
 
-    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
-        aio = win32_aio_init();
-        if (aio == NULL) {
-            error_setg(errp, "Could not initialize AIO");
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
     if (filename[0] && filename[1] == ':') {
         snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
     } else if (filename[0] == '\\' && filename[1] == '\\') {
@@ -335,13 +343,23 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     if (flags & BDRV_O_NATIVE_AIO) {
-        ret = win32_aio_attach(aio, s->hfile);
+        s->aio = win32_aio_init();
+        if (s->aio == NULL) {
+            CloseHandle(s->hfile);
+            error_setg(errp, "Could not initialize AIO");
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        ret = win32_aio_attach(s->aio, s->hfile);
         if (ret < 0) {
+            win32_aio_cleanup(s->aio);
             CloseHandle(s->hfile);
             error_setg_errno(errp, -ret, "Could not enable AIO");
             goto fail;
         }
-        s->aio = aio;
+
+        win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
     }
 
     raw_probe_alignment(bs);
@@ -389,6 +407,13 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
 static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
+
+    if (s->aio) {
+        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+        win32_aio_cleanup(s->aio);
+        s->aio = NULL;
+    }
+
     CloseHandle(s->hfile);
     if (bs->open_flags & BDRV_O_TEMPORARY) {
         unlink(bs->filename);
@@ -684,6 +709,9 @@ static BlockDriver bdrv_host_device = {
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush     = raw_aio_flush,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     .bdrv_getlength      = raw_getlength,
     .has_variable_length = true,
 
diff --git a/block/rbd.c b/block/rbd.c
index 09af48426e..93639f783c 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -555,7 +555,7 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
     acb->cancelled = 1;
 
     while (acb->status == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(acb->common.bs), true);
     }
 
     qemu_aio_release(acb);
@@ -588,7 +588,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
     rcb->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
 
-    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+                         rbd_finish_bh, rcb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -684,13 +685,16 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     }
 
     if (r < 0) {
-        goto failed;
+        goto failed_completion;
     }
 
     return &acb->common;
 
+failed_completion:
+    rbd_aio_release(c);
 failed:
     g_free(rcb);
+    qemu_vfree(acb->bounce);
     qemu_aio_release(acb);
     return NULL;
 }
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 4ecbf5f498..1fa19399f0 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -200,6 +200,8 @@ typedef struct SheepdogInode {
     uint32_t data_vdi_id[MAX_DATA_OBJS];
 } SheepdogInode;
 
+#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
+
 /*
  * 64 bit FNV-1a non-zero initial basis
  */
@@ -282,6 +284,7 @@ typedef struct AIOReq {
     unsigned int data_len;
     uint8_t flags;
     uint32_t id;
+    bool create;
 
     QLIST_ENTRY(AIOReq) aio_siblings;
 } AIOReq;
@@ -314,6 +317,7 @@ struct SheepdogAIOCB {
 
 typedef struct BDRVSheepdogState {
     BlockDriverState *bs;
+    AioContext *aio_context;
 
     SheepdogInode inode;
 
@@ -404,7 +408,7 @@ static const char * sd_strerror(int err)
 
 static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
                                     uint64_t oid, unsigned int data_len,
-                                    uint64_t offset, uint8_t flags,
+                                    uint64_t offset, uint8_t flags, bool create,
                                     uint64_t base_oid, unsigned int iov_offset)
 {
     AIOReq *aio_req;
@@ -418,6 +422,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
     aio_req->data_len = data_len;
     aio_req->flags = flags;
     aio_req->id = s->aioreq_seq_num++;
+    aio_req->create = create;
 
     acb->nr_pending++;
     return aio_req;
@@ -496,7 +501,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
             sd_finish_aiocb(acb);
             return;
         }
-        qemu_aio_wait();
+        aio_poll(s->aio_context, true);
     }
 }
 
@@ -578,6 +583,7 @@ static void restart_co_req(void *opaque)
 
 typedef struct SheepdogReqCo {
     int sockfd;
+    AioContext *aio_context;
     SheepdogReq *hdr;
     void *data;
     unsigned int *wlen;
@@ -598,14 +604,14 @@ static coroutine_fn void do_co_req(void *opaque)
     unsigned int *rlen = srco->rlen;
 
     co = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
 
     ret = send_co_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
         goto out;
     }
 
-    qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
 
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
@@ -630,18 +636,19 @@ static coroutine_fn void do_co_req(void *opaque)
 out:
     /* there is at most one request for this sockfd, so it is safe to
      * set each handler to NULL. */
-    qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
 
     srco->ret = ret;
     srco->finished = true;
 }
 
-static int do_req(int sockfd, SheepdogReq *hdr, void *data,
-                  unsigned int *wlen, unsigned int *rlen)
+static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+                  void *data, unsigned int *wlen, unsigned int *rlen)
 {
     Coroutine *co;
     SheepdogReqCo srco = {
         .sockfd = sockfd,
+        .aio_context = aio_context,
         .hdr = hdr,
         .data = data,
         .wlen = wlen,
@@ -656,7 +663,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
         co = qemu_coroutine_create(do_co_req);
         qemu_coroutine_enter(co, &srco);
         while (!srco.finished) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
 
@@ -664,8 +671,8 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
 }
 
 static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, bool create,
-                           enum AIOCBState aiocb_type);
+                                         struct iovec *iov, int niov,
+                                         enum AIOCBState aiocb_type);
 static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
 static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
 static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
@@ -698,7 +705,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
         /* move aio_req from pending list to inflight one */
         QLIST_REMOVE(aio_req, aio_siblings);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false,
+        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     }
 }
@@ -709,7 +716,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
     close(s->fd);
     s->fd = -1;
 
@@ -797,7 +804,7 @@ static void coroutine_fn aio_read_response(void *opaque)
         }
         idx = data_oid_to_idx(aio_req->oid);
 
-        if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+        if (aio_req->create) {
             /*
              * If the object is newly created one, we need to update
              * the vdi object (metadata object).  min_dirty_data_idx
@@ -922,7 +929,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
         return fd;
     }
 
-    qemu_aio_set_fd_handler(fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
     return fd;
 }
 
@@ -1092,7 +1099,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
     hdr.snapid = snapid;
     hdr.flags = SD_FLAG_CMD_WRITE;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
     if (ret) {
         error_setg_errno(errp, -ret, "cannot get vdi info");
         goto out;
@@ -1117,8 +1124,8 @@ out:
 }
 
 static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, bool create,
-                           enum AIOCBState aiocb_type)
+                                         struct iovec *iov, int niov,
+                                         enum AIOCBState aiocb_type)
 {
     int nr_copies = s->inode.nr_copies;
     SheepdogObjReq hdr;
@@ -1129,6 +1136,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     uint64_t offset = aio_req->offset;
     uint8_t flags = aio_req->flags;
     uint64_t old_oid = aio_req->base_oid;
+    bool create = aio_req->create;
 
     if (!nr_copies) {
         error_report("bug");
@@ -1173,7 +1181,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
     qemu_co_mutex_lock(&s->lock);
     s->co_send = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s);
+    aio_set_fd_handler(s->aio_context, s->fd,
+                       co_read_response, co_write_request, s);
     socket_set_cork(s->fd, 1);
 
     /* send a header */
@@ -1191,12 +1200,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     }
 out:
     socket_set_cork(s->fd, 0);
-    qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
     s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 }
 
-static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_write_object(int fd, AioContext *aio_context, char *buf,
+                             uint64_t oid, uint8_t copies,
                              unsigned int datalen, uint64_t offset,
                              bool write, bool create, uint32_t cache_flags)
 {
@@ -1229,7 +1239,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
     hdr.offset = offset;
     hdr.copies = copies;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
     if (ret) {
         error_report("failed to send a request to the sheep");
         return ret;
@@ -1244,19 +1254,23 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
     }
 }
 
-static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_object(int fd, AioContext *aio_context, char *buf,
+                       uint64_t oid, uint8_t copies,
                        unsigned int datalen, uint64_t offset,
                        uint32_t cache_flags)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, false,
+    return read_write_object(fd, aio_context, buf, oid, copies,
+                             datalen, offset, false,
                              false, cache_flags);
 }
 
-static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int write_object(int fd, AioContext *aio_context, char *buf,
+                        uint64_t oid, uint8_t copies,
                         unsigned int datalen, uint64_t offset, bool create,
                         uint32_t cache_flags)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, true,
+    return read_write_object(fd, aio_context, buf, oid, copies,
+                             datalen, offset, true,
                              create, cache_flags);
 }
 
@@ -1275,7 +1289,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
         return -EIO;
     }
 
-    inode = g_malloc(sizeof(s->inode));
+    inode = g_malloc(SD_INODE_HEADER_SIZE);
 
     ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
     if (ret) {
@@ -1284,14 +1298,15 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
         goto out;
     }
 
-    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags);
+    ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
+                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
+                      s->cache_flags);
     if (ret < 0) {
         goto out;
     }
 
     if (inode->vdi_id != s->inode.vdi_id) {
-        memcpy(&s->inode, inode, sizeof(s->inode));
+        memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
     }
 
 out:
@@ -1315,6 +1330,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
             DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
             aio_req->flags = 0;
             aio_req->base_oid = 0;
+            aio_req->create = false;
             QLIST_REMOVE(aio_req, aio_siblings);
             QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
             return true;
@@ -1327,7 +1343,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
 static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
 {
     SheepdogAIOCB *acb = aio_req->aiocb;
-    bool create = false;
+
+    aio_req->create = false;
 
     /* check whether this request becomes a CoW one */
     if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
@@ -1345,20 +1362,36 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
             aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
             aio_req->flags |= SD_FLAG_CMD_COW;
         }
-        create = true;
+        aio_req->create = true;
     }
 out:
     if (is_data_obj(aio_req->oid)) {
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     } else {
         struct iovec iov;
         iov.iov_base = &s->inode;
         iov.iov_len = sizeof(s->inode);
-        add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
     }
 }
 
+static void sd_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVSheepdogState *s = bs->opaque;
+
+    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+}
+
+static void sd_attach_aio_context(BlockDriverState *bs,
+                                  AioContext *new_context)
+{
+    BDRVSheepdogState *s = bs->opaque;
+
+    s->aio_context = new_context;
+    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+}
+
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
     .name = "sheepdog",
@@ -1387,6 +1420,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     const char *filename;
 
     s->bs = bs;
+    s->aio_context = bdrv_get_aio_context(bs);
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1448,8 +1482,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
-                      s->cache_flags);
+    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+                      0, SD_INODE_SIZE, 0, s->cache_flags);
 
     closesocket(fd);
 
@@ -1469,7 +1503,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     g_free(buf);
     return 0;
 out:
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
     if (s->fd >= 0) {
         closesocket(s->fd);
     }
@@ -1512,7 +1546,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
     hdr.copy_policy = s->inode.copy_policy;
     hdr.copies = s->inode.nr_copies;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
 
     closesocket(fd);
 
@@ -1766,7 +1800,8 @@ static void sd_close(BlockDriverState *bs)
     hdr.data_length = wlen;
     hdr.flags = SD_FLAG_CMD_WRITE;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+                 s->name, &wlen, &rlen);
 
     closesocket(fd);
 
@@ -1775,7 +1810,7 @@ static void sd_close(BlockDriverState *bs)
         error_report("%s, %s", sd_strerror(rsp->result), s->name);
     }
 
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
     closesocket(s->fd);
     g_free(s->host_spec);
 }
@@ -1812,8 +1847,9 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
     /* we don't need to update entire object */
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
     s->inode.vdi_size = offset;
-    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+                       datalen, 0, false, s->cache_flags);
     close(fd);
 
     if (ret < 0) {
@@ -1849,9 +1885,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
         iov.iov_base = &s->inode;
         iov.iov_len = sizeof(s->inode);
         aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                                data_len, offset, 0, 0, offset);
+                                data_len, offset, 0, false, 0, offset);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
 
         acb->aio_done_func = sd_finish_aiocb;
         acb->aiocb_type = AIOCB_WRITE_UDATA;
@@ -1882,7 +1918,8 @@ static bool sd_delete(BDRVSheepdogState *s)
         return false;
     }
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+                 s->name, &wlen, &rlen);
     closesocket(fd);
     if (ret) {
         return false;
@@ -1939,8 +1976,8 @@ static int sd_create_branch(BDRVSheepdogState *s)
         goto out;
     }
 
-    ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
-                      SD_INODE_SIZE, 0, s->cache_flags);
+    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
 
     closesocket(fd);
 
@@ -2049,7 +2086,8 @@ static int coroutine_fn sd_co_rw_vector(void *p)
             DPRINTF("new oid %" PRIx64 "\n", oid);
         }
 
-        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
+        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
+                                old_oid, done);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
 
         if (create) {
@@ -2058,7 +2096,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
             }
         }
 
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     done:
         offset = 0;
@@ -2138,9 +2176,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
     acb->aio_done_func = sd_finish_aiocb;
 
     aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                            0, 0, 0, 0, 0);
+                            0, 0, 0, false, 0, 0);
     QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-    add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
+    add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
 
     qemu_coroutine_yield();
     return acb->ret;
@@ -2187,8 +2225,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+                       datalen, 0, false, s->cache_flags);
     if (ret < 0) {
         error_report("failed to write snapshot's inode.");
         goto cleanup;
@@ -2203,8 +2242,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
-                      s->inode.nr_copies, datalen, 0, s->cache_flags);
+    ret = read_object(fd, s->aio_context, (char *)inode,
+                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
+                      s->cache_flags);
 
     if (ret < 0) {
         error_report("failed to read new inode info. %s", strerror(errno));
@@ -2311,7 +2351,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
     req.opcode = SD_OP_READ_VDIS;
     req.data_length = max;
 
-    ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&req,
+                 vdi_inuse, &wlen, &rlen);
 
     closesocket(fd);
     if (ret) {
@@ -2338,7 +2379,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
         }
 
         /* we don't need to read entire object */
-        ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
+        ret = read_object(fd, s->aio_context, (char *)&inode,
+                          vid_to_vdi_oid(vid),
                           0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
                           s->cache_flags);
 
@@ -2403,11 +2445,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
 
         create = (offset == 0);
         if (load) {
-            ret = read_object(fd, (char *)data, vmstate_oid,
+            ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
                               s->inode.nr_copies, data_len, offset,
                               s->cache_flags);
         } else {
-            ret = write_object(fd, (char *)data, vmstate_oid,
+            ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
                                s->inode.nr_copies, data_len, offset, create,
                                s->cache_flags);
         }
@@ -2580,6 +2622,9 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_save_vmstate  = sd_save_vmstate,
     .bdrv_load_vmstate  = sd_load_vmstate,
 
+    .bdrv_detach_aio_context = sd_detach_aio_context,
+    .bdrv_attach_aio_context = sd_attach_aio_context,
+
     .create_options = sd_create_options,
 };
 
@@ -2610,6 +2655,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_save_vmstate  = sd_save_vmstate,
     .bdrv_load_vmstate  = sd_load_vmstate,
 
+    .bdrv_detach_aio_context = sd_detach_aio_context,
+    .bdrv_attach_aio_context = sd_attach_aio_context,
+
     .create_options = sd_create_options,
 };
 
@@ -2640,6 +2688,9 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_save_vmstate  = sd_save_vmstate,
     .bdrv_load_vmstate  = sd_load_vmstate,
 
+    .bdrv_detach_aio_context = sd_detach_aio_context,
+    .bdrv_attach_aio_context = sd_attach_aio_context,
+
     .create_options = sd_create_options,
 };
 
diff --git a/block/ssh.c b/block/ssh.c
index b2129714bc..9779eac2bd 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -773,7 +773,7 @@ static void restart_coroutine(void *opaque)
     qemu_coroutine_enter(co, NULL);
 }
 
-static coroutine_fn void set_fd_handler(BDRVSSHState *s)
+static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
     IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -791,24 +791,26 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
     DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
             rd_handler, wr_handler);
 
-    qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       rd_handler, wr_handler, co);
 }
 
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
+static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
+                                          BlockDriverState *bs)
 {
     DPRINTF("s->sock=%d", s->sock);
-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
  * handlers are set up so that we'll be rescheduled when there is an
  * interesting event on the socket.
  */
-static coroutine_fn void co_yield(BDRVSSHState *s)
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
-    set_fd_handler(s);
+    set_fd_handler(s, bs);
     qemu_coroutine_yield();
-    clear_fd_handler(s);
+    clear_fd_handler(s, bs);
 }
 
 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -838,7 +840,7 @@ static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
     }
 }
 
-static coroutine_fn int ssh_read(BDRVSSHState *s,
+static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
                                  int64_t offset, size_t size,
                                  QEMUIOVector *qiov)
 {
@@ -871,7 +873,7 @@ static coroutine_fn int ssh_read(BDRVSSHState *s,
         DPRINTF("sftp_read returned %zd", r);
 
         if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-            co_yield(s);
+            co_yield(s, bs);
             goto again;
         }
         if (r < 0) {
@@ -906,14 +908,14 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = ssh_read(s, sector_num * BDRV_SECTOR_SIZE,
+    ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE,
                    nb_sectors * BDRV_SECTOR_SIZE, qiov);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
 }
 
-static int ssh_write(BDRVSSHState *s,
+static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
                      int64_t offset, size_t size,
                      QEMUIOVector *qiov)
 {
@@ -941,7 +943,7 @@ static int ssh_write(BDRVSSHState *s,
         DPRINTF("sftp_write returned %zd", r);
 
         if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-            co_yield(s);
+            co_yield(s, bs);
             goto again;
         }
         if (r < 0) {
@@ -960,7 +962,7 @@ static int ssh_write(BDRVSSHState *s,
          */
         if (r == 0) {
             ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
-            co_yield(s);
+            co_yield(s, bs);
             goto again;
         }
 
@@ -988,7 +990,7 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = ssh_write(s, sector_num * BDRV_SECTOR_SIZE,
+    ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
                     nb_sectors * BDRV_SECTOR_SIZE, qiov);
     qemu_co_mutex_unlock(&s->lock);
 
@@ -1009,7 +1011,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 
 #ifdef HAS_LIBSSH2_SFTP_FSYNC
 
-static coroutine_fn int ssh_flush(BDRVSSHState *s)
+static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
 
@@ -1017,7 +1019,7 @@ static coroutine_fn int ssh_flush(BDRVSSHState *s)
  again:
     r = libssh2_sftp_fsync(s->sftp_handle);
     if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-        co_yield(s);
+        co_yield(s, bs);
         goto again;
     }
     if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
@@ -1039,7 +1041,7 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = ssh_flush(s);
+    ret = ssh_flush(s, bs);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
diff --git a/block/vmdk.c b/block/vmdk.c
index 2b38f61fcd..b8a476278a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2096,6 +2096,27 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return 0;
 }
 
+static void vmdk_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        bdrv_detach_aio_context(s->extents[i].file);
+    }
+}
+
+static void vmdk_attach_aio_context(BlockDriverState *bs,
+                                    AioContext *new_context)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        bdrv_attach_aio_context(s->extents[i].file, new_context);
+    }
+}
+
 static QEMUOptionParameter vmdk_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2153,6 +2174,8 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_get_specific_info       = vmdk_get_specific_info,
     .bdrv_refresh_limits          = vmdk_refresh_limits,
     .bdrv_get_info                = vmdk_get_info,
+    .bdrv_detach_aio_context      = vmdk_detach_aio_context,
+    .bdrv_attach_aio_context      = vmdk_attach_aio_context,
 
     .create_options               = vmdk_create_options,
 };
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 5d1d199b61..8e417f70ae 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -40,6 +40,7 @@ struct QEMUWin32AIOState {
     HANDLE hIOCP;
     EventNotifier e;
     int count;
+    bool is_aio_context_attached;
 };
 
 typedef struct QEMUWin32AIOCB {
@@ -114,7 +115,7 @@ static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
      * wait for completion.
      */
     while (!HasOverlappedIoCompleted(&waiocb->ov)) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(blockacb->bs), true);
     }
 }
 
@@ -180,6 +181,20 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
     }
 }
 
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *old_context)
+{
+    aio_set_event_notifier(old_context, &aio->e, NULL);
+    aio->is_aio_context_attached = false;
+}
+
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *new_context)
+{
+    aio->is_aio_context_attached = true;
+    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+}
+
 QEMUWin32AIOState *win32_aio_init(void)
 {
     QEMUWin32AIOState *s;
@@ -194,8 +209,6 @@ QEMUWin32AIOState *win32_aio_init(void)
         goto out_close_efd;
     }
 
-    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
-
     return s;
 
 out_close_efd:
@@ -204,3 +217,11 @@ out_free_state:
     g_free(s);
     return NULL;
 }
+
+void win32_aio_cleanup(QEMUWin32AIOState *aio)
+{
+    assert(!aio->is_aio_context_attached);
+    CloseHandle(aio->hIOCP);
+    event_notifier_cleanup(&aio->e);
+    g_free(aio);
+}
diff --git a/blockdev.c b/blockdev.c
index 9b5261b765..4cbcc56b5e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1703,6 +1703,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
 {
     ThrottleConfig cfg;
     BlockDriverState *bs;
+    AioContext *aio_context;
 
     bs = bdrv_find(device);
     if (!bs) {
@@ -1746,6 +1747,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
         return;
     }
 
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
     if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
         bdrv_io_limits_enable(bs);
     } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
@@ -1755,6 +1759,8 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
     if (bs->io_limits_enabled) {
         bdrv_set_io_limits(bs, &cfg);
     }
+
+    aio_context_release(aio_context);
 }
 
 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
diff --git a/hw/block/dataplane/Makefile.objs b/hw/block/dataplane/Makefile.objs
index 9da2eb82ba..e786f66421 100644
--- a/hw/block/dataplane/Makefile.objs
+++ b/hw/block/dataplane/Makefile.objs
@@ -1 +1 @@
-obj-y += ioq.o virtio-blk.o
+obj-y += virtio-blk.o
diff --git a/hw/block/dataplane/ioq.c b/hw/block/dataplane/ioq.c
deleted file mode 100644
index f709f87ed6..0000000000
--- a/hw/block/dataplane/ioq.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "ioq.h"
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs)
-{
-    int rc;
-
-    ioq->fd = fd;
-    ioq->max_reqs = max_reqs;
-
-    memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx);
-    rc = io_setup(max_reqs, &ioq->io_ctx);
-    if (rc != 0) {
-        fprintf(stderr, "ioq io_setup failed %d\n", rc);
-        exit(1);
-    }
-
-    rc = event_notifier_init(&ioq->io_notifier, 0);
-    if (rc != 0) {
-        fprintf(stderr, "ioq io event notifier creation failed %d\n", rc);
-        exit(1);
-    }
-
-    ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs);
-    ioq->freelist_idx = 0;
-
-    ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs);
-    ioq->queue_idx = 0;
-}
-
-void ioq_cleanup(IOQueue *ioq)
-{
-    g_free(ioq->freelist);
-    g_free(ioq->queue);
-
-    event_notifier_cleanup(&ioq->io_notifier);
-    io_destroy(ioq->io_ctx);
-}
-
-EventNotifier *ioq_get_notifier(IOQueue *ioq)
-{
-    return &ioq->io_notifier;
-}
-
-struct iocb *ioq_get_iocb(IOQueue *ioq)
-{
-    /* Underflow cannot happen since ioq is sized for max_reqs */
-    assert(ioq->freelist_idx != 0);
-
-    struct iocb *iocb = ioq->freelist[--ioq->freelist_idx];
-    ioq->queue[ioq->queue_idx++] = iocb;
-    return iocb;
-}
-
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb)
-{
-    /* Overflow cannot happen since ioq is sized for max_reqs */
-    assert(ioq->freelist_idx != ioq->max_reqs);
-
-    ioq->freelist[ioq->freelist_idx++] = iocb;
-}
-
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
-                      unsigned int count, long long offset)
-{
-    struct iocb *iocb = ioq_get_iocb(ioq);
-
-    if (read) {
-        io_prep_preadv(iocb, ioq->fd, iov, count, offset);
-    } else {
-        io_prep_pwritev(iocb, ioq->fd, iov, count, offset);
-    }
-    io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
-    return iocb;
-}
-
-int ioq_submit(IOQueue *ioq)
-{
-    int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
-    ioq->queue_idx = 0; /* reset */
-    return rc;
-}
-
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
-                       void *opaque)
-{
-    struct io_event events[ioq->max_reqs];
-    int nevents, i;
-
-    do {
-        nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL);
-    } while (nevents < 0 && errno == EINTR);
-    if (nevents < 0) {
-        return nevents;
-    }
-
-    for (i = 0; i < nevents; i++) {
-        ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
-
-        completion(events[i].obj, ret, opaque);
-        ioq_put_iocb(ioq, events[i].obj);
-    }
-    return nevents;
-}
diff --git a/hw/block/dataplane/ioq.h b/hw/block/dataplane/ioq.h
deleted file mode 100644
index b49b5de7f4..0000000000
--- a/hw/block/dataplane/ioq.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef IOQ_H
-#define IOQ_H
-
-#include <libaio.h>
-#include "qemu/event_notifier.h"
-
-typedef struct {
-    int fd;                         /* file descriptor */
-    unsigned int max_reqs;          /* max length of freelist and queue */
-
-    io_context_t io_ctx;            /* Linux AIO context */
-    EventNotifier io_notifier;      /* Linux AIO eventfd */
-
-    /* Requests can complete in any order so a free list is necessary to manage
-     * available iocbs.
-     */
-    struct iocb **freelist;         /* free iocbs */
-    unsigned int freelist_idx;
-
-    /* Multiple requests are queued up before submitting them all in one go */
-    struct iocb **queue;            /* queued iocbs */
-    unsigned int queue_idx;
-} IOQueue;
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs);
-void ioq_cleanup(IOQueue *ioq);
-EventNotifier *ioq_get_notifier(IOQueue *ioq);
-struct iocb *ioq_get_iocb(IOQueue *ioq);
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb);
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
-                      unsigned int count, long long offset);
-int ioq_submit(IOQueue *ioq);
-
-static inline unsigned int ioq_num_queued(IOQueue *ioq)
-{
-    return ioq->queue_idx;
-}
-
-typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque);
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
-                       void *opaque);
-
-#endif /* IOQ_H */
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index e49c2536b1..c10b7b70fb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -17,7 +17,6 @@
 #include "qemu/thread.h"
 #include "qemu/error-report.h"
 #include "hw/virtio/dataplane/vring.h"
-#include "ioq.h"
 #include "block/block.h"
 #include "hw/virtio/virtio-blk.h"
 #include "virtio-blk.h"
@@ -25,20 +24,14 @@
 #include "hw/virtio/virtio-bus.h"
 #include "qom/object_interfaces.h"
 
-enum {
-    SEG_MAX = 126,                  /* maximum number of I/O segments */
-    VRING_MAX = SEG_MAX + 2,        /* maximum number of vring descriptors */
-    REQ_MAX = VRING_MAX,            /* maximum number of requests in the vring,
-                                     * is VRING_MAX / 2 with traditional and
-                                     * VRING_MAX with indirect descriptors */
-};
-
 typedef struct {
-    struct iocb iocb;               /* Linux AIO control block */
+    VirtIOBlockDataPlane *s;
     QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
     VirtQueueElement *elem;         /* saved data from the virtqueue */
-    struct iovec *bounce_iov;       /* used if guest buffers are unaligned */
-    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
+    QEMUIOVector qiov;              /* original request iovecs */
+    struct iovec bounce_iov;        /* used if guest buffers are unaligned */
+    QEMUIOVector bounce_qiov;       /* bounce buffer iovecs */
+    bool read;                      /* read or write? */
 } VirtIOBlockRequest;
 
 struct VirtIOBlockDataPlane {
@@ -47,7 +40,6 @@ struct VirtIOBlockDataPlane {
     bool stopping;
 
     VirtIOBlkConf *blk;
-    int fd;                         /* image file descriptor */
 
     VirtIODevice *vdev;
     Vring vring;                    /* virtqueue vring */
@@ -61,16 +53,8 @@ struct VirtIOBlockDataPlane {
     IOThread *iothread;
     IOThread internal_iothread_obj;
     AioContext *ctx;
-    EventNotifier io_notifier;      /* Linux AIO completion */
     EventNotifier host_notifier;    /* doorbell */
 
-    IOQueue ioqueue;                /* Linux AIO queue (should really be per
-                                       IOThread) */
-    VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the
-                                             queue */
-
-    unsigned int num_reqs;
-
     /* Operation blocker on BDS */
     Error *blocker;
 };
@@ -85,33 +69,28 @@ static void notify_guest(VirtIOBlockDataPlane *s)
     event_notifier_set(s->guest_notifier);
 }
 
-static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+static void complete_rdwr(void *opaque, int ret)
 {
-    VirtIOBlockDataPlane *s = opaque;
-    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+    VirtIOBlockRequest *req = opaque;
     struct virtio_blk_inhdr hdr;
     int len;
 
-    if (likely(ret >= 0)) {
+    if (likely(ret == 0)) {
         hdr.status = VIRTIO_BLK_S_OK;
-        len = ret;
+        len = req->qiov.size;
     } else {
         hdr.status = VIRTIO_BLK_S_IOERR;
         len = 0;
     }
 
-    trace_virtio_blk_data_plane_complete_request(s, req->elem->index, ret);
+    trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret);
 
-    if (req->read_qiov) {
-        assert(req->bounce_iov);
-        qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
-        qemu_iovec_destroy(req->read_qiov);
-        g_slice_free(QEMUIOVector, req->read_qiov);
+    if (req->read && req->bounce_iov.iov_base) {
+        qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len);
     }
 
-    if (req->bounce_iov) {
-        qemu_vfree(req->bounce_iov->iov_base);
-        g_slice_free(struct iovec, req->bounce_iov);
+    if (req->bounce_iov.iov_base) {
+        qemu_vfree(req->bounce_iov.iov_base);
     }
 
     qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
@@ -122,9 +101,9 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
      * written to, but for virtio-blk it seems to be the number of bytes
      * transferred plus the status bytes.
      */
-    vring_push(&s->vring, req->elem, len + sizeof(hdr));
-    req->elem = NULL;
-    s->num_reqs--;
+    vring_push(&req->s->vring, req->elem, len + sizeof(hdr));
+    notify_guest(req->s);
+    g_slice_free(VirtIOBlockRequest, req);
 }
 
 static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
@@ -155,51 +134,87 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s,
     complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
 }
 
-static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
-                       struct iovec *iov, unsigned iov_cnt,
-                       long long offset, VirtQueueElement *elem,
-                       QEMUIOVector *inhdr)
+static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
+                        struct iovec *iov, unsigned iov_cnt,
+                        int64_t sector_num, VirtQueueElement *elem,
+                        QEMUIOVector *inhdr)
 {
-    struct iocb *iocb;
-    QEMUIOVector qiov;
-    struct iovec *bounce_iov = NULL;
-    QEMUIOVector *read_qiov = NULL;
+    VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest);
+    QEMUIOVector *qiov;
+    int nb_sectors;
 
-    qemu_iovec_init_external(&qiov, iov, iov_cnt);
-    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
-        void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
+    /* Fill in virtio block metadata needed for completion */
+    req->s = s;
+    req->elem = elem;
+    req->inhdr = inhdr;
+    req->read = read;
+    qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
 
-        if (read) {
-            /* Need to copy back from bounce buffer on completion */
-            read_qiov = g_slice_new(QEMUIOVector);
-            qemu_iovec_init(read_qiov, iov_cnt);
-            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
-        } else {
-            qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+    qiov = &req->qiov;
+
+    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) {
+        void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size);
+
+        /* Populate bounce buffer with data for writes */
+        if (!read) {
+            qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size);
         }
 
         /* Redirect I/O to aligned bounce buffer */
-        bounce_iov = g_slice_new(struct iovec);
-        bounce_iov->iov_base = bounce_buffer;
-        bounce_iov->iov_len = qiov.size;
-        iov = bounce_iov;
-        iov_cnt = 1;
+        req->bounce_iov.iov_base = bounce_buffer;
+        req->bounce_iov.iov_len = qiov->size;
+        qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1);
+        qiov = &req->bounce_qiov;
     }
 
-    iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
+    nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
 
-    /* Fill in virtio block metadata needed for completion */
-    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
-    req->elem = elem;
-    req->inhdr = inhdr;
-    req->bounce_iov = bounce_iov;
-    req->read_qiov = read_qiov;
-    return 0;
+    if (read) {
+        bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
+                       complete_rdwr, req);
+    } else {
+        bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
+                        complete_rdwr, req);
+    }
 }
 
-static int process_request(IOQueue *ioq, VirtQueueElement *elem)
+static void complete_flush(void *opaque, int ret)
+{
+    VirtIOBlockRequest *req = opaque;
+    unsigned char status;
+
+    if (ret == 0) {
+        status = VIRTIO_BLK_S_OK;
+    } else {
+        status = VIRTIO_BLK_S_IOERR;
+    }
+
+    complete_request_early(req->s, req->elem, req->inhdr, status);
+    g_slice_free(VirtIOBlockRequest, req);
+}
+
+static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+                         QEMUIOVector *inhdr)
+{
+    VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest);
+    req->s = s;
+    req->elem = elem;
+    req->inhdr = inhdr;
+
+    bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
+}
+
+static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+                        QEMUIOVector *inhdr)
+{
+    int status;
+
+    status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
+    complete_request_early(s, elem, inhdr, status);
+}
+
+static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
-    VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
     struct iovec *iov = elem->out_sg;
     struct iovec *in_iov = elem->in_sg;
     unsigned out_num = elem->out_num;
@@ -234,25 +249,23 @@ static int process_request(IOQueue *ioq, VirtQueueElement *elem)
 
     switch (outhdr.type) {
     case VIRTIO_BLK_T_IN:
-        do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, elem, inhdr);
+        do_rdwr_cmd(s, true, in_iov, in_num,
+                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
+                    elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_OUT:
-        do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, elem, inhdr);
+        do_rdwr_cmd(s, false, iov, out_num,
+                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
+                    elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_SCSI_CMD:
-        /* TODO support SCSI commands */
-        complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
+        do_scsi_cmd(s, elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_FLUSH:
-        /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
-        if (qemu_fdatasync(s->fd) < 0) {
-            complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_IOERR);
-        } else {
-            complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
-        }
+        do_flush_cmd(s, elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_GET_ID:
@@ -274,7 +287,6 @@ static void handle_notify(EventNotifier *e)
 
     VirtQueueElement *elem;
     int ret;
-    unsigned int num_queued;
 
     event_notifier_test_and_clear(&s->host_notifier);
     for (;;) {
@@ -291,7 +303,7 @@ static void handle_notify(EventNotifier *e)
             trace_virtio_blk_data_plane_process_request(s, elem->out_num,
                                                         elem->in_num, elem->index);
 
-            if (process_request(&s->ioqueue, elem) < 0) {
+            if (process_request(s, elem) < 0) {
                 vring_set_broken(&s->vring);
                 vring_free_element(elem);
                 ret = -EFAULT;
@@ -306,44 +318,10 @@ static void handle_notify(EventNotifier *e)
             if (vring_enable_notification(s->vdev, &s->vring)) {
                 break;
             }
-        } else { /* ret == -ENOBUFS or fatal error, iovecs[] is depleted */
-            /* Since there are no iovecs[] left, stop processing for now.  Do
-             * not re-enable guest->host notifies since the I/O completion
-             * handler knows to check for more vring descriptors anyway.
-             */
+        } else { /* fatal error */
             break;
         }
     }
-
-    num_queued = ioq_num_queued(&s->ioqueue);
-    if (num_queued > 0) {
-        s->num_reqs += num_queued;
-
-        int rc = ioq_submit(&s->ioqueue);
-        if (unlikely(rc < 0)) {
-            fprintf(stderr, "ioq_submit failed %d\n", rc);
-            exit(1);
-        }
-    }
-}
-
-static void handle_io(EventNotifier *e)
-{
-    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
-                                           io_notifier);
-
-    event_notifier_test_and_clear(&s->io_notifier);
-    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
-        notify_guest(s);
-    }
-
-    /* If there were more requests than iovecs, the vring will not be empty yet
-     * so check again.  There should now be enough resources to process more
-     * requests.
-     */
-    if (unlikely(vring_more_avail(&s->vring))) {
-        handle_notify(&s->host_notifier);
-    }
 }
 
 /* Context: QEMU global mutex held */
@@ -352,7 +330,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
                                   Error **errp)
 {
     VirtIOBlockDataPlane *s;
-    int fd;
     Error *local_err = NULL;
 
     *dataplane = NULL;
@@ -361,18 +338,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
         return;
     }
 
-    if (blk->scsi) {
-        error_setg(errp,
-                   "device is incompatible with x-data-plane, use scsi=off");
-        return;
-    }
-
-    if (blk->config_wce) {
-        error_setg(errp, "device is incompatible with x-data-plane, "
-                         "use config-wce=off");
-        return;
-    }
-
     /* If dataplane is (re-)enabled while the guest is running there could be
      * block jobs that can conflict.
      */
@@ -383,16 +348,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
         return;
     }
 
-    fd = raw_get_aio_fd(blk->conf.bs);
-    if (fd < 0) {
-        error_setg(errp, "drive is incompatible with x-data-plane, "
-                         "use format=raw,cache=none,aio=native");
-        return;
-    }
-
     s = g_new0(VirtIOBlockDataPlane, 1);
     s->vdev = vdev;
-    s->fd = fd;
     s->blk = blk;
 
     if (blk->iothread) {
@@ -437,7 +394,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     VirtQueue *vq;
-    int i;
 
     if (s->started) {
         return;
@@ -470,24 +426,18 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
     }
     s->host_notifier = *virtio_queue_get_host_notifier(vq);
 
-    /* Set up ioqueue */
-    ioq_init(&s->ioqueue, s->fd, REQ_MAX);
-    for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
-        ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
-    }
-    s->io_notifier = *ioq_get_notifier(&s->ioqueue);
-
     s->starting = false;
     s->started = true;
     trace_virtio_blk_data_plane_start(s);
 
+    bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
+
     /* Kick right away to begin processing requests already in vring */
     event_notifier_set(virtio_queue_get_host_notifier(vq));
 
     /* Get this show started by hooking up our callbacks */
     aio_context_acquire(s->ctx);
     aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
-    aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io);
     aio_context_release(s->ctx);
 }
 
@@ -507,13 +457,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
     /* Stop notifications for new requests from guest */
     aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
 
-    /* Complete pending requests */
-    while (s->num_reqs > 0) {
-        aio_poll(s->ctx, true);
-    }
-
-    /* Stop ioq callbacks (there are no pending requests left) */
-    aio_set_event_notifier(s->ctx, &s->io_notifier, NULL);
+    /* Drain and switch bs back to the QEMU main loop */
+    bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context());
 
     aio_context_release(s->ctx);
 
@@ -522,7 +467,6 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
      */
     vring_teardown(&s->vring, s->vdev, 0);
 
-    ioq_cleanup(&s->ioqueue);
     k->set_host_notifier(qbus->parent, 0, false);
 
     /* Clean up guest notifier (irq) */
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index b1fc1de0dc..85aa8715ba 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,7 +33,6 @@ typedef struct VirtIOBlockReq
     VirtQueueElement elem;
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr *out;
-    struct virtio_scsi_inhdr *scsi;
     QEMUIOVector qiov;
     struct VirtIOBlockReq *next;
     BlockAcctCookie acct;
@@ -125,13 +124,15 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
     return req;
 }
 
-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+                               VirtQueueElement *elem)
 {
-#ifdef __linux__
-    int ret;
-    int i;
-#endif
     int status = VIRTIO_BLK_S_OK;
+    struct virtio_scsi_inhdr *scsi = NULL;
+#ifdef __linux__
+    int i;
+    struct sg_io_hdr hdr;
+#endif
 
     /*
      * We require at least one output segment each for the virtio_blk_outhdr
@@ -140,19 +141,18 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
      * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
      * and the sense buffer pointer in the input segments.
      */
-    if (req->elem.out_num < 2 || req->elem.in_num < 3) {
-        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-        g_free(req);
-        return;
+    if (elem->out_num < 2 || elem->in_num < 3) {
+        status = VIRTIO_BLK_S_IOERR;
+        goto fail;
     }
 
     /*
      * The scsi inhdr is placed in the second-to-last input segment, just
      * before the regular inhdr.
      */
-    req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+    scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
 
-    if (!req->dev->blk.scsi) {
+    if (!blk->blk.scsi) {
         status = VIRTIO_BLK_S_UNSUPP;
         goto fail;
     }
@@ -160,43 +160,42 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
     /*
      * No support for bidirection commands yet.
      */
-    if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+    if (elem->out_num > 2 && elem->in_num > 3) {
         status = VIRTIO_BLK_S_UNSUPP;
         goto fail;
     }
 
 #ifdef __linux__
-    struct sg_io_hdr hdr;
     memset(&hdr, 0, sizeof(struct sg_io_hdr));
     hdr.interface_id = 'S';
-    hdr.cmd_len = req->elem.out_sg[1].iov_len;
-    hdr.cmdp = req->elem.out_sg[1].iov_base;
+    hdr.cmd_len = elem->out_sg[1].iov_len;
+    hdr.cmdp = elem->out_sg[1].iov_base;
     hdr.dxfer_len = 0;
 
-    if (req->elem.out_num > 2) {
+    if (elem->out_num > 2) {
         /*
          * If there are more than the minimally required 2 output segments
          * there is write payload starting from the third iovec.
          */
         hdr.dxfer_direction = SG_DXFER_TO_DEV;
-        hdr.iovec_count = req->elem.out_num - 2;
+        hdr.iovec_count = elem->out_num - 2;
 
         for (i = 0; i < hdr.iovec_count; i++)
-            hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+            hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
 
-        hdr.dxferp = req->elem.out_sg + 2;
+        hdr.dxferp = elem->out_sg + 2;
 
-    } else if (req->elem.in_num > 3) {
+    } else if (elem->in_num > 3) {
         /*
          * If we have more than 3 input segments the guest wants to actually
          * read data.
          */
         hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-        hdr.iovec_count = req->elem.in_num - 3;
+        hdr.iovec_count = elem->in_num - 3;
         for (i = 0; i < hdr.iovec_count; i++)
-            hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+            hdr.dxfer_len += elem->in_sg[i].iov_len;
 
-        hdr.dxferp = req->elem.in_sg;
+        hdr.dxferp = elem->in_sg;
     } else {
         /*
          * Some SCSI commands don't actually transfer any data.
@@ -204,11 +203,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
         hdr.dxfer_direction = SG_DXFER_NONE;
     }
 
-    hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
-    hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+    hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
+    hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
 
-    ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
-    if (ret) {
+    status = bdrv_ioctl(blk->bs, SG_IO, &hdr);
+    if (status) {
         status = VIRTIO_BLK_S_UNSUPP;
         goto fail;
     }
@@ -224,23 +223,31 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
         hdr.status = CHECK_CONDITION;
     }
 
-    stl_p(&req->scsi->errors,
+    stl_p(&scsi->errors,
           hdr.status | (hdr.msg_status << 8) |
           (hdr.host_status << 16) | (hdr.driver_status << 24));
-    stl_p(&req->scsi->residual, hdr.resid);
-    stl_p(&req->scsi->sense_len, hdr.sb_len_wr);
-    stl_p(&req->scsi->data_len, hdr.dxfer_len);
+    stl_p(&scsi->residual, hdr.resid);
+    stl_p(&scsi->sense_len, hdr.sb_len_wr);
+    stl_p(&scsi->data_len, hdr.dxfer_len);
 
-    virtio_blk_req_complete(req, status);
-    g_free(req);
-    return;
+    return status;
 #else
     abort();
 #endif
 
 fail:
     /* Just put anything nonzero so that the ioctl fails in the guest.  */
-    stl_p(&req->scsi->errors, 255);
+    if (scsi) {
+        stl_p(&scsi->errors, 255);
+    }
+    return status;
+}
+
+static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+{
+    int status;
+
+    status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
     virtio_blk_req_complete(req, status);
     g_free(req);
 }
@@ -523,7 +530,10 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
     struct virtio_blk_config blkcfg;
 
     memcpy(&blkcfg, config, sizeof(blkcfg));
+
+    aio_context_acquire(bdrv_get_aio_context(s->bs));
     bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0);
+    aio_context_release(bdrv_get_aio_context(s->bs));
 }
 
 static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
@@ -582,7 +592,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
      * s->bs would erroneously be placed in writethrough mode.
      */
     if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
-        bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+        aio_context_acquire(bdrv_get_aio_context(s->bs));
+        bdrv_set_enable_write_cache(s->bs,
+                                    !!(features & (1 << VIRTIO_BLK_F_WCE)));
+        aio_context_release(bdrv_get_aio_context(s->bs));
     }
 }
 
diff --git a/include/block/block.h b/include/block/block.h
index faee3aa246..7d86e29cf4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -481,15 +481,6 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
 bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
 
-#ifdef CONFIG_LINUX_AIO
-int raw_get_aio_fd(BlockDriverState *bs);
-#else
-static inline int raw_get_aio_fd(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-#endif
-
 enum BlockAcctType {
     BDRV_ACCT_READ,
     BDRV_ACCT_WRITE,
@@ -574,4 +565,22 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
 
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_set_aio_context:
+ *
+ * Changes the #AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children.
+ *
+ * This function must be called from the old #AioContext or with a lock held so
+ * the old #AioContext is not executing.
+ */
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f2e753f632..8d58334c1d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -247,6 +247,19 @@ struct BlockDriver {
      */
     int (*bdrv_has_zero_init)(BlockDriverState *bs);
 
+    /* Remove fd handlers, timers, and other event loop callbacks so the event
+     * loop is no longer in use.  Called with no in-flight requests and in
+     * depth-first traversal order with parents before child nodes.
+     */
+    void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
+     * can be processed again.  Called with no in-flight requests and in
+     * depth-first traversal order with child nodes before parent nodes.
+     */
+    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+                                    AioContext *new_context);
+
     QLIST_ENTRY(BlockDriver) list;
 };
 
@@ -297,6 +310,8 @@ struct BlockDriverState {
     const BlockDevOps *dev_ops;
     void *dev_opaque;
 
+    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+
     char filename[1024];
     char backing_file[1024]; /* if non zero, the image is a diff of
                                 this file image */
@@ -390,11 +405,25 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                     NotifierWithReturn *notifier);
 
 /**
- * bdrv_get_aio_context:
+ * bdrv_detach_aio_context:
  *
- * Returns: the currently bound #AioContext
+ * May be called from .bdrv_detach_aio_context() to detach children from the
+ * current #AioContext.  This is only needed by block drivers that manage their
+ * own children.  Both ->file and ->backing_hd are automatically handled and
+ * block drivers should not call this function on them explicitly.
  */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+void bdrv_detach_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_attach_aio_context:
+ *
+ * May be called from .bdrv_attach_aio_context() to attach children to the new
+ * #AioContext.  This is only needed by block drivers that manage their own
+ * children.  Both ->file and ->backing_hd are automatically handled and block
+ * drivers should not call this function on them explicitly.
+ */
+void bdrv_attach_aio_context(BlockDriverState *bs,
+                             AioContext *new_context);
 
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index e4c41ff2ef..4bc9b549ad 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -155,4 +155,7 @@ typedef struct VirtIOBlock {
 
 void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk);
 
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+                               VirtQueueElement *elem);
+
 #endif
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index ab29b0b918..b890613a9c 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -67,6 +67,11 @@ typedef struct ThrottleState {
     int64_t previous_leak;    /* timestamp of the last leak done */
     QEMUTimer * timers[2];    /* timers used to do the throttling */
     QEMUClockType clock_type; /* the clock used */
+
+    /* Callbacks */
+    QEMUTimerCB *read_timer_cb;
+    QEMUTimerCB *write_timer_cb;
+    void *timer_opaque;
 } ThrottleState;
 
 /* operations on single leaky buckets */
@@ -82,6 +87,7 @@ bool throttle_compute_timer(ThrottleState *ts,
 
 /* init/destroy cycle */
 void throttle_init(ThrottleState *ts,
+                   AioContext *aio_context,
                    QEMUClockType clock_type,
                    void (read_timer)(void *),
                    void (write_timer)(void *),
@@ -89,6 +95,10 @@ void throttle_init(ThrottleState *ts,
 
 void throttle_destroy(ThrottleState *ts);
 
+void throttle_detach_aio_context(ThrottleState *ts);
+
+void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context);
+
 bool throttle_have_timer(ThrottleState *ts);
 
 /* configuration */
diff --git a/qapi-schema.json b/qapi-schema.json
index 7bc33ea717..14b498b442 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2,32 +2,11 @@
 #
 # QAPI Schema
 
-##
-# @ErrorClass
-#
-# QEMU error classes
-#
-# @GenericError: this is used for errors that don't require a specific error
-#                class. This should be the default case for most errors
-#
-# @CommandNotFound: the requested command has not been found
-#
-# @DeviceEncrypted: the requested operation can't be fulfilled because the
-#                   selected device is encrypted
-#
-# @DeviceNotActive: a device has failed to be become active
-#
-# @DeviceNotFound: the requested device has not been found
-#
-# @KVMMissingCap: the requested operation can't be fulfilled because a
-#                 required KVM capability is missing
-#
-# Since: 1.2
-##
-{ 'enum': 'ErrorClass',
-  'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
-            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
+# QAPI common definitions
+{ 'include': 'qapi/common.json' }
 
+# QAPI block definitions
+{ 'include': 'qapi/block.json' }
 
 ##
 # LostTickPolicy:
@@ -53,40 +32,6 @@
 { 'enum': 'LostTickPolicy',
   'data': ['discard', 'delay', 'merge', 'slew' ] }
 
-##
-# BiosAtaTranslation:
-#
-# Policy that BIOS should use to interpret cylinder/head/sector
-# addresses.  Note that Bochs BIOS and SeaBIOS will not actually
-# translate logical CHS to physical; instead, they will use logical
-# block addressing.
-#
-# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
-#        depending on the size of the disk.  If they are not passed,
-#        choose none if QEMU can guess that the disk had 16 or fewer
-#        heads, large if QEMU can guess that the disk had 131072 or
-#        fewer tracks across all heads (i.e. cylinders*heads<131072),
-#        otherwise LBA.
-#
-# @none: The physical disk geometry is equal to the logical geometry.
-#
-# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
-#       heads (if fewer than 255 are enough to cover the whole disk
-#       with 1024 cylinders/head).  The number of cylinders/head is
-#       then computed based on the number of sectors and heads.
-#
-# @large: The number of cylinders per head is scaled down to 1024
-#         by correspondingly scaling up the number of heads.
-#
-# @rechs: Same as @large, but first convert a 16-head geometry to
-#         15-head, by proportionally scaling up the number of
-#         cylinders/head.
-#
-# Since: 2.0
-##
-{ 'enum': 'BiosAtaTranslation',
-  'data': ['auto', 'none', 'lba', 'large', 'rechs']}
-
 # @add_client
 #
 # Allow client connections for VNC, Spice and socket based
@@ -133,43 +78,6 @@
 ##
 { 'command': 'query-name', 'returns': 'NameInfo' }
 
-##
-# @VersionInfo:
-#
-# A description of QEMU's version.
-#
-# @qemu.major:  The major version of QEMU
-#
-# @qemu.minor:  The minor version of QEMU
-#
-# @qemu.micro:  The micro version of QEMU.  By current convention, a micro
-#               version of 50 signifies a development branch.  A micro version
-#               greater than or equal to 90 signifies a release candidate for
-#               the next minor version.  A micro version of less than 50
-#               signifies a stable release.
-#
-# @package:     QEMU will always set this field to an empty string.  Downstream
-#               versions of QEMU should set this to a non-empty string.  The
-#               exact format depends on the downstream however it highly
-#               recommended that a unique name is used.
-#
-# Since: 0.14.0
-##
-{ 'type': 'VersionInfo',
-  'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'},
-           'package': 'str'} }
-
-##
-# @query-version:
-#
-# Returns the current version of QEMU.
-#
-# Returns:  A @VersionInfo object describing the current version of QEMU.
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-version', 'returns': 'VersionInfo' }
-
 ##
 # @KvmInfo:
 #
@@ -241,179 +149,6 @@
             'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
             'guest-panicked' ] }
 
-##
-# @SnapshotInfo
-#
-# @id: unique snapshot id
-#
-# @name: user chosen name
-#
-# @vm-state-size: size of the VM state
-#
-# @date-sec: UTC date of the snapshot in seconds
-#
-# @date-nsec: fractional part in nano seconds to be used with date-sec
-#
-# @vm-clock-sec: VM clock relative to boot in seconds
-#
-# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
-#
-# Since: 1.3
-#
-##
-
-{ 'type': 'SnapshotInfo',
-  'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
-            'date-sec': 'int', 'date-nsec': 'int',
-            'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
-
-##
-# @ImageInfoSpecificQCow2:
-#
-# @compat: compatibility level
-#
-# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1
-#
-# Since: 1.7
-##
-{ 'type': 'ImageInfoSpecificQCow2',
-  'data': {
-      'compat': 'str',
-      '*lazy-refcounts': 'bool'
-  } }
-
-##
-# @ImageInfoSpecificVmdk:
-#
-# @create-type: The create type of VMDK image
-#
-# @cid: Content id of image
-#
-# @parent-cid: Parent VMDK image's cid
-#
-# @extents: List of extent files
-#
-# Since: 1.7
-##
-{ 'type': 'ImageInfoSpecificVmdk',
-  'data': {
-      'create-type': 'str',
-      'cid': 'int',
-      'parent-cid': 'int',
-      'extents': ['ImageInfo']
-  } }
-
-##
-# @ImageInfoSpecific:
-#
-# A discriminated record of image format specific information structures.
-#
-# Since: 1.7
-##
-
-{ 'union': 'ImageInfoSpecific',
-  'data': {
-      'qcow2': 'ImageInfoSpecificQCow2',
-      'vmdk': 'ImageInfoSpecificVmdk'
-  } }
-
-##
-# @ImageInfo:
-#
-# Information about a QEMU image file
-#
-# @filename: name of the image file
-#
-# @format: format of the image file
-#
-# @virtual-size: maximum capacity in bytes of the image
-#
-# @actual-size: #optional actual size on disk in bytes of the image
-#
-# @dirty-flag: #optional true if image is not cleanly closed
-#
-# @cluster-size: #optional size of a cluster in bytes
-#
-# @encrypted: #optional true if the image is encrypted
-#
-# @compressed: #optional true if the image is compressed (Since 1.7)
-#
-# @backing-filename: #optional name of the backing file
-#
-# @full-backing-filename: #optional full path of the backing file
-#
-# @backing-filename-format: #optional the format of the backing file
-#
-# @snapshots: #optional list of VM snapshots
-#
-# @backing-image: #optional info of the backing image (since 1.6)
-#
-# @format-specific: #optional structure supplying additional format-specific
-# information (since 1.7)
-#
-# Since: 1.3
-#
-##
-
-{ 'type': 'ImageInfo',
-  'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
-           '*actual-size': 'int', 'virtual-size': 'int',
-           '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
-           '*backing-filename': 'str', '*full-backing-filename': 'str',
-           '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-           '*backing-image': 'ImageInfo',
-           '*format-specific': 'ImageInfoSpecific' } }
-
-##
-# @ImageCheck:
-#
-# Information about a QEMU image file check
-#
-# @filename: name of the image file checked
-#
-# @format: format of the image file checked
-#
-# @check-errors: number of unexpected errors occurred during check
-#
-# @image-end-offset: #optional offset (in bytes) where the image ends, this
-#                    field is present if the driver for the image format
-#                    supports it
-#
-# @corruptions: #optional number of corruptions found during the check if any
-#
-# @leaks: #optional number of leaks found during the check if any
-#
-# @corruptions-fixed: #optional number of corruptions fixed during the check
-#                     if any
-#
-# @leaks-fixed: #optional number of leaks fixed during the check if any
-#
-# @total-clusters: #optional total number of clusters, this field is present
-#                  if the driver for the image format supports it
-#
-# @allocated-clusters: #optional total number of allocated clusters, this
-#                      field is present if the driver for the image format
-#                      supports it
-#
-# @fragmented-clusters: #optional total number of fragmented clusters, this
-#                       field is present if the driver for the image format
-#                       supports it
-#
-# @compressed-clusters: #optional total number of compressed clusters, this
-#                       field is present if the driver for the image format
-#                       supports it
-#
-# Since: 1.4
-#
-##
-
-{ 'type': 'ImageCheck',
-  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
-           '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
-           '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
-           '*total-clusters': 'int', '*allocated-clusters': 'int',
-           '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
-
 ##
 # @StatusInfo:
 #
@@ -583,28 +318,6 @@
   'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
   'returns': 'str' }
 
-##
-# @CommandInfo:
-#
-# Information about a QMP command
-#
-# @name: The command name
-#
-# Since: 0.14.0
-##
-{ 'type': 'CommandInfo', 'data': {'name': 'str'} }
-
-##
-# @query-commands:
-#
-# Return a list of supported QMP commands by this server
-#
-# Returns: A list of @CommandInfo for all supported commands
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-commands', 'returns': ['CommandInfo'] }
-
 ##
 # @EventInfo:
 #
@@ -916,252 +629,6 @@
 ##
 { 'command': 'query-iothreads', 'returns': ['IOThreadInfo'] }
 
-##
-# @BlockDeviceInfo:
-#
-# Information about the backing device for a block device.
-#
-# @file: the filename of the backing device
-#
-# @node-name: #optional the name of the block driver node (Since 2.0)
-#
-# @ro: true if the backing device was open read-only
-#
-# @drv: the name of the block format used to open the backing device. As of
-#       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
-#       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
-#       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
-#
-# @backing_file: #optional the name of the backing file (for copy-on-write)
-#
-# @backing_file_depth: number of files in the backing file chain (since: 1.2)
-#
-# @encrypted: true if the backing device is encrypted
-#
-# @encryption_key_missing: true if the backing device is encrypted but an
-#                          valid encryption key is missing
-#
-# @detect_zeroes: detect and optimize zero writes (Since 2.1)
-#
-# @bps: total throughput limit in bytes per second is specified
-#
-# @bps_rd: read throughput limit in bytes per second is specified
-#
-# @bps_wr: write throughput limit in bytes per second is specified
-#
-# @iops: total I/O operations per second is specified
-#
-# @iops_rd: read I/O operations per second is specified
-#
-# @iops_wr: write I/O operations per second is specified
-#
-# @image: the info of image used (since: 1.6)
-#
-# @bps_max: #optional total max in bytes (Since 1.7)
-#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
-#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
-#
-# @iops_max: #optional total I/O operations max (Since 1.7)
-#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
-#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
-#
-# @iops_size: #optional an I/O size in bytes (Since 1.7)
-#
-# Since: 0.14.0
-#
-##
-{ 'type': 'BlockDeviceInfo',
-  'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
-            '*backing_file': 'str', 'backing_file_depth': 'int',
-            'encrypted': 'bool', 'encryption_key_missing': 'bool',
-            'detect_zeroes': 'BlockdevDetectZeroesOptions',
-            'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
-            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
-            'image': 'ImageInfo',
-            '*bps_max': 'int', '*bps_rd_max': 'int',
-            '*bps_wr_max': 'int', '*iops_max': 'int',
-            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
-            '*iops_size': 'int' } }
-
-##
-# @BlockDeviceIoStatus:
-#
-# An enumeration of block device I/O status.
-#
-# @ok: The last I/O operation has succeeded
-#
-# @failed: The last I/O operation has failed
-#
-# @nospace: The last I/O operation has failed due to a no-space condition
-#
-# Since: 1.0
-##
-{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
-
-##
-# @BlockDeviceMapEntry:
-#
-# Entry in the metadata map of the device (returned by "qemu-img map")
-#
-# @start: Offset in the image of the first byte described by this entry
-#         (in bytes)
-#
-# @length: Length of the range described by this entry (in bytes)
-#
-# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
-#         before reaching one for which the range is allocated.  The value is
-#         in the range 0 to the depth of the image chain - 1.
-#
-# @zero: the sectors in this range read as zeros
-#
-# @data: reading the image will actually read data from a file (in particular,
-#        if @offset is present this means that the sectors are not simply
-#        preallocated, but contain actual data in raw format)
-#
-# @offset: if present, the image file stores the data for this range in
-#          raw format at the given offset.
-#
-# Since 1.7
-##
-{ 'type': 'BlockDeviceMapEntry',
-  'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
-            'data': 'bool', '*offset': 'int' } }
-
-##
-# @BlockDirtyInfo:
-#
-# Block dirty bitmap information.
-#
-# @count: number of dirty bytes according to the dirty bitmap
-#
-# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
-#
-# Since: 1.3
-##
-{ 'type': 'BlockDirtyInfo',
-  'data': {'count': 'int', 'granularity': 'int'} }
-
-##
-# @BlockInfo:
-#
-# Block device information.  This structure describes a virtual device and
-# the backing device associated with it.
-#
-# @device: The device name associated with the virtual device.
-#
-# @type: This field is returned only for compatibility reasons, it should
-#        not be used (always returns 'unknown')
-#
-# @removable: True if the device supports removable media.
-#
-# @locked: True if the guest has locked this device from having its media
-#          removed
-#
-# @tray_open: #optional True if the device has a tray and it is open
-#             (only present if removable is true)
-#
-# @dirty-bitmaps: #optional dirty bitmaps information (only present if the
-#                 driver has one or more dirty bitmaps) (Since 2.0)
-#
-# @io-status: #optional @BlockDeviceIoStatus. Only present if the device
-#             supports it and the VM is configured to stop on errors
-#
-# @inserted: #optional @BlockDeviceInfo describing the device if media is
-#            present
-#
-# Since:  0.14.0
-##
-{ 'type': 'BlockInfo',
-  'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
-           'locked': 'bool', '*inserted': 'BlockDeviceInfo',
-           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
-           '*dirty-bitmaps': ['BlockDirtyInfo'] } }
-
-##
-# @query-block:
-#
-# Get a list of BlockInfo for all virtual block devices.
-#
-# Returns: a list of @BlockInfo describing each virtual block device
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-block', 'returns': ['BlockInfo'] }
-
-##
-# @BlockDeviceStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @rd_bytes:      The number of bytes read by the device.
-#
-# @wr_bytes:      The number of bytes written by the device.
-#
-# @rd_operations: The number of read operations performed by the device.
-#
-# @wr_operations: The number of write operations performed by the device.
-#
-# @flush_operations: The number of cache flush operations performed by the
-#                    device (since 0.15.0)
-#
-# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds
-#                       (since 0.15.0).
-#
-# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0).
-#
-# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0).
-#
-# @wr_highest_offset: The offset after the greatest byte written to the
-#                     device.  The intended use of this information is for
-#                     growable sparse files (like qcow2) that are used on top
-#                     of a physical device.
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockDeviceStats',
-  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
-           'wr_operations': 'int', 'flush_operations': 'int',
-           'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
-           'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
-
-##
-# @BlockStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @device: #optional If the stats are for a virtual block device, the name
-#          corresponding to the virtual block device.
-#
-# @stats:  A @BlockDeviceStats for the device.
-#
-# @parent: #optional This describes the file block device if it has one.
-#
-# @backing: #optional This describes the backing block device if it has one.
-#           (Since 2.0)
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockStats',
-  'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
-           '*parent': 'BlockStats',
-           '*backing': 'BlockStats'} }
-
-##
-# @query-blockstats:
-#
-# Query the @BlockStats for all virtual block devices.
-#
-# Returns: A list of @BlockStats for each virtual block devices.
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-blockstats', 'returns': ['BlockStats'] }
-
 ##
 # @VncClientInfo:
 #
@@ -1500,105 +967,6 @@
 ##
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
 
-##
-# @BlockdevOnError:
-#
-# An enumeration of possible behaviors for errors on I/O operations.
-# The exact meaning depends on whether the I/O was initiated by a guest
-# or by a block job
-#
-# @report: for guest operations, report the error to the guest;
-#          for jobs, cancel the job
-#
-# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
-#          or BLOCK_JOB_ERROR)
-#
-# @enospc: same as @stop on ENOSPC, same as @report otherwise.
-#
-# @stop: for guest operations, stop the virtual machine;
-#        for jobs, pause the job
-#
-# Since: 1.3
-##
-{ 'enum': 'BlockdevOnError',
-  'data': ['report', 'ignore', 'enospc', 'stop'] }
-
-##
-# @MirrorSyncMode:
-#
-# An enumeration of possible behaviors for the initial synchronization
-# phase of storage mirroring.
-#
-# @top: copies data in the topmost image to the destination
-#
-# @full: copies data from all images to the destination
-#
-# @none: only copy data written from now on
-#
-# Since: 1.3
-##
-{ 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
-
-##
-# @BlockJobType:
-#
-# Type of a block job.
-#
-# @commit: block commit job type, see "block-commit"
-#
-# @stream: block stream job type, see "block-stream"
-#
-# @mirror: drive mirror job type, see "drive-mirror"
-#
-# @backup: drive backup job type, see "drive-backup"
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockJobType',
-  'data': ['commit', 'stream', 'mirror', 'backup'] }
-
-##
-# @BlockJobInfo:
-#
-# Information about a long-running block device operation.
-#
-# @type: the job type ('stream' for image streaming)
-#
-# @device: the block device name
-#
-# @len: the maximum progress value
-#
-# @busy: false if the job is known to be in a quiescent state, with
-#        no pending I/O.  Since 1.3.
-#
-# @paused: whether the job is paused or, if @busy is true, will
-#          pause itself as soon as possible.  Since 1.3.
-#
-# @offset: the current progress value
-#
-# @speed: the rate limit, bytes per second
-#
-# @io-status: the status of the job (since 1.3)
-#
-# Since: 1.1
-##
-{ 'type': 'BlockJobInfo',
-  'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
-           'io-status': 'BlockDeviceIoStatus'} }
-
-##
-# @query-block-jobs:
-#
-# Return information about long-running block device operations.
-#
-# Returns: a list of @BlockJobInfo for each active block job
-#
-# Since: 1.1
-##
-{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
-
 ##
 # @quit:
 #
@@ -1778,43 +1146,6 @@
 ##
 { 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
 
-##
-# @block_passwd:
-#
-# This command sets the password of a block device that has not been open
-# with a password and requires one.
-#
-# The two cases where this can happen are a block device is created through
-# QEMU's initial command line or a block device is changed through the legacy
-# @change interface.
-#
-# In the event that the block device is created through the initial command
-# line, the VM will start in the stopped state regardless of whether '-S' is
-# used.  The intention is for a management tool to query the block devices to
-# determine which ones are encrypted, set the passwords with this command, and
-# then start the guest with the @cont command.
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the block backend device to set the password on
-#
-# @node-name: #optional graph node name to set the password on (Since 2.0)
-#
-# @password: the password to use for the device
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#          If @device is not encrypted, DeviceNotEncrypted
-#
-# Notes:  Not all block formats support encryption and some that do are not
-#         able to validate that a password is correct.  Disk corruption may
-#         occur if an invalid password is specified.
-#
-# Since: 0.14.0
-##
-{ 'command': 'block_passwd', 'data': {'*device': 'str',
-                                      '*node-name': 'str', 'password': 'str'} }
-
 ##
 # @balloon:
 #
@@ -1835,126 +1166,6 @@
 ##
 { 'command': 'balloon', 'data': {'value': 'int'} }
 
-##
-# @block_resize
-#
-# Resize a block image while a guest is running.
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the device to get the image resized
-#
-# @node-name: #optional graph node name to get the image resized (Since 2.0)
-#
-# @size:  new image size in bytes
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since: 0.14.0
-##
-{ 'command': 'block_resize', 'data': { '*device': 'str',
-                                       '*node-name': 'str',
-                                       'size': 'int' }}
-
-##
-# @NewImageMode
-#
-# An enumeration that tells QEMU how to set the backing file path in
-# a new image file.
-#
-# @existing: QEMU should look for an existing image file.
-#
-# @absolute-paths: QEMU should create a new image with absolute paths
-# for the backing file. If there is no backing file available, the new
-# image will not be backed either.
-#
-# Since: 1.1
-##
-{ 'enum': 'NewImageMode',
-  'data': [ 'existing', 'absolute-paths' ] }
-
-##
-# @BlockdevSnapshot
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the device to generate the snapshot from.
-#
-# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
-#
-# @snapshot-file: the target of the new image. A new file will be created.
-#
-# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
-#
-# @format: #optional the format of the snapshot image, default is 'qcow2'.
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
-##
-{ 'type': 'BlockdevSnapshot',
-  'data': { '*device': 'str', '*node-name': 'str',
-            'snapshot-file': 'str', '*snapshot-node-name': 'str',
-            '*format': 'str', '*mode': 'NewImageMode' } }
-
-##
-# @BlockdevSnapshotInternal
-#
-# @device: the name of the device to generate the snapshot from
-#
-# @name: the name of the internal snapshot to be created
-#
-# Notes: In transaction, if @name is empty, or any snapshot matching @name
-#        exists, the operation will fail. Only some image formats support it,
-#        for example, qcow2, rbd, and sheepdog.
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevSnapshotInternal',
-  'data': { 'device': 'str', 'name': 'str' } }
-
-##
-# @DriveBackup
-#
-# @device: the name of the device which should be copied.
-#
-# @target: the target of the new image. If the file exists, or if it
-#          is a device, the existing file/device will be used as the new
-#          destination.  If it does not exist, a new file will be created.
-#
-# @format: #optional the format of the new destination, default is to
-#          probe if @mode is 'existing', else the format of the source
-#
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
-#
-# @speed: #optional the maximum speed, in bytes per second
-#
-# @on-source-error: #optional the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
-#
-# @on-target-error: #optional the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
-#
-# Note that @on-source-error and @on-target-error only affect background I/O.
-# If an error occurs during a guest write request, the device's rerror/werror
-# actions will be used.
-#
-# Since: 1.6
-##
-{ 'type': 'DriveBackup',
-  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
-            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int',
-            '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
-
 ##
 # @Abort
 #
@@ -2001,68 +1212,6 @@
 { 'command': 'transaction',
   'data': { 'actions': [ 'TransactionAction' ] } }
 
-##
-# @blockdev-snapshot-sync
-#
-# Generates a synchronous snapshot of a block device.
-#
-# For the arguments, see the documentation of BlockdevSnapshot.
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since 0.14.0
-##
-{ 'command': 'blockdev-snapshot-sync',
-  'data': 'BlockdevSnapshot' }
-
-##
-# @blockdev-snapshot-internal-sync
-#
-# Synchronously take an internal snapshot of a block device, when the format
-# of the image used supports it.
-#
-# For the arguments, see the documentation of BlockdevSnapshotInternal.
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#          If any snapshot matching @name exists, or @name is empty,
-#          GenericError
-#          If the format of the image used does not support it,
-#          BlockFormatFeatureNotSupported
-#
-# Since 1.7
-##
-{ 'command': 'blockdev-snapshot-internal-sync',
-  'data': 'BlockdevSnapshotInternal' }
-
-##
-# @blockdev-snapshot-delete-internal-sync
-#
-# Synchronously delete an internal snapshot of a block device, when the format
-# of the image used support it. The snapshot is identified by name or id or
-# both. One of the name or id is required. Return SnapshotInfo for the
-# successfully deleted snapshot.
-#
-# @device: the name of the device to delete the snapshot from
-#
-# @id: optional the snapshot's ID to be deleted
-#
-# @name: optional the snapshot's name to be deleted
-#
-# Returns: SnapshotInfo on success
-#          If @device is not a valid block device, DeviceNotFound
-#          If snapshot not found, GenericError
-#          If the format of the image used does not support it,
-#          BlockFormatFeatureNotSupported
-#          If @id and @name are both not specified, GenericError
-#
-# Since 1.7
-##
-{ 'command': 'blockdev-snapshot-delete-internal-sync',
-  'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
-  'returns': 'SnapshotInfo' }
-
 ##
 # @human-monitor-command:
 #
@@ -2091,129 +1240,6 @@
   'data': {'command-line': 'str', '*cpu-index': 'int'},
   'returns': 'str' }
 
-##
-# @block-commit
-#
-# Live commit of data from overlay image nodes into backing nodes - i.e.,
-# writes data between 'top' and 'base' into 'base'.
-#
-# @device:  the name of the device
-#
-# @base:   #optional The file name of the backing image to write data into.
-#                    If not specified, this is the deepest backing image
-#
-# @top:              The file name of the backing image within the image chain,
-#                    which contains the topmost data to be committed down.
-#
-#                    If top == base, that is an error.
-#                    If top == active, the job will not be completed by itself,
-#                    user needs to complete the job with the block-job-complete
-#                    command after getting the ready event. (Since 2.0)
-#
-#                    If the base image is smaller than top, then the base image
-#                    will be resized to be the same size as top.  If top is
-#                    smaller than the base image, the base will not be
-#                    truncated.  If you want the base image size to match the
-#                    size of the smaller top, you can safely truncate it
-#                    yourself once the commit operation successfully completes.
-#
-#
-# @speed:  #optional the maximum speed, in bytes per second
-#
-# Returns: Nothing on success
-#          If commit or stream is already active on this device, DeviceInUse
-#          If @device does not exist, DeviceNotFound
-#          If image commit is not supported by this device, NotSupported
-#          If @base or @top is invalid, a generic error is returned
-#          If @speed is invalid, InvalidParameter
-#
-# Since: 1.3
-#
-##
-{ 'command': 'block-commit',
-  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
-            '*speed': 'int' } }
-
-##
-# @drive-backup
-#
-# Start a point-in-time copy of a block device to a new destination.  The
-# status of ongoing drive-backup operations can be checked with
-# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
-# The operation can be stopped before it has completed using the
-# block-job-cancel command.
-#
-# For the arguments, see the documentation of DriveBackup.
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since 1.6
-##
-{ 'command': 'drive-backup', 'data': 'DriveBackup' }
-
-##
-# @query-named-block-nodes
-#
-# Get the named block driver list
-#
-# Returns: the list of BlockDeviceInfo
-#
-# Since 2.0
-##
-{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
-
-##
-# @drive-mirror
-#
-# Start mirroring a block device's writes to a new destination.
-#
-# @device:  the name of the device whose writes should be mirrored.
-#
-# @target: the target of the new image. If the file exists, or if it
-#          is a device, the existing file/device will be used as the new
-#          destination.  If it does not exist, a new file will be created.
-#
-# @format: #optional the format of the new destination, default is to
-#          probe if @mode is 'existing', else the format of the source
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
-#
-# @speed:  #optional the maximum speed, in bytes per second
-#
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
-#
-# @granularity: #optional granularity of the dirty bitmap, default is 64K
-#               if the image format doesn't have clusters, 4K if the clusters
-#               are smaller than that, else the cluster size.  Must be a
-#               power of 2 between 512 and 64M (since 1.4).
-#
-# @buf-size: #optional maximum amount of data in flight from source to
-#            target (since 1.4).
-#
-# @on-source-error: #optional the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
-#
-# @on-target-error: #optional the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since 1.3
-##
-{ 'command': 'drive-mirror',
-  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
-            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int', '*granularity': 'uint32',
-            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
-
 ##
 # @migrate_cancel
 #
@@ -2429,25 +1455,6 @@
 ##
 { 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
 
-##
-# @eject:
-#
-# Ejects a device from a removable drive.
-#
-# @device:  The name of the device
-#
-# @force:   @optional If true, eject regardless of whether the drive is locked.
-#           If not specified, the default value is false.
-#
-# Returns:  Nothing on success
-#           If @device is not a valid block device, DeviceNotFound
-#
-# Notes:    Ejecting a device will no media results in success
-#
-# Since: 0.14.0
-##
-{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
-
 ##
 # @change-vnc-password:
 #
@@ -2497,211 +1504,6 @@
 { 'command': 'change',
   'data': {'device': 'str', 'target': 'str', '*arg': 'str'} }
 
-##
-# @block_set_io_throttle:
-#
-# Change I/O throttle limits for a block drive.
-#
-# @device: The name of the device
-#
-# @bps: total throughput limit in bytes per second
-#
-# @bps_rd: read throughput limit in bytes per second
-#
-# @bps_wr: write throughput limit in bytes per second
-#
-# @iops: total I/O operations per second
-#
-# @ops_rd: read I/O operations per second
-#
-# @iops_wr: write I/O operations per second
-#
-# @bps_max: #optional total max in bytes (Since 1.7)
-#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
-#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
-#
-# @iops_max: #optional total I/O operations max (Since 1.7)
-#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
-#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
-#
-# @iops_size: #optional an I/O size in bytes (Since 1.7)
-#
-# Returns: Nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since: 1.1
-##
-{ 'command': 'block_set_io_throttle',
-  'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
-            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
-            '*bps_max': 'int', '*bps_rd_max': 'int',
-            '*bps_wr_max': 'int', '*iops_max': 'int',
-            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
-            '*iops_size': 'int' } }
-
-##
-# @block-stream:
-#
-# Copy data from a backing file into a block device.
-#
-# The block streaming operation is performed in the background until the entire
-# backing file has been copied.  This command returns immediately once streaming
-# has started.  The status of ongoing block streaming operations can be checked
-# with query-block-jobs.  The operation can be stopped before it has completed
-# using the block-job-cancel command.
-#
-# If a base file is specified then sectors are not copied from that base file and
-# its backing chain.  When streaming completes the image file will have the base
-# file as its backing file.  This can be used to stream a subset of the backing
-# file chain instead of flattening the entire image.
-#
-# On successful completion the image file is updated to drop the backing file
-# and the BLOCK_JOB_COMPLETED event is emitted.
-#
-# @device: the device name
-#
-# @base:   #optional the common backing file name
-#
-# @speed:  #optional the maximum speed, in bytes per second
-#
-# @on-error: #optional the action to take on an error (default report).
-#            'stop' and 'enospc' can only be used if the block device
-#            supports io-status (see BlockInfo).  Since 1.3.
-#
-# Returns: Nothing on success
-#          If @device does not exist, DeviceNotFound
-#
-# Since: 1.1
-##
-{ 'command': 'block-stream',
-  'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
-            '*on-error': 'BlockdevOnError' } }
-
-##
-# @block-job-set-speed:
-#
-# Set maximum speed for a background block operation.
-#
-# This command can only be issued when there is an active block job.
-#
-# Throttling can be disabled by setting the speed to 0.
-#
-# @device: the device name
-#
-# @speed:  the maximum speed, in bytes per second, or 0 for unlimited.
-#          Defaults to 0.
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.1
-##
-{ 'command': 'block-job-set-speed',
-  'data': { 'device': 'str', 'speed': 'int' } }
-
-##
-# @block-job-cancel:
-#
-# Stop an active background block operation.
-#
-# This command returns immediately after marking the active background block
-# operation for cancellation.  It is an error to call this command if no
-# operation is in progress.
-#
-# The operation will cancel as soon as possible and then emit the
-# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
-# enumerated using query-block-jobs.
-#
-# For streaming, the image file retains its backing file unless the streaming
-# operation happens to complete just as it is being cancelled.  A new streaming
-# operation can be started at a later time to finish copying all data from the
-# backing file.
-#
-# @device: the device name
-#
-# @force: #optional whether to allow cancellation of a paused job (default
-#         false).  Since 1.3.
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.1
-##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
-
-##
-# @block-job-pause:
-#
-# Pause an active background block operation.
-#
-# This command returns immediately after marking the active background block
-# operation for pausing.  It is an error to call this command if no
-# operation is in progress.  Pausing an already paused job has no cumulative
-# effect; a single block-job-resume command will resume the job.
-#
-# The operation will pause as soon as possible.  No event is emitted when
-# the operation is actually paused.  Cancelling a paused job automatically
-# resumes it.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
-
-##
-# @block-job-resume:
-#
-# Resume an active background block operation.
-#
-# This command returns immediately after resuming a paused background block
-# operation.  It is an error to call this command if no operation is in
-# progress.  Resuming an already running job is not an error.
-#
-# This command also clears the error status of the job.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
-
-##
-# @block-job-complete:
-#
-# Manually trigger completion of an active background block operation.  This
-# is supported for drive mirroring, where it also switches the device to
-# write to the target path only.  The ability to complete is signaled with
-# a BLOCK_JOB_READY event.
-#
-# This command completes an active background block operation synchronously.
-# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
-# is not defined.  Note that if an I/O error occurs during the processing of
-# this command: 1) the command itself will fail; 2) the error will be processed
-# according to the rerror/werror arguments that were specified when starting
-# the operation.
-#
-# A cancelled or paused job cannot be completed.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
-
 ##
 # @ObjectTypeInfo:
 #
@@ -3660,49 +2462,6 @@
 ##
 { 'command': 'screendump', 'data': {'filename': 'str'} }
 
-##
-# @nbd-server-start:
-#
-# Start an NBD server listening on the given host and port.  Block
-# devices can then be exported using @nbd-server-add.  The NBD
-# server will present them as named exports; for example, another
-# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
-#
-# @addr: Address on which to listen.
-#
-# Returns: error if the server is already running.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-start',
-  'data': { 'addr': 'SocketAddress' } }
-
-##
-# @nbd-server-add:
-#
-# Export a device to QEMU's embedded NBD server.
-#
-# @device: Block device to be exported
-#
-# @writable: Whether clients should be able to write to the device via the
-#     NBD connection (default false). #optional
-#
-# Returns: error if the device is already marked for export.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
-
-##
-# @nbd-server-stop:
-#
-# Stop QEMU's embedded NBD server, and unregister all devices previously
-# added via @nbd-server-add.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-stop' }
-
 ##
 # @ChardevFile:
 #
@@ -4243,410 +3002,6 @@
 { 'command': 'query-rx-filter', 'data': { '*name': 'str' },
   'returns': ['RxFilterInfo'] }
 
-
-##
-# @BlockdevDiscardOptions
-#
-# Determines how to handle discard requests.
-#
-# @ignore:      Ignore the request
-# @unmap:       Forward as an unmap request
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockdevDiscardOptions',
-  'data': [ 'ignore', 'unmap' ] }
-
-##
-# @BlockdevDetectZeroesOptions
-#
-# Describes the operation mode for the automatic conversion of plain
-# zero writes by the OS to driver specific optimized zero write commands.
-#
-# @off:      Disabled (default)
-# @on:       Enabled
-# @unmap:    Enabled and even try to unmap blocks if possible. This requires
-#            also that @BlockdevDiscardOptions is set to unmap for this device.
-#
-# Since: 2.1
-##
-{ 'enum': 'BlockdevDetectZeroesOptions',
-  'data': [ 'off', 'on', 'unmap' ] }
-
-##
-# @BlockdevAioOptions
-#
-# Selects the AIO backend to handle I/O requests
-#
-# @threads:     Use qemu's thread pool
-# @native:      Use native AIO backend (only Linux and Windows)
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockdevAioOptions',
-  'data': [ 'threads', 'native' ] }
-
-##
-# @BlockdevCacheOptions
-#
-# Includes cache-related options for block devices
-#
-# @writeback:   #optional enables writeback mode for any caches (default: true)
-# @direct:      #optional enables use of O_DIRECT (bypass the host page cache;
-#               default: false)
-# @no-flush:    #optional ignore any flush requests for the device (default:
-#               false)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevCacheOptions',
-  'data': { '*writeback': 'bool',
-            '*direct': 'bool',
-            '*no-flush': 'bool' } }
-
-##
-# @BlockdevDriver
-#
-# Drivers that are supported in block device operations.
-#
-# @host_device, @host_cdrom, @host_floppy: Since 2.1
-#
-# Since: 2.0
-##
-{ 'enum': 'BlockdevDriver',
-  'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
-            'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
-            'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
-            'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
-
-##
-# @BlockdevOptionsBase
-#
-# Options that are available for all block devices, independent of the block
-# driver.
-#
-# @driver:        block driver name
-# @id:            #optional id by which the new block device can be referred to.
-#                 This is a required option on the top level of blockdev-add, and
-#                 currently not allowed on any other level.
-# @node-name:     #optional the name of a block driver state node (Since 2.0)
-# @discard:       #optional discard-related options (default: ignore)
-# @cache:         #optional cache-related options
-# @aio:           #optional AIO backend (default: threads)
-# @rerror:        #optional how to handle read errors on the device
-#                 (default: report)
-# @werror:        #optional how to handle write errors on the device
-#                 (default: enospc)
-# @read-only:     #optional whether the block device should be read-only
-#                 (default: false)
-# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
-#                 (default: off)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsBase',
-  'data': { 'driver': 'BlockdevDriver',
-            '*id': 'str',
-            '*node-name': 'str',
-            '*discard': 'BlockdevDiscardOptions',
-            '*cache': 'BlockdevCacheOptions',
-            '*aio': 'BlockdevAioOptions',
-            '*rerror': 'BlockdevOnError',
-            '*werror': 'BlockdevOnError',
-            '*read-only': 'bool',
-            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
-
-##
-# @BlockdevOptionsFile
-#
-# Driver specific block device options for the file backend and similar
-# protocols.
-#
-# @filename:    path to the image file
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsFile',
-  'data': { 'filename': 'str' } }
-
-##
-# @BlockdevOptionsVVFAT
-#
-# Driver specific block device options for the vvfat protocol.
-#
-# @dir:         directory to be exported as FAT image
-# @fat-type:    #optional FAT type: 12, 16 or 32
-# @floppy:      #optional whether to export a floppy image (true) or
-#               partitioned hard disk (false; default)
-# @rw:          #optional whether to allow write operations (default: false)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsVVFAT',
-  'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
-            '*rw': 'bool' } }
-
-##
-# @BlockdevOptionsGenericFormat
-#
-# Driver specific block device options for image format that have no option
-# besides their data source.
-#
-# @file:        reference to or definition of the data source block device
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsGenericFormat',
-  'data': { 'file': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsGenericCOWFormat
-#
-# Driver specific block device options for image format that have no option
-# besides their data source and an optional backing file.
-#
-# @backing:     #optional reference to or definition of the backing file block
-#               device (if missing, taken from the image file content). It is
-#               allowed to pass an empty string here in order to disable the
-#               default backing file.
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsGenericCOWFormat',
-  'base': 'BlockdevOptionsGenericFormat',
-  'data': { '*backing': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsQcow2
-#
-# Driver specific block device options for qcow2.
-#
-# @lazy-refcounts:        #optional whether to enable the lazy refcounts
-#                         feature (default is taken from the image file)
-#
-# @pass-discard-request:  #optional whether discard requests to the qcow2
-#                         device should be forwarded to the data source
-#
-# @pass-discard-snapshot: #optional whether discard requests for the data source
-#                         should be issued when a snapshot operation (e.g.
-#                         deleting a snapshot) frees clusters in the qcow2 file
-#
-# @pass-discard-other:    #optional whether discard requests for the data source
-#                         should be issued on other occasions where a cluster
-#                         gets freed
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsQcow2',
-  'base': 'BlockdevOptionsGenericCOWFormat',
-  'data': { '*lazy-refcounts': 'bool',
-            '*pass-discard-request': 'bool',
-            '*pass-discard-snapshot': 'bool',
-            '*pass-discard-other': 'bool' } }
-
-##
-# @BlkdebugEvent
-#
-# Trigger events supported by blkdebug.
-##
-{ 'enum': 'BlkdebugEvent',
-  'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
-            'l1_grow.activate_table', 'l2_load', 'l2_update',
-            'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
-            'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
-            'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
-            'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
-            'refblock_load', 'refblock_update', 'refblock_update_part',
-            'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
-            'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
-            'refblock_alloc.switch_table', 'cluster_alloc',
-            'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
-            'flush_to_disk' ] }
-
-##
-# @BlkdebugInjectErrorOptions
-#
-# Describes a single error injection for blkdebug.
-#
-# @event:       trigger event
-#
-# @state:       #optional the state identifier blkdebug needs to be in to
-#               actually trigger the event; defaults to "any"
-#
-# @errno:       #optional error identifier (errno) to be returned; defaults to
-#               EIO
-#
-# @sector:      #optional specifies the sector index which has to be affected
-#               in order to actually trigger the event; defaults to "any
-#               sector"
-#
-# @once:        #optional disables further events after this one has been
-#               triggered; defaults to false
-#
-# @immediately: #optional fail immediately; defaults to false
-#
-# Since: 2.0
-##
-{ 'type': 'BlkdebugInjectErrorOptions',
-  'data': { 'event': 'BlkdebugEvent',
-            '*state': 'int',
-            '*errno': 'int',
-            '*sector': 'int',
-            '*once': 'bool',
-            '*immediately': 'bool' } }
-
-##
-# @BlkdebugSetStateOptions
-#
-# Describes a single state-change event for blkdebug.
-#
-# @event:       trigger event
-#
-# @state:       #optional the current state identifier blkdebug needs to be in;
-#               defaults to "any"
-#
-# @new_state:   the state identifier blkdebug is supposed to assume if
-#               this event is triggered
-#
-# Since: 2.0
-##
-{ 'type': 'BlkdebugSetStateOptions',
-  'data': { 'event': 'BlkdebugEvent',
-            '*state': 'int',
-            'new_state': 'int' } }
-
-##
-# @BlockdevOptionsBlkdebug
-#
-# Driver specific block device options for blkdebug.
-#
-# @image:           underlying raw block device (or image file)
-#
-# @config:          #optional filename of the configuration file
-#
-# @align:           #optional required alignment for requests in bytes
-#
-# @inject-error:    #optional array of error injection descriptions
-#
-# @set-state:       #optional array of state-change descriptions
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsBlkdebug',
-  'data': { 'image': 'BlockdevRef',
-            '*config': 'str',
-            '*align': 'int',
-            '*inject-error': ['BlkdebugInjectErrorOptions'],
-            '*set-state': ['BlkdebugSetStateOptions'] } }
-
-##
-# @BlockdevOptionsBlkverify
-#
-# Driver specific block device options for blkverify.
-#
-# @test:    block device to be tested
-#
-# @raw:     raw image used for verification
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsBlkverify',
-  'data': { 'test': 'BlockdevRef',
-            'raw': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsQuorum
-#
-# Driver specific block device options for Quorum
-#
-# @blkverify:      #optional true if the driver must print content mismatch
-#                  set to false by default
-#
-# @children:       the children block devices to use
-#
-# @vote-threshold: the vote limit under which a read will fail
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsQuorum',
-  'data': { '*blkverify': 'bool',
-            'children': [ 'BlockdevRef' ],
-            'vote-threshold': 'int' } }
-
-##
-# @BlockdevOptions
-#
-# Options for creating a block device.
-#
-# Since: 1.7
-##
-{ 'union': 'BlockdevOptions',
-  'base': 'BlockdevOptionsBase',
-  'discriminator': 'driver',
-  'data': {
-      'file':       'BlockdevOptionsFile',
-      'host_device':'BlockdevOptionsFile',
-      'host_cdrom': 'BlockdevOptionsFile',
-      'host_floppy':'BlockdevOptionsFile',
-      'http':       'BlockdevOptionsFile',
-      'https':      'BlockdevOptionsFile',
-      'ftp':        'BlockdevOptionsFile',
-      'ftps':       'BlockdevOptionsFile',
-      'tftp':       'BlockdevOptionsFile',
-# TODO gluster: Wait for structured options
-# TODO iscsi: Wait for structured options
-# TODO nbd: Should take InetSocketAddress for 'host'?
-# TODO nfs: Wait for structured options
-# TODO rbd: Wait for structured options
-# TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
-      'vvfat':      'BlockdevOptionsVVFAT',
-      'blkdebug':   'BlockdevOptionsBlkdebug',
-      'blkverify':  'BlockdevOptionsBlkverify',
-      'bochs':      'BlockdevOptionsGenericFormat',
-      'cloop':      'BlockdevOptionsGenericFormat',
-      'cow':        'BlockdevOptionsGenericCOWFormat',
-      'dmg':        'BlockdevOptionsGenericFormat',
-      'parallels':  'BlockdevOptionsGenericFormat',
-      'qcow':       'BlockdevOptionsGenericCOWFormat',
-      'qcow2':      'BlockdevOptionsQcow2',
-      'qed':        'BlockdevOptionsGenericCOWFormat',
-      'raw':        'BlockdevOptionsGenericFormat',
-      'vdi':        'BlockdevOptionsGenericFormat',
-      'vhdx':       'BlockdevOptionsGenericFormat',
-      'vmdk':       'BlockdevOptionsGenericCOWFormat',
-      'vpc':        'BlockdevOptionsGenericFormat',
-      'quorum':     'BlockdevOptionsQuorum'
-  } }
-
-##
-# @BlockdevRef
-#
-# Reference to a block device.
-#
-# @definition:      defines a new block device inline
-# @reference:       references the ID of an existing block device. An
-#                   empty string means that no block device should be
-#                   referenced.
-#
-# Since: 1.7
-##
-{ 'union': 'BlockdevRef',
-  'discriminator': {},
-  'data': { 'definition': 'BlockdevOptions',
-            'reference': 'str' } }
-
-##
-# @blockdev-add:
-#
-# Creates a new block device.
-#
-# @options: block device options for the new device
-#
-# Since: 1.7
-##
-{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
-
 ##
 # @InputButton
 #
diff --git a/qapi/block-core.json b/qapi/block-core.json
new file mode 100644
index 0000000000..7215e48130
--- /dev/null
+++ b/qapi/block-core.json
@@ -0,0 +1,1412 @@
+# -*- Mode: Python -*-
+#
+# QAPI block core definitions (vm unrelated)
+
+# QAPI common definitions
+{ 'include': 'common.json' }
+
+##
+# @SnapshotInfo
+#
+# @id: unique snapshot id
+#
+# @name: user chosen name
+#
+# @vm-state-size: size of the VM state
+#
+# @date-sec: UTC date of the snapshot in seconds
+#
+# @date-nsec: fractional part in nano seconds to be used with date-sec
+#
+# @vm-clock-sec: VM clock relative to boot in seconds
+#
+# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'SnapshotInfo',
+  'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
+            'date-sec': 'int', 'date-nsec': 'int',
+            'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
+
+##
+# @ImageInfoSpecificQCow2:
+#
+# @compat: compatibility level
+#
+# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1
+#
+# Since: 1.7
+##
+{ 'type': 'ImageInfoSpecificQCow2',
+  'data': {
+      'compat': 'str',
+      '*lazy-refcounts': 'bool'
+  } }
+
+##
+# @ImageInfoSpecificVmdk:
+#
+# @create-type: The create type of VMDK image
+#
+# @cid: Content id of image
+#
+# @parent-cid: Parent VMDK image's cid
+#
+# @extents: List of extent files
+#
+# Since: 1.7
+##
+{ 'type': 'ImageInfoSpecificVmdk',
+  'data': {
+      'create-type': 'str',
+      'cid': 'int',
+      'parent-cid': 'int',
+      'extents': ['ImageInfo']
+  } }
+
+##
+# @ImageInfoSpecific:
+#
+# A discriminated record of image format specific information structures.
+#
+# Since: 1.7
+##
+
+{ 'union': 'ImageInfoSpecific',
+  'data': {
+      'qcow2': 'ImageInfoSpecificQCow2',
+      'vmdk': 'ImageInfoSpecificVmdk'
+  } }
+
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file
+#
+# @filename: name of the image file
+#
+# @format: format of the image file
+#
+# @virtual-size: maximum capacity in bytes of the image
+#
+# @actual-size: #optional actual size on disk in bytes of the image
+#
+# @dirty-flag: #optional true if image is not cleanly closed
+#
+# @cluster-size: #optional size of a cluster in bytes
+#
+# @encrypted: #optional true if the image is encrypted
+#
+# @compressed: #optional true if the image is compressed (Since 1.7)
+#
+# @backing-filename: #optional name of the backing file
+#
+# @full-backing-filename: #optional full path of the backing file
+#
+# @backing-filename-format: #optional the format of the backing file
+#
+# @snapshots: #optional list of VM snapshots
+#
+# @backing-image: #optional info of the backing image (since 1.6)
+#
+# @format-specific: #optional structure supplying additional format-specific
+# information (since 1.7)
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'ImageInfo',
+  'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
+           '*actual-size': 'int', 'virtual-size': 'int',
+           '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
+           '*backing-filename': 'str', '*full-backing-filename': 'str',
+           '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
+           '*backing-image': 'ImageInfo',
+           '*format-specific': 'ImageInfoSpecific' } }
+
+##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @image-end-offset: #optional offset (in bytes) where the image ends, this
+#                    field is present if the driver for the image format
+#                    supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+#                     if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+#                  if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+#                      field is present if the driver for the image format
+#                      supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+#                       field is present if the driver for the image format
+#                       supports it
+#
+# @compressed-clusters: #optional total number of compressed clusters, this
+#                       field is present if the driver for the image format
+#                       supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+           '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+           '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+           '*total-clusters': 'int', '*allocated-clusters': 'int',
+           '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
+
+##
+# @BlockDeviceInfo:
+#
+# Information about the backing device for a block device.
+#
+# @file: the filename of the backing device
+#
+# @node-name: #optional the name of the block driver node (Since 2.0)
+#
+# @ro: true if the backing device was open read-only
+#
+# @drv: the name of the block format used to open the backing device. As of
+#       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
+#       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
+#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
+#       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
+#
+# @backing_file: #optional the name of the backing file (for copy-on-write)
+#
+# @backing_file_depth: number of files in the backing file chain (since: 1.2)
+#
+# @encrypted: true if the backing device is encrypted
+#
+# @encryption_key_missing: true if the backing device is encrypted but an
+#                          valid encryption key is missing
+#
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
+# @bps: total throughput limit in bytes per second is specified
+#
+# @bps_rd: read throughput limit in bytes per second is specified
+#
+# @bps_wr: write throughput limit in bytes per second is specified
+#
+# @iops: total I/O operations per second is specified
+#
+# @iops_rd: read I/O operations per second is specified
+#
+# @iops_wr: write I/O operations per second is specified
+#
+# @image: the info of image used (since: 1.6)
+#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
+# Since: 0.14.0
+#
+##
+{ 'type': 'BlockDeviceInfo',
+  'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
+            '*backing_file': 'str', 'backing_file_depth': 'int',
+            'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'BlockdevDetectZeroesOptions',
+            'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            'image': 'ImageInfo',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*iops_size': 'int' } }
+
+##
+# @BlockDeviceIoStatus:
+#
+# An enumeration of block device I/O status.
+#
+# @ok: The last I/O operation has succeeded
+#
+# @failed: The last I/O operation has failed
+#
+# @nospace: The last I/O operation has failed due to a no-space condition
+#
+# Since: 1.0
+##
+{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
+
+##
+# @BlockDeviceMapEntry:
+#
+# Entry in the metadata map of the device (returned by "qemu-img map")
+#
+# @start: Offset in the image of the first byte described by this entry
+#         (in bytes)
+#
+# @length: Length of the range described by this entry (in bytes)
+#
+# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
+#         before reaching one for which the range is allocated.  The value is
+#         in the range 0 to the depth of the image chain - 1.
+#
+# @zero: the sectors in this range read as zeros
+#
+# @data: reading the image will actually read data from a file (in particular,
+#        if @offset is present this means that the sectors are not simply
+#        preallocated, but contain actual data in raw format)
+#
+# @offset: if present, the image file stores the data for this range in
+#          raw format at the given offset.
+#
+# Since 1.7
+##
+{ 'type': 'BlockDeviceMapEntry',
+  'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
+            'data': 'bool', '*offset': 'int' } }
+
+##
+# @BlockDirtyInfo:
+#
+# Block dirty bitmap information.
+#
+# @count: number of dirty bytes according to the dirty bitmap
+#
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
+# Since: 1.3
+##
+{ 'type': 'BlockDirtyInfo',
+  'data': {'count': 'int', 'granularity': 'int'} }
+
+##
+# @BlockInfo:
+#
+# Block device information.  This structure describes a virtual device and
+# the backing device associated with it.
+#
+# @device: The device name associated with the virtual device.
+#
+# @type: This field is returned only for compatibility reasons, it should
+#        not be used (always returns 'unknown')
+#
+# @removable: True if the device supports removable media.
+#
+# @locked: True if the guest has locked this device from having its media
+#          removed
+#
+# @tray_open: #optional True if the device has a tray and it is open
+#             (only present if removable is true)
+#
+# @dirty-bitmaps: #optional dirty bitmaps information (only present if the
+#                 driver has one or more dirty bitmaps) (Since 2.0)
+#
+# @io-status: #optional @BlockDeviceIoStatus. Only present if the device
+#             supports it and the VM is configured to stop on errors
+#
+# @inserted: #optional @BlockDeviceInfo describing the device if media is
+#            present
+#
+# Since:  0.14.0
+##
+{ 'type': 'BlockInfo',
+  'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
+           'locked': 'bool', '*inserted': 'BlockDeviceInfo',
+           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
+           '*dirty-bitmaps': ['BlockDirtyInfo'] } }
+
+##
+# @query-block:
+#
+# Get a list of BlockInfo for all virtual block devices.
+#
+# Returns: a list of @BlockInfo describing each virtual block device
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-block', 'returns': ['BlockInfo'] }
+
+##
+# @BlockDeviceStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @rd_bytes:      The number of bytes read by the device.
+#
+# @wr_bytes:      The number of bytes written by the device.
+#
+# @rd_operations: The number of read operations performed by the device.
+#
+# @wr_operations: The number of write operations performed by the device.
+#
+# @flush_operations: The number of cache flush operations performed by the
+#                    device (since 0.15.0)
+#
+# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds
+#                       (since 0.15.0).
+#
+# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0).
+#
+# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0).
+#
+# @wr_highest_offset: The offset after the greatest byte written to the
+#                     device.  The intended use of this information is for
+#                     growable sparse files (like qcow2) that are used on top
+#                     of a physical device.
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockDeviceStats',
+  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
+           'wr_operations': 'int', 'flush_operations': 'int',
+           'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
+           'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
+
+##
+# @BlockStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @device: #optional If the stats are for a virtual block device, the name
+#          corresponding to the virtual block device.
+#
+# @stats:  A @BlockDeviceStats for the device.
+#
+# @parent: #optional This describes the file block device if it has one.
+#
+# @backing: #optional This describes the backing block device if it has one.
+#           (Since 2.0)
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockStats',
+  'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
+           '*parent': 'BlockStats',
+           '*backing': 'BlockStats'} }
+
+##
+# @query-blockstats:
+#
+# Query the @BlockStats for all virtual block devices.
+#
+# Returns: A list of @BlockStats for each virtual block devices.
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-blockstats', 'returns': ['BlockStats'] }
+
+##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+#          for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+#          or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+#        for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+  'data': ['report', 'ignore', 'enospc', 'stop'] }
+
+##
+# @MirrorSyncMode:
+#
+# An enumeration of possible behaviors for the initial synchronization
+# phase of storage mirroring.
+#
+# @top: copies data in the topmost image to the destination
+#
+# @full: copies data from all images to the destination
+#
+# @none: only copy data written from now on
+#
+# Since: 1.3
+##
+{ 'enum': 'MirrorSyncMode',
+  'data': ['top', 'full', 'none'] }
+
+##
+# @BlockJobType:
+#
+# Type of a block job.
+#
+# @commit: block commit job type, see "block-commit"
+#
+# @stream: block stream job type, see "block-stream"
+#
+# @mirror: drive mirror job type, see "drive-mirror"
+#
+# @backup: drive backup job type, see "drive-backup"
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockJobType',
+  'data': ['commit', 'stream', 'mirror', 'backup'] }
+
+##
+# @BlockJobInfo:
+#
+# Information about a long-running block device operation.
+#
+# @type: the job type ('stream' for image streaming)
+#
+# @device: the block device name
+#
+# @len: the maximum progress value
+#
+# @busy: false if the job is known to be in a quiescent state, with
+#        no pending I/O.  Since 1.3.
+#
+# @paused: whether the job is paused or, if @busy is true, will
+#          pause itself as soon as possible.  Since 1.3.
+#
+# @offset: the current progress value
+#
+# @speed: the rate limit, bytes per second
+#
+# @io-status: the status of the job (since 1.3)
+#
+# Since: 1.1
+##
+{ 'type': 'BlockJobInfo',
+  'data': {'type': 'str', 'device': 'str', 'len': 'int',
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+           'io-status': 'BlockDeviceIoStatus'} }
+
+##
+# @query-block-jobs:
+#
+# Return information about long-running block device operations.
+#
+# Returns: a list of @BlockJobInfo for each active block job
+#
+# Since: 1.1
+##
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
+
+##
+# @block_passwd:
+#
+# This command sets the password of a block device that has not been open
+# with a password and requires one.
+#
+# The two cases where this can happen are a block device is created through
+# QEMU's initial command line or a block device is changed through the legacy
+# @change interface.
+#
+# In the event that the block device is created through the initial command
+# line, the VM will start in the stopped state regardless of whether '-S' is
+# used.  The intention is for a management tool to query the block devices to
+# determine which ones are encrypted, set the passwords with this command, and
+# then start the guest with the @cont command.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the block backend device to set the password on
+#
+# @node-name: #optional graph node name to set the password on (Since 2.0)
+#
+# @password: the password to use for the device
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#          If @device is not encrypted, DeviceNotEncrypted
+#
+# Notes:  Not all block formats support encryption and some that do are not
+#         able to validate that a password is correct.  Disk corruption may
+#         occur if an invalid password is specified.
+#
+# Since: 0.14.0
+##
+{ 'command': 'block_passwd', 'data': {'*device': 'str',
+                                      '*node-name': 'str', 'password': 'str'} }
+
+##
+# @block_resize
+#
+# Resize a block image while a guest is running.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to get the image resized
+#
+# @node-name: #optional graph node name to get the image resized (Since 2.0)
+#
+# @size:  new image size in bytes
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since: 0.14.0
+##
+{ 'command': 'block_resize', 'data': { '*device': 'str',
+                                       '*node-name': 'str',
+                                       'size': 'int' }}
+
+##
+# @NewImageMode
+#
+# An enumeration that tells QEMU how to set the backing file path in
+# a new image file.
+#
+# @existing: QEMU should look for an existing image file.
+#
+# @absolute-paths: QEMU should create a new image with absolute paths
+# for the backing file. If there is no backing file available, the new
+# image will not be backed either.
+#
+# Since: 1.1
+##
+{ 'enum': 'NewImageMode',
+  'data': [ 'existing', 'absolute-paths' ] }
+
+##
+# @BlockdevSnapshot
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to generate the snapshot from.
+#
+# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
+#
+# @snapshot-file: the target of the new image. A new file will be created.
+#
+# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
+#
+# @format: #optional the format of the snapshot image, default is 'qcow2'.
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+##
+{ 'type': 'BlockdevSnapshot',
+  'data': { '*device': 'str', '*node-name': 'str',
+            'snapshot-file': 'str', '*snapshot-node-name': 'str',
+            '*format': 'str', '*mode': 'NewImageMode' } }
+
+##
+# @DriveBackup
+#
+# @device: the name of the device which should be copied.
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# Note that @on-source-error and @on-target-error only affect background I/O.
+# If an error occurs during a guest write request, the device's rerror/werror
+# actions will be used.
+#
+# Since: 1.6
+##
+{ 'type': 'DriveBackup',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+            '*speed': 'int',
+            '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
+
+##
+# @blockdev-snapshot-sync
+#
+# Generates a synchronous snapshot of a block device.
+#
+# For the arguments, see the documentation of BlockdevSnapshot.
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 0.14.0
+##
+{ 'command': 'blockdev-snapshot-sync',
+  'data': 'BlockdevSnapshot' }
+
+##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device:  the name of the device
+#
+# @base:   #optional The file name of the backing image to write data into.
+#                    If not specified, this is the deepest backing image
+#
+# @top:              The file name of the backing image within the image chain,
+#                    which contains the topmost data to be committed down.
+#
+#                    If top == base, that is an error.
+#                    If top == active, the job will not be completed by itself,
+#                    user needs to complete the job with the block-job-complete
+#                    command after getting the ready event. (Since 2.0)
+#
+#                    If the base image is smaller than top, then the base image
+#                    will be resized to be the same size as top.  If top is
+#                    smaller than the base image, the base will not be
+#                    truncated.  If you want the base image size to match the
+#                    size of the smaller top, you can safely truncate it
+#                    yourself once the commit operation successfully completes.
+#
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+#          If commit or stream is already active on this device, DeviceInUse
+#          If @device does not exist, DeviceNotFound
+#          If image commit is not supported by this device, NotSupported
+#          If @base or @top is invalid, a generic error is returned
+#          If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+            '*speed': 'int' } }
+
+##
+# @drive-backup
+#
+# Start a point-in-time copy of a block device to a new destination.  The
+# status of ongoing drive-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# For the arguments, see the documentation of DriveBackup.
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.6
+##
+{ 'command': 'drive-backup', 'data': 'DriveBackup' }
+
+##
+# @query-named-block-nodes
+#
+# Get the named block driver list
+#
+# Returns: the list of BlockDeviceInfo
+#
+# Since 2.0
+##
+{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
+
+##
+# @drive-mirror
+#
+# Start mirroring a block device's writes to a new destination.
+#
+# @device:  the name of the device whose writes should be mirrored.
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @granularity: #optional granularity of the dirty bitmap, default is 64K
+#               if the image format doesn't have clusters, 4K if the clusters
+#               are smaller than that, else the cluster size.  Must be a
+#               power of 2 between 512 and 64M (since 1.4).
+#
+# @buf-size: #optional maximum amount of data in flight from source to
+#            target (since 1.4).
+#
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.3
+##
+{ 'command': 'drive-mirror',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+            '*speed': 'int', '*granularity': 'uint32',
+            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
+
+##
+# @block_set_io_throttle:
+#
+# Change I/O throttle limits for a block drive.
+#
+# @device: The name of the device
+#
+# @bps: total throughput limit in bytes per second
+#
+# @bps_rd: read throughput limit in bytes per second
+#
+# @bps_wr: write throughput limit in bytes per second
+#
+# @iops: total I/O operations per second
+#
+# @ops_rd: read I/O operations per second
+#
+# @iops_wr: write I/O operations per second
+#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
+# Returns: Nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block_set_io_throttle',
+  'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*iops_size': 'int' } }
+
+##
+# @block-stream:
+#
+# Copy data from a backing file into a block device.
+#
+# The block streaming operation is performed in the background until the entire
+# backing file has been copied.  This command returns immediately once streaming
+# has started.  The status of ongoing block streaming operations can be checked
+# with query-block-jobs.  The operation can be stopped before it has completed
+# using the block-job-cancel command.
+#
+# If a base file is specified then sectors are not copied from that base file and
+# its backing chain.  When streaming completes the image file will have the base
+# file as its backing file.  This can be used to stream a subset of the backing
+# file chain instead of flattening the entire image.
+#
+# On successful completion the image file is updated to drop the backing file
+# and the BLOCK_JOB_COMPLETED event is emitted.
+#
+# @device: the device name
+#
+# @base:   #optional the common backing file name
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# @on-error: #optional the action to take on an error (default report).
+#            'stop' and 'enospc' can only be used if the block device
+#            supports io-status (see BlockInfo).  Since 1.3.
+#
+# Returns: Nothing on success
+#          If @device does not exist, DeviceNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block-stream',
+  'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError' } }
+
+##
+# @block-job-set-speed:
+#
+# Set maximum speed for a background block operation.
+#
+# This command can only be issued when there is an active block job.
+#
+# Throttling can be disabled by setting the speed to 0.
+#
+# @device: the device name
+#
+# @speed:  the maximum speed, in bytes per second, or 0 for unlimited.
+#          Defaults to 0.
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-set-speed',
+  'data': { 'device': 'str', 'speed': 'int' } }
+
+##
+# @block-job-cancel:
+#
+# Stop an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for cancellation.  It is an error to call this command if no
+# operation is in progress.
+#
+# The operation will cancel as soon as possible and then emit the
+# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
+# enumerated using query-block-jobs.
+#
+# For streaming, the image file retains its backing file unless the streaming
+# operation happens to complete just as it is being cancelled.  A new streaming
+# operation can be started at a later time to finish copying all data from the
+# backing file.
+#
+# @device: the device name
+#
+# @force: #optional whether to allow cancellation of a paused job (default
+#         false).  Since 1.3.
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing.  It is an error to call this command if no
+# operation is in progress.  Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible.  No event is emitted when
+# the operation is actually paused.  Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation.  It is an error to call this command if no operation is in
+# progress.  Resuming an already running job is not an error.
+#
+# This command also clears the error status of the job.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
+
+##
+# @block-job-complete:
+#
+# Manually trigger completion of an active background block operation.  This
+# is supported for drive mirroring, where it also switches the device to
+# write to the target path only.  The ability to complete is signaled with
+# a BLOCK_JOB_READY event.
+#
+# This command completes an active background block operation synchronously.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined.  Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused job cannot be completed.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
+
+##
+# @BlockdevDiscardOptions
+#
+# Determines how to handle discard requests.
+#
+# @ignore:      Ignore the request
+# @unmap:       Forward as an unmap request
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockdevDiscardOptions',
+  'data': [ 'ignore', 'unmap' ] }
+
+##
+# @BlockdevDetectZeroesOptions
+#
+# Describes the operation mode for the automatic conversion of plain
+# zero writes by the OS to driver specific optimized zero write commands.
+#
+# @off:      Disabled (default)
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible. This requires
+#            also that @BlockdevDiscardOptions is set to unmap for this device.
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
+# @BlockdevAioOptions
+#
+# Selects the AIO backend to handle I/O requests
+#
+# @threads:     Use qemu's thread pool
+# @native:      Use native AIO backend (only Linux and Windows)
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockdevAioOptions',
+  'data': [ 'threads', 'native' ] }
+
+##
+# @BlockdevCacheOptions
+#
+# Includes cache-related options for block devices
+#
+# @writeback:   #optional enables writeback mode for any caches (default: true)
+# @direct:      #optional enables use of O_DIRECT (bypass the host page cache;
+#               default: false)
+# @no-flush:    #optional ignore any flush requests for the device (default:
+#               false)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevCacheOptions',
+  'data': { '*writeback': 'bool',
+            '*direct': 'bool',
+            '*no-flush': 'bool' } }
+
+##
+# @BlockdevDriver
+#
+# Drivers that are supported in block device operations.
+#
+# @host_device, @host_cdrom, @host_floppy: Since 2.1
+#
+# Since: 2.0
+##
+{ 'enum': 'BlockdevDriver',
+  'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
+            'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
+            'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
+            'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
+
+##
+# @BlockdevOptionsBase
+#
+# Options that are available for all block devices, independent of the block
+# driver.
+#
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred to.
+#                 This is a required option on the top level of blockdev-add, and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsBase',
+  'data': { 'driver': 'BlockdevDriver',
+            '*id': 'str',
+            '*node-name': 'str',
+            '*discard': 'BlockdevDiscardOptions',
+            '*cache': 'BlockdevCacheOptions',
+            '*aio': 'BlockdevAioOptions',
+            '*rerror': 'BlockdevOnError',
+            '*werror': 'BlockdevOnError',
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
+
+##
+# @BlockdevOptionsFile
+#
+# Driver specific block device options for the file backend and similar
+# protocols.
+#
+# @filename:    path to the image file
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsFile',
+  'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsVVFAT
+#
+# Driver specific block device options for the vvfat protocol.
+#
+# @dir:         directory to be exported as FAT image
+# @fat-type:    #optional FAT type: 12, 16 or 32
+# @floppy:      #optional whether to export a floppy image (true) or
+#               partitioned hard disk (false; default)
+# @rw:          #optional whether to allow write operations (default: false)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsVVFAT',
+  'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
+            '*rw': 'bool' } }
+
+##
+# @BlockdevOptionsGenericFormat
+#
+# Driver specific block device options for image format that have no option
+# besides their data source.
+#
+# @file:        reference to or definition of the data source block device
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsGenericFormat',
+  'data': { 'file': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsGenericCOWFormat
+#
+# Driver specific block device options for image format that have no option
+# besides their data source and an optional backing file.
+#
+# @backing:     #optional reference to or definition of the backing file block
+#               device (if missing, taken from the image file content). It is
+#               allowed to pass an empty string here in order to disable the
+#               default backing file.
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsGenericCOWFormat',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*backing': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsQcow2
+#
+# Driver specific block device options for qcow2.
+#
+# @lazy-refcounts:        #optional whether to enable the lazy refcounts
+#                         feature (default is taken from the image file)
+#
+# @pass-discard-request:  #optional whether discard requests to the qcow2
+#                         device should be forwarded to the data source
+#
+# @pass-discard-snapshot: #optional whether discard requests for the data source
+#                         should be issued when a snapshot operation (e.g.
+#                         deleting a snapshot) frees clusters in the qcow2 file
+#
+# @pass-discard-other:    #optional whether discard requests for the data source
+#                         should be issued on other occasions where a cluster
+#                         gets freed
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsQcow2',
+  'base': 'BlockdevOptionsGenericCOWFormat',
+  'data': { '*lazy-refcounts': 'bool',
+            '*pass-discard-request': 'bool',
+            '*pass-discard-snapshot': 'bool',
+            '*pass-discard-other': 'bool' } }
+
+##
+# @BlkdebugEvent
+#
+# Trigger events supported by blkdebug.
+##
+{ 'enum': 'BlkdebugEvent',
+  'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
+            'l1_grow.activate_table', 'l2_load', 'l2_update',
+            'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
+            'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
+            'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
+            'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
+            'refblock_load', 'refblock_update', 'refblock_update_part',
+            'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
+            'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
+            'refblock_alloc.switch_table', 'cluster_alloc',
+            'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
+            'flush_to_disk' ] }
+
+##
+# @BlkdebugInjectErrorOptions
+#
+# Describes a single error injection for blkdebug.
+#
+# @event:       trigger event
+#
+# @state:       #optional the state identifier blkdebug needs to be in to
+#               actually trigger the event; defaults to "any"
+#
+# @errno:       #optional error identifier (errno) to be returned; defaults to
+#               EIO
+#
+# @sector:      #optional specifies the sector index which has to be affected
+#               in order to actually trigger the event; defaults to "any
+#               sector"
+#
+# @once:        #optional disables further events after this one has been
+#               triggered; defaults to false
+#
+# @immediately: #optional fail immediately; defaults to false
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugInjectErrorOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            '*errno': 'int',
+            '*sector': 'int',
+            '*once': 'bool',
+            '*immediately': 'bool' } }
+
+##
+# @BlkdebugSetStateOptions
+#
+# Describes a single state-change event for blkdebug.
+#
+# @event:       trigger event
+#
+# @state:       #optional the current state identifier blkdebug needs to be in;
+#               defaults to "any"
+#
+# @new_state:   the state identifier blkdebug is supposed to assume if
+#               this event is triggered
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugSetStateOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            'new_state': 'int' } }
+
+##
+# @BlockdevOptionsBlkdebug
+#
+# Driver specific block device options for blkdebug.
+#
+# @image:           underlying raw block device (or image file)
+#
+# @config:          #optional filename of the configuration file
+#
+# @align:           #optional required alignment for requests in bytes
+#
+# @inject-error:    #optional array of error injection descriptions
+#
+# @set-state:       #optional array of state-change descriptions
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkdebug',
+  'data': { 'image': 'BlockdevRef',
+            '*config': 'str',
+            '*align': 'int',
+            '*inject-error': ['BlkdebugInjectErrorOptions'],
+            '*set-state': ['BlkdebugSetStateOptions'] } }
+
+##
+# @BlockdevOptionsBlkverify
+#
+# Driver specific block device options for blkverify.
+#
+# @test:    block device to be tested
+#
+# @raw:     raw image used for verification
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkverify',
+  'data': { 'test': 'BlockdevRef',
+            'raw': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsQuorum
+#
+# Driver specific block device options for Quorum
+#
+# @blkverify:      #optional true if the driver must print content mismatch
+#                  set to false by default
+#
+# @children:       the children block devices to use
+#
+# @vote-threshold: the vote limit under which a read will fail
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsQuorum',
+  'data': { '*blkverify': 'bool',
+            'children': [ 'BlockdevRef' ],
+            'vote-threshold': 'int' } }
+
+##
+# @BlockdevOptions
+#
+# Options for creating a block device.
+#
+# Since: 1.7
+##
+{ 'union': 'BlockdevOptions',
+  'base': 'BlockdevOptionsBase',
+  'discriminator': 'driver',
+  'data': {
+      'file':       'BlockdevOptionsFile',
+      'host_device':'BlockdevOptionsFile',
+      'host_cdrom': 'BlockdevOptionsFile',
+      'host_floppy':'BlockdevOptionsFile',
+      'http':       'BlockdevOptionsFile',
+      'https':      'BlockdevOptionsFile',
+      'ftp':        'BlockdevOptionsFile',
+      'ftps':       'BlockdevOptionsFile',
+      'tftp':       'BlockdevOptionsFile',
+# TODO gluster: Wait for structured options
+# TODO iscsi: Wait for structured options
+# TODO nbd: Should take InetSocketAddress for 'host'?
+# TODO nfs: Wait for structured options
+# TODO rbd: Wait for structured options
+# TODO sheepdog: Wait for structured options
+# TODO ssh: Should take InetSocketAddress for 'host'?
+      'vvfat':      'BlockdevOptionsVVFAT',
+      'blkdebug':   'BlockdevOptionsBlkdebug',
+      'blkverify':  'BlockdevOptionsBlkverify',
+      'bochs':      'BlockdevOptionsGenericFormat',
+      'cloop':      'BlockdevOptionsGenericFormat',
+      'cow':        'BlockdevOptionsGenericCOWFormat',
+      'dmg':        'BlockdevOptionsGenericFormat',
+      'parallels':  'BlockdevOptionsGenericFormat',
+      'qcow':       'BlockdevOptionsGenericCOWFormat',
+      'qcow2':      'BlockdevOptionsQcow2',
+      'qed':        'BlockdevOptionsGenericCOWFormat',
+      'raw':        'BlockdevOptionsGenericFormat',
+      'vdi':        'BlockdevOptionsGenericFormat',
+      'vhdx':       'BlockdevOptionsGenericFormat',
+      'vmdk':       'BlockdevOptionsGenericCOWFormat',
+      'vpc':        'BlockdevOptionsGenericFormat',
+      'quorum':     'BlockdevOptionsQuorum'
+  } }
+
+##
+# @BlockdevRef
+#
+# Reference to a block device.
+#
+# @definition:      defines a new block device inline
+# @reference:       references the ID of an existing block device. An
+#                   empty string means that no block device should be
+#                   referenced.
+#
+# Since: 1.7
+##
+{ 'union': 'BlockdevRef',
+  'discriminator': {},
+  'data': { 'definition': 'BlockdevOptions',
+            'reference': 'str' } }
+
+##
+# @blockdev-add:
+#
+# Creates a new block device.
+#
+# @options: block device options for the new device
+#
+# Since: 1.7
+##
+{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
+
diff --git a/qapi/block.json b/qapi/block.json
new file mode 100644
index 0000000000..61c463ab05
--- /dev/null
+++ b/qapi/block.json
@@ -0,0 +1,166 @@
+# -*- Mode: Python -*-
+#
+# QAPI block definitions (vm related)
+
+# QAPI block core definitions
+{ 'include': 'block-core.json' }
+
+##
+# BiosAtaTranslation:
+#
+# Policy that BIOS should use to interpret cylinder/head/sector
+# addresses.  Note that Bochs BIOS and SeaBIOS will not actually
+# translate logical CHS to physical; instead, they will use logical
+# block addressing.
+#
+# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
+#        depending on the size of the disk.  If they are not passed,
+#        choose none if QEMU can guess that the disk had 16 or fewer
+#        heads, large if QEMU can guess that the disk had 131072 or
+#        fewer tracks across all heads (i.e. cylinders*heads<131072),
+#        otherwise LBA.
+#
+# @none: The physical disk geometry is equal to the logical geometry.
+#
+# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
+#       heads (if fewer than 255 are enough to cover the whole disk
+#       with 1024 cylinders/head).  The number of cylinders/head is
+#       then computed based on the number of sectors and heads.
+#
+# @large: The number of cylinders per head is scaled down to 1024
+#         by correspondingly scaling up the number of heads.
+#
+# @rechs: Same as @large, but first convert a 16-head geometry to
+#         15-head, by proportionally scaling up the number of
+#         cylinders/head.
+#
+# Since: 2.0
+##
+{ 'enum': 'BiosAtaTranslation',
+  'data': ['auto', 'none', 'lba', 'large', 'rechs']}
+
+##
+# @BlockdevSnapshotInternal
+#
+# @device: the name of the device to generate the snapshot from
+#
+# @name: the name of the internal snapshot to be created
+#
+# Notes: In transaction, if @name is empty, or any snapshot matching @name
+#        exists, the operation will fail. Only some image formats support it,
+#        for example, qcow2, rbd, and sheepdog.
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevSnapshotInternal',
+  'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @blockdev-snapshot-internal-sync
+#
+# Synchronously take an internal snapshot of a block device, when the format
+# of the image used supports it.
+#
+# For the arguments, see the documentation of BlockdevSnapshotInternal.
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#          If any snapshot matching @name exists, or @name is empty,
+#          GenericError
+#          If the format of the image used does not support it,
+#          BlockFormatFeatureNotSupported
+#
+# Since 1.7
+##
+{ 'command': 'blockdev-snapshot-internal-sync',
+  'data': 'BlockdevSnapshotInternal' }
+
+##
+# @blockdev-snapshot-delete-internal-sync
+#
+# Synchronously delete an internal snapshot of a block device, when the format
+# of the image used support it. The snapshot is identified by name or id or
+# both. One of the name or id is required. Return SnapshotInfo for the
+# successfully deleted snapshot.
+#
+# @device: the name of the device to delete the snapshot from
+#
+# @id: optional the snapshot's ID to be deleted
+#
+# @name: optional the snapshot's name to be deleted
+#
+# Returns: SnapshotInfo on success
+#          If @device is not a valid block device, DeviceNotFound
+#          If snapshot not found, GenericError
+#          If the format of the image used does not support it,
+#          BlockFormatFeatureNotSupported
+#          If @id and @name are both not specified, GenericError
+#
+# Since 1.7
+##
+{ 'command': 'blockdev-snapshot-delete-internal-sync',
+  'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
+  'returns': 'SnapshotInfo' }
+
+##
+# @eject:
+#
+# Ejects a device from a removable drive.
+#
+# @device:  The name of the device
+#
+# @force:   @optional If true, eject regardless of whether the drive is locked.
+#           If not specified, the default value is false.
+#
+# Returns:  Nothing on success
+#           If @device is not a valid block device, DeviceNotFound
+#
+# Notes:    Ejecting a device will no media results in success
+#
+# Since: 0.14.0
+##
+{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
+
+##
+# @nbd-server-start:
+#
+# Start an NBD server listening on the given host and port.  Block
+# devices can then be exported using @nbd-server-add.  The NBD
+# server will present them as named exports; for example, another
+# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+#
+# @addr: Address on which to listen.
+#
+# Returns: error if the server is already running.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-start',
+  'data': { 'addr': 'SocketAddress' } }
+
+##
+# @nbd-server-add:
+#
+# Export a device to QEMU's embedded NBD server.
+#
+# @device: Block device to be exported
+#
+# @writable: Whether clients should be able to write to the device via the
+#     NBD connection (default false). #optional
+#
+# Returns: error if the device is already marked for export.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
+
+##
+# @nbd-server-stop:
+#
+# Stop QEMU's embedded NBD server, and unregister all devices previously
+# added via @nbd-server-add.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-stop' }
+
diff --git a/qapi/common.json b/qapi/common.json
new file mode 100644
index 0000000000..4e9a21f2f6
--- /dev/null
+++ b/qapi/common.json
@@ -0,0 +1,89 @@
+# -*- Mode: Python -*-
+#
+# QAPI common definitions
+
+##
+# @ErrorClass
+#
+# QEMU error classes
+#
+# @GenericError: this is used for errors that don't require a specific error
+#                class. This should be the default case for most errors
+#
+# @CommandNotFound: the requested command has not been found
+#
+# @DeviceEncrypted: the requested operation can't be fulfilled because the
+#                   selected device is encrypted
+#
+# @DeviceNotActive: a device has failed to be become active
+#
+# @DeviceNotFound: the requested device has not been found
+#
+# @KVMMissingCap: the requested operation can't be fulfilled because a
+#                 required KVM capability is missing
+#
+# Since: 1.2
+##
+{ 'enum': 'ErrorClass',
+  'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
+            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
+
+##
+# @VersionInfo:
+#
+# A description of QEMU's version.
+#
+# @qemu.major:  The major version of QEMU
+#
+# @qemu.minor:  The minor version of QEMU
+#
+# @qemu.micro:  The micro version of QEMU.  By current convention, a micro
+#               version of 50 signifies a development branch.  A micro version
+#               greater than or equal to 90 signifies a release candidate for
+#               the next minor version.  A micro version of less than 50
+#               signifies a stable release.
+#
+# @package:     QEMU will always set this field to an empty string.  Downstream
+#               versions of QEMU should set this to a non-empty string.  The
+#               exact format depends on the downstream however it highly
+#               recommended that a unique name is used.
+#
+# Since: 0.14.0
+##
+{ 'type': 'VersionInfo',
+  'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'},
+           'package': 'str'} }
+
+##
+# @query-version:
+#
+# Returns the current version of QEMU.
+#
+# Returns:  A @VersionInfo object describing the current version of QEMU.
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-version', 'returns': 'VersionInfo' }
+
+##
+# @CommandInfo:
+#
+# Information about a QMP command
+#
+# @name: The command name
+#
+# Since: 0.14.0
+##
+{ 'type': 'CommandInfo', 'data': {'name': 'str'} }
+
+##
+# @query-commands:
+#
+# Return a list of supported QMP commands by this server
+#
+# Returns: A list of @CommandInfo for all supported commands
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-commands', 'returns': ['CommandInfo'] }
+
diff --git a/qemu-img.c b/qemu-img.c
index b3d2bc6f02..aa89ba21fd 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -580,10 +580,11 @@ static int collect_image_check(BlockDriverState *bs,
 /*
  * Checks an image for consistency. Exit codes:
  *
- * 0 - Check completed, image is good
- * 1 - Check not completed because of internal errors
- * 2 - Check completed, image is corrupted
- * 3 - Check completed, image has leaked clusters, but is good otherwise
+ *  0 - Check completed, image is good
+ *  1 - Check not completed because of internal errors
+ *  2 - Check completed, image is corrupted
+ *  3 - Check completed, image has leaked clusters, but is good otherwise
+ * 63 - Checks are not supported by the image format
  */
 static int img_check(int argc, char **argv)
 {
diff --git a/qemu-img.texi b/qemu-img.texi
index f84590ebf0..c68b54148a 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -126,6 +126,29 @@ wrong fix or hiding corruption that has already occurred.
 Only the formats @code{qcow2}, @code{qed} and @code{vdi} support
 consistency checks.
 
+In case the image does not have any inconsistencies, check exits with @code{0}.
+Other exit codes indicate the kind of inconsistency found or if another error
+occurred. The following table summarizes all exit codes of the check subcommand:
+
+@table @option
+
+@item 0
+Check completed, the image is (now) consistent
+@item 1
+Check not completed because of internal errors
+@item 2
+Check completed, image is corrupted
+@item 3
+Check completed, image has leaked clusters, but is not corrupted
+@item 63
+Checks are not supported by the image format
+
+@end table
+
+If @code{-r} is specified, exit codes representing the image state refer to the
+state after (the attempt at) repairing it. That is, a successful @code{-r all}
+will yield the exit code 0, independently of the image state before.
+
 @item create [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
 
 Create the new disk image @var{filename} of size @var{size} and format
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 1d4ffd3603..3de6ab80e0 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -12,8 +12,10 @@
 
 #include <glib.h>
 #include <math.h>
+#include "block/aio.h"
 #include "qemu/throttle.h"
 
+AioContext     *ctx;
 LeakyBucket    bkt;
 ThrottleConfig cfg;
 ThrottleState  ts;
@@ -104,7 +106,8 @@ static void test_init(void)
     memset(&ts, 1, sizeof(ts));
 
     /* init the structure */
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
 
     /* check initialized fields */
     g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
@@ -126,7 +129,8 @@ static void test_init(void)
 static void test_destroy(void)
 {
     int i;
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
     throttle_destroy(&ts);
     for (i = 0; i < 2; i++) {
         g_assert(!ts.timers[i]);
@@ -165,7 +169,8 @@ static void test_config_functions(void)
 
     orig_cfg.op_size = 1;
 
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
     /* structure reset by throttle_init previous_leak should be null */
     g_assert(!ts.previous_leak);
     throttle_config(&ts, &orig_cfg);
@@ -324,7 +329,8 @@ static void test_have_timer(void)
     g_assert(!throttle_have_timer(&ts));
 
     /* init the structure */
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
 
     /* timer set by init should return true */
     g_assert(throttle_have_timer(&ts));
@@ -332,6 +338,29 @@ static void test_have_timer(void)
     throttle_destroy(&ts);
 }
 
+static void test_detach_attach(void)
+{
+    /* zero the structure */
+    memset(&ts, 0, sizeof(ts));
+
+    /* init the structure */
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
+
+    /* timer set by init should return true */
+    g_assert(throttle_have_timer(&ts));
+
+    /* timer should no longer exist after detaching */
+    throttle_detach_aio_context(&ts);
+    g_assert(!throttle_have_timer(&ts));
+
+    /* timer should exist again after attaching */
+    throttle_attach_aio_context(&ts, ctx);
+    g_assert(throttle_have_timer(&ts));
+
+    throttle_destroy(&ts);
+}
+
 static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
                 int size,                   /* size of the operation to do */
                 double avg,                 /* io limit */
@@ -357,7 +386,8 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
 
     cfg.op_size = op_size;
 
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
     throttle_config(&ts, &cfg);
 
     /* account a read */
@@ -461,7 +491,15 @@ static void test_accounting(void)
 
 int main(int argc, char **argv)
 {
+    GSource *src;
+
     init_clocks();
+
+    ctx = aio_context_new();
+    src = aio_get_g_source(ctx);
+    g_source_attach(src, NULL);
+    g_source_unref(src);
+
     do {} while (g_main_context_iteration(NULL, false));
 
     /* tests in the same order as the header function declarations */
@@ -471,6 +509,7 @@ int main(int argc, char **argv)
     g_test_add_func("/throttle/init",               test_init);
     g_test_add_func("/throttle/destroy",            test_destroy);
     g_test_add_func("/throttle/have_timer",         test_have_timer);
+    g_test_add_func("/throttle/detach_attach",      test_detach_attach);
     g_test_add_func("/throttle/config/enabled",     test_enabled);
     g_test_add_func("/throttle/config/conflicting", test_conflicting_config);
     g_test_add_func("/throttle/config/is_valid",    test_is_valid);
diff --git a/util/throttle.c b/util/throttle.c
index 02e6f15587..f976ac7de5 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -22,6 +22,7 @@
 
 #include "qemu/throttle.h"
 #include "qemu/timer.h"
+#include "block/aio.h"
 
 /* This function make a bucket leak
  *
@@ -157,8 +158,18 @@ bool throttle_compute_timer(ThrottleState *ts,
     return false;
 }
 
+/* Add timers to event loop */
+void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context)
+{
+    ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
+                                  ts->read_timer_cb, ts->timer_opaque);
+    ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
+                                  ts->write_timer_cb, ts->timer_opaque);
+}
+
 /* To be called first on the ThrottleState */
 void throttle_init(ThrottleState *ts,
+                   AioContext *aio_context,
                    QEMUClockType clock_type,
                    QEMUTimerCB *read_timer_cb,
                    QEMUTimerCB *write_timer_cb,
@@ -167,8 +178,10 @@ void throttle_init(ThrottleState *ts,
     memset(ts, 0, sizeof(ThrottleState));
 
     ts->clock_type = clock_type;
-    ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque);
-    ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque);
+    ts->read_timer_cb = read_timer_cb;
+    ts->write_timer_cb = write_timer_cb;
+    ts->timer_opaque = timer_opaque;
+    throttle_attach_aio_context(ts, aio_context);
 }
 
 /* destroy a timer */
@@ -181,8 +194,8 @@ static void throttle_timer_destroy(QEMUTimer **timer)
     *timer = NULL;
 }
 
-/* To be called last on the ThrottleState */
-void throttle_destroy(ThrottleState *ts)
+/* Remove timers from event loop */
+void throttle_detach_aio_context(ThrottleState *ts)
 {
     int i;
 
@@ -191,6 +204,12 @@ void throttle_destroy(ThrottleState *ts)
     }
 }
 
+/* To be called last on the ThrottleState */
+void throttle_destroy(ThrottleState *ts)
+{
+    throttle_detach_aio_context(ts);
+}
+
 /* is any throttling timer configured */
 bool throttle_have_timer(ThrottleState *ts)
 {