From d79df2a2ceb3cb0771146587e9a4bfb312577f46 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 21 Mar 2017 18:48:10 +0100
Subject: [PATCH 1/4] blockjob: avoid recursive AioContext locking

Streaming or any other block job hangs when performed on a block device
that has a non-default iothread.  This happens because the AioContext
is acquired twice by block_job_defer_to_main_loop_bh and then released
only once by BDRV_POLL_WHILE.  (Insert rants on recursive mutexes, which
unfortunately are a temporary but necessary evil for iothreads at the
moment).

Luckily, the reason for the double acquisition is simple; the function
acquires the AioContext for both the job iothread and the BDS iothread,
in case the BDS iothread was changed while the job was running.  It
is therefore enough to skip the second acquisition when the two
AioContexts are one and the same.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1490118490-5597-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 blockjob.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 69126af97f..2159df776b 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -755,12 +755,16 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
 
     /* Fetch BDS AioContext again, in case it has changed */
     aio_context = blk_get_aio_context(data->job->blk);
-    aio_context_acquire(aio_context);
+    if (aio_context != data->aio_context) {
+        aio_context_acquire(aio_context);
+    }
 
     data->job->deferred_to_main_loop = false;
     data->fn(data->job, data->opaque);
 
-    aio_context_release(aio_context);
+    if (aio_context != data->aio_context) {
+        aio_context_release(aio_context);
+    }
 
     aio_context_release(data->aio_context);
 

From e3796a245ad0efa65ca8d2dc6424562a8fbaeb6a Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 16 Mar 2017 17:23:49 -0400
Subject: [PATCH 2/4] blockjob: add block_job_start_shim

The purpose of this shim is to allow us to pause pre-started jobs.
The purpose of *that* is to allow us to buffer a pause request that
will be able to take effect before the job ever does any work, allowing
us to create jobs during a quiescent state (under which they will be
automatically paused), then resuming the jobs after the critical section
in any order, either:

(1) -block_job_start
    -block_job_resume (via e.g. drained_end)

(2) -block_job_resume (via e.g. drained_end)
    -block_job_start

The problem that requires a startup wrapper is the idea that a job must
start in the busy=true state only its first time-- all subsequent entries
require busy to be false, and the toggling of this state is otherwise
handled during existing pause and yield points.

The wrapper simply allows us to mandate that a job can "start," set busy
to true, then immediately pause only if necessary. We could avoid
requiring a wrapper, but all jobs would need to do it, so it's been
factored out here.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170316212351.13797-2-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 blockjob.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 2159df776b..0e9ed0336d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -250,16 +250,28 @@ static bool block_job_started(BlockJob *job)
     return job->co;
 }
 
+/**
+ * All jobs must allow a pause point before entering their job proper. This
+ * ensures that jobs can be paused prior to being started, then resumed later.
+ */
+static void coroutine_fn block_job_co_entry(void *opaque)
+{
+    BlockJob *job = opaque;
+
+    assert(job && job->driver && job->driver->start);
+    block_job_pause_point(job);
+    job->driver->start(job);
+}
+
 void block_job_start(BlockJob *job)
 {
     assert(job && !block_job_started(job) && job->paused &&
-           !job->busy && job->driver->start);
-    job->co = qemu_coroutine_create(job->driver->start, job);
-    if (--job->pause_count == 0) {
-        job->paused = false;
-        job->busy = true;
-        qemu_coroutine_enter(job->co);
-    }
+           job->driver && job->driver->start);
+    job->co = qemu_coroutine_create(block_job_co_entry, job);
+    job->pause_count--;
+    job->busy = true;
+    job->paused = false;
+    qemu_coroutine_enter(job->co);
 }
 
 void block_job_ref(BlockJob *job)

From f4d9cc88ee69a5b04a843424e50f466e36fcad4e Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 16 Mar 2017 17:23:50 -0400
Subject: [PATCH 3/4] block-backend: add drained_begin / drained_end ops

Allow block backends to forward drain requests to their devices/users.
The initial intended purpose for this patch is to allow BBs to forward
requests along to BlockJobs, which will want to pause if their associated
BB has entered a drained region.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170316212351.13797-3-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/block-backend.c          | 24 ++++++++++++++++++++++--
 include/sysemu/block-backend.h |  8 ++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 5742c09c2c..0b6377332c 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -65,6 +65,8 @@ struct BlockBackend {
     bool allow_write_beyond_eof;
 
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
+
+    int quiesce_counter;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -699,12 +701,17 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                      void *opaque)
 {
     /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
-     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
-     * is set. */
+     * it that way, so we can assume blk->dev, if present, is a DeviceState if
+     * blk->dev_ops is set. Non-device users may use dev_ops without device. */
     assert(!blk->legacy_dev);
 
     blk->dev_ops = ops;
     blk->dev_opaque = opaque;
+
+    /* Are we currently quiesced? Should we enforce this right now? */
+    if (blk->quiesce_counter && ops->drained_begin) {
+        ops->drained_begin(opaque);
+    }
 }
 
 /*
@@ -1870,6 +1877,12 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
 
+    if (++blk->quiesce_counter == 1) {
+        if (blk->dev_ops && blk->dev_ops->drained_begin) {
+            blk->dev_ops->drained_begin(blk->dev_opaque);
+        }
+    }
+
     /* Note that blk->root may not be accessible here yet if we are just
      * attaching to a BlockDriverState that is drained. Use child instead. */
 
@@ -1881,7 +1894,14 @@ static void blk_root_drained_begin(BdrvChild *child)
 static void blk_root_drained_end(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
+    assert(blk->quiesce_counter);
 
     assert(blk->public.io_limits_disabled);
     --blk->public.io_limits_disabled;
+
+    if (--blk->quiesce_counter == 0) {
+        if (blk->dev_ops && blk->dev_ops->drained_end) {
+            blk->dev_ops->drained_end(blk->dev_opaque);
+        }
+    }
 }
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 096c17fce0..7462228ac1 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -58,6 +58,14 @@ typedef struct BlockDevOps {
      * Runs when the size changed (e.g. monitor command block_resize)
      */
     void (*resize_cb)(void *opaque);
+    /*
+     * Runs when the backend receives a drain request.
+     */
+    void (*drained_begin)(void *opaque);
+    /*
+     * Runs when the backend's last drain request ends.
+     */
+    void (*drained_end)(void *opaque);
 } BlockDevOps;
 
 /* This struct is embedded in (the private) BlockBackend struct and contains

From 600ac6a0ef5c06418446ef2f37407bddcc51b21c Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 16 Mar 2017 17:23:51 -0400
Subject: [PATCH 4/4] blockjob: add devops to blockjob backends

This lets us hook into drained_begin and drained_end requests from the
backend level, which is particularly useful for making sure that all
jobs associated with a particular node (whether the source or the target)
receive a drain request.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170316212351.13797-4-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 blockjob.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 0e9ed0336d..9b619f385a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -68,6 +68,23 @@ static const BdrvChildRole child_job = {
     .stay_at_node       = true,
 };
 
+static void block_job_drained_begin(void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_pause(job);
+}
+
+static void block_job_drained_end(void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_resume(job);
+}
+
+static const BlockDevOps block_job_dev_ops = {
+    .drained_begin = block_job_drained_begin,
+    .drained_end = block_job_drained_end,
+};
+
 BlockJob *block_job_next(BlockJob *job)
 {
     if (!job) {
@@ -205,11 +222,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     }
 
     job = g_malloc0(driver->instance_size);
-    error_setg(&job->blocker, "block device is in use by block job: %s",
-               BlockJobType_lookup[driver->job_type]);
-    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
-    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
-
     job->driver        = driver;
     job->id            = g_strdup(job_id);
     job->blk           = blk;
@@ -219,8 +231,15 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job->paused        = true;
     job->pause_count   = 1;
     job->refcnt        = 1;
+
+    error_setg(&job->blocker, "block device is in use by block job: %s",
+               BlockJobType_lookup[driver->job_type]);
+    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
     bs->job = job;
 
+    blk_set_dev_ops(blk, &block_job_dev_ops, job);
+    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
+
     QLIST_INSERT_HEAD(&block_jobs, job, job_list);
 
     blk_add_aio_context_notifier(blk, block_job_attached_aio_context,