Block layer patches

- Code cleanups around block graph modification
 - Simplify drain
 - coroutine_fn correctness fixes, including splitting generated
   coroutine wrappers into co_wrapper (to be called only from
   non-coroutine context) and co_wrapper_mixed (both coroutine and
   non-coroutine context)
 - Introduce a block graph rwlock
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmObOHIRHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9b4DQ//VAgplktr9NvX00a/hu4ZXgMyMuwPz36a
 0Lfb11hxZM02hN1Re1M/ldzxrQj5ywxatmPH3bwLW/TjlPcXwOHvToF1EwkW/3Ow
 evmOUOxEH/o7Re8ZrEQEjEg3LdlFBrpRyRNS5qrGK4+38TKsecLYakZdOvYyPujm
 zpE4uGzG+ZYygyx4OtEhL7bry/iKO3ehGYO3/5IKrpjH+hj1MhYegcUCE7r4eE0G
 Mmo48ALsxlwStTUx9gUp9jwr5p08k3Jl+ubZnkTPcNZYoyTgX3rOycD8yPoaHoFr
 bMV6SKPZckueMT1aCTZ2pQl2gJ2ap+YFgJYQ4oyY8powVayZbVFtO9tGO/ZN+IWY
 zC88tmRMj1KGlgg1R0dUCOfzaJZkk44wZV72U6D3zKuXWBtHVz3WgnlRmD7k4K5Q
 MhqgqOatSBcD6XajmzMJi6W1xEcMs7yy9/V5e5AsGLKrjX0Z8r8+A6cK18/Hd+4N
 9cDkhHET0TccmU2sTgmkqOYXl/VyiU622g1b7E9sjSdzKnqqZ4H0IkFf2G23gVBR
 4xgUsftaBaIIRlrp1mHtR8zo7646pQZrZi3RFPx83dJYXpreUtnNNTzlURdnDDfA
 chvHjrjZ/sz2VKmMHK+PnFVVpxWSAUhHoRUcMV1+EmRGlm9ikuVO4WDBqaG8HHNk
 vZBm7LpuGGE=
 =Z9pn
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging

Block layer patches

- Code cleanups around block graph modification
- Simplify drain
- coroutine_fn correctness fixes, including splitting generated
  coroutine wrappers into co_wrapper (to be called only from
  non-coroutine context) and co_wrapper_mixed (both coroutine and
  non-coroutine context)
- Introduce a block graph rwlock

# gpg: Signature made Thu 15 Dec 2022 15:08:34 GMT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (50 commits)
  block: GRAPH_RDLOCK for functions only called by co_wrappers
  block: use co_wrapper_mixed_bdrv_rdlock in functions taking the rdlock
  block-coroutine-wrapper.py: introduce annotations that take the graph rdlock
  Mark assert_bdrv_graph_readable/writable() GRAPH_RD/WRLOCK
  graph-lock: TSA annotations for lock/unlock functions
  block: assert that graph read and writes are performed correctly
  block: remove unnecessary assert_bdrv_graph_writable()
  block: wrlock in bdrv_replace_child_noperm
  block: Fix locking in external_snapshot_prepare()
  test-bdrv-drain: Fix incorrrect drain assumptions
  clang-tsa: Add macros for shared locks
  clang-tsa: Add TSA_ASSERT() macro
  Import clang-tsa.h
  async: Register/unregister aiocontext in graph lock list
  graph-lock: Implement guard macros
  graph-lock: Introduce a lock to protect block graph operations
  block: Factor out bdrv_drain_all_begin_nopoll()
  block/dirty-bitmap: convert coroutine-only functions to co_wrapper
  block: convert bdrv_create to co_wrapper
  block-coroutine-wrapper.py: support also basic return types
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2022-12-16 13:26:09 +00:00
commit d038d2645a
45 changed files with 1577 additions and 1130 deletions

404
block.c
View File

@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs);
static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
Transaction *tran);
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue,
@ -528,65 +526,24 @@ typedef struct CreateCo {
Error *err;
} CreateCo;
static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
Error *local_err = NULL;
int ret;
CreateCo *cco = opaque;
assert(cco->drv);
GLOBAL_STATE_CODE();
ret = cco->drv->bdrv_co_create_opts(cco->drv,
cco->filename, cco->opts, &local_err);
error_propagate(&cco->err, local_err);
cco->ret = ret;
}
int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp)
int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
int ret;
GLOBAL_STATE_CODE();
Coroutine *co;
CreateCo cco = {
.drv = drv,
.filename = g_strdup(filename),
.opts = opts,
.ret = NOT_DONE,
.err = NULL,
};
ERRP_GUARD();
if (!drv->bdrv_co_create_opts) {
error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
ret = -ENOTSUP;
goto out;
error_setg(errp, "Driver '%s' does not support image creation",
drv->format_name);
return -ENOTSUP;
}
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_create_co_entry(&cco);
} else {
co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
qemu_coroutine_enter(co);
while (cco.ret == NOT_DONE) {
aio_poll(qemu_get_aio_context(), true);
}
ret = drv->bdrv_co_create_opts(drv, filename, opts, errp);
if (ret < 0 && !*errp) {
error_setg_errno(errp, -ret, "Could not create image");
}
ret = cco.ret;
if (ret < 0) {
if (cco.err) {
error_propagate(errp, cco.err);
} else {
error_setg_errno(errp, -ret, "Could not create image");
}
}
out:
g_free(cco.filename);
return ret;
}
@ -725,7 +682,8 @@ out:
return ret;
}
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
Error **errp)
{
QemuOpts *protocol_opts;
BlockDriver *drv;
@ -766,7 +724,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
goto out;
}
ret = bdrv_create(drv, filename, protocol_opts, errp);
ret = bdrv_co_create(drv, filename, protocol_opts, errp);
out:
qemu_opts_del(protocol_opts);
qobject_unref(qdict);
@ -1228,20 +1186,19 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
bdrv_do_drained_begin_quiesce(bs, NULL, false);
bdrv_do_drained_begin_quiesce(bs, NULL);
}
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
return bdrv_drain_poll(bs, false, NULL, false);
return bdrv_drain_poll(bs, NULL, false);
}
static void bdrv_child_cb_drained_end(BdrvChild *child,
int *drained_end_counter)
static void bdrv_child_cb_drained_end(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
bdrv_drained_end_no_poll(bs, drained_end_counter);
bdrv_drained_end(bs);
}
static int bdrv_child_cb_inactivate(BdrvChild *child)
@ -1445,11 +1402,11 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
*child_flags = flags;
}
static void bdrv_child_cb_attach(BdrvChild *child)
static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
assert_bdrv_graph_writable(bs);
assert_bdrv_graph_writable();
QLIST_INSERT_HEAD(&bs->children, child, next);
if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
/*
@ -1485,11 +1442,9 @@ static void bdrv_child_cb_attach(BdrvChild *child)
assert(!bs->file);
bs->file = child;
}
bdrv_apply_subtree_drain(child, bs);
}
static void bdrv_child_cb_detach(BdrvChild *child)
static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
@ -1497,9 +1452,7 @@ static void bdrv_child_cb_detach(BdrvChild *child)
bdrv_backing_detach(child);
}
bdrv_unapply_subtree_drain(child, bs);
assert_bdrv_graph_writable(bs);
assert_bdrv_graph_writable();
QLIST_REMOVE(child, next);
if (child == bs->backing) {
assert(child != bs->file);
@ -1715,8 +1668,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
assert(is_power_of_2(bs->bl.request_alignment));
for (i = 0; i < bs->quiesce_counter; i++) {
if (drv->bdrv_co_drain_begin) {
drv->bdrv_co_drain_begin(bs);
if (drv->bdrv_drain_begin) {
drv->bdrv_drain_begin(bs);
}
}
@ -2414,6 +2367,20 @@ static void bdrv_replace_child_abort(void *opaque)
GLOBAL_STATE_CODE();
/* old_bs reference is transparently moved from @s to @s->child */
if (!s->child->bs) {
/*
* The parents were undrained when removing old_bs from the child. New
* requests can't have been made, though, because the child was empty.
*
* TODO Make bdrv_replace_child_noperm() transactionable to avoid
* undraining the parent in the first place. Once this is done, having
* new_bs drained when calling bdrv_replace_child_tran() is not a
* requirement any more.
*/
bdrv_parent_drained_begin_single(s->child);
assert(!bdrv_parent_drained_poll_single(s->child));
}
assert(s->child->quiesced_parent);
bdrv_replace_child_noperm(s->child, s->old_bs);
bdrv_unref(new_bs);
}
@ -2429,12 +2396,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
*
* Note: real unref of old_bs is done only on commit.
*
* Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
* kept drained until the transaction is completed.
*
* The function doesn't update permissions, caller is responsible for this.
*/
static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
Transaction *tran)
{
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
assert(child->quiesced_parent);
assert(!new_bs || new_bs->quiesce_counter);
*s = (BdrvReplaceChildState) {
.child = child,
.old_bs = child->bs,
@ -2523,8 +2497,12 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
return 0;
}
static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
Transaction *tran, Error **errp)
/*
* @list is a product of bdrv_topological_dfs() (may be called several times) -
* a topologically sorted subgraph.
*/
static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q,
Transaction *tran, Error **errp)
{
int ret;
BlockDriverState *bs;
@ -2546,6 +2524,24 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
return 0;
}
/*
* @list is any list of nodes. List is completed by all subtrees and
* topologically sorted. It's not a problem if some node occurs in the @list
* several times.
*/
static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
Transaction *tran, Error **errp)
{
g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
g_autoptr(GSList) refresh_list = NULL;
for ( ; list; list = list->next) {
refresh_list = bdrv_topological_dfs(refresh_list, found, list->data);
}
return bdrv_do_refresh_perms(refresh_list, q, tran, errp);
}
void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t *shared_perm)
{
@ -2593,15 +2589,24 @@ char *bdrv_perm_names(uint64_t perm)
}
static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
/* @tran is allowed to be NULL. In this case no rollback is possible */
static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran,
Error **errp)
{
int ret;
Transaction *tran = tran_new();
Transaction *local_tran = NULL;
g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
GLOBAL_STATE_CODE();
ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
tran_finalize(tran, ret);
if (!tran) {
tran = local_tran = tran_new();
}
ret = bdrv_do_refresh_perms(list, NULL, tran, errp);
if (local_tran) {
tran_finalize(local_tran, ret);
}
return ret;
}
@ -2617,7 +2622,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
bdrv_child_set_perm(c, perm, shared, tran);
ret = bdrv_refresh_perms(c->bs, &local_err);
ret = bdrv_refresh_perms(c->bs, tran, &local_err);
tran_finalize(tran, ret);
@ -2826,14 +2831,41 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
return permissions[qapi_perm];
}
/*
* Replaces the node that a BdrvChild points to without updating permissions.
*
* If @new_bs is non-NULL, the parent of @child must already be drained through
* @child.
*/
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs)
{
BlockDriverState *old_bs = child->bs;
int new_bs_quiesce_counter;
int drain_saldo;
assert(!child->frozen);
/*
* If we want to change the BdrvChild to point to a drained node as its new
* child->bs, we need to make sure that its new parent is drained, too. In
* other words, either child->quiesce_parent must already be true or we must
* be able to set it and keep the parent's quiesce_counter consistent with
* that, but without polling or starting new requests (this function
* guarantees that it doesn't poll, and starting new requests would be
* against the invariants of drain sections).
*
* To keep things simple, we pick the first option (child->quiesce_parent
* must already be true). We also generalise the rule a bit to make it
* easier to verify in callers and more likely to be covered in test cases:
* The parent must be quiesced through this child even if new_bs isn't
* currently drained.
*
* The only exception is for callers that always pass new_bs == NULL. In
* this case, we obviously never need to consider the case of a drained
* new_bs, so we can keep the callers simpler by allowing them not to drain
* the parent.
*/
assert(!new_bs || child->quiesced_parent);
assert(old_bs != new_bs);
GLOBAL_STATE_CODE();
@ -2841,59 +2873,33 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
/*
* If the new child node is drained but the old one was not, flush
* all outstanding requests to the old child node.
*/
while (drain_saldo > 0 && child->klass->drained_begin) {
bdrv_parent_drained_begin_single(child, true);
drain_saldo--;
}
/* TODO Pull this up into the callers to avoid polling here */
bdrv_graph_wrlock();
if (old_bs) {
/* Detach first so that the recursive drain sections coming from @child
* are already gone and we only end the drain sections that came from
* elsewhere. */
if (child->klass->detach) {
child->klass->detach(child);
}
assert_bdrv_graph_writable(old_bs);
QLIST_REMOVE(child, next_parent);
}
child->bs = new_bs;
if (new_bs) {
assert_bdrv_graph_writable(new_bs);
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
/*
* Detaching the old node may have led to the new node's
* quiesce_counter having been decreased. Not a problem, we
* just need to recognize this here and then invoke
* drained_end appropriately more often.
*/
assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
/* Attach only after starting new drained sections, so that recursive
* drain sections coming from @child don't get an extra .drained_begin
* callback. */
if (child->klass->attach) {
child->klass->attach(child);
}
}
bdrv_graph_wrunlock();
/*
* If the old child node was drained but the new one is not, allow
* requests to come in only after the new node has been attached.
* If the parent was drained through this BdrvChild previously, but new_bs
* is not drained, allow requests to come in only after the new node has
* been attached.
*/
while (drain_saldo < 0 && child->klass->drained_end) {
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
if (!new_bs_quiesce_counter && child->quiesced_parent) {
bdrv_parent_drained_end_single(child);
drain_saldo++;
}
}
@ -3026,6 +3032,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
}
bdrv_ref(child_bs);
/*
* Let every new BdrvChild start with a drained parent. Inserting the child
* in the graph with bdrv_replace_child_noperm() will undrain it if
* @child_bs is not drained.
*
* The child was only just created and is not yet visible in global state
* until bdrv_replace_child_noperm() inserts it into the graph, so nobody
* could have sent requests and polling is not necessary.
*
* Note that this means that the parent isn't fully drained yet, we only
* stop new requests from coming in. This is fine, we don't care about the
* old requests here, they are not for this child. If another place enters a
* drain section for the same parent, but wants it to be fully quiesced, it
* will not run most of the the code in .drained_begin() again (which is not
* a problem, we already did this), but it will still poll until the parent
* is fully quiesced, so it will not be negatively affected either.
*/
bdrv_parent_drained_begin_single(new_child);
bdrv_replace_child_noperm(new_child, child_bs);
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
@ -3070,30 +3094,6 @@ static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs,
tran, errp);
}
static void bdrv_detach_child(BdrvChild *child)
{
BlockDriverState *old_bs = child->bs;
GLOBAL_STATE_CODE();
bdrv_replace_child_noperm(child, NULL);
bdrv_child_free(child);
if (old_bs) {
/*
* Update permissions for old node. We're just taking a parent away, so
* we're loosening restrictions. Errors of permission update are not
* fatal in this case, ignore them.
*/
bdrv_refresh_perms(old_bs, NULL);
/*
* When the parent requiring a non-default AioContext is removed, the
* node moves back to the main AioContext
*/
bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, NULL);
}
}
/*
* This function steals the reference to child_bs from the caller.
* That reference is later dropped by bdrv_root_unref_child().
@ -3125,7 +3125,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
goto out;
}
ret = bdrv_refresh_perms(child_bs, errp);
ret = bdrv_refresh_perms(child_bs, tran, errp);
out:
tran_finalize(tran, ret);
@ -3166,7 +3166,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
goto out;
}
ret = bdrv_refresh_perms(parent_bs, errp);
ret = bdrv_refresh_perms(parent_bs, tran, errp);
if (ret < 0) {
goto out;
}
@ -3182,12 +3182,28 @@ out:
/* Callers must ensure that child->frozen is false. */
void bdrv_root_unref_child(BdrvChild *child)
{
BlockDriverState *child_bs;
BlockDriverState *child_bs = child->bs;
GLOBAL_STATE_CODE();
bdrv_replace_child_noperm(child, NULL);
bdrv_child_free(child);
if (child_bs) {
/*
* Update permissions for old node. We're just taking a parent away, so
* we're loosening restrictions. Errors of permission update are not
* fatal in this case, ignore them.
*/
bdrv_refresh_perms(child_bs, NULL, NULL);
/*
* When the parent requiring a non-default AioContext is removed, the
* node moves back to the main AioContext
*/
bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL,
NULL);
}
child_bs = child->bs;
bdrv_detach_child(child);
bdrv_unref(child_bs);
}
@ -3406,24 +3422,35 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
}
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp)
int bdrv_set_backing_hd_drained(BlockDriverState *bs,
BlockDriverState *backing_hd,
Error **errp)
{
int ret;
Transaction *tran = tran_new();
GLOBAL_STATE_CODE();
bdrv_drained_begin(bs);
assert(bs->quiesce_counter > 0);
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
if (ret < 0) {
goto out;
}
ret = bdrv_refresh_perms(bs, errp);
ret = bdrv_refresh_perms(bs, tran, errp);
out:
tran_finalize(tran, ret);
return ret;
}
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp)
{
int ret;
GLOBAL_STATE_CODE();
bdrv_drained_begin(bs);
ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
bdrv_drained_end(bs);
return ret;
@ -4153,7 +4180,9 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
* returns a pointer to bs_queue, which is either the newly allocated
* bs_queue, or the existing bs_queue being used.
*
* bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
* bs is drained here and undrained by bdrv_reopen_queue_free().
*
* To be called with bs->aio_context locked.
*/
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
@ -4173,12 +4202,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
int flags;
QemuOpts *opts;
/* Make sure that the caller remembered to use a drained section. This is
* important to avoid graph changes between the recursive queuing here and
* bdrv_reopen_multiple(). */
assert(bs->quiesce_counter > 0);
GLOBAL_STATE_CODE();
bdrv_drained_begin(bs);
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
QTAILQ_INIT(bs_queue);
@ -4312,6 +4339,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
return bs_queue;
}
/* To be called with bs->aio_context locked */
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options, bool keep_old_opts)
@ -4328,6 +4356,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
if (bs_queue) {
BlockReopenQueueEntry *bs_entry, *next;
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
aio_context_acquire(ctx);
bdrv_drained_end(bs_entry->state.bs);
aio_context_release(ctx);
qobject_unref(bs_entry->state.explicit_options);
qobject_unref(bs_entry->state.options);
g_free(bs_entry);
@ -4361,7 +4395,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
BlockReopenQueueEntry *bs_entry, *next;
AioContext *ctx;
Transaction *tran = tran_new();
g_autoptr(GHashTable) found = NULL;
g_autoptr(GSList) refresh_list = NULL;
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
@ -4391,18 +4424,15 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
bs_entry->prepared = true;
}
found = g_hash_table_new(NULL, NULL);
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
BDRVReopenState *state = &bs_entry->state;
refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
refresh_list = g_slist_prepend(refresh_list, state->bs);
if (state->old_backing_bs) {
refresh_list = bdrv_topological_dfs(refresh_list, found,
state->old_backing_bs);
refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs);
}
if (state->old_file_bs) {
refresh_list = bdrv_topological_dfs(refresh_list, found,
state->old_file_bs);
refresh_list = g_slist_prepend(refresh_list, state->old_file_bs);
}
}
@ -4475,18 +4505,16 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
GLOBAL_STATE_CODE();
bdrv_subtree_drained_begin(bs);
queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
if (ctx != qemu_get_aio_context()) {
aio_context_release(ctx);
}
queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
ret = bdrv_reopen_multiple(queue, errp);
if (ctx != qemu_get_aio_context()) {
aio_context_acquire(ctx);
}
bdrv_subtree_drained_end(bs);
return ret;
}
@ -5067,23 +5095,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
}
if (child->bs) {
BlockDriverState *bs = child->bs;
bdrv_drained_begin(bs);
bdrv_replace_child_tran(child, NULL, tran);
bdrv_drained_end(bs);
}
tran_add(tran, &bdrv_remove_child_drv, child);
}
/*
* A function to remove backing-chain child of @bs if exists: cow child for
* format nodes (always .backing) and filter child for filters (may be .file or
* .backing)
*/
static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
Transaction *tran)
static void undrain_on_clean_cb(void *opaque)
{
bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
bdrv_drained_end(opaque);
}
static TransactionActionDrv undrain_on_clean = {
.clean = undrain_on_clean_cb,
};
static int bdrv_replace_node_noperm(BlockDriverState *from,
BlockDriverState *to,
bool auto_skip, Transaction *tran,
@ -5093,6 +5122,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
GLOBAL_STATE_CODE();
bdrv_drained_begin(from);
bdrv_drained_begin(to);
tran_add(tran, &undrain_on_clean, from);
tran_add(tran, &undrain_on_clean, to);
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
if (!should_update_child(c, to)) {
@ -5130,7 +5164,6 @@ static int bdrv_replace_node_common(BlockDriverState *from,
Error **errp)
{
Transaction *tran = tran_new();
g_autoptr(GHashTable) found = NULL;
g_autoptr(GSList) refresh_list = NULL;
BlockDriverState *to_cow_parent = NULL;
int ret;
@ -5168,13 +5201,11 @@ static int bdrv_replace_node_common(BlockDriverState *from,
}
if (detach_subchain) {
bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
}
found = g_hash_table_new(NULL, NULL);
refresh_list = bdrv_topological_dfs(refresh_list, found, to);
refresh_list = bdrv_topological_dfs(refresh_list, found, from);
refresh_list = g_slist_prepend(refresh_list, to);
refresh_list = g_slist_prepend(refresh_list, from);
ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
if (ret < 0) {
@ -5244,7 +5275,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
goto out;
}
ret = bdrv_refresh_perms(bs_new, errp);
ret = bdrv_refresh_perms(bs_new, tran, errp);
out:
tran_finalize(tran, ret);
@ -5259,7 +5290,6 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
{
int ret;
Transaction *tran = tran_new();
g_autoptr(GHashTable) found = NULL;
g_autoptr(GSList) refresh_list = NULL;
BlockDriverState *old_bs = child->bs;
@ -5271,9 +5301,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
bdrv_replace_child_tran(child, new_bs, tran);
found = g_hash_table_new(NULL, NULL);
refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
refresh_list = g_slist_prepend(refresh_list, old_bs);
refresh_list = g_slist_prepend(refresh_list, new_bs);
ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
@ -5373,6 +5402,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
{
IO_CODE();
assert_bdrv_graph_readable();
if (bs->drv == NULL) {
return -ENOMEDIUM;
}
@ -5595,7 +5625,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
GLOBAL_STATE_CODE();
bdrv_ref(top);
bdrv_subtree_drained_begin(top);
bdrv_drained_begin(base);
if (!top->drv || !base->drv) {
goto exit;
@ -5668,7 +5698,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
ret = 0;
exit:
bdrv_subtree_drained_end(top);
bdrv_drained_end(base);
bdrv_unref(top);
return ret;
}
@ -6544,7 +6574,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp)
*/
if (bs->open_flags & BDRV_O_INACTIVE) {
bs->open_flags &= ~BDRV_O_INACTIVE;
ret = bdrv_refresh_perms(bs, errp);
ret = bdrv_refresh_perms(bs, NULL, errp);
if (ret < 0) {
bs->open_flags |= BDRV_O_INACTIVE;
return ret;
@ -6588,6 +6618,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
IO_CODE();
assert(!(bs->open_flags & BDRV_O_INACTIVE));
assert_bdrv_graph_readable();
if (bs->drv->bdrv_co_invalidate_cache) {
bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
@ -6689,7 +6720,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
* We only tried to loosen restrictions, so errors are not fatal, ignore
* them.
*/
bdrv_refresh_perms(bs, NULL);
bdrv_refresh_perms(bs, NULL, NULL);
/* Recursively inactivate children */
QLIST_FOREACH(child, &bs->children, next) {
@ -6894,6 +6925,10 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
return true;
}
/*
* Must not be called while holding the lock of an AioContext other than the
* current one.
*/
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
char *options, uint64_t img_size, int flags, bool quiet,
@ -7181,7 +7216,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
if (bs->quiesce_counter) {
aio_enable_external(bs->aio_context);
}
assert_bdrv_graph_writable(bs);
bs->aio_context = NULL;
}
@ -7195,7 +7229,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
aio_disable_external(new_context);
}
assert_bdrv_graph_writable(bs);
bs->aio_context = new_context;
if (bs->drv && bs->drv->bdrv_attach_aio_context) {
@ -7276,7 +7309,6 @@ static void bdrv_set_aio_context_commit(void *opaque)
BlockDriverState *bs = (BlockDriverState *) state->bs;
AioContext *new_context = state->new_ctx;
AioContext *old_context = bdrv_get_aio_context(bs);
assert_bdrv_graph_writable(bs);
/*
* Take the old AioContex when detaching it from bs.

View File

@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
}
static void blk_root_drained_begin(BdrvChild *child);
static bool blk_root_drained_poll(BdrvChild *child);
static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
@ -1424,6 +1424,27 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
}
int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
BlockDriverState *base,
int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
IO_CODE();
return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
map, file);
}
int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
BlockDriverState *base,
bool include_base, int64_t offset,
int64_t bytes, int64_t *pnum)
{
IO_CODE();
return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
bytes, pnum);
}
typedef struct BlkRwCo {
BlockBackend *blk;
int64_t offset;
@ -2556,7 +2577,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
return busy || !!blk->in_flight;
}
static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
assert(blk->quiesce_counter);

View File

@ -577,8 +577,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
return ret;
}
static int block_copy_block_status(BlockCopyState *s, int64_t offset,
int64_t bytes, int64_t *pnum)
static coroutine_fn int block_copy_block_status(BlockCopyState *s,
int64_t offset,
int64_t bytes, int64_t *pnum)
{
int64_t num;
BlockDriverState *base;
@ -590,8 +591,8 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
base = NULL;
}
ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
NULL, NULL);
ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
NULL, NULL);
if (ret < 0 || num < s->cluster_size) {
/*
* On error or if failed to obtain large enough chunk just fallback to
@ -613,8 +614,9 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
* Check if the cluster starting at offset is allocated or not.
* return via pnum the number of contiguous clusters sharing this allocation.
*/
static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
int64_t *pnum)
static int coroutine_fn block_copy_is_cluster_allocated(BlockCopyState *s,
int64_t offset,
int64_t *pnum)
{
BlockDriverState *bs = s->source->bs;
int64_t count, total_count = 0;
@ -624,7 +626,7 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
while (true) {
ret = bdrv_is_allocated(bs, offset, bytes, &count);
ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
if (ret < 0) {
return ret;
}
@ -669,8 +671,9 @@ void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
* @return 0 when the cluster at @offset was unallocated,
* 1 otherwise, and -ret on error.
*/
int64_t block_copy_reset_unallocated(BlockCopyState *s,
int64_t offset, int64_t *count)
int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
int64_t offset,
int64_t *count)
{
int ret;
int64_t clusters, bytes;

View File

@ -30,20 +30,17 @@
/* Base structure for argument packing structures */
typedef struct BdrvPollCo {
BlockDriverState *bs;
AioContext *ctx;
bool in_progress;
int ret;
Coroutine *co; /* Keep pointer here for debugging */
} BdrvPollCo;
static inline int bdrv_poll_co(BdrvPollCo *s)
static inline void bdrv_poll_co(BdrvPollCo *s)
{
assert(!qemu_in_coroutine());
bdrv_coroutine_enter(s->bs, s->co);
BDRV_POLL_WHILE(s->bs, s->in_progress);
return s->ret;
aio_co_enter(s->ctx, s->co);
AIO_WAIT_WHILE(s->ctx, s->in_progress);
}
#endif /* BLOCK_BLOCK_GEN_H */

View File

@ -155,8 +155,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
break;
}
/* Copy if allocated above the base */
ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true,
offset, COMMIT_BUFFER_SIZE, &n);
ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
offset, COMMIT_BUFFER_SIZE, &n);
copy = (ret > 0);
trace_commit_one_iteration(s, offset, n, ret);
if (copy) {

View File

@ -37,9 +37,11 @@
* the I/O API.
*/
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
@ -53,10 +55,11 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState **file,
int *depth);
int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int coroutine_fn
nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking,
@ -71,7 +74,7 @@ nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking,
* the "I/O or GS" API.
*/
int generated_co_wrapper
int co_wrapper_mixed_bdrv_rdlock
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool include_base,
@ -82,7 +85,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
int generated_co_wrapper
int co_wrapper_mixed
nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp);
#endif /* BLOCK_COROUTINES_H */

View File

@ -703,7 +703,7 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv,
}
/* Create protocol layer */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto fail;
}

View File

@ -388,7 +388,7 @@ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
* not fail.
* This function doesn't release corresponding BdrvDirtyBitmap.
*/
static int coroutine_fn
int coroutine_fn
bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
Error **errp)
{
@ -399,45 +399,6 @@ bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
return 0;
}
typedef struct BdrvRemovePersistentDirtyBitmapCo {
BlockDriverState *bs;
const char *name;
Error **errp;
int ret;
} BdrvRemovePersistentDirtyBitmapCo;
static void coroutine_fn
bdrv_co_remove_persistent_dirty_bitmap_entry(void *opaque)
{
BdrvRemovePersistentDirtyBitmapCo *s = opaque;
s->ret = bdrv_co_remove_persistent_dirty_bitmap(s->bs, s->name, s->errp);
aio_wait_kick();
}
int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
Error **errp)
{
if (qemu_in_coroutine()) {
return bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
} else {
Coroutine *co;
BdrvRemovePersistentDirtyBitmapCo s = {
.bs = bs,
.name = name,
.errp = errp,
.ret = -EINPROGRESS,
};
co = qemu_coroutine_create(bdrv_co_remove_persistent_dirty_bitmap_entry,
&s);
bdrv_coroutine_enter(bs, co);
BDRV_POLL_WHILE(bs, s.ret == -EINPROGRESS);
return s.ret;
}
}
bool
bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs)
{
@ -447,7 +408,7 @@ bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs)
return false;
}
static bool coroutine_fn
bool coroutine_fn
bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp)
{
@ -470,51 +431,6 @@ bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
return drv->bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
}
typedef struct BdrvCanStoreNewDirtyBitmapCo {
BlockDriverState *bs;
const char *name;
uint32_t granularity;
Error **errp;
bool ret;
bool in_progress;
} BdrvCanStoreNewDirtyBitmapCo;
static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque)
{
BdrvCanStoreNewDirtyBitmapCo *s = opaque;
s->ret = bdrv_co_can_store_new_dirty_bitmap(s->bs, s->name, s->granularity,
s->errp);
s->in_progress = false;
aio_wait_kick();
}
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp)
{
IO_CODE();
if (qemu_in_coroutine()) {
return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
} else {
Coroutine *co;
BdrvCanStoreNewDirtyBitmapCo s = {
.bs = bs,
.name = name,
.granularity = granularity,
.errp = errp,
.in_progress = true,
};
co = qemu_coroutine_create(bdrv_co_can_store_new_dirty_bitmap_entry,
&s);
bdrv_coroutine_enter(bs, co);
BDRV_POLL_WHILE(bs, s.in_progress);
return s.ret;
}
}
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
bdrv_dirty_bitmaps_lock(bitmap->bs);

275
block/graph-lock.c Normal file
View File

@ -0,0 +1,275 @@
/*
* Graph lock: rwlock to protect block layer graph manipulations (add/remove
* edges and nodes)
*
* Copyright (c) 2022 Red Hat
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "block/graph-lock.h"
#include "block/block.h"
#include "block/block_int.h"
/* Dummy lock object to use for Thread Safety Analysis (TSA) */
BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
/* Written and read with atomic operations. */
static int has_writer;
/*
* A reader coroutine could move from an AioContext to another.
* If this happens, there is no problem from the point of view of
* counters. The problem is that the total count becomes
* unbalanced if one of the two AioContexts gets deleted.
* The count of readers must remain correct, so the AioContext's
* balance is transferred to this glboal variable.
* Protected by aio_context_list_lock.
*/
static uint32_t orphaned_reader_count;
/* Queue of readers waiting for the writer to finish */
static CoQueue reader_queue;
struct BdrvGraphRWlock {
/* How many readers are currently reading the graph. */
uint32_t reader_count;
/*
* List of BdrvGraphRWlock kept in graph-lock.c
* Protected by aio_context_list_lock
*/
QTAILQ_ENTRY(BdrvGraphRWlock) next_aio;
};
/*
* List of BdrvGraphRWlock. This list ensures that each BdrvGraphRWlock
* can safely modify only its own counter, avoid reading/writing
* others and thus improving performances by avoiding cacheline bounces.
*/
static QTAILQ_HEAD(, BdrvGraphRWlock) aio_context_list =
QTAILQ_HEAD_INITIALIZER(aio_context_list);
static void __attribute__((__constructor__)) bdrv_init_graph_lock(void)
{
qemu_mutex_init(&aio_context_list_lock);
qemu_co_queue_init(&reader_queue);
}
void register_aiocontext(AioContext *ctx)
{
ctx->bdrv_graph = g_new0(BdrvGraphRWlock, 1);
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(ctx->bdrv_graph->reader_count == 0);
QTAILQ_INSERT_TAIL(&aio_context_list, ctx->bdrv_graph, next_aio);
}
void unregister_aiocontext(AioContext *ctx)
{
QEMU_LOCK_GUARD(&aio_context_list_lock);
orphaned_reader_count += ctx->bdrv_graph->reader_count;
QTAILQ_REMOVE(&aio_context_list, ctx->bdrv_graph, next_aio);
g_free(ctx->bdrv_graph);
}
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
uint32_t rd;
QEMU_LOCK_GUARD(&aio_context_list_lock);
/* rd can temporarly be negative, but the total will *always* be >= 0 */
rd = orphaned_reader_count;
QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) {
rd += qatomic_read(&brdv_graph->reader_count);
}
/* shouldn't overflow unless there are 2^31 readers */
assert((int32_t)rd >= 0);
return rd;
}
void bdrv_graph_wrlock(void)
{
GLOBAL_STATE_CODE();
assert(!qatomic_read(&has_writer));
/* Make sure that constantly arriving new I/O doesn't cause starvation */
bdrv_drain_all_begin_nopoll();
/*
* reader_count == 0: this means writer will read has_reader as 1
* reader_count >= 1: we don't know if writer read has_writer == 0 or 1,
* but we need to wait.
* Wait by allowing other coroutine (and possible readers) to continue.
*/
do {
/*
* has_writer must be 0 while polling, otherwise we get a deadlock if
* any callback involved during AIO_WAIT_WHILE() tries to acquire the
* reader lock.
*/
qatomic_set(&has_writer, 0);
AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1);
qatomic_set(&has_writer, 1);
/*
* We want to only check reader_count() after has_writer = 1 is visible
* to other threads. That way no more readers can sneak in after we've
* determined reader_count() == 0.
*/
smp_mb();
} while (reader_count() >= 1);
bdrv_drain_all_end();
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
/*
* No need for memory barriers, this works in pair with
* the slow path of rdlock() and both take the lock.
*/
qatomic_store_release(&has_writer, 0);
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
/* Do not lock if in main thread */
if (qemu_in_main_thread()) {
return;
}
for (;;) {
qatomic_set(&bdrv_graph->reader_count,
bdrv_graph->reader_count + 1);
/* make sure writer sees reader_count before we check has_writer */
smp_mb();
/*
* has_writer == 0: this means writer will read reader_count as >= 1
* has_writer == 1: we don't know if writer read reader_count == 0
* or > 0, but we need to wait anyways because
* it will write.
*/
if (!qatomic_read(&has_writer)) {
break;
}
/*
* Synchronize access with reader_count() in bdrv_graph_wrlock().
* Case 1:
* If this critical section gets executed first, reader_count will
* decrease and the reader will go to sleep.
* Then the writer will read reader_count that does not take into
* account this reader, and if there's no other reader it will
* enter the write section.
* Case 2:
* If reader_count() critical section gets executed first,
* then writer will read reader_count >= 1.
* It will wait in AIO_WAIT_WHILE(), but once it releases the lock
* we will enter this critical section and call aio_wait_kick().
*/
WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) {
/*
* Additional check when we use the above lock to synchronize
* with bdrv_graph_wrunlock().
* Case 1:
* If this gets executed first, has_writer is still 1, so we reduce
* reader_count and go to sleep.
* Then the writer will set has_writer to 0 and wake up all readers,
* us included.
* Case 2:
* If bdrv_graph_wrunlock() critical section gets executed first,
* then it will set has_writer to 0 and wake up all other readers.
* Then we execute this critical section, and therefore must check
* again for has_writer, otherwise we sleep without any writer
* actually running.
*/
if (!qatomic_read(&has_writer)) {
return;
}
/* slow path where reader sleeps */
bdrv_graph->reader_count--;
aio_wait_kick();
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
/* Do not lock if in main thread */
if (qemu_in_main_thread()) {
return;
}
qatomic_store_release(&bdrv_graph->reader_count,
bdrv_graph->reader_count - 1);
/* make sure writer sees reader_count before we check has_writer */
smp_mb();
/*
* has_writer == 0: this means reader will read reader_count decreased
* has_writer == 1: we don't know if writer read reader_count old or
* new. Therefore, kick again so on next iteration
* writer will for sure read the updated value.
*/
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
}
void bdrv_graph_rdlock_main_loop(void)
{
GLOBAL_STATE_CODE();
assert(!qemu_in_coroutine());
}
void bdrv_graph_rdunlock_main_loop(void)
{
GLOBAL_STATE_CODE();
assert(!qemu_in_coroutine());
}
void assert_bdrv_graph_readable(void)
{
assert(qemu_in_main_thread() || reader_count());
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
assert(qatomic_read(&has_writer));
}

View File

@ -45,53 +45,43 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
bool ignore_bds_parents)
static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
if (c == ignore) {
continue;
}
bdrv_parent_drained_begin_single(c, false);
}
}
static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
int *drained_end_counter)
{
assert(c->parent_quiesce_counter > 0);
c->parent_quiesce_counter--;
if (c->klass->drained_end) {
c->klass->drained_end(c, drained_end_counter);
bdrv_parent_drained_begin_single(c);
}
}
void bdrv_parent_drained_end_single(BdrvChild *c)
{
int drained_end_counter = 0;
AioContext *ctx = bdrv_child_get_parent_aio_context(c);
IO_OR_GS_CODE();
bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
assert(c->quiesced_parent);
c->quiesced_parent = false;
if (c->klass->drained_end) {
c->klass->drained_end(c);
}
}
static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
bool ignore_bds_parents,
int *drained_end_counter)
static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
if (c == ignore) {
continue;
}
bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
bdrv_parent_drained_end_single(c);
}
}
static bool bdrv_parent_drained_poll_single(BdrvChild *c)
bool bdrv_parent_drained_poll_single(BdrvChild *c)
{
if (c->klass->drained_poll) {
return c->klass->drained_poll(c);
@ -115,17 +105,16 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
return busy;
}
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
void bdrv_parent_drained_begin_single(BdrvChild *c)
{
AioContext *ctx = bdrv_child_get_parent_aio_context(c);
IO_OR_GS_CODE();
c->parent_quiesce_counter++;
assert(!c->quiesced_parent);
c->quiesced_parent = true;
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
}
if (poll) {
AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
}
}
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
@ -245,69 +234,14 @@ typedef struct {
BlockDriverState *bs;
bool done;
bool begin;
bool recursive;
bool poll;
BdrvChild *parent;
bool ignore_bds_parents;
int *drained_end_counter;
} BdrvCoDrainData;
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
{
BdrvCoDrainData *data = opaque;
BlockDriverState *bs = data->bs;
if (data->begin) {
bs->drv->bdrv_co_drain_begin(bs);
} else {
bs->drv->bdrv_co_drain_end(bs);
}
/* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
qatomic_mb_set(&data->done, true);
if (!data->begin) {
qatomic_dec(data->drained_end_counter);
}
bdrv_dec_in_flight(bs);
g_free(data);
}
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
int *drained_end_counter)
{
BdrvCoDrainData *data;
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
(!begin && !bs->drv->bdrv_co_drain_end)) {
return;
}
data = g_new(BdrvCoDrainData, 1);
*data = (BdrvCoDrainData) {
.bs = bs,
.done = false,
.begin = begin,
.drained_end_counter = drained_end_counter,
};
if (!begin) {
qatomic_inc(drained_end_counter);
}
/* Make sure the driver callback completes during the polling phase for
* drain_begin. */
bdrv_inc_in_flight(bs);
data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
aio_co_schedule(bdrv_get_aio_context(bs), data->co);
}
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents)
bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
bool ignore_bds_parents)
{
BdrvChild *child, *next;
IO_OR_GS_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
@ -318,30 +252,18 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
return true;
}
if (recursive) {
assert(!ignore_bds_parents);
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
if (bdrv_drain_poll(child->bs, recursive, child, false)) {
return true;
}
}
}
return false;
}
static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
BdrvChild *ignore_parent)
{
return bdrv_drain_poll(bs, recursive, ignore_parent, false);
return bdrv_drain_poll(bs, ignore_parent, false);
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
BdrvChild *parent, bool ignore_bds_parents,
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
bool poll);
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
BdrvChild *parent, bool ignore_bds_parents,
int *drained_end_counter);
static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@ -354,14 +276,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
assert(!data->drained_end_counter);
bdrv_do_drained_begin(bs, data->recursive, data->parent,
data->ignore_bds_parents, data->poll);
bdrv_do_drained_begin(bs, data->parent, data->poll);
} else {
assert(!data->poll);
bdrv_do_drained_end(bs, data->recursive, data->parent,
data->ignore_bds_parents,
data->drained_end_counter);
bdrv_do_drained_end(bs, data->parent);
}
aio_context_release(ctx);
} else {
@ -374,11 +292,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin, bool recursive,
bool begin,
BdrvChild *parent,
bool ignore_bds_parents,
bool poll,
int *drained_end_counter)
bool poll)
{
BdrvCoDrainData data;
Coroutine *self = qemu_coroutine_self();
@ -394,11 +310,8 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.bs = bs,
.done = false,
.begin = begin,
.recursive = recursive,
.parent = parent,
.ignore_bds_parents = ignore_bds_parents,
.poll = poll,
.drained_end_counter = drained_end_counter,
};
if (bs) {
@ -429,41 +342,22 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
}
}
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents)
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
bool poll)
{
IO_OR_GS_CODE();
assert(!qemu_in_coroutine());
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, true, parent, poll);
return;
}
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
}
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
bdrv_drain_invoke(bs, true, NULL);
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
BdrvChild *parent, bool ignore_bds_parents,
bool poll)
{
BdrvChild *child, *next;
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
poll, NULL);
return;
}
bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
if (recursive) {
assert(!ignore_bds_parents);
bs->recursive_quiesce_counter++;
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
false);
bdrv_parent_drained_begin(bs, parent);
if (bs->drv && bs->drv->bdrv_drain_begin) {
bs->drv->bdrv_drain_begin(bs);
}
}
@ -477,117 +371,50 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
* nodes.
*/
if (poll) {
assert(!ignore_bds_parents);
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
}
}
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
{
bdrv_do_drained_begin(bs, parent, false);
}
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, false, NULL, false, true);
}
void bdrv_subtree_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, true, NULL, false, true);
bdrv_do_drained_begin(bs, NULL, true);
}
/**
* This function does not poll, nor must any of its recursively called
* functions. The *drained_end_counter pointee will be incremented
* once for every background operation scheduled, and decremented once
* the operation settles. Therefore, the pointer must remain valid
* until the pointee reaches 0. That implies that whoever sets up the
* pointee has to poll until it is 0.
*
* We use atomic operations to access *drained_end_counter, because
* (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
* @bs may contain nodes in different AioContexts,
* (2) bdrv_drain_all_end() uses the same counter for all nodes,
* regardless of which AioContext they are in.
* functions.
*/
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
BdrvChild *parent, bool ignore_bds_parents,
int *drained_end_counter)
static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
{
BdrvChild *child;
int old_quiesce_counter;
assert(drained_end_counter != NULL);
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
false, drained_end_counter);
bdrv_co_yield_to_drain(bs, false, parent, false);
return;
}
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
bdrv_drain_invoke(bs, false, drained_end_counter);
bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
drained_end_counter);
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
aio_enable_external(bdrv_get_aio_context(bs));
}
if (recursive) {
assert(!ignore_bds_parents);
bs->recursive_quiesce_counter--;
QLIST_FOREACH(child, &bs->children, next) {
bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
drained_end_counter);
if (bs->drv && bs->drv->bdrv_drain_end) {
bs->drv->bdrv_drain_end(bs);
}
bdrv_parent_drained_end(bs, parent);
aio_enable_external(bdrv_get_aio_context(bs));
}
}
void bdrv_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
IO_OR_GS_CODE();
bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
{
IO_CODE();
bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
IO_OR_GS_CODE();
bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
{
int i;
IO_OR_GS_CODE();
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_begin(child->bs, true, child, false, true);
}
}
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
{
int drained_end_counter = 0;
int i;
IO_OR_GS_CODE();
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_end(child->bs, true, child, false,
&drained_end_counter);
}
BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
bdrv_do_drained_end(bs, NULL);
}
void bdrv_drain(BlockDriverState *bs)
@ -620,7 +447,7 @@ static bool bdrv_drain_all_poll(void)
while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
result |= bdrv_drain_poll(bs, false, NULL, true);
result |= bdrv_drain_poll(bs, NULL, true);
aio_context_release(aio_context);
}
@ -639,16 +466,11 @@ static bool bdrv_drain_all_poll(void)
* NOTE: no new block jobs or BlockDriverStates can be created between
* the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
*/
void bdrv_drain_all_begin(void)
void bdrv_drain_all_begin_nopoll(void)
{
BlockDriverState *bs = NULL;
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
return;
}
/*
* bdrv queue is managed by record/replay,
* waiting for finishing the I/O requests may
@ -670,9 +492,21 @@ void bdrv_drain_all_begin(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
bdrv_do_drained_begin(bs, false, NULL, true, false);
bdrv_do_drained_begin(bs, NULL, false);
aio_context_release(aio_context);
}
}
void bdrv_drain_all_begin(void)
{
BlockDriverState *bs = NULL;
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(NULL, true, NULL, true);
return;
}
bdrv_drain_all_begin_nopoll();
/* Now poll the in-flight requests */
AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
@ -684,22 +518,19 @@ void bdrv_drain_all_begin(void)
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
{
int drained_end_counter = 0;
GLOBAL_STATE_CODE();
g_assert(bs->quiesce_counter > 0);
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
bdrv_do_drained_end(bs, NULL);
}
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drain_all_end(void)
{
BlockDriverState *bs = NULL;
int drained_end_counter = 0;
GLOBAL_STATE_CODE();
/*
@ -715,13 +546,11 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
bdrv_do_drained_end(bs, NULL);
aio_context_release(aio_context);
}
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
assert(bdrv_drain_all_count > 0);
bdrv_drain_all_count--;
}
@ -2711,6 +2540,17 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
return ret;
}
int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
IO_CODE();
return bdrv_co_common_block_status_above(bs, base, false, true, offset,
bytes, pnum, map, file, NULL);
}
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
@ -2756,6 +2596,22 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
}
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum)
{
int ret;
int64_t dummy;
IO_CODE();
ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset,
bytes, pnum ? pnum : &dummy, NULL,
NULL, NULL);
if (ret < 0) {
return ret;
}
return !!(ret & BDRV_BLOCK_ALLOCATED);
}
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum)
{
@ -2772,6 +2628,29 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
return !!(ret & BDRV_BLOCK_ALLOCATED);
}
/* See bdrv_is_allocated_above for documentation */
int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
BlockDriverState *base,
bool include_base, int64_t offset,
int64_t bytes, int64_t *pnum)
{
int depth;
int ret;
IO_CODE();
ret = bdrv_co_common_block_status_above(top, base, include_base, false,
offset, bytes, pnum, NULL, NULL,
&depth);
if (ret < 0) {
return ret;
}
if (ret & BDRV_BLOCK_ALLOCATED) {
return depth;
}
return 0;
}
/*
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
@ -2795,10 +2674,12 @@ int bdrv_is_allocated_above(BlockDriverState *top,
int64_t bytes, int64_t *pnum)
{
int depth;
int ret = bdrv_common_block_status_above(top, base, include_base, false,
offset, bytes, pnum, NULL, NULL,
&depth);
int ret;
IO_CODE();
ret = bdrv_common_block_status_above(top, base, include_base, false,
offset, bytes, pnum, NULL, NULL,
&depth);
if (ret < 0) {
return ret;
}
@ -2816,6 +2697,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
IO_CODE();
assert_bdrv_graph_readable();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@ -2848,6 +2730,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
IO_CODE();
assert_bdrv_graph_readable();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {

View File

@ -10,6 +10,7 @@ block_ss.add(files(
'blkverify.c',
'block-backend.c',
'block-copy.c',
'graph-lock.c',
'commit.c',
'copy-on-read.c',
'preallocate.c',
@ -137,6 +138,7 @@ block_gen_c = custom_target('block-gen.c',
output: 'block-gen.c',
input: files(
'../include/block/block-io.h',
'../include/block/dirty-bitmap.h',
'../include/block/block-global-state.h',
'../include/sysemu/block-backend-io.h',
'coroutines.h'

View File

@ -646,7 +646,7 @@ static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
}
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto done;
}

View File

@ -973,7 +973,7 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
}
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto fail;
}

View File

@ -3871,7 +3871,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
}
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto finish;
}
@ -3886,7 +3886,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
/* Create and open an external data file (protocol layer) */
val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
if (val) {
ret = bdrv_create_file(val, opts, errp);
ret = bdrv_co_create_file(val, opts, errp);
if (ret < 0) {
goto finish;
}

View File

@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
assert(!s->allocating_write_reqs_plugged);
if (s->allocating_acb != NULL) {
/* Another allocating write came concurrently. This cannot happen
* from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
* from bdrv_qed_drain_begin, but it can happen when the timer runs.
*/
qemu_co_mutex_unlock(&s->table_lock);
return false;
@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s)
qemu_co_mutex_unlock(&s->table_lock);
}
static void coroutine_fn qed_need_check_timer_entry(void *opaque)
static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
{
BDRVQEDState *s = opaque;
int ret;
trace_qed_need_check_timer_cb(s);
@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
(void) ret;
}
static void coroutine_fn qed_need_check_timer_entry(void *opaque)
{
BDRVQEDState *s = opaque;
qed_need_check_timer(opaque);
bdrv_dec_in_flight(s->bs);
}
static void qed_need_check_timer_cb(void *opaque)
{
BDRVQEDState *s = opaque;
Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
bdrv_inc_in_flight(s->bs);
qemu_coroutine_enter(co);
}
@ -355,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
static void bdrv_qed_drain_begin(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
* header is flushed.
*/
if (s->need_check_timer && timer_pending(s->need_check_timer)) {
Coroutine *co;
qed_cancel_need_check_timer(s);
qed_need_check_timer_entry(s);
co = qemu_coroutine_create(qed_need_check_timer_entry, s);
bdrv_inc_in_flight(bs);
aio_co_enter(bdrv_get_aio_context(bs), co);
}
}
@ -764,7 +778,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
}
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto fail;
}
@ -1647,7 +1661,7 @@ static BlockDriver bdrv_qed = {
.bdrv_co_check = bdrv_qed_co_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
.bdrv_co_drain_begin = bdrv_qed_co_drain_begin,
.bdrv_drain_begin = bdrv_qed_drain_begin,
};
static void bdrv_qed_init(void)

View File

@ -433,7 +433,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
QemuOpts *opts,
Error **errp)
{
return bdrv_create_file(filename, opts, errp);
return bdrv_co_create_file(filename, opts, errp);
}
static int raw_open(BlockDriverState *bs, QDict *options, int flags,

View File

@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
}
bdrv_subtree_drained_begin(hidden_disk->bs);
bdrv_subtree_drained_begin(secondary_disk->bs);
if (s->orig_hidden_read_only) {
QDict *opts = qdict_new();
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
aio_context_acquire(ctx);
}
}
bdrv_subtree_drained_end(hidden_disk->bs);
bdrv_subtree_drained_end(secondary_disk->bs);
}
static void backup_job_cleanup(BlockDriverState *bs)

View File

@ -64,13 +64,16 @@ static int stream_prepare(Job *job)
bdrv_cor_filter_drop(s->cor_filter_bs);
s->cor_filter_bs = NULL;
bdrv_subtree_drained_begin(s->above_base);
/*
* bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
* already here and use bdrv_set_backing_hd_drained() instead because
* the polling during drained_begin() might change the graph, and if we do
* this only later, we may end up working with the wrong base node (or it
* might even have gone away by the time we want to use it).
*/
bdrv_drained_begin(unfiltered_bs);
base = bdrv_filter_or_cow_bs(s->above_base);
if (base) {
bdrv_ref(base);
}
unfiltered_base = bdrv_skip_filters(base);
if (bdrv_cow_child(unfiltered_bs)) {
@ -82,7 +85,13 @@ static int stream_prepare(Job *job)
}
}
bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
/*
* This call will do I/O, so the graph can change again from here on.
* We have already completed the graph change, so we are not in danger
* of operating on the wrong node any more if this happens.
*/
ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
if (local_err) {
error_report_err(local_err);
@ -92,10 +101,7 @@ static int stream_prepare(Job *job)
}
out:
if (base) {
bdrv_unref(base);
}
bdrv_subtree_drained_end(s->above_base);
bdrv_drained_end(unfiltered_bs);
return ret;
}

View File

@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
reopen_state->opaque = NULL;
}
static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
static void throttle_drain_begin(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
}
}
static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
static void throttle_drain_end(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
assert(tgm->io_limits_disabled);
@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = {
.bdrv_reopen_commit = throttle_reopen_commit,
.bdrv_reopen_abort = throttle_reopen_abort,
.bdrv_co_drain_begin = throttle_co_drain_begin,
.bdrv_co_drain_end = throttle_co_drain_end,
.bdrv_drain_begin = throttle_drain_begin,
.bdrv_drain_end = throttle_drain_end,
.is_filter = true,
.strong_runtime_opts = throttle_strong_runtime_opts,

View File

@ -934,7 +934,7 @@ static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true);
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto done;
}

View File

@ -2084,7 +2084,7 @@ static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
}
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto fail;
}

View File

@ -2285,15 +2285,16 @@ exit:
return ret;
}
static int vmdk_create_extent(const char *filename, int64_t filesize,
bool flat, bool compress, bool zeroed_grain,
BlockBackend **pbb,
QemuOpts *opts, Error **errp)
static int coroutine_fn vmdk_create_extent(const char *filename,
int64_t filesize, bool flat,
bool compress, bool zeroed_grain,
BlockBackend **pbb,
QemuOpts *opts, Error **errp)
{
int ret;
BlockBackend *blk = NULL;
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto exit;
}
@ -2366,14 +2367,14 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
* non-split format.
* idx >= 1: get the n-th extent if in a split subformat
*/
typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size,
int idx,
bool flat,
bool split,
bool compress,
bool zeroed_grain,
void *opaque,
Error **errp);
typedef BlockBackend * coroutine_fn (*vmdk_create_extent_fn)(int64_t size,
int idx,
bool flat,
bool split,
bool compress,
bool zeroed_grain,
void *opaque,
Error **errp);
static void vmdk_desc_add_extent(GString *desc,
const char *extent_line_fmt,
@ -2616,7 +2617,7 @@ typedef struct {
QemuOpts *opts;
} VMDKCreateOptsData;
static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx,
static BlockBackend * coroutine_fn vmdk_co_create_opts_cb(int64_t size, int idx,
bool flat, bool split, bool compress,
bool zeroed_grain, void *opaque,
Error **errp)
@ -2768,10 +2769,11 @@ exit:
return ret;
}
static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
bool flat, bool split, bool compress,
bool zeroed_grain, void *opaque,
Error **errp)
static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
bool flat, bool split,
bool compress,
bool zeroed_grain,
void *opaque, Error **errp)
{
int ret;
BlockDriverState *bs;

View File

@ -1111,7 +1111,7 @@ static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
}
/* Create and open the file (protocol layer) */
ret = bdrv_create_file(filename, opts, errp);
ret = bdrv_co_create_file(filename, opts, errp);
if (ret < 0) {
goto fail;
}

View File

@ -1507,10 +1507,14 @@ static void external_snapshot_prepare(BlkActionState *common,
goto out;
}
bdrv_refresh_filename(state->old_bs);
aio_context_release(aio_context);
bdrv_img_create(new_image_file, format,
state->old_bs->filename,
state->old_bs->drv->format_name,
NULL, size, flags, false, &local_err);
aio_context_acquire(aio_context);
if (local_err) {
error_propagate(errp, local_err);
goto out;
@ -3515,8 +3519,6 @@ fail:
void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
{
BlockReopenQueue *queue = NULL;
GSList *drained = NULL;
GSList *p;
/* Add each one of the BDS that we want to reopen to the queue */
for (; reopen_list != NULL; reopen_list = reopen_list->next) {
@ -3553,9 +3555,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
bdrv_subtree_drained_begin(bs);
queue = bdrv_reopen_queue(queue, bs, qdict, false);
drained = g_slist_prepend(drained, bs);
aio_context_release(ctx);
}
@ -3566,15 +3566,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
fail:
bdrv_reopen_queue_free(queue);
for (p = drained; p; p = p->next) {
BlockDriverState *bs = p->data;
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
bdrv_subtree_drained_end(bs);
aio_context_release(ctx);
}
g_slist_free(drained);
}
void qmp_blockdev_del(const char *node_name, Error **errp)

View File

@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c)
}
}
static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
static void child_job_drained_end(BdrvChild *c)
{
BlockJob *job = c->opaque;
job_resume(&job->job);

View File

@ -26,12 +26,12 @@ called ``bdrv_foo(<same args>)``. In this case the script can help. To
trigger the generation:
1. You need ``bdrv_foo`` declaration somewhere (for example, in
``block/coroutines.h``) with the ``generated_co_wrapper`` mark,
``block/coroutines.h``) with the ``co_wrapper`` mark,
like this:
.. code-block:: c
int generated_co_wrapper bdrv_foo(<some args>);
int co_wrapper bdrv_foo(<some args>);
2. You need to feed this declaration to block-coroutine-wrapper script.
For this, add the .h (or .c) file with the declaration to the
@ -46,7 +46,7 @@ Links
1. The script location is ``scripts/block-coroutine-wrapper.py``.
2. Generic place for private ``generated_co_wrapper`` declarations is
2. Generic place for private ``co_wrapper`` declarations is
``block/coroutines.h``, for public declarations:
``include/block/block.h``

View File

@ -22,6 +22,7 @@
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
#include "qemu/timer.h"
#include "block/graph-lock.h"
typedef struct BlockAIOCB BlockAIOCB;
typedef void BlockCompletionFunc(void *opaque, int ret);
@ -127,6 +128,14 @@ struct AioContext {
/* Used by AioContext users to protect from multi-threaded access. */
QemuRecMutex lock;
/*
* Keep track of readers and writers of the block layer graph.
* This is essential to avoid performing additions and removal
* of nodes and edges from block graph while some
* other thread is traversing it.
*/
BdrvGraphRWlock *bdrv_graph;
/* The list of registered AIO handlers. Protected by ctx->list_lock. */
AioHandlerList aio_handlers;

View File

@ -29,20 +29,35 @@
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
#include "block/dirty-bitmap.h"
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
#include "qemu/transactions.h"
/*
* generated_co_wrapper
* co_wrapper{*}: Function specifiers used by block-coroutine-wrapper.py
*
* Function specifier, which does nothing but mark functions to be
* Function specifiers, which do nothing but mark functions to be
* generated by scripts/block-coroutine-wrapper.py
*
* Read more in docs/devel/block-coroutine-wrapper.rst
* Usage: read docs/devel/block-coroutine-wrapper.rst
*
* There are 4 kind of specifiers:
* - co_wrapper functions can be called by only non-coroutine context, because
* they always generate a new coroutine.
* - co_wrapper_mixed functions can be called by both coroutine and
* non-coroutine context.
* - co_wrapper_bdrv_rdlock are co_wrapper functions but automatically take and
* release the graph rdlock when creating a new coroutine
* - co_wrapper_mixed_bdrv_rdlock are co_wrapper_mixed functions but
* automatically take and release the graph rdlock when creating a new
* coroutine.
*/
#define generated_co_wrapper
#define co_wrapper
#define co_wrapper_mixed
#define co_wrapper_bdrv_rdlock
#define co_wrapper_mixed_bdrv_rdlock
#include "block/dirty-bitmap.h"
#include "block/blockjob.h"
/* block.c */
typedef struct BlockDriver BlockDriver;

View File

@ -36,8 +36,9 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
void block_copy_state_free(BlockCopyState *s);
void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
int64_t block_copy_reset_unallocated(BlockCopyState *s,
int64_t offset, int64_t *count);
int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
int64_t offset,
int64_t *count);
int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
bool ignore_ratelimit, uint64_t timeout_ns,

View File

@ -55,9 +55,14 @@ BlockDriver *bdrv_find_protocol(const char *filename,
bool allow_protocol_prefix,
Error **errp);
BlockDriver *bdrv_find_format(const char *format_name);
int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp);
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp);
int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp);
int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
Error **errp);
BlockDriverState *bdrv_new(void);
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
@ -82,6 +87,9 @@ int bdrv_open_file_child(const char *filename,
BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp);
int bdrv_set_backing_hd_drained(BlockDriverState *bs,
BlockDriverState *backing_hd,
Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
@ -144,6 +152,7 @@ int bdrv_inactivate_all(void);
int bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain_all_begin(void);
void bdrv_drain_all_begin_nopoll(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);

View File

@ -39,19 +39,24 @@
* to catch when they are accidentally called by the wrong API.
*/
int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes,
BdrvRequestFlags flags);
int co_wrapper_mixed_bdrv_rdlock
bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes,
BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags);
int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
int64_t bytes, const void *buf,
BdrvRequestFlags flags);
int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
int64_t bytes, const void *buf,
BdrvRequestFlags flags);
int co_wrapper_mixed_bdrv_rdlock
bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
BdrvRequestFlags flags);
int co_wrapper_mixed_bdrv_rdlock
bdrv_pwrite(BdrvChild *child, int64_t offset,int64_t bytes,
const void *buf, BdrvRequestFlags flags);
int co_wrapper_mixed_bdrv_rdlock
bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
const void *buf, BdrvRequestFlags flags);
int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
int64_t bytes, const void *buf,
BdrvRequestFlags flags);
@ -94,14 +99,29 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum, int64_t *map,
BlockDriverState **file);
int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file);
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum);
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum);
int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
BlockDriverState *base,
bool include_base, int64_t offset,
int64_t bytes, int64_t *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
bool include_base, int64_t offset, int64_t bytes,
int64_t *pnum);
int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
int64_t bytes);
@ -200,8 +220,14 @@ AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
void bdrv_io_plug(BlockDriverState *bs);
void bdrv_io_unplug(BlockDriverState *bs);
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp);
bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
const char *name,
uint32_t granularity,
Error **errp);
bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs,
const char *name,
uint32_t granularity,
Error **errp);
/**
*
@ -237,21 +263,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
/**
* bdrv_drained_end_no_poll:
*
* Same as bdrv_drained_end(), but do not poll for the subgraph to
* actually become unquiesced. Therefore, no graph changes will occur
* with this function.
*
* *drained_end_counter is incremented for every background operation
* that is scheduled, and will be decremented for every operation once
* it settles. The caller must poll until it reaches 0. The counter
* should be accessed using atomic operations only.
*/
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
@ -281,47 +292,54 @@ void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
void bdrv_drain(BlockDriverState *bs);
int generated_co_wrapper
int co_wrapper_mixed_bdrv_rdlock
bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
int co_wrapper_mixed_bdrv_rdlock
bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
/* Invalidate any cached metadata used by image formats */
int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
Error **errp);
int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes);
int generated_co_wrapper
int co_wrapper_mixed_bdrv_rdlock
bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
int co_wrapper_mixed_bdrv_rdlock bdrv_flush(BlockDriverState *bs);
int co_wrapper_mixed_bdrv_rdlock
bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int co_wrapper_mixed_bdrv_rdlock
bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int generated_co_wrapper
int co_wrapper_mixed_bdrv_rdlock
bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
/**
* bdrv_parent_drained_begin_single:
*
* Begin a quiesced section for the parent of @c. If @poll is true, wait for
* any pending activity to cease.
* Begin a quiesced section for the parent of @c.
*/
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
void bdrv_parent_drained_begin_single(BdrvChild *c);
/**
* bdrv_parent_drained_poll_single:
*
* Returns true if there is any pending activity to cease before @c can be
* called quiesced, false otherwise.
*/
bool bdrv_parent_drained_poll_single(BdrvChild *c);
/**
* bdrv_parent_drained_end_single:
*
* End a quiesced section for the parent of @c.
*
* This polls @bs's AioContext until all scheduled sub-drained_ends
* have settled, which may result in graph changes.
*/
void bdrv_parent_drained_end_single(BdrvChild *c);
/**
* bdrv_drain_poll:
*
* Poll for pending requests in @bs, its parents (except for @ignore_parent),
* and if @recursive is true its children as well (used for subtree drain).
* Poll for pending requests in @bs and its parents (except for @ignore_parent).
*
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
* ignore the drain request because they will be drained separately (used for
@ -329,8 +347,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
*
* This is part of bdrv_drained_begin.
*/
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents);
bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
bool ignore_bds_parents);
/**
* bdrv_drained_begin:
@ -348,31 +366,13 @@ void bdrv_drained_begin(BlockDriverState *bs);
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
* running requests to complete.
*/
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents);
/**
* Like bdrv_drained_begin, but recursively begins a quiesced section for
* exclusive access to all child nodes as well.
*/
void bdrv_subtree_drained_begin(BlockDriverState *bs);
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
/**
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
*
* This polls @bs's AioContext until all scheduled sub-drained_ends
* have settled. On one hand, that may result in graph changes. On
* the other, this requires that the caller either runs in the main
* loop; or that all involved nodes (@bs and all of its parents) are
* in the caller's AioContext.
*/
void bdrv_drained_end(BlockDriverState *bs);
/**
* End a quiescent section started by bdrv_subtree_drained_begin().
*/
void bdrv_subtree_drained_end(BlockDriverState *bs);
#endif /* BLOCK_IO_H */

View File

@ -641,8 +641,8 @@ struct BlockDriver {
/*
* Invalidate any cached meta-data.
*/
void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
Error **errp);
void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)(
BlockDriverState *bs, Error **errp);
/*
* Flushes all data for all layers by calling bdrv_co_flush for underlying
@ -701,12 +701,11 @@ struct BlockDriver {
Error **errp);
BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
QEMUIOVector *qiov,
int64_t pos);
int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
QEMUIOVector *qiov,
int64_t pos);
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_save_vmstate)(
BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_load_vmstate)(
BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
/* removable device specific */
bool (*bdrv_is_inserted)(BlockDriverState *bs);
@ -724,9 +723,8 @@ struct BlockDriver {
* Returns 0 for completed check, -errno for internal errors.
* The check results are stored in result.
*/
int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
BdrvCheckResult *result,
BdrvCheckMode fix);
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)(
BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix);
void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
@ -735,17 +733,19 @@ struct BlockDriver {
void (*bdrv_io_unplug)(BlockDriverState *bs);
/**
* bdrv_co_drain_begin is called if implemented in the beginning of a
* bdrv_drain_begin is called if implemented in the beginning of a
* drain operation to drain and stop any internal sources of requests in
* the driver.
* bdrv_co_drain_end is called if implemented at the end of the drain.
* bdrv_drain_end is called if implemented at the end of the drain.
*
* They should be used by the driver to e.g. manage scheduled I/O
* requests, or toggle an internal state. After the end of the drain new
* requests will continue normally.
*
* Implementations of both functions must not call aio_poll().
*/
void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
void (*bdrv_drain_begin)(BlockDriverState *bs);
void (*bdrv_drain_end)(BlockDriverState *bs);
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
@ -896,8 +896,8 @@ struct BdrvChildClass {
void (*activate)(BdrvChild *child, Error **errp);
int (*inactivate)(BdrvChild *child);
void (*attach)(BdrvChild *child);
void (*detach)(BdrvChild *child);
void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
/*
* Notifies the parent that the filename of its child has changed (e.g.
@ -937,15 +937,11 @@ struct BdrvChildClass {
* These functions must not change the graph (and therefore also must not
* call aio_poll(), which could change the graph indirectly).
*
* If drained_end() schedules background operations, it must atomically
* increment *drained_end_counter for each such operation and atomically
* decrement it once the operation has settled.
*
* Note that this can be nested. If drained_begin() was called twice, new
* I/O is allowed only after drained_end() was called twice, too.
*/
void (*drained_begin)(BdrvChild *child);
void (*drained_end)(BdrvChild *child, int *drained_end_counter);
void (*drained_end)(BdrvChild *child);
/*
* Returns whether the parent has pending requests for the child. This
@ -982,13 +978,13 @@ struct BdrvChild {
bool frozen;
/*
* How many times the parent of this child has been drained
* True if the parent of this child has been drained by this BdrvChild
* (through klass->drained_*).
* Usually, this is equal to bs->quiesce_counter (potentially
* reduced by bdrv_drain_all_count). It may differ while the
*
* It is generally true if bs->quiesce_counter > 0. It may differ while the
* child is entering or leaving a drained section.
*/
int parent_quiesce_counter;
bool quiesced_parent;
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
@ -1186,7 +1182,6 @@ struct BlockDriverState {
/* Accessed with atomic ops. */
int quiesce_counter;
int recursive_quiesce_counter;
unsigned int write_gen; /* Current data generation */

View File

@ -310,21 +310,4 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
*/
void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
/**
* Make sure that the function is running under both drain and BQL.
* The latter protects from concurrent writings
* from the GS API, while the former prevents concurrent reads
* from I/O.
*/
static inline void assert_bdrv_graph_writable(BlockDriverState *bs)
{
/*
* TODO: this function is incomplete. Because the users of this
* assert lack the necessary drains, check only for BQL.
* Once the necessary drains are added,
* assert also for qatomic_read(&bs->quiesce_counter) > 0
*/
assert(qemu_in_main_thread());
}
#endif /* BLOCK_INT_GLOBAL_STATE_H */

View File

@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
*/
void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
*
* See include/block/block-io.h for more information about
* the "I/O or GS" API.
*/
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
#endif /* BLOCK_INT_IO_H */

View File

@ -26,6 +26,7 @@
#include "block_int-global-state.h"
#include "block_int-io.h"
#include "block/graph-lock.h"
/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */

View File

@ -34,8 +34,14 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
Error **errp);
void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap);
void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
Error **errp);
int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
const char *name,
Error **errp);
int co_wrapper bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
const char *name,
Error **errp);
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap);

280
include/block/graph-lock.h Normal file
View File

@ -0,0 +1,280 @@
/*
* Graph lock: rwlock to protect block layer graph manipulations (add/remove
* edges and nodes)
*
* Copyright (c) 2022 Red Hat
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GRAPH_LOCK_H
#define GRAPH_LOCK_H
#include "qemu/osdep.h"
#include "qemu/clang-tsa.h"
#include "qemu/coroutine.h"
/**
* Graph Lock API
* This API provides a rwlock used to protect block layer
* graph modifications like edge (BdrvChild) and node (BlockDriverState)
* addition and removal.
* Currently we have 1 writer only, the Main loop, and many
* readers, mostly coroutines running in other AioContext thus other threads.
*
* We distinguish between writer (main loop, under BQL) that modifies the
* graph, and readers (all other coroutines running in various AioContext),
* that go through the graph edges, reading
* BlockDriverState ->parents and->children.
*
* The writer (main loop) has an "exclusive" access, so it first waits for
* current read to finish, and then prevents incoming ones from
* entering while it has the exclusive access.
*
* The readers (coroutines in multiple AioContext) are free to
* access the graph as long the writer is not modifying the graph.
* In case it is, they go in a CoQueue and sleep until the writer
* is done.
*
* If a coroutine changes AioContext, the counter in the original and new
* AioContext are left intact, since the writer does not care where is the
* reader, but only if there is one.
* As a result, some AioContexts might have a negative reader count, to
* balance the positive count of the AioContext that took the lock.
* This also means that when an AioContext is deleted it may have a nonzero
* reader count. In that case we transfer the count to a global shared counter
* so that the writer is always aware of all readers.
*/
typedef struct BdrvGraphRWlock BdrvGraphRWlock;
/* Dummy lock object to use for Thread Safety Analysis (TSA) */
typedef struct TSA_CAPABILITY("mutex") BdrvGraphLock {
} BdrvGraphLock;
extern BdrvGraphLock graph_lock;
/*
* clang doesn't check consistency in locking annotations between forward
* declarations and the function definition. Having the annotation on the
* definition, but not the declaration in a header file, may give the reader
* a false sense of security because the condition actually remains unchecked
* for callers in other source files.
*
* Therefore, as a convention, for public functions, GRAPH_RDLOCK and
* GRAPH_WRLOCK annotations should be present only in the header file.
*/
#define GRAPH_WRLOCK TSA_REQUIRES(graph_lock)
#define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock)
/*
* TSA annotations are not part of function types, so checks are defeated when
* using a function pointer. As a workaround, annotate function pointers with
* this macro that will require that the lock is at least taken while reading
* the pointer. In most cases this is equivalent to actually protecting the
* function call.
*/
#define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock)
#define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock)
/*
* register_aiocontext:
* Add AioContext @ctx to the list of AioContext.
* This list is used to obtain the total number of readers
* currently running the graph.
*/
void register_aiocontext(AioContext *ctx);
/*
* unregister_aiocontext:
* Removes AioContext @ctx to the list of AioContext.
*/
void unregister_aiocontext(AioContext *ctx);
/*
* bdrv_graph_wrlock:
* Start an exclusive write operation to modify the graph. This means we are
* adding or removing an edge or a node in the block layer graph. Nobody else
* is allowed to access the graph.
*
* Must only be called from outside bdrv_graph_co_rdlock.
*
* The wrlock can only be taken from the main loop, with BQL held, as only the
* main loop is allowed to modify the graph.
*
* This function polls. Callers must not hold the lock of any AioContext other
* than the current one.
*/
void bdrv_graph_wrlock(void) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
/*
* bdrv_graph_wrunlock:
* Write finished, reset global has_writer to 0 and restart
* all readers that are waiting.
*/
void bdrv_graph_wrunlock(void) TSA_RELEASE(graph_lock) TSA_NO_TSA;
/*
* bdrv_graph_co_rdlock:
* Read the bs graph. This usually means traversing all nodes in
* the graph, therefore it can't happen while another thread is
* modifying it.
* Increases the reader counter of the current aiocontext,
* and if has_writer is set, it means that the writer is modifying
* the graph, therefore wait in a coroutine queue.
* The writer will then wake this coroutine once it is done.
*
* This lock should be taken from Iothreads (IO_CODE() class of functions)
* because it signals the writer that there are some
* readers currently running, or waits until the current
* write is finished before continuing.
* Calling this function from the Main Loop with BQL held
* is not necessary, since the Main Loop itself is the only
* writer, thus won't be able to read and write at the same time.
* The only exception to that is when we can't take the lock in the
* function/coroutine itself, and need to delegate the caller (usually main
* loop) to take it and wait that the coroutine ends, so that
* we always signal that a reader is running.
*/
void coroutine_fn TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
bdrv_graph_co_rdlock(void);
/*
* bdrv_graph_rdunlock:
* Read terminated, decrease the count of readers in the current aiocontext.
* If the writer is waiting for reads to finish (has_writer == 1), signal
* the writer that we are done via aio_wait_kick() to let it continue.
*/
void coroutine_fn TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
bdrv_graph_co_rdunlock(void);
/*
* bdrv_graph_rd{un}lock_main_loop:
* Just a placeholder to mark where the graph rdlock should be taken
* in the main loop. It is just asserting that we are not
* in a coroutine and in GLOBAL_STATE_CODE.
*/
void TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
bdrv_graph_rdlock_main_loop(void);
void TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
bdrv_graph_rdunlock_main_loop(void);
/*
* assert_bdrv_graph_readable:
* Make sure that the reader is either the main loop,
* or there is at least a reader helding the rdlock.
* In this way an incoming writer is aware of the read and waits.
*/
void GRAPH_RDLOCK assert_bdrv_graph_readable(void);
/*
* assert_bdrv_graph_writable:
* Make sure that the writer is the main loop and has set @has_writer,
* so that incoming readers will pause.
*/
void GRAPH_WRLOCK assert_bdrv_graph_writable(void);
/*
* Calling this function tells TSA that we know that the lock is effectively
* taken even though we cannot prove it (yet) with GRAPH_RDLOCK. This can be
* useful in intermediate stages of a conversion to using the GRAPH_RDLOCK
* macro.
*/
static inline void TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA
assume_graph_lock(void)
{
}
typedef struct GraphLockable { } GraphLockable;
/*
* In C, compound literals have the lifetime of an automatic variable.
* In C++ it would be different, but then C++ wouldn't need QemuLockable
* either...
*/
#define GML_OBJ_() (&(GraphLockable) { })
/*
* This is not marked as TSA_ACQUIRE() because TSA doesn't understand the
* cleanup attribute and would therefore complain that the graph is never
* unlocked. TSA_ASSERT() makes sure that the following calls know that we
* hold the lock while unlocking is left unchecked.
*/
static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA
graph_lockable_auto_lock(GraphLockable *x)
{
bdrv_graph_co_rdlock();
return x;
}
static inline void TSA_NO_TSA
graph_lockable_auto_unlock(GraphLockable *x)
{
bdrv_graph_co_rdunlock();
}
G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockable, graph_lockable_auto_unlock)
#define WITH_GRAPH_RDLOCK_GUARD_(var) \
for (g_autoptr(GraphLockable) var = graph_lockable_auto_lock(GML_OBJ_()); \
var; \
graph_lockable_auto_unlock(var), var = NULL)
#define WITH_GRAPH_RDLOCK_GUARD() \
WITH_GRAPH_RDLOCK_GUARD_(glue(graph_lockable_auto, __COUNTER__))
#define GRAPH_RDLOCK_GUARD(x) \
g_autoptr(GraphLockable) \
glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \
graph_lockable_auto_lock(GML_OBJ_())
typedef struct GraphLockableMainloop { } GraphLockableMainloop;
/*
* In C, compound literals have the lifetime of an automatic variable.
* In C++ it would be different, but then C++ wouldn't need QemuLockable
* either...
*/
#define GMLML_OBJ_() (&(GraphLockableMainloop) { })
/*
* This is not marked as TSA_ACQUIRE() because TSA doesn't understand the
* cleanup attribute and would therefore complain that the graph is never
* unlocked. TSA_ASSERT() makes sure that the following calls know that we
* hold the lock while unlocking is left unchecked.
*/
static inline GraphLockableMainloop * TSA_ASSERT(graph_lock) TSA_NO_TSA
graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x)
{
bdrv_graph_rdlock_main_loop();
return x;
}
static inline void TSA_NO_TSA
graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x)
{
bdrv_graph_rdunlock_main_loop();
}
G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockableMainloop,
graph_lockable_auto_unlock_mainloop)
#define GRAPH_RDLOCK_GUARD_MAINLOOP(x) \
g_autoptr(GraphLockableMainloop) \
glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \
graph_lockable_auto_lock_mainloop(GMLML_OBJ_())
#endif /* GRAPH_LOCK_H */

114
include/qemu/clang-tsa.h Normal file
View File

@ -0,0 +1,114 @@
#ifndef CLANG_TSA_H
#define CLANG_TSA_H
/*
* Copyright 2018 Jarkko Hietaniemi <jhi@iki.fi>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* http://clang.llvm.org/docs/ThreadSafetyAnalysis.html
*
* TSA is available since clang 3.6-ish.
*/
#ifdef __clang__
# define TSA(x) __attribute__((x))
#else
# define TSA(x) /* No TSA, make TSA attributes no-ops. */
#endif
/* TSA_CAPABILITY() is used to annotate typedefs:
*
* typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex;
*/
#define TSA_CAPABILITY(x) TSA(capability(x))
/* TSA_GUARDED_BY() is used to annotate global variables,
* the data is guarded:
*
* Foo foo TSA_GUARDED_BY(mutex);
*/
#define TSA_GUARDED_BY(x) TSA(guarded_by(x))
/* TSA_PT_GUARDED_BY() is used to annotate global pointers, the data
* behind the pointer is guarded.
*
* Foo* ptr TSA_PT_GUARDED_BY(mutex);
*/
#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x))
/* The TSA_REQUIRES() is used to annotate functions: the caller of the
* function MUST hold the resource, the function will NOT release it.
*
* More than one mutex may be specified, comma-separated.
*
* void Foo(void) TSA_REQUIRES(mutex);
*/
#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__))
#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__))
/* TSA_EXCLUDES() is used to annotate functions: the caller of the
* function MUST NOT hold resource, the function first acquires the
* resource, and then releases it.
*
* More than one mutex may be specified, comma-separated.
*
* void Foo(void) TSA_EXCLUDES(mutex);
*/
#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__))
/* TSA_ACQUIRE() is used to annotate functions: the caller of the
* function MUST NOT hold the resource, the function will acquire the
* resource, but NOT release it.
*
* More than one mutex may be specified, comma-separated.
*
* void Foo(void) TSA_ACQUIRE(mutex);
*/
#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__))
#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__))
/* TSA_RELEASE() is used to annotate functions: the caller of the
* function MUST hold the resource, but the function will then release it.
*
* More than one mutex may be specified, comma-separated.
*
* void Foo(void) TSA_RELEASE(mutex);
*/
#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__))
#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__))
/* TSA_NO_TSA is used to annotate functions. Use only when you need to.
*
* void Foo(void) TSA_NO_TSA;
*/
#define TSA_NO_TSA TSA(no_thread_safety_analysis)
/*
* TSA_ASSERT() is used to annotate functions: This function will assert that
* the lock is held. When it returns, the caller of the function is assumed to
* already hold the resource.
*
* More than one mutex may be specified, comma-separated.
*/
#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__))
#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__))
#endif /* #ifndef CLANG_TSA_H */

View File

@ -92,6 +92,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
BlockDriverState *base,
int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file);
int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
BlockDriverState *base,
bool include_base, int64_t offset,
int64_t bytes, int64_t *pnum);
/*
* "I/O or GS" API functions. These functions can run without
@ -101,77 +110,77 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
* the "I/O or GS" API.
*/
int generated_co_wrapper blk_pread(BlockBackend *blk, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_pread(BlockBackend *blk, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
void *buf, BdrvRequestFlags flags);
int generated_co_wrapper blk_preadv(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_preadv(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int generated_co_wrapper blk_preadv_part(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_preadv_part(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset, BdrvRequestFlags flags);
int generated_co_wrapper blk_pwrite(BlockBackend *blk, int64_t offset,
int64_t bytes, const void *buf,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_pwrite(BlockBackend *blk, int64_t offset,
int64_t bytes, const void *buf,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
const void *buf, BdrvRequestFlags flags);
int generated_co_wrapper blk_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int generated_co_wrapper blk_pwritev_part(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_pwritev_part(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int generated_co_wrapper blk_pwrite_compressed(BlockBackend *blk,
int64_t offset, int64_t bytes,
const void *buf);
int co_wrapper_mixed blk_pwrite_compressed(BlockBackend *blk,
int64_t offset, int64_t bytes,
const void *buf);
int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
int64_t bytes, const void *buf);
int generated_co_wrapper blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes,
BdrvRequestFlags flags);
int co_wrapper_mixed blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int generated_co_wrapper blk_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes);
int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes);
int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes);
int generated_co_wrapper blk_flush(BlockBackend *blk);
int co_wrapper_mixed blk_flush(BlockBackend *blk);
int coroutine_fn blk_co_flush(BlockBackend *blk);
int generated_co_wrapper blk_ioctl(BlockBackend *blk, unsigned long int req,
void *buf);
int co_wrapper_mixed blk_ioctl(BlockBackend *blk, unsigned long int req,
void *buf);
int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
void *buf);
int generated_co_wrapper blk_truncate(BlockBackend *blk, int64_t offset,
bool exact, PreallocMode prealloc,
BdrvRequestFlags flags, Error **errp);
int co_wrapper_mixed blk_truncate(BlockBackend *blk, int64_t offset,
bool exact, PreallocMode prealloc,
BdrvRequestFlags flags, Error **errp);
int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags,
Error **errp);

View File

@ -1988,7 +1988,7 @@ static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
}
/* Do a sparse read and send the structured reply to the client.
* Returns -errno if sending fails. bdrv_block_status_above() failure is
* Returns -errno if sending fails. blk_co_block_status_above() failure is
* reported to the client, at which point this function succeeds.
*/
static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
@ -2004,10 +2004,10 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
while (progress < size) {
int64_t pnum;
int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
offset + progress,
size - progress, &pnum, NULL,
NULL);
int status = blk_co_block_status_above(exp->common.blk, NULL,
offset + progress,
size - progress, &pnum, NULL,
NULL);
bool final;
if (status < 0) {
@ -2138,14 +2138,15 @@ static int nbd_extent_array_add(NBDExtentArray *ea,
return 0;
}
static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, NBDExtentArray *ea)
static int coroutine_fn blockstatus_to_extents(BlockBackend *blk,
uint64_t offset, uint64_t bytes,
NBDExtentArray *ea)
{
while (bytes) {
uint32_t flags;
int64_t num;
int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
NULL, NULL);
int ret = blk_co_block_status_above(blk, NULL, offset, bytes, &num,
NULL, NULL);
if (ret < 0) {
return ret;
@ -2165,13 +2166,14 @@ static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
return 0;
}
static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, NBDExtentArray *ea)
static int coroutine_fn blockalloc_to_extents(BlockBackend *blk,
uint64_t offset, uint64_t bytes,
NBDExtentArray *ea)
{
while (bytes) {
int64_t num;
int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
&num);
int ret = blk_co_is_allocated_above(blk, NULL, false, offset, bytes,
&num);
if (ret < 0) {
return ret;
@ -2217,20 +2219,21 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
}
/* Get block status from the exported device and send it to the client */
static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
BlockDriverState *bs, uint64_t offset,
uint32_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
static int
coroutine_fn nbd_co_send_block_status(NBDClient *client, uint64_t handle,
BlockBackend *blk, uint64_t offset,
uint32_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
int ret;
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
if (context_id == NBD_META_ID_BASE_ALLOCATION) {
ret = blockstatus_to_extents(bs, offset, length, ea);
ret = blockstatus_to_extents(blk, offset, length, ea);
} else {
ret = blockalloc_to_extents(bs, offset, length, ea);
ret = blockalloc_to_extents(blk, offset, length, ea);
}
if (ret < 0) {
return nbd_co_send_structured_error(
@ -2557,7 +2560,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
if (client->export_meta.base_allocation) {
ret = nbd_co_send_block_status(client, request->handle,
blk_bs(exp->common.blk),
exp->common.blk,
request->from,
request->len, dont_fragment,
!--contexts_remaining,
@ -2570,7 +2573,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
if (client->export_meta.allocation_depth) {
ret = nbd_co_send_block_status(client, request->handle,
blk_bs(exp->common.blk),
exp->common.blk,
request->from, request->len,
dont_fragment,
!--contexts_remaining,

View File

@ -2,7 +2,7 @@
"""Generate coroutine wrappers for block subsystem.
The program parses one or several concatenated c files from stdin,
searches for functions with the 'generated_co_wrapper' specifier
searches for functions with the 'co_wrapper' specifier
and generates corresponding wrappers on stdout.
Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
@ -62,10 +62,28 @@ class ParamDecl:
class FuncDecl:
def __init__(self, return_type: str, name: str, args: str) -> None:
def __init__(self, return_type: str, name: str, args: str,
variant: str) -> None:
self.return_type = return_type.strip()
self.name = name.strip()
self.struct_name = snake_to_camel(self.name)
self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
self.create_only_co = 'mixed' not in variant
self.graph_rdlock = 'bdrv_rdlock' in variant
subsystem, subname = self.name.split('_', 1)
self.co_name = f'{subsystem}_co_{subname}'
t = self.args[0].type
if t == 'BlockDriverState *':
ctx = 'bdrv_get_aio_context(bs)'
elif t == 'BdrvChild *':
ctx = 'bdrv_get_aio_context(child->bs)'
elif t == 'BlockBackend *':
ctx = 'blk_get_aio_context(blk)'
else:
ctx = 'qemu_get_aio_context()'
self.ctx = ctx
def gen_list(self, format: str) -> str:
return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
@ -74,17 +92,20 @@ class FuncDecl:
return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
# Match wrappers declared with a generated_co_wrapper mark
func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*'
# Match wrappers declared with a co_wrapper mark
func_decl_re = re.compile(r'^(?P<return_type>[a-zA-Z][a-zA-Z0-9_]* [\*]?)'
r'\s*co_wrapper'
r'(?P<variant>(_[a-z][a-z0-9_]*)?)\s*'
r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
r'\((?P<args>[^)]*)\);$', re.MULTILINE)
def func_decl_iter(text: str) -> Iterator:
for m in func_decl_re.finditer(text):
yield FuncDecl(return_type='int',
yield FuncDecl(return_type=m.group('return_type'),
name=m.group('wrapper_name'),
args=m.group('args'))
args=m.group('args'),
variant=m.group('variant'))
def snake_to_camel(func_name: str) -> str:
@ -97,24 +118,75 @@ def snake_to_camel(func_name: str) -> str:
return ''.join(words)
def create_mixed_wrapper(func: FuncDecl) -> str:
"""
Checks if we are already in coroutine
"""
name = func.co_name
struct_name = func.struct_name
graph_assume_lock = 'assume_graph_lock();' if func.graph_rdlock else ''
return f"""\
{func.return_type} {func.name}({ func.gen_list('{decl}') })
{{
if (qemu_in_coroutine()) {{
{graph_assume_lock}
return {name}({ func.gen_list('{name}') });
}} else {{
{struct_name} s = {{
.poll_state.ctx = {func.ctx},
.poll_state.in_progress = true,
{ func.gen_block(' .{name} = {name},') }
}};
s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
bdrv_poll_co(&s.poll_state);
return s.ret;
}}
}}"""
def create_co_wrapper(func: FuncDecl) -> str:
"""
Assumes we are not in coroutine, and creates one
"""
name = func.co_name
struct_name = func.struct_name
return f"""\
{func.return_type} {func.name}({ func.gen_list('{decl}') })
{{
{struct_name} s = {{
.poll_state.ctx = {func.ctx},
.poll_state.in_progress = true,
{ func.gen_block(' .{name} = {name},') }
}};
assert(!qemu_in_coroutine());
s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
bdrv_poll_co(&s.poll_state);
return s.ret;
}}"""
def gen_wrapper(func: FuncDecl) -> str:
assert not '_co_' in func.name
assert func.return_type == 'int'
assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *',
'BlockBackend *']
subsystem, subname = func.name.split('_', 1)
name = func.co_name
struct_name = func.struct_name
name = f'{subsystem}_co_{subname}'
graph_lock=''
graph_unlock=''
if func.graph_rdlock:
graph_lock=' bdrv_graph_co_rdlock();'
graph_unlock=' bdrv_graph_co_rdunlock();'
t = func.args[0].type
if t == 'BlockDriverState *':
bs = 'bs'
elif t == 'BdrvChild *':
bs = 'child->bs'
else:
bs = 'blk_bs(blk)'
struct_name = snake_to_camel(name)
creation_function = create_mixed_wrapper
if func.create_only_co:
creation_function = create_co_wrapper
return f"""\
/*
@ -123,6 +195,7 @@ def gen_wrapper(func: FuncDecl) -> str:
typedef struct {struct_name} {{
BdrvPollCo poll_state;
{func.return_type} ret;
{ func.gen_block(' {decl};') }
}} {struct_name};
@ -130,29 +203,15 @@ static void coroutine_fn {name}_entry(void *opaque)
{{
{struct_name} *s = opaque;
s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
{graph_lock}
s->ret = {name}({ func.gen_list('s->{name}') });
{graph_unlock}
s->poll_state.in_progress = false;
aio_wait_kick();
}}
int {func.name}({ func.gen_list('{decl}') })
{{
if (qemu_in_coroutine()) {{
return {name}({ func.gen_list('{name}') });
}} else {{
{struct_name} s = {{
.poll_state.bs = {bs},
.poll_state.in_progress = true,
{ func.gen_block(' .{name} = {name},') }
}};
s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
return bdrv_poll_co(&s.poll_state);
}}
}}"""
{creation_function(func)}"""
def gen_wrappers(input_code: str) -> str:

10
stubs/graph-lock.c Normal file
View File

@ -0,0 +1,10 @@
#include "qemu/osdep.h"
#include "block/graph-lock.h"
void register_aiocontext(AioContext *ctx)
{
}
void unregister_aiocontext(AioContext *ctx)
{
}

View File

@ -13,6 +13,7 @@ stub_ss.add(files('error-printf.c'))
stub_ss.add(files('fdset.c'))
stub_ss.add(files('gdbstub.c'))
stub_ss.add(files('get-vm-name.c'))
stub_ss.add(files('graph-lock.c'))
if linux_io_uring.found()
stub_ss.add(files('io_uring.c'))
endif

View File

@ -38,16 +38,26 @@ typedef struct BDRVTestState {
bool sleep_in_drain_begin;
} BDRVTestState;
static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
static void coroutine_fn sleep_in_drain_begin(void *opaque)
{
BlockDriverState *bs = opaque;
qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
bdrv_dec_in_flight(bs);
}
static void bdrv_test_drain_begin(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count++;
if (s->sleep_in_drain_begin) {
qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs);
bdrv_inc_in_flight(bs);
aio_co_enter(bdrv_get_aio_context(bs), co);
}
}
static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
static void bdrv_test_drain_end(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count--;
@ -101,8 +111,8 @@ static BlockDriver bdrv_test = {
.bdrv_close = bdrv_test_close,
.bdrv_co_preadv = bdrv_test_co_preadv,
.bdrv_co_drain_begin = bdrv_test_co_drain_begin,
.bdrv_co_drain_end = bdrv_test_co_drain_end,
.bdrv_drain_begin = bdrv_test_drain_begin,
.bdrv_drain_end = bdrv_test_drain_end,
.bdrv_child_perm = bdrv_default_perms,
@ -146,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void))
enum drain_type {
BDRV_DRAIN_ALL,
BDRV_DRAIN,
BDRV_SUBTREE_DRAIN,
DRAIN_TYPE_MAX,
};
@ -155,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
switch (drain_type) {
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
default: g_assert_not_reached();
}
}
@ -165,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
switch (drain_type) {
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
case BDRV_DRAIN: bdrv_drained_end(bs); break;
case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
default: g_assert_not_reached();
}
}
@ -261,11 +268,6 @@ static void test_drv_cb_drain(void)
test_drv_cb_common(BDRV_DRAIN, false);
}
static void test_drv_cb_drain_subtree(void)
{
test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
}
static void test_drv_cb_co_drain_all(void)
{
call_in_coroutine(test_drv_cb_drain_all);
@ -276,11 +278,6 @@ static void test_drv_cb_co_drain(void)
call_in_coroutine(test_drv_cb_drain);
}
static void test_drv_cb_co_drain_subtree(void)
{
call_in_coroutine(test_drv_cb_drain_subtree);
}
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
{
BlockBackend *blk;
@ -299,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
do_drain_begin(drain_type, bs);
g_assert_cmpint(bs->quiesce_counter, ==, 1);
if (drain_type == BDRV_DRAIN_ALL) {
g_assert_cmpint(bs->quiesce_counter, ==, 2);
} else {
g_assert_cmpint(bs->quiesce_counter, ==, 1);
}
g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
do_drain_end(drain_type, bs);
@ -322,11 +323,6 @@ static void test_quiesce_drain(void)
test_quiesce_common(BDRV_DRAIN, false);
}
static void test_quiesce_drain_subtree(void)
{
test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
}
static void test_quiesce_co_drain_all(void)
{
call_in_coroutine(test_quiesce_drain_all);
@ -337,11 +333,6 @@ static void test_quiesce_co_drain(void)
call_in_coroutine(test_quiesce_drain);
}
static void test_quiesce_co_drain_subtree(void)
{
call_in_coroutine(test_quiesce_drain_subtree);
}
static void test_nested(void)
{
BlockBackend *blk;
@ -361,8 +352,8 @@ static void test_nested(void)
for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
int backing_quiesce = (outer != BDRV_DRAIN) +
(inner != BDRV_DRAIN);
int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
(inner == BDRV_DRAIN_ALL);
g_assert_cmpint(bs->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
@ -372,10 +363,10 @@ static void test_nested(void)
do_drain_begin(outer, bs);
do_drain_begin(inner, bs);
g_assert_cmpint(bs->quiesce_counter, ==, 2);
g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
g_assert_cmpint(s->drain_count, ==, 2);
g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
g_assert_cmpint(s->drain_count, ==, 1);
g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
do_drain_end(inner, bs);
do_drain_end(outer, bs);
@ -392,158 +383,6 @@ static void test_nested(void)
blk_unref(blk);
}
static void test_multiparent(void)
{
BlockBackend *blk_a, *blk_b;
BlockDriverState *bs_a, *bs_b, *backing;
BDRVTestState *a_s, *b_s, *backing_s;
blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
&error_abort);
a_s = bs_a->opaque;
blk_insert_bs(blk_a, bs_a, &error_abort);
blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
&error_abort);
b_s = bs_b->opaque;
blk_insert_bs(blk_b, bs_b, &error_abort);
backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
backing_s = backing->opaque;
bdrv_set_backing_hd(bs_a, backing, &error_abort);
bdrv_set_backing_hd(bs_b, backing, &error_abort);
g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
g_assert_cmpint(a_s->drain_count, ==, 0);
g_assert_cmpint(b_s->drain_count, ==, 0);
g_assert_cmpint(backing_s->drain_count, ==, 0);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
g_assert_cmpint(backing->quiesce_counter, ==, 1);
g_assert_cmpint(a_s->drain_count, ==, 1);
g_assert_cmpint(b_s->drain_count, ==, 1);
g_assert_cmpint(backing_s->drain_count, ==, 1);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
g_assert_cmpint(backing->quiesce_counter, ==, 2);
g_assert_cmpint(a_s->drain_count, ==, 2);
g_assert_cmpint(b_s->drain_count, ==, 2);
g_assert_cmpint(backing_s->drain_count, ==, 2);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
g_assert_cmpint(backing->quiesce_counter, ==, 1);
g_assert_cmpint(a_s->drain_count, ==, 1);
g_assert_cmpint(b_s->drain_count, ==, 1);
g_assert_cmpint(backing_s->drain_count, ==, 1);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
g_assert_cmpint(a_s->drain_count, ==, 0);
g_assert_cmpint(b_s->drain_count, ==, 0);
g_assert_cmpint(backing_s->drain_count, ==, 0);
bdrv_unref(backing);
bdrv_unref(bs_a);
bdrv_unref(bs_b);
blk_unref(blk_a);
blk_unref(blk_b);
}
static void test_graph_change_drain_subtree(void)
{
BlockBackend *blk_a, *blk_b;
BlockDriverState *bs_a, *bs_b, *backing;
BDRVTestState *a_s, *b_s, *backing_s;
blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
&error_abort);
a_s = bs_a->opaque;
blk_insert_bs(blk_a, bs_a, &error_abort);
blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
&error_abort);
b_s = bs_b->opaque;
blk_insert_bs(blk_b, bs_b, &error_abort);
backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
backing_s = backing->opaque;
bdrv_set_backing_hd(bs_a, backing, &error_abort);
g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
g_assert_cmpint(a_s->drain_count, ==, 0);
g_assert_cmpint(b_s->drain_count, ==, 0);
g_assert_cmpint(backing_s->drain_count, ==, 0);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
bdrv_set_backing_hd(bs_b, backing, &error_abort);
g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
g_assert_cmpint(backing->quiesce_counter, ==, 5);
g_assert_cmpint(a_s->drain_count, ==, 5);
g_assert_cmpint(b_s->drain_count, ==, 5);
g_assert_cmpint(backing_s->drain_count, ==, 5);
bdrv_set_backing_hd(bs_b, NULL, &error_abort);
g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
g_assert_cmpint(backing->quiesce_counter, ==, 3);
g_assert_cmpint(a_s->drain_count, ==, 3);
g_assert_cmpint(b_s->drain_count, ==, 2);
g_assert_cmpint(backing_s->drain_count, ==, 3);
bdrv_set_backing_hd(bs_b, backing, &error_abort);
g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
g_assert_cmpint(backing->quiesce_counter, ==, 5);
g_assert_cmpint(a_s->drain_count, ==, 5);
g_assert_cmpint(b_s->drain_count, ==, 5);
g_assert_cmpint(backing_s->drain_count, ==, 5);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
g_assert_cmpint(a_s->drain_count, ==, 0);
g_assert_cmpint(b_s->drain_count, ==, 0);
g_assert_cmpint(backing_s->drain_count, ==, 0);
bdrv_unref(backing);
bdrv_unref(bs_a);
bdrv_unref(bs_b);
blk_unref(blk_a);
blk_unref(blk_b);
}
static void test_graph_change_drain_all(void)
{
BlockBackend *blk_a, *blk_b;
@ -763,12 +602,6 @@ static void test_iothread_drain(void)
test_iothread_common(BDRV_DRAIN, 1);
}
static void test_iothread_drain_subtree(void)
{
test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
}
typedef struct TestBlockJob {
BlockJob common;
@ -853,7 +686,6 @@ enum test_job_result {
enum test_job_drain_node {
TEST_JOB_DRAIN_SRC,
TEST_JOB_DRAIN_SRC_CHILD,
TEST_JOB_DRAIN_SRC_PARENT,
};
static void test_blockjob_common_drain_node(enum drain_type drain_type,
@ -891,9 +723,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
case TEST_JOB_DRAIN_SRC_CHILD:
drain_bs = src_backing;
break;
case TEST_JOB_DRAIN_SRC_PARENT:
drain_bs = src_overlay;
break;
default:
g_assert_not_reached();
}
@ -1045,10 +874,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
TEST_JOB_DRAIN_SRC);
test_blockjob_common_drain_node(drain_type, use_iothread, result,
TEST_JOB_DRAIN_SRC_CHILD);
if (drain_type == BDRV_SUBTREE_DRAIN) {
test_blockjob_common_drain_node(drain_type, use_iothread, result,
TEST_JOB_DRAIN_SRC_PARENT);
}
}
static void test_blockjob_drain_all(void)
@ -1061,11 +886,6 @@ static void test_blockjob_drain(void)
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
}
static void test_blockjob_drain_subtree(void)
{
test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
}
static void test_blockjob_error_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
@ -1078,12 +898,6 @@ static void test_blockjob_error_drain(void)
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
}
static void test_blockjob_error_drain_subtree(void)
{
test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
}
static void test_blockjob_iothread_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
@ -1094,11 +908,6 @@ static void test_blockjob_iothread_drain(void)
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
}
static void test_blockjob_iothread_drain_subtree(void)
{
test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
}
static void test_blockjob_iothread_error_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
@ -1111,12 +920,6 @@ static void test_blockjob_iothread_error_drain(void)
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
}
static void test_blockjob_iothread_error_drain_subtree(void)
{
test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
}
typedef struct BDRVTestTopState {
BdrvChild *wait_child;
@ -1263,14 +1066,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
bdrv_drain(child_bs);
bdrv_unref(child_bs);
break;
case BDRV_SUBTREE_DRAIN:
/* Would have to ref/unref bs here for !detach_instead_of_delete, but
* then the whole test becomes pointless because the graph changes
* don't occur during the drain any more. */
assert(detach_instead_of_delete);
bdrv_subtree_drained_begin(bs);
bdrv_subtree_drained_end(bs);
break;
case BDRV_DRAIN_ALL:
bdrv_drain_all_begin();
bdrv_drain_all_end();
@ -1305,11 +1100,6 @@ static void test_detach_by_drain(void)
do_test_delete_by_drain(true, BDRV_DRAIN);
}
static void test_detach_by_drain_subtree(void)
{
do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
}
struct detach_by_parent_data {
BlockDriverState *parent_b;
@ -1317,6 +1107,7 @@ struct detach_by_parent_data {
BlockDriverState *c;
BdrvChild *child_c;
bool by_parent_cb;
bool detach_on_drain;
};
static struct detach_by_parent_data detach_by_parent_data;
@ -1324,6 +1115,7 @@ static void detach_indirect_bh(void *opaque)
{
struct detach_by_parent_data *data = opaque;
bdrv_dec_in_flight(data->child_b->bs);
bdrv_unref_child(data->parent_b, data->child_b);
bdrv_ref(data->c);
@ -1338,12 +1130,21 @@ static void detach_by_parent_aio_cb(void *opaque, int ret)
g_assert_cmpint(ret, ==, 0);
if (data->by_parent_cb) {
bdrv_inc_in_flight(data->child_b->bs);
detach_indirect_bh(data);
}
}
static void detach_by_driver_cb_drained_begin(BdrvChild *child)
{
struct detach_by_parent_data *data = &detach_by_parent_data;
if (!data->detach_on_drain) {
return;
}
data->detach_on_drain = false;
bdrv_inc_in_flight(data->child_b->bs);
aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
detach_indirect_bh, &detach_by_parent_data);
child_of_bds.drained_begin(child);
@ -1384,8 +1185,14 @@ static void test_detach_indirect(bool by_parent_cb)
detach_by_driver_cb_class = child_of_bds;
detach_by_driver_cb_class.drained_begin =
detach_by_driver_cb_drained_begin;
detach_by_driver_cb_class.drained_end = NULL;
detach_by_driver_cb_class.drained_poll = NULL;
}
detach_by_parent_data = (struct detach_by_parent_data) {
.detach_on_drain = false,
};
/* Create all involved nodes */
parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
&error_abort);
@ -1437,12 +1244,16 @@ static void test_detach_indirect(bool by_parent_cb)
.child_b = child_b,
.c = c,
.by_parent_cb = by_parent_cb,
.detach_on_drain = true,
};
acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
g_assert(acb != NULL);
/* Drain and check the expected result */
bdrv_subtree_drained_begin(parent_b);
bdrv_drained_begin(parent_b);
bdrv_drained_begin(a);
bdrv_drained_begin(b);
bdrv_drained_begin(c);
g_assert(detach_by_parent_data.child_c != NULL);
@ -1457,12 +1268,15 @@ static void test_detach_indirect(bool by_parent_cb)
g_assert(QLIST_NEXT(child_a, next) == NULL);
g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
g_assert_cmpint(parent_b->quiesce_counter, ==, 3);
g_assert_cmpint(a->quiesce_counter, ==, 1);
g_assert_cmpint(b->quiesce_counter, ==, 0);
g_assert_cmpint(b->quiesce_counter, ==, 1);
g_assert_cmpint(c->quiesce_counter, ==, 1);
bdrv_subtree_drained_end(parent_b);
bdrv_drained_end(parent_b);
bdrv_drained_end(a);
bdrv_drained_end(b);
bdrv_drained_end(c);
bdrv_unref(parent_b);
blk_unref(blk);
@ -1693,6 +1507,7 @@ static void test_blockjob_commit_by_drained_end(void)
bdrv_drained_begin(bs_child);
g_assert(!job_has_completed);
bdrv_drained_end(bs_child);
aio_poll(qemu_get_aio_context(), false);
g_assert(job_has_completed);
bdrv_unref(bs_parents[0]);
@ -1848,6 +1663,7 @@ static void test_drop_intermediate_poll(void)
g_assert(!job_has_completed);
ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
aio_poll(qemu_get_aio_context(), false);
g_assert(ret == 0);
g_assert(job_has_completed);
@ -1856,6 +1672,7 @@ static void test_drop_intermediate_poll(void)
typedef struct BDRVReplaceTestState {
bool setup_completed;
bool was_drained;
bool was_undrained;
bool has_read;
@ -1916,52 +1733,78 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
return 0;
}
static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
{
BlockDriverState *bs = opaque;
BDRVReplaceTestState *s = bs->opaque;
/* Keep waking io_co up until it is done */
while (s->io_co) {
aio_co_wake(s->io_co);
s->io_co = NULL;
qemu_coroutine_yield();
}
s->drain_co = NULL;
bdrv_dec_in_flight(bs);
}
/**
* If .drain_count is 0, wake up .io_co if there is one; and set
* .was_drained.
* Increment .drain_count.
*/
static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
if (!s->drain_count) {
/* Keep waking io_co up until it is done */
s->drain_co = qemu_coroutine_self();
while (s->io_co) {
aio_co_wake(s->io_co);
s->io_co = NULL;
qemu_coroutine_yield();
}
s->drain_co = NULL;
if (!s->setup_completed) {
return;
}
if (!s->drain_count) {
s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
bdrv_inc_in_flight(bs);
aio_co_enter(bdrv_get_aio_context(bs), s->drain_co);
s->was_drained = true;
}
s->drain_count++;
}
static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
{
BlockDriverState *bs = opaque;
char data;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
int ret;
/* Queue a read request post-drain */
ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
g_assert(ret >= 0);
bdrv_dec_in_flight(bs);
}
/**
* Reduce .drain_count, set .was_undrained once it reaches 0.
* If .drain_count reaches 0 and the node has a backing file, issue a
* read request.
*/
static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
static void bdrv_replace_test_drain_end(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
if (!s->setup_completed) {
return;
}
g_assert(s->drain_count > 0);
if (!--s->drain_count) {
int ret;
s->was_undrained = true;
if (bs->backing) {
char data;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
/* Queue a read request post-drain */
ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
g_assert(ret >= 0);
Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry,
bs);
bdrv_inc_in_flight(bs);
aio_co_enter(bdrv_get_aio_context(bs), co);
}
}
}
@ -1974,8 +1817,8 @@ static BlockDriver bdrv_replace_test = {
.bdrv_close = bdrv_replace_test_close,
.bdrv_co_preadv = bdrv_replace_test_co_preadv,
.bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
.bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
.bdrv_drain_begin = bdrv_replace_test_drain_begin,
.bdrv_drain_end = bdrv_replace_test_drain_end,
.bdrv_child_perm = bdrv_default_perms,
};
@ -2051,6 +1894,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
bdrv_ref(old_child_bs);
bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
BDRV_CHILD_COW, &error_abort);
parent_s->setup_completed = true;
for (i = 0; i < old_drain_count; i++) {
bdrv_drained_begin(old_child_bs);
@ -2172,70 +2016,47 @@ int main(int argc, char **argv)
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
test_drv_cb_drain_subtree);
g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
test_drv_cb_co_drain_all);
g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
test_drv_cb_co_drain_subtree);
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
test_quiesce_drain_subtree);
g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
test_quiesce_co_drain_all);
g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
test_quiesce_co_drain_subtree);
g_test_add_func("/bdrv-drain/nested", test_nested);
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
test_graph_change_drain_subtree);
g_test_add_func("/bdrv-drain/graph-change/drain_all",
test_graph_change_drain_all);
g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
g_test_add_func("/bdrv-drain/iothread/drain_subtree",
test_iothread_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
test_blockjob_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
test_blockjob_error_drain_all);
g_test_add_func("/bdrv-drain/blockjob/error/drain",
test_blockjob_error_drain);
g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
test_blockjob_error_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
test_blockjob_iothread_drain_all);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
test_blockjob_iothread_drain);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
test_blockjob_iothread_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
test_blockjob_iothread_error_drain_all);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
test_blockjob_iothread_error_drain);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
test_blockjob_iothread_error_drain_subtree);
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);

View File

@ -27,6 +27,7 @@
#include "qapi/error.h"
#include "block/aio.h"
#include "block/thread-pool.h"
#include "block/graph-lock.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
#include "qemu/rcu_queue.h"
@ -376,6 +377,7 @@ aio_ctx_finalize(GSource *source)
qemu_rec_mutex_destroy(&ctx->lock);
qemu_lockcnt_destroy(&ctx->list_lock);
timerlistgroup_deinit(&ctx->tlg);
unregister_aiocontext(ctx);
aio_context_destroy(ctx);
}
@ -574,6 +576,8 @@ AioContext *aio_context_new(Error **errp)
ctx->thread_pool_min = 0;
ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
register_aiocontext(ctx);
return ctx;
fail:
g_source_destroy(&ctx->source);