udpate and rebase to QEMU v6.0.0
Mostly minor changes, bigger ones summarized: * QEMU's internal backup code now uses a new async system, which allows parallel requests - the default max_workers settings is 64, I chose less, since 64 put enough stress on QEMU that the guest became practically unusable during the backup, and 16 still shows quite a nice measureable performance improvement. Little code changes for us though. * 'malformed' QAPI parameters/functions are now a build error (i.e. using '_' vs '-'), I chose to just whitelist our calls in the name of backwards compatibility. * monitor OOB race fix now uses the upstream variant, cherry-picked from origin/master since it's not in 6.0 by default * last patch fixes a bug with snapshot rollback related to the new yank system Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>master
parent
1cbf147251
commit
8dca018b68
|
@ -1,3 +1,9 @@
|
|||
pve-qemu-kvm (6.0.0-1) UNRELEASED; urgency=medium
|
||||
|
||||
* Non-maintainer upload.
|
||||
|
||||
-- Stefan Reiter <sreiter@mila.proxmox.com> Wed, 26 May 2021 15:39:15 +0200
|
||||
|
||||
pve-qemu-kvm (5.2.0-11) bullseye; urgency=medium
|
||||
|
||||
* re-build for Proxmox VE 7 / Debian Bullseye
|
||||
|
|
|
@ -27,15 +27,15 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
|
|||
Signed-off-by: John Snow <jsnow@redhat.com>
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
block/mirror.c | 98 ++++++++++++++++++++++++++++++-------
|
||||
blockdev.c | 39 +++++++++++++--
|
||||
include/block/block_int.h | 4 +-
|
||||
qapi/block-core.json | 29 +++++++++--
|
||||
tests/test-block-iothread.c | 4 +-
|
||||
block/mirror.c | 98 +++++++++++++++++++++++++-------
|
||||
blockdev.c | 39 ++++++++++++-
|
||||
include/block/block_int.h | 4 +-
|
||||
qapi/block-core.json | 29 ++++++++--
|
||||
tests/unit/test-block-iothread.c | 4 +-
|
||||
5 files changed, 145 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index 8e1ad6eceb..97843992c2 100644
|
||||
index 5a71bd8bbc..f1416d96f3 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -50,7 +50,7 @@ typedef struct MirrorBlockJob {
|
||||
|
@ -56,7 +56,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
BdrvDirtyBitmap *dirty_bitmap;
|
||||
BdrvDirtyBitmapIter *dbi;
|
||||
uint8_t *buf;
|
||||
@@ -677,7 +679,8 @@ static int mirror_exit_common(Job *job)
|
||||
@@ -678,7 +680,8 @@ static int mirror_exit_common(Job *job)
|
||||
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
|
||||
&error_abort);
|
||||
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
|
||||
|
@ -66,7 +66,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
|
||||
|
||||
if (bdrv_cow_bs(unfiltered_target) != backing) {
|
||||
@@ -774,6 +777,16 @@ static void mirror_abort(Job *job)
|
||||
@@ -783,6 +786,16 @@ static void mirror_abort(Job *job)
|
||||
assert(ret == 0);
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
|
||||
{
|
||||
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
|
||||
@@ -955,7 +968,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
||||
@@ -964,7 +977,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
||||
mirror_free_init(s);
|
||||
|
||||
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
|
||||
|
@ -93,7 +93,15 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
ret = mirror_dirty_init(s);
|
||||
if (ret < 0 || job_is_cancelled(&s->common.job)) {
|
||||
goto immediate_exit;
|
||||
@@ -1188,6 +1202,7 @@ static const BlockJobDriver mirror_job_driver = {
|
||||
@@ -1195,6 +1209,7 @@ static const BlockJobDriver mirror_job_driver = {
|
||||
.run = mirror_run,
|
||||
.prepare = mirror_prepare,
|
||||
.abort = mirror_abort,
|
||||
+ .clean = mirror_clean,
|
||||
.pause = mirror_pause,
|
||||
.complete = mirror_complete,
|
||||
.cancel = mirror_cancel,
|
||||
@@ -1211,6 +1226,7 @@ static const BlockJobDriver commit_active_job_driver = {
|
||||
.run = mirror_run,
|
||||
.prepare = mirror_prepare,
|
||||
.abort = mirror_abort,
|
||||
|
@ -101,15 +109,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
.pause = mirror_pause,
|
||||
.complete = mirror_complete,
|
||||
},
|
||||
@@ -1203,6 +1218,7 @@ static const BlockJobDriver commit_active_job_driver = {
|
||||
.run = mirror_run,
|
||||
.prepare = mirror_prepare,
|
||||
.abort = mirror_abort,
|
||||
+ .clean = mirror_clean,
|
||||
.pause = mirror_pause,
|
||||
.complete = mirror_complete,
|
||||
},
|
||||
@@ -1550,7 +1566,10 @@ static BlockJob *mirror_start_job(
|
||||
@@ -1572,7 +1588,10 @@ static BlockJob *mirror_start_job(
|
||||
BlockCompletionFunc *cb,
|
||||
void *opaque,
|
||||
const BlockJobDriver *driver,
|
||||
|
@ -121,8 +121,8 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
bool auto_complete, const char *filter_node_name,
|
||||
bool is_mirror, MirrorCopyMode copy_mode,
|
||||
Error **errp)
|
||||
@@ -1563,10 +1582,39 @@ static BlockJob *mirror_start_job(
|
||||
Error *local_err = NULL;
|
||||
@@ -1584,10 +1603,39 @@ static BlockJob *mirror_start_job(
|
||||
uint64_t target_perms, target_shared_perms;
|
||||
int ret;
|
||||
|
||||
- if (granularity == 0) {
|
||||
|
@ -163,7 +163,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
assert(is_power_of_2(granularity));
|
||||
|
||||
if (buf_size < 0) {
|
||||
@@ -1705,7 +1753,9 @@ static BlockJob *mirror_start_job(
|
||||
@@ -1728,7 +1776,9 @@ static BlockJob *mirror_start_job(
|
||||
s->replaces = g_strdup(replaces);
|
||||
s->on_source_error = on_source_error;
|
||||
s->on_target_error = on_target_error;
|
||||
|
@ -174,7 +174,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
s->backing_mode = backing_mode;
|
||||
s->zero_target = zero_target;
|
||||
s->copy_mode = copy_mode;
|
||||
@@ -1726,6 +1776,18 @@ static BlockJob *mirror_start_job(
|
||||
@@ -1749,6 +1799,18 @@ static BlockJob *mirror_start_job(
|
||||
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
|
||||
}
|
||||
|
||||
|
@ -193,7 +193,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
|
||||
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
|
||||
BLK_PERM_CONSISTENT_READ,
|
||||
@@ -1803,6 +1865,9 @@ fail:
|
||||
@@ -1826,6 +1888,9 @@ fail:
|
||||
if (s->dirty_bitmap) {
|
||||
bdrv_release_dirty_bitmap(s->dirty_bitmap);
|
||||
}
|
||||
|
@ -203,7 +203,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
job_early_fail(&s->common.job);
|
||||
}
|
||||
|
||||
@@ -1820,29 +1885,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
|
||||
@@ -1843,29 +1908,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
|
||||
BlockDriverState *target, const char *replaces,
|
||||
int creation_flags, int64_t speed,
|
||||
uint32_t granularity, int64_t buf_size,
|
||||
|
@ -238,7 +238,7 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
}
|
||||
|
||||
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
|
||||
@@ -1868,7 +1927,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
|
||||
@@ -1890,7 +1949,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
|
||||
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
|
||||
MIRROR_LEAVE_BACKING_CHAIN, false,
|
||||
on_error, on_error, true, cb, opaque,
|
||||
|
@ -246,13 +246,13 @@ index 8e1ad6eceb..97843992c2 100644
|
|||
+ &commit_active_job_driver, MIRROR_SYNC_MODE_FULL,
|
||||
+ NULL, 0, base, auto_complete,
|
||||
filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
|
||||
&local_err);
|
||||
if (local_err) {
|
||||
errp);
|
||||
if (!job) {
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index fe6fb5dc1d..394920613d 100644
|
||||
index a57590aae4..798d02704f 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2930,6 +2930,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
@@ -2963,6 +2963,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
BlockDriverState *target,
|
||||
bool has_replaces, const char *replaces,
|
||||
enum MirrorSyncMode sync,
|
||||
|
@ -263,7 +263,7 @@ index fe6fb5dc1d..394920613d 100644
|
|||
BlockMirrorBackingMode backing_mode,
|
||||
bool zero_target,
|
||||
bool has_speed, int64_t speed,
|
||||
@@ -2949,6 +2953,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
@@ -2982,6 +2986,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
{
|
||||
BlockDriverState *unfiltered_bs;
|
||||
int job_flags = JOB_DEFAULT;
|
||||
|
@ -271,7 +271,7 @@ index fe6fb5dc1d..394920613d 100644
|
|||
|
||||
if (!has_speed) {
|
||||
speed = 0;
|
||||
@@ -3003,6 +3008,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
@@ -3036,6 +3041,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
sync = MIRROR_SYNC_MODE_FULL;
|
||||
}
|
||||
|
||||
|
@ -301,7 +301,7 @@ index fe6fb5dc1d..394920613d 100644
|
|||
if (!has_replaces) {
|
||||
/* We want to mirror from @bs, but keep implicit filters on top */
|
||||
unfiltered_bs = bdrv_skip_implicit_filters(bs);
|
||||
@@ -3049,8 +3077,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
@@ -3082,8 +3110,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
* and will allow to check whether the node still exist at mirror completion
|
||||
*/
|
||||
mirror_start(job_id, bs, target,
|
||||
|
@ -312,7 +312,7 @@ index fe6fb5dc1d..394920613d 100644
|
|||
on_source_error, on_target_error, unmap, filter_node_name,
|
||||
copy_mode, errp);
|
||||
}
|
||||
@@ -3195,6 +3223,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
|
||||
@@ -3228,6 +3256,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
|
||||
|
||||
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
|
||||
arg->has_replaces, arg->replaces, arg->sync,
|
||||
|
@ -321,7 +321,7 @@ index fe6fb5dc1d..394920613d 100644
|
|||
backing_mode, zero_target,
|
||||
arg->has_speed, arg->speed,
|
||||
arg->has_granularity, arg->granularity,
|
||||
@@ -3216,6 +3246,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
|
||||
@@ -3249,6 +3279,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
|
||||
const char *device, const char *target,
|
||||
bool has_replaces, const char *replaces,
|
||||
MirrorSyncMode sync,
|
||||
|
@ -330,7 +330,7 @@ index fe6fb5dc1d..394920613d 100644
|
|||
bool has_speed, int64_t speed,
|
||||
bool has_granularity, uint32_t granularity,
|
||||
bool has_buf_size, int64_t buf_size,
|
||||
@@ -3265,7 +3297,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
|
||||
@@ -3298,7 +3330,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
|
||||
}
|
||||
|
||||
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
|
||||
|
@ -341,10 +341,10 @@ index fe6fb5dc1d..394920613d 100644
|
|||
has_granularity, granularity,
|
||||
has_buf_size, buf_size,
|
||||
diff --git a/include/block/block_int.h b/include/block/block_int.h
|
||||
index 95d9333be1..6f8eda629a 100644
|
||||
index 88e4111939..1c399bdb16 100644
|
||||
--- a/include/block/block_int.h
|
||||
+++ b/include/block/block_int.h
|
||||
@@ -1230,7 +1230,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
|
||||
@@ -1258,7 +1258,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
|
||||
BlockDriverState *target, const char *replaces,
|
||||
int creation_flags, int64_t speed,
|
||||
uint32_t granularity, int64_t buf_size,
|
||||
|
@ -356,10 +356,10 @@ index 95d9333be1..6f8eda629a 100644
|
|||
BlockdevOnError on_source_error,
|
||||
BlockdevOnError on_target_error,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 04ad80bc1e..9db3120716 100644
|
||||
index 6d227924d0..4b6fb6ca44 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -1971,10 +1971,19 @@
|
||||
@@ -1921,10 +1921,19 @@
|
||||
# (all the disk, only the sectors allocated in the topmost image, or
|
||||
# only new I/O).
|
||||
#
|
||||
|
@ -380,7 +380,7 @@ index 04ad80bc1e..9db3120716 100644
|
|||
#
|
||||
# @buf-size: maximum amount of data in flight from source to
|
||||
# target (since 1.4).
|
||||
@@ -2012,7 +2021,9 @@
|
||||
@@ -1962,7 +1971,9 @@
|
||||
{ 'struct': 'DriveMirror',
|
||||
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
|
||||
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
|
||||
|
@ -391,7 +391,7 @@ index 04ad80bc1e..9db3120716 100644
|
|||
'*speed': 'int', '*granularity': 'uint32',
|
||||
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
|
||||
'*on-target-error': 'BlockdevOnError',
|
||||
@@ -2280,10 +2291,19 @@
|
||||
@@ -2230,10 +2241,19 @@
|
||||
# (all the disk, only the sectors allocated in the topmost image, or
|
||||
# only new I/O).
|
||||
#
|
||||
|
@ -412,7 +412,7 @@ index 04ad80bc1e..9db3120716 100644
|
|||
#
|
||||
# @buf-size: maximum amount of data in flight from source to
|
||||
# target
|
||||
@@ -2332,7 +2352,8 @@
|
||||
@@ -2282,7 +2302,8 @@
|
||||
{ 'command': 'blockdev-mirror',
|
||||
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
|
||||
'*replaces': 'str',
|
||||
|
@ -422,10 +422,10 @@ index 04ad80bc1e..9db3120716 100644
|
|||
'*speed': 'int', '*granularity': 'uint32',
|
||||
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
|
||||
'*on-target-error': 'BlockdevOnError',
|
||||
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
|
||||
index 3f866a35c6..500ede71c8 100644
|
||||
--- a/tests/test-block-iothread.c
|
||||
+++ b/tests/test-block-iothread.c
|
||||
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
|
||||
index 8cf172cb7a..65f652819d 100644
|
||||
--- a/tests/unit/test-block-iothread.c
|
||||
+++ b/tests/unit/test-block-iothread.c
|
||||
@@ -623,8 +623,8 @@ static void test_propagate_mirror(void)
|
||||
|
||||
/* Start a mirror job */
|
||||
|
|
|
@ -23,10 +23,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||
1 file changed, 18 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index 97843992c2..d1cce079da 100644
|
||||
index f1416d96f3..3e37967365 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -654,8 +654,6 @@ static int mirror_exit_common(Job *job)
|
||||
@@ -655,8 +655,6 @@ static int mirror_exit_common(Job *job)
|
||||
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ index 97843992c2..d1cce079da 100644
|
|||
/* Make sure that the source BDS doesn't go away during bdrv_replace_node,
|
||||
* before we can call bdrv_drained_end */
|
||||
bdrv_ref(src);
|
||||
@@ -755,6 +753,18 @@ static int mirror_exit_common(Job *job)
|
||||
@@ -764,6 +762,18 @@ static int mirror_exit_common(Job *job)
|
||||
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
|
||||
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
|
||||
|
||||
|
@ -54,7 +54,7 @@ index 97843992c2..d1cce079da 100644
|
|||
bs_opaque->job = NULL;
|
||||
|
||||
bdrv_drained_end(src);
|
||||
@@ -1592,10 +1602,6 @@ static BlockJob *mirror_start_job(
|
||||
@@ -1613,10 +1623,6 @@ static BlockJob *mirror_start_job(
|
||||
" sync mode",
|
||||
MirrorSyncMode_str(sync_mode));
|
||||
return NULL;
|
||||
|
@ -65,7 +65,7 @@ index 97843992c2..d1cce079da 100644
|
|||
}
|
||||
} else if (bitmap) {
|
||||
error_setg(errp,
|
||||
@@ -1612,6 +1618,12 @@ static BlockJob *mirror_start_job(
|
||||
@@ -1633,6 +1639,12 @@ static BlockJob *mirror_start_job(
|
||||
return NULL;
|
||||
}
|
||||
granularity = bdrv_dirty_bitmap_granularity(bitmap);
|
||||
|
|
|
@ -15,10 +15,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 394920613d..4f8bd38b58 100644
|
||||
index 798d02704f..c025fbf567 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -3029,6 +3029,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
@@ -3062,6 +3062,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -11,14 +11,14 @@ beforehand.
|
|||
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
block/mirror.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
block/mirror.c | 11 ++++-------
|
||||
1 file changed, 4 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index d1cce079da..e6140cf018 100644
|
||||
index 3e37967365..c6f759e279 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -759,8 +759,8 @@ static int mirror_exit_common(Job *job)
|
||||
@@ -768,8 +768,8 @@ static int mirror_exit_common(Job *job)
|
||||
job->ret == 0 && ret == 0)) {
|
||||
/* Success; synchronize copy back to sync. */
|
||||
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
|
||||
|
@ -29,14 +29,17 @@ index d1cce079da..e6140cf018 100644
|
|||
}
|
||||
}
|
||||
bdrv_release_dirty_bitmap(s->dirty_bitmap);
|
||||
@@ -1793,8 +1793,8 @@ static BlockJob *mirror_start_job(
|
||||
@@ -1816,11 +1816,8 @@ static BlockJob *mirror_start_job(
|
||||
}
|
||||
|
||||
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
|
||||
- bdrv_merge_dirty_bitmap(s->dirty_bitmap, s->sync_bitmap,
|
||||
- NULL, &local_err);
|
||||
- if (local_err) {
|
||||
- goto fail;
|
||||
- }
|
||||
+ bdrv_dirty_bitmap_merge_internal(s->dirty_bitmap, s->sync_bitmap,
|
||||
+ NULL, true);
|
||||
if (local_err) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
|
||||
|
|
|
@ -23,8 +23,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||
---
|
||||
tests/qemu-iotests/384 | 547 +++++++
|
||||
tests/qemu-iotests/384.out | 2846 ++++++++++++++++++++++++++++++++++++
|
||||
tests/qemu-iotests/group | 1 +
|
||||
3 files changed, 3394 insertions(+)
|
||||
2 files changed, 3393 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/384
|
||||
create mode 100644 tests/qemu-iotests/384.out
|
||||
|
||||
|
@ -3433,15 +3432,3 @@ index 0000000000..9b7408b6d6
|
|||
+{"execute": "blockdev-mirror", "arguments": {"bitmap": "bitmap0", "device": "drive0", "filter-node-name": "mirror-top", "job-id": "api_job", "sync": "none", "target": "mirror_target"}}
|
||||
+{"error": {"class": "GenericError", "desc": "bitmap-mode must be specified if a bitmap is provided"}}
|
||||
+
|
||||
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
|
||||
index 2960dff728..952dceba1f 100644
|
||||
--- a/tests/qemu-iotests/group
|
||||
+++ b/tests/qemu-iotests/group
|
||||
@@ -270,6 +270,7 @@
|
||||
253 rw quick
|
||||
254 rw backing quick
|
||||
255 rw quick
|
||||
+384 rw
|
||||
256 rw auto quick
|
||||
257 rw
|
||||
258 rw quick
|
||||
|
|
|
@ -18,11 +18,11 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||
3 files changed, 70 insertions(+), 59 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index e6140cf018..3a08239a78 100644
|
||||
index c6f759e279..ce2b13b4d8 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -1592,31 +1592,13 @@ static BlockJob *mirror_start_job(
|
||||
Error *local_err = NULL;
|
||||
@@ -1613,31 +1613,13 @@ static BlockJob *mirror_start_job(
|
||||
uint64_t target_perms, target_shared_perms;
|
||||
int ret;
|
||||
|
||||
- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
|
||||
|
@ -59,10 +59,10 @@ index e6140cf018..3a08239a78 100644
|
|||
|
||||
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 4f8bd38b58..a40c6fd0f6 100644
|
||||
index c025fbf567..9cab29e567 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -3008,7 +3008,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
@@ -3041,7 +3041,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
|
||||
sync = MIRROR_SYNC_MODE_FULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Date: Mon, 14 Sep 2020 19:32:21 +0200
|
||||
Subject: [PATCH] Revert "qemu-img convert: Don't pre-zero images"
|
||||
|
||||
This reverts commit edafc70c0c8510862f2f213a3acf7067113bcd08.
|
||||
|
||||
As it correlates with causing issues on LVM allocation
|
||||
https://bugzilla.proxmox.com/show_bug.cgi?id=3002
|
||||
---
|
||||
qemu-img.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/qemu-img.c b/qemu-img.c
|
||||
index 8bdea40b58..f9050bfaad 100644
|
||||
--- a/qemu-img.c
|
||||
+++ b/qemu-img.c
|
||||
@@ -2104,6 +2104,15 @@ static int convert_do_copy(ImgConvertState *s)
|
||||
s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
|
||||
}
|
||||
|
||||
+ if (!s->has_zero_init && !s->target_has_backing &&
|
||||
+ bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
|
||||
+ {
|
||||
+ ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
|
||||
+ if (ret == 0) {
|
||||
+ s->has_zero_init = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Allocate buffer for copied data. For compressed images, only one cluster
|
||||
* can be copied at a time. */
|
||||
if (s->compressed) {
|
84
debian/patches/extra/0001-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch
vendored
Normal file
84
debian/patches/extra/0001-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Mon, 22 Mar 2021 16:40:24 +0100
|
||||
Subject: [PATCH] monitor/qmp: fix race on CHR_EVENT_CLOSED without OOB
|
||||
|
||||
The QMP dispatcher coroutine holds the qmp_queue_lock over a yield
|
||||
point, where it expects to be rescheduled from the main context. If a
|
||||
CHR_EVENT_CLOSED event is received just then, it can race and block the
|
||||
main thread on the mutex in monitor_qmp_cleanup_queue_and_resume.
|
||||
|
||||
monitor_resume does not need to be called from main context, so we can
|
||||
call it immediately after popping a request from the queue, which allows
|
||||
us to drop the qmp_queue_lock mutex before yielding.
|
||||
|
||||
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Message-Id: <20210322154024.15011-1-s.reiter@proxmox.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Markus Armbruster <armbru@redhat.com>
|
||||
---
|
||||
monitor/qmp.c | 40 ++++++++++++++++++++++------------------
|
||||
1 file changed, 22 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/monitor/qmp.c b/monitor/qmp.c
|
||||
index 2b0308f933..092c527b6f 100644
|
||||
--- a/monitor/qmp.c
|
||||
+++ b/monitor/qmp.c
|
||||
@@ -257,24 +257,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
|
||||
trace_monitor_qmp_in_band_dequeue(req_obj,
|
||||
req_obj->mon->qmp_requests->length);
|
||||
|
||||
- if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
|
||||
- /*
|
||||
- * Someone rescheduled us (probably because a new requests
|
||||
- * came in), but we didn't actually yield. Do that now,
|
||||
- * only to be immediately reentered and removed from the
|
||||
- * list of scheduled coroutines.
|
||||
- */
|
||||
- qemu_coroutine_yield();
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Move the coroutine from iohandler_ctx to qemu_aio_context for
|
||||
- * executing the command handler so that it can make progress if it
|
||||
- * involves an AIO_WAIT_WHILE().
|
||||
- */
|
||||
- aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
|
||||
- qemu_coroutine_yield();
|
||||
-
|
||||
/*
|
||||
* @req_obj has a request, we hold req_obj->mon->qmp_queue_lock
|
||||
*/
|
||||
@@ -298,8 +280,30 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
|
||||
monitor_resume(&mon->common);
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Drop the queue mutex now, before yielding, otherwise we might
|
||||
+ * deadlock if the main thread tries to lock it.
|
||||
+ */
|
||||
qemu_mutex_unlock(&mon->qmp_queue_lock);
|
||||
|
||||
+ if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
|
||||
+ /*
|
||||
+ * Someone rescheduled us (probably because a new requests
|
||||
+ * came in), but we didn't actually yield. Do that now,
|
||||
+ * only to be immediately reentered and removed from the
|
||||
+ * list of scheduled coroutines.
|
||||
+ */
|
||||
+ qemu_coroutine_yield();
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Move the coroutine from iohandler_ctx to qemu_aio_context for
|
||||
+ * executing the command handler so that it can make progress if it
|
||||
+ * involves an AIO_WAIT_WHILE().
|
||||
+ */
|
||||
+ aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
|
||||
+ qemu_coroutine_yield();
|
||||
+
|
||||
/* Process request */
|
||||
if (req_obj->req) {
|
||||
if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) {
|
|
@ -1,38 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Thu, 28 Jan 2021 15:19:51 +0100
|
||||
Subject: [PATCH] docs: don't install man page if guest agent is disabled
|
||||
|
||||
No sense outputting the qemu-ga and qemu-ga-ref man pages when the guest
|
||||
agent binary itself is disabled. This mirrors behaviour from before the
|
||||
meson switch.
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
docs/meson.build | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/docs/meson.build b/docs/meson.build
|
||||
index ebd85d59f9..cc6f5007f8 100644
|
||||
--- a/docs/meson.build
|
||||
+++ b/docs/meson.build
|
||||
@@ -46,6 +46,8 @@ if build_docs
|
||||
meson.source_root() / 'docs/sphinx/qmp_lexer.py',
|
||||
qapi_gen_depends ]
|
||||
|
||||
+ have_ga = have_tools and config_host.has_key('CONFIG_GUEST_AGENT')
|
||||
+
|
||||
configure_file(output: 'index.html',
|
||||
input: files('index.html.in'),
|
||||
configuration: {'VERSION': meson.project_version()},
|
||||
@@ -53,8 +55,8 @@ if build_docs
|
||||
manuals = [ 'devel', 'interop', 'tools', 'specs', 'system', 'user' ]
|
||||
man_pages = {
|
||||
'interop' : {
|
||||
- 'qemu-ga.8': (have_tools ? 'man8' : ''),
|
||||
- 'qemu-ga-ref.7': 'man7',
|
||||
+ 'qemu-ga.8': (have_ga ? 'man8' : ''),
|
||||
+ 'qemu-ga-ref.7': (have_ga ? 'man7' : ''),
|
||||
'qemu-qmp-ref.7': 'man7',
|
||||
},
|
||||
'tools': {
|
|
@ -1,31 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Thu, 4 Feb 2021 17:06:19 +0100
|
||||
Subject: [PATCH] migration: only check page size match if RAM postcopy is
|
||||
enabled
|
||||
|
||||
Postcopy may also be advised for dirty-bitmap migration only, in which
|
||||
case the remote page size will not be available and we'll instead read
|
||||
bogus data, blocking migration with a mismatch error if the VM uses
|
||||
hugepages.
|
||||
|
||||
Fixes: 58110f0acb ("migration: split common postcopy out of ram postcopy")
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
migration/ram.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/migration/ram.c b/migration/ram.c
|
||||
index 7811cde643..6ace15261c 100644
|
||||
--- a/migration/ram.c
|
||||
+++ b/migration/ram.c
|
||||
@@ -3521,7 +3521,7 @@ static int ram_load_precopy(QEMUFile *f)
|
||||
}
|
||||
}
|
||||
/* For postcopy we need to check hugepage sizes match */
|
||||
- if (postcopy_advised &&
|
||||
+ if (postcopy_advised && migrate_postcopy_ram() &&
|
||||
block->page_size != qemu_host_page_size) {
|
||||
uint64_t remote_page_size = qemu_get_be64(f);
|
||||
if (remote_page_size != block->page_size) {
|
|
@ -1,143 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 4 Feb 2021 18:34:35 +0000
|
||||
Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open()
|
||||
|
||||
Both lo_open() and lo_create() have similar code to open a file. Extract
|
||||
a common lo_do_open() function from lo_open() that will be used by
|
||||
lo_create() in a later commit.
|
||||
|
||||
Since lo_do_open() does not otherwise need fuse_req_t req, convert
|
||||
lo_add_fd_mapping() to use struct lo_data *lo instead.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20210204150208.367837-2-stefanha@redhat.com>
|
||||
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------
|
||||
1 file changed, 46 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||
index 97485b22b4..218e20e9d7 100644
|
||||
--- a/tools/virtiofsd/passthrough_ll.c
|
||||
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||
@@ -471,17 +471,17 @@ static void lo_map_remove(struct lo_map *map, size_t key)
|
||||
}
|
||||
|
||||
/* Assumes lo->mutex is held */
|
||||
-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
|
||||
+static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd)
|
||||
{
|
||||
struct lo_map_elem *elem;
|
||||
|
||||
- elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
|
||||
+ elem = lo_map_alloc_elem(&lo->fd_map);
|
||||
if (!elem) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
elem->fd = fd;
|
||||
- return elem - lo_data(req)->fd_map.elems;
|
||||
+ return elem - lo->fd_map.elems;
|
||||
}
|
||||
|
||||
/* Assumes lo->mutex is held */
|
||||
@@ -1661,6 +1661,38 @@ static void update_open_flags(int writeback, int allow_direct_io,
|
||||
}
|
||||
}
|
||||
|
||||
+static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
+ struct fuse_file_info *fi)
|
||||
+{
|
||||
+ char buf[64];
|
||||
+ ssize_t fh;
|
||||
+ int fd;
|
||||
+
|
||||
+ update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
+
|
||||
+ sprintf(buf, "%i", inode->fd);
|
||||
+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
|
||||
+ if (fd == -1) {
|
||||
+ return errno;
|
||||
+ }
|
||||
+
|
||||
+ pthread_mutex_lock(&lo->mutex);
|
||||
+ fh = lo_add_fd_mapping(lo, fd);
|
||||
+ pthread_mutex_unlock(&lo->mutex);
|
||||
+ if (fh == -1) {
|
||||
+ close(fd);
|
||||
+ return ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ fi->fh = fh;
|
||||
+ if (lo->cache == CACHE_NONE) {
|
||||
+ fi->direct_io = 1;
|
||||
+ } else if (lo->cache == CACHE_ALWAYS) {
|
||||
+ fi->keep_cache = 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
mode_t mode, struct fuse_file_info *fi)
|
||||
{
|
||||
@@ -1701,7 +1733,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
ssize_t fh;
|
||||
|
||||
pthread_mutex_lock(&lo->mutex);
|
||||
- fh = lo_add_fd_mapping(req, fd);
|
||||
+ fh = lo_add_fd_mapping(lo, fd);
|
||||
pthread_mutex_unlock(&lo->mutex);
|
||||
if (fh == -1) {
|
||||
close(fd);
|
||||
@@ -1892,38 +1924,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
|
||||
|
||||
static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||||
{
|
||||
- int fd;
|
||||
- ssize_t fh;
|
||||
- char buf[64];
|
||||
struct lo_data *lo = lo_data(req);
|
||||
+ struct lo_inode *inode = lo_inode(req, ino);
|
||||
+ int err;
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
|
||||
fi->flags);
|
||||
|
||||
- update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
-
|
||||
- sprintf(buf, "%i", lo_fd(req, ino));
|
||||
- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
|
||||
- if (fd == -1) {
|
||||
- return (void)fuse_reply_err(req, errno);
|
||||
- }
|
||||
-
|
||||
- pthread_mutex_lock(&lo->mutex);
|
||||
- fh = lo_add_fd_mapping(req, fd);
|
||||
- pthread_mutex_unlock(&lo->mutex);
|
||||
- if (fh == -1) {
|
||||
- close(fd);
|
||||
- fuse_reply_err(req, ENOMEM);
|
||||
+ if (!inode) {
|
||||
+ fuse_reply_err(req, EBADF);
|
||||
return;
|
||||
}
|
||||
|
||||
- fi->fh = fh;
|
||||
- if (lo->cache == CACHE_NONE) {
|
||||
- fi->direct_io = 1;
|
||||
- } else if (lo->cache == CACHE_ALWAYS) {
|
||||
- fi->keep_cache = 1;
|
||||
+ err = lo_do_open(lo, inode, fi);
|
||||
+ lo_inode_put(lo, &inode);
|
||||
+ if (err) {
|
||||
+ fuse_reply_err(req, err);
|
||||
+ } else {
|
||||
+ fuse_reply_open(req, fi);
|
||||
}
|
||||
- fuse_reply_open(req, fi);
|
||||
}
|
||||
|
||||
static void lo_release(fuse_req_t req, fuse_ino_t ino,
|
|
@ -1,107 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 4 Feb 2021 18:34:36 +0000
|
||||
Subject: [PATCH] virtiofsd: optionally return inode pointer from
|
||||
lo_do_lookup()
|
||||
|
||||
lo_do_lookup() finds an existing inode or allocates a new one. It
|
||||
increments nlookup so that the inode stays alive until the client
|
||||
releases it.
|
||||
|
||||
Existing callers don't need the struct lo_inode so the function doesn't
|
||||
return it. Extend the function to optionally return the inode. The next
|
||||
commit will need it.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20210204150208.367837-3-stefanha@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++--------
|
||||
1 file changed, 21 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||
index 218e20e9d7..2bd050b620 100644
|
||||
--- a/tools/virtiofsd/passthrough_ll.c
|
||||
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||
@@ -843,11 +843,13 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Increments nlookup and caller must release refcount using
|
||||
- * lo_inode_put(&parent).
|
||||
+ * Increments nlookup on the inode on success. unref_inode_lolocked() must be
|
||||
+ * called eventually to decrement nlookup again. If inodep is non-NULL, the
|
||||
+ * inode pointer is stored and the caller must call lo_inode_put().
|
||||
*/
|
||||
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
- struct fuse_entry_param *e)
|
||||
+ struct fuse_entry_param *e,
|
||||
+ struct lo_inode **inodep)
|
||||
{
|
||||
int newfd;
|
||||
int res;
|
||||
@@ -857,6 +859,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
struct lo_inode *inode = NULL;
|
||||
struct lo_inode *dir = lo_inode(req, parent);
|
||||
|
||||
+ if (inodep) {
|
||||
+ *inodep = NULL;
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* name_to_handle_at() and open_by_handle_at() can reach here with fuse
|
||||
* mount point in guest, but we don't have its inode info in the
|
||||
@@ -924,7 +930,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
pthread_mutex_unlock(&lo->mutex);
|
||||
}
|
||||
e->ino = inode->fuse_ino;
|
||||
- lo_inode_put(lo, &inode);
|
||||
+
|
||||
+ /* Transfer ownership of inode pointer to caller or drop it */
|
||||
+ if (inodep) {
|
||||
+ *inodep = inode;
|
||||
+ } else {
|
||||
+ lo_inode_put(lo, &inode);
|
||||
+ }
|
||||
+
|
||||
lo_inode_put(lo, &dir);
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
|
||||
@@ -959,7 +972,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
|
||||
return;
|
||||
}
|
||||
|
||||
- err = lo_do_lookup(req, parent, name, &e);
|
||||
+ err = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
if (err) {
|
||||
fuse_reply_err(req, err);
|
||||
} else {
|
||||
@@ -1067,7 +1080,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
|
||||
goto out;
|
||||
}
|
||||
|
||||
- saverr = lo_do_lookup(req, parent, name, &e);
|
||||
+ saverr = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
if (saverr) {
|
||||
goto out;
|
||||
}
|
||||
@@ -1544,7 +1557,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
|
||||
|
||||
if (plus) {
|
||||
if (!is_dot_or_dotdot(name)) {
|
||||
- err = lo_do_lookup(req, ino, name, &e);
|
||||
+ err = lo_do_lookup(req, ino, name, &e, NULL);
|
||||
if (err) {
|
||||
goto error;
|
||||
}
|
||||
@@ -1742,7 +1755,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
}
|
||||
|
||||
fi->fh = fh;
|
||||
- err = lo_do_lookup(req, parent, name, &e);
|
||||
+ err = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
}
|
||||
if (lo->cache == CACHE_NONE) {
|
||||
fi->direct_io = 1;
|
|
@ -1,296 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 4 Feb 2021 18:34:37 +0000
|
||||
Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517)
|
||||
|
||||
A well-behaved FUSE client does not attempt to open special files with
|
||||
FUSE_OPEN because they are handled on the client side (e.g. device nodes
|
||||
are handled by client-side device drivers).
|
||||
|
||||
The check to prevent virtiofsd from opening special files is missing in
|
||||
a few cases, most notably FUSE_OPEN. A malicious client can cause
|
||||
virtiofsd to open a device node, potentially allowing the guest to
|
||||
escape. This can be exploited by a modified guest device driver. It is
|
||||
not exploitable from guest userspace since the guest kernel will handle
|
||||
special files inside the guest instead of sending FUSE requests.
|
||||
|
||||
This patch fixes this issue by introducing the lo_inode_open() function
|
||||
to check the file type before opening it. This is a short-term solution
|
||||
because it does not prevent a compromised virtiofsd process from opening
|
||||
device nodes on the host.
|
||||
|
||||
Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT
|
||||
| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not
|
||||
necessary here. If the file exists and the user did not specify O_EXCL,
|
||||
open it via lo_do_open().
|
||||
|
||||
Reported-by: Alex Xu <alex@alxu.ca>
|
||||
Fixes: CVE-2020-35517
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
|
||||
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20210204150208.367837-4-stefanha@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++-----------
|
||||
1 file changed, 92 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||
index 2bd050b620..03c5e0d13c 100644
|
||||
--- a/tools/virtiofsd/passthrough_ll.c
|
||||
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||
@@ -567,6 +567,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino)
|
||||
return fd;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a
|
||||
+ * regular file or a directory.
|
||||
+ *
|
||||
+ * Use this helper function instead of raw openat(2) to prevent security issues
|
||||
+ * when a malicious client opens special files such as block device nodes.
|
||||
+ * Symlink inodes are also rejected since symlinks must already have been
|
||||
+ * traversed on the client side.
|
||||
+ */
|
||||
+static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
+ int open_flags)
|
||||
+{
|
||||
+ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
|
||||
+ int fd;
|
||||
+
|
||||
+ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
|
||||
+ return -EBADF;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
|
||||
+ * that the inode is not a special file but if an external process races
|
||||
+ * with us then symlinks are traversed here. It is not possible to escape
|
||||
+ * the shared directory since it is mounted as "/" though.
|
||||
+ */
|
||||
+ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
|
||||
+ if (fd < 0) {
|
||||
+ return -errno;
|
||||
+ }
|
||||
+ return fd;
|
||||
+}
|
||||
+
|
||||
static void lo_init(void *userdata, struct fuse_conn_info *conn)
|
||||
{
|
||||
struct lo_data *lo = (struct lo_data *)userdata;
|
||||
@@ -696,9 +728,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
||||
if (fi) {
|
||||
truncfd = fd;
|
||||
} else {
|
||||
- sprintf(procname, "%i", ifd);
|
||||
- truncfd = openat(lo->proc_self_fd, procname, O_RDWR);
|
||||
+ truncfd = lo_inode_open(lo, inode, O_RDWR);
|
||||
if (truncfd < 0) {
|
||||
+ errno = -truncfd;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
@@ -860,7 +892,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
struct lo_inode *dir = lo_inode(req, parent);
|
||||
|
||||
if (inodep) {
|
||||
- *inodep = NULL;
|
||||
+ *inodep = NULL; /* in case there is an error */
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1674,19 +1706,26 @@ static void update_open_flags(int writeback, int allow_direct_io,
|
||||
}
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Open a regular file, set up an fd mapping, and fill out the struct
|
||||
+ * fuse_file_info for it. If existing_fd is not negative, use that fd instead
|
||||
+ * opening a new one. Takes ownership of existing_fd.
|
||||
+ *
|
||||
+ * Returns 0 on success or a positive errno.
|
||||
+ */
|
||||
static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
- struct fuse_file_info *fi)
|
||||
+ int existing_fd, struct fuse_file_info *fi)
|
||||
{
|
||||
- char buf[64];
|
||||
ssize_t fh;
|
||||
- int fd;
|
||||
+ int fd = existing_fd;
|
||||
|
||||
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
|
||||
- sprintf(buf, "%i", inode->fd);
|
||||
- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
|
||||
- if (fd == -1) {
|
||||
- return errno;
|
||||
+ if (fd < 0) {
|
||||
+ fd = lo_inode_open(lo, inode, fi->flags);
|
||||
+ if (fd < 0) {
|
||||
+ return -fd;
|
||||
+ }
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&lo->mutex);
|
||||
@@ -1709,9 +1748,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
mode_t mode, struct fuse_file_info *fi)
|
||||
{
|
||||
- int fd;
|
||||
+ int fd = -1;
|
||||
struct lo_data *lo = lo_data(req);
|
||||
struct lo_inode *parent_inode;
|
||||
+ struct lo_inode *inode = NULL;
|
||||
struct fuse_entry_param e;
|
||||
int err;
|
||||
struct lo_cred old = {};
|
||||
@@ -1737,36 +1777,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
|
||||
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
|
||||
- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
|
||||
- mode);
|
||||
+ /* Try to create a new file but don't open existing files */
|
||||
+ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode);
|
||||
err = fd == -1 ? errno : 0;
|
||||
- lo_restore_cred(&old);
|
||||
|
||||
- if (!err) {
|
||||
- ssize_t fh;
|
||||
+ lo_restore_cred(&old);
|
||||
|
||||
- pthread_mutex_lock(&lo->mutex);
|
||||
- fh = lo_add_fd_mapping(lo, fd);
|
||||
- pthread_mutex_unlock(&lo->mutex);
|
||||
- if (fh == -1) {
|
||||
- close(fd);
|
||||
- err = ENOMEM;
|
||||
- goto out;
|
||||
- }
|
||||
+ /* Ignore the error if file exists and O_EXCL was not given */
|
||||
+ if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
- fi->fh = fh;
|
||||
- err = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
+ err = lo_do_lookup(req, parent, name, &e, &inode);
|
||||
+ if (err) {
|
||||
+ goto out;
|
||||
}
|
||||
- if (lo->cache == CACHE_NONE) {
|
||||
- fi->direct_io = 1;
|
||||
- } else if (lo->cache == CACHE_ALWAYS) {
|
||||
- fi->keep_cache = 1;
|
||||
+
|
||||
+ err = lo_do_open(lo, inode, fd, fi);
|
||||
+ fd = -1; /* lo_do_open() takes ownership of fd */
|
||||
+ if (err) {
|
||||
+ /* Undo lo_do_lookup() nlookup ref */
|
||||
+ unref_inode_lolocked(lo, inode, 1);
|
||||
}
|
||||
|
||||
out:
|
||||
+ lo_inode_put(lo, &inode);
|
||||
lo_inode_put(lo, &parent_inode);
|
||||
|
||||
if (err) {
|
||||
+ if (fd >= 0) {
|
||||
+ close(fd);
|
||||
+ }
|
||||
+
|
||||
fuse_reply_err(req, err);
|
||||
} else {
|
||||
fuse_reply_create(req, &e, fi);
|
||||
@@ -1780,7 +1822,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
|
||||
pid_t pid, int *err)
|
||||
{
|
||||
struct lo_inode_plock *plock;
|
||||
- char procname[64];
|
||||
int fd;
|
||||
|
||||
plock =
|
||||
@@ -1797,12 +1838,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
|
||||
}
|
||||
|
||||
/* Open another instance of file which can be used for ofd locks. */
|
||||
- sprintf(procname, "%i", inode->fd);
|
||||
-
|
||||
/* TODO: What if file is not writable? */
|
||||
- fd = openat(lo->proc_self_fd, procname, O_RDWR);
|
||||
- if (fd == -1) {
|
||||
- *err = errno;
|
||||
+ fd = lo_inode_open(lo, inode, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ *err = -fd;
|
||||
free(plock);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1949,7 +1988,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||||
return;
|
||||
}
|
||||
|
||||
- err = lo_do_open(lo, inode, fi);
|
||||
+ err = lo_do_open(lo, inode, -1, fi);
|
||||
lo_inode_put(lo, &inode);
|
||||
if (err) {
|
||||
fuse_reply_err(req, err);
|
||||
@@ -2005,39 +2044,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||||
static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
|
||||
struct fuse_file_info *fi)
|
||||
{
|
||||
+ struct lo_inode *inode = lo_inode(req, ino);
|
||||
+ struct lo_data *lo = lo_data(req);
|
||||
int res;
|
||||
int fd;
|
||||
- char *buf;
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
|
||||
(void *)fi);
|
||||
|
||||
- if (!fi) {
|
||||
- struct lo_data *lo = lo_data(req);
|
||||
-
|
||||
- res = asprintf(&buf, "%i", lo_fd(req, ino));
|
||||
- if (res == -1) {
|
||||
- return (void)fuse_reply_err(req, errno);
|
||||
- }
|
||||
+ if (!inode) {
|
||||
+ fuse_reply_err(req, EBADF);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- fd = openat(lo->proc_self_fd, buf, O_RDWR);
|
||||
- free(buf);
|
||||
- if (fd == -1) {
|
||||
- return (void)fuse_reply_err(req, errno);
|
||||
+ if (!fi) {
|
||||
+ fd = lo_inode_open(lo, inode, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ res = -fd;
|
||||
+ goto out;
|
||||
}
|
||||
} else {
|
||||
fd = lo_fi_fd(req, fi);
|
||||
}
|
||||
|
||||
if (datasync) {
|
||||
- res = fdatasync(fd);
|
||||
+ res = fdatasync(fd) == -1 ? errno : 0;
|
||||
} else {
|
||||
- res = fsync(fd);
|
||||
+ res = fsync(fd) == -1 ? errno : 0;
|
||||
}
|
||||
if (!fi) {
|
||||
close(fd);
|
||||
}
|
||||
- fuse_reply_err(req, res == -1 ? errno : 0);
|
||||
+out:
|
||||
+ lo_inode_put(lo, &inode);
|
||||
+ fuse_reply_err(req, res);
|
||||
}
|
||||
|
||||
static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
|
|
@ -1,29 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 4 Feb 2021 18:34:38 +0000
|
||||
Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist
|
||||
|
||||
This is how glibc implements lseek(2) on POWER.
|
||||
|
||||
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20210121171540.1449777-1-groug@kaod.org>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_seccomp.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
|
||||
index 11623f56f2..bb8ef5b17f 100644
|
||||
--- a/tools/virtiofsd/passthrough_seccomp.c
|
||||
+++ b/tools/virtiofsd/passthrough_seccomp.c
|
||||
@@ -68,6 +68,7 @@ static const int syscall_whitelist[] = {
|
||||
SCMP_SYS(linkat),
|
||||
SCMP_SYS(listxattr),
|
||||
SCMP_SYS(lseek),
|
||||
+ SCMP_SYS(_llseek), /* For POWER */
|
||||
SCMP_SYS(madvise),
|
||||
SCMP_SYS(mkdirat),
|
||||
SCMP_SYS(mknodat),
|
|
@ -1,31 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 4 Feb 2021 18:34:39 +0000
|
||||
Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist
|
||||
|
||||
This is how linux restarts some system calls after SIGSTOP/SIGCONT.
|
||||
This is needed to avoid virtiofsd termination when resuming execution
|
||||
under GDB for example.
|
||||
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20210201193305.136390-1-groug@kaod.org>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_seccomp.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
|
||||
index bb8ef5b17f..44d75e0e36 100644
|
||||
--- a/tools/virtiofsd/passthrough_seccomp.c
|
||||
+++ b/tools/virtiofsd/passthrough_seccomp.c
|
||||
@@ -92,6 +92,7 @@ static const int syscall_whitelist[] = {
|
||||
SCMP_SYS(renameat),
|
||||
SCMP_SYS(renameat2),
|
||||
SCMP_SYS(removexattr),
|
||||
+ SCMP_SYS(restart_syscall),
|
||||
SCMP_SYS(rt_sigaction),
|
||||
SCMP_SYS(rt_sigprocmask),
|
||||
SCMP_SYS(rt_sigreturn),
|
|
@ -1,108 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Vitaly Cheptsov <cheptsov@ispras.ru>
|
||||
Date: Tue, 2 Mar 2021 09:21:10 -0500
|
||||
Subject: [PATCH] i386/acpi: restore device paths for pre-5.1 vms
|
||||
|
||||
After fixing the _UID value for the primary PCI root bridge in
|
||||
af1b80ae it was discovered that this change updates Windows
|
||||
configuration in an incompatible way causing network configuration
|
||||
failure unless DHCP is used. More details provided on the list:
|
||||
|
||||
https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
|
||||
|
||||
This change reverts the _UID update from 1 to 0 for q35 and i440fx
|
||||
VMs before version 5.2 to maintain the original behaviour when
|
||||
upgrading.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Cc: qemu-devel@nongnu.org
|
||||
Reported-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Vitaly Cheptsov <cheptsov@ispras.ru>
|
||||
Message-Id: <20210301195919.9333-1-cheptsov@ispras.ru>
|
||||
Tested-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Fixes: af1b80ae56c9 ("i386/acpi: fix inconsistent QEMU/OVMF device paths")
|
||||
---
|
||||
hw/i386/acpi-build.c | 4 ++--
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 2 ++
|
||||
include/hw/i386/pc.h | 1 +
|
||||
4 files changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
|
||||
index 1f5c211245..b5616582a5 100644
|
||||
--- a/hw/i386/acpi-build.c
|
||||
+++ b/hw/i386/acpi-build.c
|
||||
@@ -1513,7 +1513,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
dev = aml_device("PCI0");
|
||||
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
|
||||
aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
|
||||
- aml_append(dev, aml_name_decl("_UID", aml_int(0)));
|
||||
+ aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
|
||||
aml_append(sb_scope, dev);
|
||||
aml_append(dsdt, sb_scope);
|
||||
|
||||
@@ -1530,7 +1530,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
|
||||
aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
|
||||
aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
|
||||
- aml_append(dev, aml_name_decl("_UID", aml_int(0)));
|
||||
+ aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
|
||||
aml_append(dev, build_q35_osc_method());
|
||||
aml_append(sb_scope, dev);
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 13d1628f13..2524c96216 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -417,6 +417,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pcmc->default_nic_model = "e1000";
|
||||
+ pcmc->pci_root_uid = 0;
|
||||
|
||||
m->family = "pc_piix";
|
||||
m->desc = "Standard PC (i440FX + PIIX, 1996)";
|
||||
@@ -448,6 +449,7 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
|
||||
pcmc->kvmclock_create_always = false;
|
||||
+ pcmc->pci_root_uid = 1;
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index a3f4959c43..c58dad5ae3 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -329,6 +329,7 @@ static void pc_q35_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pcmc->default_nic_model = "e1000e";
|
||||
+ pcmc->pci_root_uid = 0;
|
||||
|
||||
m->family = "pc_q35";
|
||||
m->desc = "Standard PC (Q35 + ICH9, 2009)";
|
||||
@@ -364,6 +365,7 @@ static void pc_q35_5_1_machine_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
|
||||
pcmc->kvmclock_create_always = false;
|
||||
+ pcmc->pci_root_uid = 1;
|
||||
}
|
||||
|
||||
DEFINE_Q35_MACHINE(v5_1, "pc-q35-5.1", NULL,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 911e460097..7f8e1a791f 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -99,6 +99,7 @@ struct PCMachineClass {
|
||||
int legacy_acpi_table_size;
|
||||
unsigned acpi_data_size;
|
||||
bool do_not_add_smb_acpi;
|
||||
+ int pci_root_uid;
|
||||
|
||||
/* SMBIOS compat: */
|
||||
bool smbios_defaults;
|
|
@ -1,48 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Mon, 22 Mar 2021 15:20:04 +0100
|
||||
Subject: [PATCH] monitor/qmp: fix race on CHR_EVENT_CLOSED without OOB
|
||||
|
||||
The QMP dispatcher coroutine holds the qmp_queue_lock over a yield
|
||||
point, where it expects to be rescheduled from the main context. If a
|
||||
CHR_EVENT_CLOSED event is received just then, it can race and block the
|
||||
main thread on the mutex in monitor_qmp_cleanup_queue_and_resume.
|
||||
|
||||
Calculate need_resume immediately after we pop a request from the queue,
|
||||
so that we can release the mutex before yielding.
|
||||
|
||||
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
monitor/qmp.c | 11 ++++++-----
|
||||
1 file changed, 6 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/monitor/qmp.c b/monitor/qmp.c
|
||||
index 2e37d11bd3..2aff833f7a 100644
|
||||
--- a/monitor/qmp.c
|
||||
+++ b/monitor/qmp.c
|
||||
@@ -252,6 +252,12 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
|
||||
}
|
||||
}
|
||||
|
||||
+ mon = req_obj->mon;
|
||||
+ /* qmp_oob_enabled() might change after "qmp_capabilities" */
|
||||
+ need_resume = !qmp_oob_enabled(mon) ||
|
||||
+ mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1;
|
||||
+ qemu_mutex_unlock(&mon->qmp_queue_lock);
|
||||
+
|
||||
if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
|
||||
/*
|
||||
* Someone rescheduled us (probably because a new requests
|
||||
@@ -270,11 +276,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
|
||||
aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
|
||||
qemu_coroutine_yield();
|
||||
|
||||
- mon = req_obj->mon;
|
||||
- /* qmp_oob_enabled() might change after "qmp_capabilities" */
|
||||
- need_resume = !qmp_oob_enabled(mon) ||
|
||||
- mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1;
|
||||
- qemu_mutex_unlock(&mon->qmp_queue_lock);
|
||||
if (req_obj->req) {
|
||||
QDict *qdict = qobject_to(QDict, req_obj->req);
|
||||
QObject *id = qdict ? qdict_get(qdict, "id") : NULL;
|
|
@ -1,42 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 3 Dec 2020 18:23:10 +0100
|
||||
Subject: [PATCH] block: Fix locking in qmp_block_resize()
|
||||
|
||||
The drain functions assume that we hold the AioContext lock of the
|
||||
drained block node. Make sure to actually take the lock.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20201203172311.68232-3-kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
blockdev.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index fe6fb5dc1d..9a86e9fb4b 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2481,14 +2481,17 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device,
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ bdrv_co_lock(bs);
|
||||
bdrv_drained_begin(bs);
|
||||
+ bdrv_co_unlock(bs);
|
||||
+
|
||||
old_ctx = bdrv_co_enter(bs);
|
||||
blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
|
||||
bdrv_co_leave(bs, old_ctx);
|
||||
- bdrv_drained_end(bs);
|
||||
|
||||
out:
|
||||
bdrv_co_lock(bs);
|
||||
+ bdrv_drained_end(bs);
|
||||
blk_unref(blk);
|
||||
bdrv_co_unlock(bs);
|
||||
}
|
|
@ -1,118 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 3 Dec 2020 18:23:11 +0100
|
||||
Subject: [PATCH] block: Fix deadlock in bdrv_co_yield_to_drain()
|
||||
|
||||
If bdrv_co_yield_to_drain() is called for draining a block node that
|
||||
runs in a different AioContext, it keeps that AioContext locked while it
|
||||
yields and schedules a BH in the AioContext to do the actual drain.
|
||||
|
||||
As long as executing the BH is the very next thing that the event loop
|
||||
of the node's AioContext does, this actually happens to work, but when
|
||||
it tries to execute something else that wants to take the AioContext
|
||||
lock, it will deadlock. (In the bug report, this other thing is a
|
||||
virtio-scsi device running virtio_scsi_data_plane_handle_cmd().)
|
||||
|
||||
Instead, always drop the AioContext lock across the yield and reacquire
|
||||
it only when the coroutine is reentered. The BH needs to unconditionally
|
||||
take the lock for itself now.
|
||||
|
||||
This fixes the 'block_resize' QMP command on a block node that runs in
|
||||
an iothread.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
|
||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1903511
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20201203172311.68232-4-kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
block/io.c | 41 ++++++++++++++++++++++++-----------------
|
||||
1 file changed, 24 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index ec5e152bb7..a9f56a9ab1 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -306,17 +306,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
|
||||
if (bs) {
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
- AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
|
||||
-
|
||||
- /*
|
||||
- * When the coroutine yielded, the lock for its home context was
|
||||
- * released, so we need to re-acquire it here. If it explicitly
|
||||
- * acquired a different context, the lock is still held and we don't
|
||||
- * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
|
||||
- */
|
||||
- if (ctx == co_ctx) {
|
||||
- aio_context_acquire(ctx);
|
||||
- }
|
||||
+ aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
assert(!data->drained_end_counter);
|
||||
@@ -328,9 +318,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
data->ignore_bds_parents,
|
||||
data->drained_end_counter);
|
||||
}
|
||||
- if (ctx == co_ctx) {
|
||||
- aio_context_release(ctx);
|
||||
- }
|
||||
+ aio_context_release(ctx);
|
||||
} else {
|
||||
assert(data->begin);
|
||||
bdrv_drain_all_begin();
|
||||
@@ -348,13 +336,16 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
int *drained_end_counter)
|
||||
{
|
||||
BdrvCoDrainData data;
|
||||
+ Coroutine *self = qemu_coroutine_self();
|
||||
+ AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
+ AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
|
||||
|
||||
/* Calling bdrv_drain() from a BH ensures the current coroutine yields and
|
||||
* other coroutines run if they were queued by aio_co_enter(). */
|
||||
|
||||
assert(qemu_in_coroutine());
|
||||
data = (BdrvCoDrainData) {
|
||||
- .co = qemu_coroutine_self(),
|
||||
+ .co = self,
|
||||
.bs = bs,
|
||||
.done = false,
|
||||
.begin = begin,
|
||||
@@ -368,13 +359,29 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
if (bs) {
|
||||
bdrv_inc_in_flight(bs);
|
||||
}
|
||||
- replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
|
||||
- bdrv_co_drain_bh_cb, &data);
|
||||
+
|
||||
+ /*
|
||||
+ * Temporarily drop the lock across yield or we would get deadlocks.
|
||||
+ * bdrv_co_drain_bh_cb() reaquires the lock as needed.
|
||||
+ *
|
||||
+ * When we yield below, the lock for the current context will be
|
||||
+ * released, so if this is actually the lock that protects bs, don't drop
|
||||
+ * it a second time.
|
||||
+ */
|
||||
+ if (ctx != co_ctx) {
|
||||
+ aio_context_release(ctx);
|
||||
+ }
|
||||
+ replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
|
||||
|
||||
qemu_coroutine_yield();
|
||||
/* If we are resumed from some other event (such as an aio completion or a
|
||||
* timer callback), it is a bug in the caller that should be fixed. */
|
||||
assert(data.done);
|
||||
+
|
||||
+ /* Reaquire the AioContext of bs if we dropped it */
|
||||
+ if (ctx != co_ctx) {
|
||||
+ aio_context_acquire(ctx);
|
||||
+ }
|
||||
}
|
||||
|
||||
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|