Compare commits

..

1 Commits

Author SHA1 Message Date
Vitaliy Filippov a6885c7d11 Add Vitastor support 2024-02-07 01:11:30 +03:00
110 changed files with 2999 additions and 5498 deletions

View File

@ -24,7 +24,6 @@ endif
PC_BIOS_FW_PURGE_LIST_IN = \ PC_BIOS_FW_PURGE_LIST_IN = \
hppa-firmware.img \ hppa-firmware.img \
hppa-firmware64.img \
openbios-ppc \ openbios-ppc \
openbios-sparc32 \ openbios-sparc32 \
openbios-sparc64 \ openbios-sparc64 \
@ -32,8 +31,7 @@ PC_BIOS_FW_PURGE_LIST_IN = \
s390-ccw.img \ s390-ccw.img \
s390-netboot.img \ s390-netboot.img \
u-boot.e500 \ u-boot.e500 \
.*[a-zA-Z0-9]\.dtb \ .*\.dtb \
.*[a-zA-Z0-9]\.dts \
qemu_vga.ndrv \ qemu_vga.ndrv \
slof.bin \ slof.bin \
opensbi-riscv.*-generic-fw_dynamic.bin \ opensbi-riscv.*-generic-fw_dynamic.bin \
@ -58,7 +56,7 @@ $(BUILDDIR): submodule
deb kvm: $(DEBS) deb kvm: $(DEBS)
$(DEB_DBG): $(DEB) $(DEB_DBG): $(DEB)
$(DEB): $(BUILDDIR) $(DEB): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j32
lintian $(DEBS) lintian $(DEBS)
sbuild: $(DSC) sbuild: $(DSC)

138
debian/changelog vendored
View File

@ -1,140 +1,8 @@
pve-qemu-kvm (9.0.2-4) bookworm; urgency=medium pve-qemu-kvm (8.1.5-2+vitastor1) bookworm; urgency=medium
* async snapshot: ensure any dynamic vCPU-throttling applied for * Add Vitastor support
auto-converge gets always disabled again after finishing the snapshot.
-- Proxmox Support Team <support@proxmox.com> Sun, 10 Nov 2024 11:23:09 +0100 -- Vitaliy Filippov <vitalif@yourcmc.ru> Wed, 07 Feb 2024 01:11:00 +0300
pve-qemu-kvm (9.0.2-3) bookworm; urgency=medium
* pick up fix for VirtIO PCI regressions
* pick up stable fixes for 9.0, including fixes for VirtIO-net, ARM and
x86(_64) emulation, CVEs to harden NBD server against malicious clients,
as well as a few others (VNC, physmem, Intel IOMMU, ...).
-- Proxmox Support Team <support@proxmox.com> Fri, 06 Sep 2024 16:21:42 +0200
pve-qemu-kvm (9.0.2-2) bookworm; urgency=medium
* actually update submodule to QEMU 9.0.2. The previous release was still
based on 9.0.0 by mistake.
-- Proxmox Support Team <support@proxmox.com> Wed, 07 Aug 2024 10:16:01 +0200
pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.0.2. While our version had most
stable fixes included already, there are new fixes for VirtIO and VGA
display screen blanking (#4786)
* backport fix for a regression with the LSI-53c895a controller and one for
the boot order getting ignored for USB storage
-- Proxmox Support Team <support@proxmox.com> Mon, 29 Jul 2024 18:59:40 +0200
pve-qemu-kvm (9.0.0-6) bookworm; urgency=medium
* fix a regression in the zeroinit block driver that prevented importing and
cloning disks to RBD storages which are not using the krbd setting
-- Proxmox Support Team <support@proxmox.com> Mon, 08 Jul 2024 16:11:15 +0200
pve-qemu-kvm (9.0.0-5) bookworm; urgency=medium
* backport fix for CVE-2024-4467 to prevent malicious qcow2 image files from
already causing bad effects if being queried via 'qemu-img info'. For
Proxmox VE, this is an additional safe guard, as currently it directly
creates and manages the qcow2 images used by VMs and does not allow
unprivileged users to import them
* fix #4726: code cleanup: avoid superfluous check in vma backup code
-- Proxmox Support Team <support@proxmox.com> Wed, 03 Jul 2024 13:13:35 +0200
pve-qemu-kvm (9.0.0-4) bookworm; urgency=medium
* fix crash after saving a snapshot without including VM state when a VirtIO
block device with iothread is configured.
* fix edge case in error handling when opening a block device from PBS fails
* minor code cleanup in backup code
-- Proxmox Support Team <support@proxmox.com> Mon, 01 Jul 2024 11:26:11 +0200
pve-qemu-kvm (9.0.0-3) bookworm; urgency=medium
* fix crash when doing resize after hotplugging a disk using io_uring
* fix some minor issues in software CPU emulation (i.e. non-KVM) for ARM and
x86(_64)
-- Proxmox Support Team <support@proxmox.com> Wed, 29 May 2024 15:55:44 +0200
pve-qemu-kvm (9.0.0-2) bookworm; urgency=medium
* fix #5409: backup: fix copy-before-write timeout
* backup: improve error when copy-before-write fails for fleecing
* fix forwards and backwards migration with VirtIO-GPU display
* fix a regression in pflash device introduced in 8.2
* revert a commit for VirtIO PCI devices that turned out to cause more
potential security issues than what it fixed
* move compatibility flags for a new VirtIO-net feature to the correct
machine type. The feature was introduced in QEMU 8.2, but the
compatibility flags got added to machine version 8.0 instead of 8.1. This
breaks backwards migration with machine version 8.1 from a 8.2/9.0 binary
to an 8.1 binary, in cases where the guest kernel enables the feature
(e.g. Ubuntu 23.10).
While that breaks migration with machine version 8.1 from an unpatched to
a patched binary, Proxmox VE only ever had 8.2 on the test repository and
9.0 not yet in any public repository.
-- Proxmox Support Team <support@proxmox.com> Fri, 17 May 2024 17:04:52 +0200
pve-qemu-kvm (9.0.0-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.0.0
-- Proxmox Support Team <support@proxmox.com> Mon, 29 Apr 2024 10:51:37 +0200
pve-qemu-kvm (8.2.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.2.2
-- Proxmox Support Team <support@proxmox.com> Sat, 27 Apr 2024 12:44:30 +0200
pve-qemu-kvm (8.1.5-5) bookworm; urgency=medium
* implement support for backup fleecing
-- Proxmox Support Team <support@proxmox.com> Thu, 11 Apr 2024 17:46:48 +0200
pve-qemu-kvm (8.1.5-4) bookworm; urgency=medium
* fix live-import for certain kinds of VMDK images that rely on padding
* backup: avoid bubbling up first error if it's an ECANCELED one, as those
are often a result of cancling the job due to running into an actual
issue.
* backup: factor out & clean up gathering device info into helper
-- Proxmox Support Team <support@proxmox.com> Tue, 12 Mar 2024 14:08:40 +0100
pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
* backport fix for potential deadlock during QMP stop command if the VM has
disks attached through VirtIO-Block and IO-Thread enabled
* fix #4507: add patch to automatically increase NOFILE soft limit
-- Proxmox Support Team <support@proxmox.com> Wed, 21 Feb 2024 20:11:23 +0100
pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium

1
debian/control vendored
View File

@ -59,6 +59,7 @@ Depends: ceph-common (>= 0.48),
libspice-server1 (>= 0.14.0~), libspice-server1 (>= 0.14.0~),
libusb-1.0-0 (>= 1.0.17-1), libusb-1.0-0 (>= 1.0.17-1),
libusbredirparser1 (>= 0.6-2), libusbredirparser1 (>= 0.6-2),
vitastor-client (>= 0.9.4),
libuuid1, libuuid1,
${misc:Depends}, ${misc:Depends},
${shlibs:Depends}, ${shlibs:Depends},

View File

@ -27,18 +27,18 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: John Snow <jsnow@redhat.com> Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebased for 8.2.2] [FE: rebased for 8.1.1]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/mirror.c | 99 ++++++++++++++++++++------ block/mirror.c | 98 +++++++++++++++++++++-----
blockdev.c | 38 +++++++++- blockdev.c | 38 +++++++++-
include/block/block_int-global-state.h | 4 +- include/block/block_int-global-state.h | 4 +-
qapi/block-core.json | 25 ++++++- qapi/block-core.json | 25 ++++++-
tests/unit/test-block-iothread.c | 4 +- tests/unit/test-block-iothread.c | 4 +-
5 files changed, 142 insertions(+), 28 deletions(-) 5 files changed, 142 insertions(+), 27 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c diff --git a/block/mirror.c b/block/mirror.c
index 1bdce3b657..0c5c72df2e 100644 index d3cacd1708..1ff42c8af1 100644
--- a/block/mirror.c --- a/block/mirror.c
+++ b/block/mirror.c +++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob { @@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@ -50,7 +50,7 @@ index 1bdce3b657..0c5c72df2e 100644
BlockMirrorBackingMode backing_mode; BlockMirrorBackingMode backing_mode;
/* Whether the target image requires explicit zero-initialization */ /* Whether the target image requires explicit zero-initialization */
bool zero_target; bool zero_target;
@@ -73,6 +73,8 @@ typedef struct MirrorBlockJob { @@ -65,6 +65,8 @@ typedef struct MirrorBlockJob {
size_t buf_size; size_t buf_size;
int64_t bdev_length; int64_t bdev_length;
unsigned long *cow_bitmap; unsigned long *cow_bitmap;
@ -59,9 +59,9 @@ index 1bdce3b657..0c5c72df2e 100644
BdrvDirtyBitmap *dirty_bitmap; BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi; BdrvDirtyBitmapIter *dbi;
uint8_t *buf; uint8_t *buf;
@@ -722,7 +724,8 @@ static int mirror_exit_common(Job *job) @@ -705,7 +707,8 @@ static int mirror_exit_common(Job *job)
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort); &error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base; - BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ BlockDriverState *backing; + BlockDriverState *backing;
@ -69,7 +69,7 @@ index 1bdce3b657..0c5c72df2e 100644
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
if (bdrv_cow_bs(unfiltered_target) != backing) { if (bdrv_cow_bs(unfiltered_target) != backing) {
@@ -819,6 +822,16 @@ static void mirror_abort(Job *job) @@ -809,6 +812,16 @@ static void mirror_abort(Job *job)
assert(ret == 0); assert(ret == 0);
} }
@ -86,7 +86,7 @@ index 1bdce3b657..0c5c72df2e 100644
static void coroutine_fn mirror_throttle(MirrorBlockJob *s) static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{ {
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -1015,7 +1028,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) @@ -997,7 +1010,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s); mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@ -96,7 +96,7 @@ index 1bdce3b657..0c5c72df2e 100644
ret = mirror_dirty_init(s); ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) { if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit; goto immediate_exit;
@@ -1304,6 +1318,7 @@ static const BlockJobDriver mirror_job_driver = { @@ -1251,6 +1265,7 @@ static const BlockJobDriver mirror_job_driver = {
.run = mirror_run, .run = mirror_run,
.prepare = mirror_prepare, .prepare = mirror_prepare,
.abort = mirror_abort, .abort = mirror_abort,
@ -104,7 +104,7 @@ index 1bdce3b657..0c5c72df2e 100644
.pause = mirror_pause, .pause = mirror_pause,
.complete = mirror_complete, .complete = mirror_complete,
.cancel = mirror_cancel, .cancel = mirror_cancel,
@@ -1322,6 +1337,7 @@ static const BlockJobDriver commit_active_job_driver = { @@ -1267,6 +1282,7 @@ static const BlockJobDriver commit_active_job_driver = {
.run = mirror_run, .run = mirror_run,
.prepare = mirror_prepare, .prepare = mirror_prepare,
.abort = mirror_abort, .abort = mirror_abort,
@ -112,7 +112,7 @@ index 1bdce3b657..0c5c72df2e 100644
.pause = mirror_pause, .pause = mirror_pause,
.complete = mirror_complete, .complete = mirror_complete,
.cancel = commit_active_cancel, .cancel = commit_active_cancel,
@@ -1714,7 +1730,10 @@ static BlockJob *mirror_start_job( @@ -1658,7 +1674,10 @@ static BlockJob *mirror_start_job(
BlockCompletionFunc *cb, BlockCompletionFunc *cb,
void *opaque, void *opaque,
const BlockJobDriver *driver, const BlockJobDriver *driver,
@ -124,9 +124,9 @@ index 1bdce3b657..0c5c72df2e 100644
bool auto_complete, const char *filter_node_name, bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode, bool is_mirror, MirrorCopyMode copy_mode,
Error **errp) Error **errp)
@@ -1728,10 +1747,39 @@ static BlockJob *mirror_start_job( @@ -1670,10 +1689,39 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
GLOBAL_STATE_CODE(); int ret;
- if (granularity == 0) { - if (granularity == 0) {
- granularity = bdrv_get_default_bitmap_granularity(target); - granularity = bdrv_get_default_bitmap_granularity(target);
@ -166,7 +166,7 @@ index 1bdce3b657..0c5c72df2e 100644
assert(is_power_of_2(granularity)); assert(is_power_of_2(granularity));
if (buf_size < 0) { if (buf_size < 0) {
@@ -1871,7 +1919,9 @@ static BlockJob *mirror_start_job( @@ -1804,7 +1852,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces); s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error; s->on_source_error = on_source_error;
s->on_target_error = on_target_error; s->on_target_error = on_target_error;
@ -176,10 +176,10 @@ index 1bdce3b657..0c5c72df2e 100644
+ s->bitmap_mode = bitmap_mode; + s->bitmap_mode = bitmap_mode;
s->backing_mode = backing_mode; s->backing_mode = backing_mode;
s->zero_target = zero_target; s->zero_target = zero_target;
qatomic_set(&s->copy_mode, copy_mode); s->copy_mode = copy_mode;
@@ -1897,6 +1947,18 @@ static BlockJob *mirror_start_job( @@ -1825,6 +1875,18 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap); bdrv_disable_dirty_bitmap(s->dirty_bitmap);
}
+ if (s->sync_bitmap) { + if (s->sync_bitmap) {
+ bdrv_dirty_bitmap_set_busy(s->sync_bitmap, true); + bdrv_dirty_bitmap_set_busy(s->sync_bitmap, true);
@ -193,10 +193,10 @@ index 1bdce3b657..0c5c72df2e 100644
+ } + }
+ } + }
+ +
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0, ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
@@ -1979,6 +2041,9 @@ fail: BLK_PERM_CONSISTENT_READ,
@@ -1902,6 +1964,9 @@ fail:
if (s->dirty_bitmap) { if (s->dirty_bitmap) {
bdrv_release_dirty_bitmap(s->dirty_bitmap); bdrv_release_dirty_bitmap(s->dirty_bitmap);
} }
@ -206,7 +206,7 @@ index 1bdce3b657..0c5c72df2e 100644
job_early_fail(&s->common.job); job_early_fail(&s->common.job);
} }
@@ -2001,35 +2066,28 @@ void mirror_start(const char *job_id, BlockDriverState *bs, @@ -1919,31 +1984,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces, BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed, int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size, uint32_t granularity, int64_t buf_size,
@ -231,12 +231,8 @@ index 1bdce3b657..0c5c72df2e 100644
- MirrorSyncMode_str(mode)); - MirrorSyncMode_str(mode));
- return; - return;
- } - }
-
bdrv_graph_rdlock_main_loop();
- is_none_mode = mode == MIRROR_SYNC_MODE_NONE; - is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
bdrv_graph_rdunlock_main_loop();
mirror_start_job(job_id, bs, creation_flags, target, replaces, mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode, zero_target, speed, granularity, buf_size, backing_mode, zero_target,
on_source_error, on_target_error, unmap, NULL, NULL, on_source_error, on_target_error, unmap, NULL, NULL,
@ -247,7 +243,7 @@ index 1bdce3b657..0c5c72df2e 100644
} }
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -2056,7 +2114,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, @@ -1970,7 +2029,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job_id, bs, creation_flags, base, NULL, speed, 0, 0, job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN, false, MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque, on_error, on_error, true, cb, opaque,
@ -258,10 +254,10 @@ index 1bdce3b657..0c5c72df2e 100644
errp); errp);
if (!job) { if (!job) {
diff --git a/blockdev.c b/blockdev.c diff --git a/blockdev.c b/blockdev.c
index 4c33c3f5f0..f3e508a6a7 100644 index c28462a633..a402fa4bf7 100644
--- a/blockdev.c --- a/blockdev.c
+++ b/blockdev.c +++ b/blockdev.c
@@ -2776,6 +2776,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, @@ -2849,6 +2849,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, BlockDriverState *target,
const char *replaces, const char *replaces,
enum MirrorSyncMode sync, enum MirrorSyncMode sync,
@ -271,7 +267,7 @@ index 4c33c3f5f0..f3e508a6a7 100644
BlockMirrorBackingMode backing_mode, BlockMirrorBackingMode backing_mode,
bool zero_target, bool zero_target,
bool has_speed, int64_t speed, bool has_speed, int64_t speed,
@@ -2794,6 +2797,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, @@ -2867,6 +2870,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
{ {
BlockDriverState *unfiltered_bs; BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT; int job_flags = JOB_DEFAULT;
@ -279,7 +275,7 @@ index 4c33c3f5f0..f3e508a6a7 100644
GLOBAL_STATE_CODE(); GLOBAL_STATE_CODE();
GRAPH_RDLOCK_GUARD_MAINLOOP(); GRAPH_RDLOCK_GUARD_MAINLOOP();
@@ -2848,6 +2852,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, @@ -2921,6 +2925,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL; sync = MIRROR_SYNC_MODE_FULL;
} }
@ -309,7 +305,7 @@ index 4c33c3f5f0..f3e508a6a7 100644
if (!replaces) { if (!replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */ /* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs); unfiltered_bs = bdrv_skip_implicit_filters(bs);
@@ -2889,8 +2916,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, @@ -2966,8 +2993,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion * and will allow to check whether the node still exist at mirror completion
*/ */
mirror_start(job_id, bs, target, mirror_start(job_id, bs, target,
@ -320,7 +316,7 @@ index 4c33c3f5f0..f3e508a6a7 100644
on_source_error, on_target_error, unmap, filter_node_name, on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp); copy_mode, errp);
} }
@@ -3034,6 +3061,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3115,6 +3142,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->job_id, bs, target_bs, blockdev_mirror_common(arg->job_id, bs, target_bs,
arg->replaces, arg->sync, arg->replaces, arg->sync,
@ -329,7 +325,7 @@ index 4c33c3f5f0..f3e508a6a7 100644
backing_mode, zero_target, backing_mode, zero_target,
arg->has_speed, arg->speed, arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity, arg->has_granularity, arg->granularity,
@@ -3053,6 +3082,8 @@ void qmp_blockdev_mirror(const char *job_id, @@ -3136,6 +3165,8 @@ void qmp_blockdev_mirror(const char *job_id,
const char *device, const char *target, const char *device, const char *target,
const char *replaces, const char *replaces,
MirrorSyncMode sync, MirrorSyncMode sync,
@ -338,7 +334,7 @@ index 4c33c3f5f0..f3e508a6a7 100644
bool has_speed, int64_t speed, bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity, bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size, bool has_buf_size, int64_t buf_size,
@@ -3093,7 +3124,8 @@ void qmp_blockdev_mirror(const char *job_id, @@ -3184,7 +3215,8 @@ void qmp_blockdev_mirror(const char *job_id,
} }
blockdev_mirror_common(job_id, bs, target_bs, blockdev_mirror_common(job_id, bs, target_bs,
@ -349,10 +345,10 @@ index 4c33c3f5f0..f3e508a6a7 100644
has_granularity, granularity, has_granularity, granularity,
has_buf_size, buf_size, has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index eb2d92a226..f0c642b194 100644 index da5fb31089..32f0f9858a 100644
--- a/include/block/block_int-global-state.h --- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h
@@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs, @@ -152,7 +152,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces, BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed, int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size, uint32_t granularity, int64_t buf_size,
@ -364,10 +360,10 @@ index eb2d92a226..f0c642b194 100644
BlockdevOnError on_source_error, BlockdevOnError on_source_error,
BlockdevOnError on_target_error, BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json diff --git a/qapi/block-core.json b/qapi/block-core.json
index b179d65520..905da8be72 100644 index bca1a0c372..a5cea82139 100644
--- a/qapi/block-core.json --- a/qapi/block-core.json
+++ b/qapi/block-core.json +++ b/qapi/block-core.json
@@ -2174,6 +2174,15 @@ @@ -2145,6 +2145,15 @@
# destination (all the disk, only the sectors allocated in the # destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O). # topmost image, or only new I/O).
# #
@ -383,7 +379,7 @@ index b179d65520..905da8be72 100644
# @granularity: granularity of the dirty bitmap, default is 64K if the # @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are # image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2 # smaller than that, else the cluster size. Must be a power of 2
@@ -2216,7 +2225,9 @@ @@ -2187,7 +2196,9 @@
{ 'struct': 'DriveMirror', { 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str', 'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str', '*format': 'str', '*node-name': 'str', '*replaces': 'str',
@ -394,7 +390,7 @@ index b179d65520..905da8be72 100644
'*speed': 'int', '*granularity': 'uint32', '*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError',
@@ -2496,6 +2507,15 @@ @@ -2471,6 +2482,15 @@
# destination (all the disk, only the sectors allocated in the # destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O). # topmost image, or only new I/O).
# #
@ -410,7 +406,7 @@ index b179d65520..905da8be72 100644
# @granularity: granularity of the dirty bitmap, default is 64K if the # @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are # image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2 # smaller than that, else the cluster size. Must be a power of 2
@@ -2544,7 +2564,8 @@ @@ -2521,7 +2541,8 @@
{ 'command': 'blockdev-mirror', { 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str', 'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str', '*replaces': 'str',
@ -421,10 +417,10 @@ index b179d65520..905da8be72 100644
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError',
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 3766d5de6b..afa44cbd34 100644 index d727a5fee8..8a34aa2328 100644
--- a/tests/unit/test-block-iothread.c --- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c
@@ -755,8 +755,8 @@ static void test_propagate_mirror(void) @@ -757,8 +757,8 @@ static void test_propagate_mirror(void)
/* Start a mirror job */ /* Start a mirror job */
mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@ -434,4 +430,4 @@ index 3766d5de6b..afa44cbd34 100644
+ false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, + false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
&error_abort); &error_abort);
WITH_JOB_LOCK_GUARD() {

View File

@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 18 insertions(+), 6 deletions(-) 1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c diff --git a/block/mirror.c b/block/mirror.c
index 0c5c72df2e..37fee3fa25 100644 index 1ff42c8af1..11b8a8e959 100644
--- a/block/mirror.c --- a/block/mirror.c
+++ b/block/mirror.c +++ b/block/mirror.c
@@ -693,8 +693,6 @@ static int mirror_exit_common(Job *job) @@ -682,8 +682,6 @@ static int mirror_exit_common(Job *job)
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs); bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
} }
@ -36,9 +36,9 @@ index 0c5c72df2e..37fee3fa25 100644
/* Make sure that the source BDS doesn't go away during bdrv_replace_node, /* Make sure that the source BDS doesn't go away during bdrv_replace_node,
* before we can call bdrv_drained_end */ * before we can call bdrv_drained_end */
bdrv_ref(src); bdrv_ref(src);
@@ -800,6 +798,18 @@ static int mirror_exit_common(Job *job) @@ -788,6 +786,18 @@ static int mirror_exit_common(Job *job)
bdrv_drained_end(target_bs); block_job_remove_all_bdrv(bjob);
bdrv_unref(target_bs); bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+ if (s->sync_bitmap) { + if (s->sync_bitmap) {
+ if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS || + if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@ -55,7 +55,7 @@ index 0c5c72df2e..37fee3fa25 100644
bs_opaque->job = NULL; bs_opaque->job = NULL;
bdrv_drained_end(src); bdrv_drained_end(src);
@@ -1757,10 +1767,6 @@ static BlockJob *mirror_start_job( @@ -1699,10 +1709,6 @@ static BlockJob *mirror_start_job(
" sync mode", " sync mode",
MirrorSyncMode_str(sync_mode)); MirrorSyncMode_str(sync_mode));
return NULL; return NULL;
@ -66,7 +66,7 @@ index 0c5c72df2e..37fee3fa25 100644
} }
} else if (bitmap) { } else if (bitmap) {
error_setg(errp, error_setg(errp,
@@ -1777,6 +1783,12 @@ static BlockJob *mirror_start_job( @@ -1719,6 +1725,12 @@ static BlockJob *mirror_start_job(
return NULL; return NULL;
} }
granularity = bdrv_dirty_bitmap_granularity(bitmap); granularity = bdrv_dirty_bitmap_granularity(bitmap);

View File

@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 3 insertions(+) 1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c diff --git a/blockdev.c b/blockdev.c
index f3e508a6a7..37b8437f3e 100644 index a402fa4bf7..01b0ab0549 100644
--- a/blockdev.c --- a/blockdev.c
+++ b/blockdev.c +++ b/blockdev.c
@@ -2873,6 +2873,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, @@ -2946,6 +2946,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return; return;
} }

View File

@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 7 deletions(-) 1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c diff --git a/block/mirror.c b/block/mirror.c
index 37fee3fa25..6b3cce1007 100644 index 11b8a8e959..00f2665ca4 100644
--- a/block/mirror.c --- a/block/mirror.c
+++ b/block/mirror.c +++ b/block/mirror.c
@@ -804,8 +804,8 @@ static int mirror_exit_common(Job *job) @@ -792,8 +792,8 @@ static int mirror_exit_common(Job *job)
job->ret == 0 && ret == 0)) { job->ret == 0 && ret == 0)) {
/* Success; synchronize copy back to sync. */ /* Success; synchronize copy back to sync. */
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL); bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@ -30,7 +30,7 @@ index 37fee3fa25..6b3cce1007 100644
} }
} }
bdrv_release_dirty_bitmap(s->dirty_bitmap); bdrv_release_dirty_bitmap(s->dirty_bitmap);
@@ -1964,11 +1964,8 @@ static BlockJob *mirror_start_job( @@ -1892,11 +1892,8 @@ static BlockJob *mirror_start_job(
} }
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) { if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
@ -43,4 +43,4 @@ index 37fee3fa25..6b3cce1007 100644
+ NULL, true); + NULL, true);
} }
bdrv_graph_wrlock(); ret = block_job_add_bdrv(&s->common, "source", bs, 0,

View File

@ -12,7 +12,7 @@ uniform w.r.t. backup block jobs.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebase for 8.2.2] [FE: rebase for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/mirror.c | 28 +++------------ block/mirror.c | 28 +++------------
@ -21,12 +21,12 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 70 insertions(+), 59 deletions(-) 3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c diff --git a/block/mirror.c b/block/mirror.c
index 6b3cce1007..2f1223852b 100644 index 00f2665ca4..60cf574de5 100644
--- a/block/mirror.c --- a/block/mirror.c
+++ b/block/mirror.c +++ b/block/mirror.c
@@ -1757,31 +1757,13 @@ static BlockJob *mirror_start_job( @@ -1699,31 +1699,13 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
GLOBAL_STATE_CODE(); int ret;
- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { - if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
- error_setg(errp, "Sync mode '%s' not supported", - error_setg(errp, "Sync mode '%s' not supported",
@ -62,10 +62,10 @@ index 6b3cce1007..2f1223852b 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) { if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c diff --git a/blockdev.c b/blockdev.c
index 37b8437f3e..ed8198f351 100644 index 01b0ab0549..cd5f205ad1 100644
--- a/blockdev.c --- a/blockdev.c
+++ b/blockdev.c +++ b/blockdev.c
@@ -2852,7 +2852,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, @@ -2925,7 +2925,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL; sync = MIRROR_SYNC_MODE_FULL;
} }

View File

@ -78,7 +78,7 @@ index 252de85681..8db28f9272 100644
/** /**
diff --git a/monitor/monitor.c b/monitor/monitor.c diff --git a/monitor/monitor.c b/monitor/monitor.c
index 01ede1babd..5681bca346 100644 index dc352f9e9d..56e1307014 100644
--- a/monitor/monitor.c --- a/monitor/monitor.c
+++ b/monitor/monitor.c +++ b/monitor/monitor.c
@@ -117,6 +117,21 @@ bool monitor_cur_is_qmp(void) @@ -117,6 +117,21 @@ bool monitor_cur_is_qmp(void)

View File

@ -22,7 +22,7 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
1 file changed, 2 insertions(+), 12 deletions(-) 1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 2d0c607177..97e51733af 100644 index 32c70c9e99..984b6a3145 100644
--- a/hw/scsi/megasas.c --- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c
@@ -1781,7 +1781,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) @@ -1781,7 +1781,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)

View File

@ -55,10 +55,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
1 file changed, 6 insertions(+), 6 deletions(-) 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/ide/core.c b/hw/ide/core.c diff --git a/hw/ide/core.c b/hw/ide/core.c
index e8cb2dac92..3b21acf651 100644 index c3508acbb1..289347af58 100644
--- a/hw/ide/core.c --- a/hw/ide/core.c
+++ b/hw/ide/core.c +++ b/hw/ide/core.c
@@ -456,7 +456,7 @@ static void ide_trim_bh_cb(void *opaque) @@ -444,7 +444,7 @@ static void ide_trim_bh_cb(void *opaque)
iocb->bh = NULL; iocb->bh = NULL;
qemu_aio_unref(iocb); qemu_aio_unref(iocb);
@ -67,7 +67,7 @@ index e8cb2dac92..3b21acf651 100644
blk_dec_in_flight(blk); blk_dec_in_flight(blk);
} }
@@ -516,6 +516,8 @@ static void ide_issue_trim_cb(void *opaque, int ret) @@ -504,6 +504,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
done: done:
iocb->aiocb = NULL; iocb->aiocb = NULL;
if (iocb->bh) { if (iocb->bh) {
@ -76,7 +76,7 @@ index e8cb2dac92..3b21acf651 100644
replay_bh_schedule_event(iocb->bh); replay_bh_schedule_event(iocb->bh);
} }
} }
@@ -528,9 +530,6 @@ BlockAIOCB *ide_issue_trim( @@ -516,9 +518,6 @@ BlockAIOCB *ide_issue_trim(
IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb; TrimAIOCB *iocb;
@ -86,7 +86,7 @@ index e8cb2dac92..3b21acf651 100644
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s; iocb->s = s;
iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
@@ -754,8 +753,9 @@ void ide_cancel_dma_sync(IDEState *s) @@ -742,8 +741,9 @@ void ide_cancel_dma_sync(IDEState *s)
*/ */
if (s->bus->dma->aiocb) { if (s->bus->dma->aiocb) {
trace_ide_cancel_dma_sync_remaining(); trace_ide_cancel_dma_sync_remaining();

View File

@ -0,0 +1,48 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 28 Jul 2023 10:47:48 +0200
Subject: [PATCH] migration/block-dirty-bitmap: fix loading bitmap when there
is an iothread
The bdrv_create_dirty_bitmap() function (which is also called by
bdrv_dirty_bitmap_create_successor()) uses bdrv_getlength(bs). This is
a wrapper around a coroutine, and thus uses bdrv_poll_co(). Polling
tries to release the AioContext which will trigger an assert() if it
hasn't been acquired before.
The issue does not happen for migration, because there we are in a
coroutine already, so the wrapper will just call bdrv_co_getlength()
directly without polling.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/block-dirty-bitmap.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 032fc5f405..e1ae3b7316 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -805,8 +805,11 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
"destination", bdrv_dirty_bitmap_name(s->bitmap));
return -EINVAL;
} else {
+ AioContext *ctx = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(ctx);
s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
s->bitmap_name, &local_err);
+ aio_context_release(ctx);
if (!s->bitmap) {
error_report_err(local_err);
return -EINVAL;
@@ -833,7 +836,10 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
bdrv_disable_dirty_bitmap(s->bitmap);
if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
+ AioContext *ctx = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(ctx);
bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
+ aio_context_release(ctx);
if (local_err) {
error_report_err(local_err);
return -EINVAL;

View File

@ -0,0 +1,140 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 10:07:03 +0200
Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0]. Because the AioContext locks still exist, it
should be safe to disable locking again.
From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
> We don't currently rely on graph locking yet. It is supposed to replace
> the AioContext lock eventually to enable multiqueue support, but as long
> as we still have the AioContext lock, it is sufficient without the graph
> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
> more either and this commit can be reverted. (Of course, it can also be
> reverted while the AioContext lock still exists if the callers have been
> fixed.)
[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/graph-lock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 5e66f01ae8..5c2873262a 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,12 +108,19 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(BlockDriverState *bs)
{
+#if 0
AioContext *ctx = NULL;
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
assert(!qatomic_read(&has_writer));
/*
@@ -158,11 +168,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
if (ctx) {
aio_context_acquire(bdrv_get_aio_context(bs));
}
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -174,10 +186,13 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -237,10 +252,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -258,6 +275,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -275,13 +293,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}

View File

@ -1,35 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 29 Apr 2024 15:41:11 +0200
Subject: [PATCH] block/copy-before-write: use uint64_t for timeout in
nanoseconds
rather than the uint32_t for which the maximum is slightly more than 4
seconds and larger values would overflow. The QAPI interface allows
specifying the number of seconds, so only values 0 to 4 are safe right
now, other values lead to a much lower timeout than a user expects.
The block_copy() call where this is used already takes a uint64_t for
the timeout, so no change required there.
Fixes: 6db7fd1ca9 ("block/copy-before-write: implement cbw-timeout option")
Reported-by: Friedrich Weber <f.weber@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Tested-by: Friedrich Weber <f.weber@proxmox.com>
---
block/copy-before-write.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 8aba27a71d..026fa9840f 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -43,7 +43,7 @@ typedef struct BDRVCopyBeforeWriteState {
BlockCopyState *bcs;
BdrvChild *target;
OnCbwError on_cbw_error;
- uint32_t cbw_timeout_ns;
+ uint64_t cbw_timeout_ns;
/*
* @lock: protects access to @access_bitmap, @done_bitmap and

View File

@ -1,55 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Date: Thu, 11 Apr 2024 11:29:22 +0200
Subject: [PATCH] block/copy-before-write: fix permission
In case when source node does not have any parents, the condition still
works as required: backup job do create the parent by
block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
Still, in this case checking @perm variable doesn't work, as backup job
creates the root blk with empty permissions (as it rely on CBW filter
to require correct permissions and don't want to create extra
conflicts).
So, we should not check @perm.
The hack may be dropped entirely when transactional insertion of
filter (when we don't try to recalculate permissions in intermediate
state, when filter does conflict with original parent of the source
node) merged (old big series
"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/copy-before-write.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 026fa9840f..5a9456d426 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
perm, shared, nperm, nshared);
if (!QLIST_EMPTY(&bs->parents)) {
- if (perm & BLK_PERM_WRITE) {
- *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
- }
+ /*
+ * Note, that source child may be shared with backup job. Backup job
+ * does create own blk parent on copy-before-write node, so this
+ * works even if source node does not have any parents before backup
+ * start
+ */
+ *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
*nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
}
}

View File

@ -0,0 +1,57 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 11:19:14 +0200
Subject: [PATCH] migration states: workaround snapshot performance regression
Commit 813cd616 ("migration: Use migration_transferred_bytes() to
calculate rate_limit") introduced a prohibitive performance regression
when taking a snapshot [0]. The reason turns out to be the flushing
done by migration_transferred_bytes()
Just use a _noflush version of the relevant function as a workaround
until upstream fixes the issue. This is inspired by a not-applied
upstream series [1], but doing the very minimum to avoid the
regression.
[0]: https://gitlab.com/qemu-project/qemu/-/issues/1821
[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-05/msg07708.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/migration-stats.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/migration/migration-stats.c b/migration/migration-stats.c
index 095d6d75bb..8073c8ebaa 100644
--- a/migration/migration-stats.c
+++ b/migration/migration-stats.c
@@ -18,6 +18,20 @@
MigrationAtomicStats mig_stats;
+/*
+ * Same as migration_transferred_bytes below, but using the _noflush
+ * variant of qemu_file_transferred() to avoid a performance
+ * regression in migration_rate_exceeded().
+ */
+static uint64_t migration_transferred_bytes_noflush(QEMUFile *f)
+{
+ uint64_t multifd = stat64_get(&mig_stats.multifd_bytes);
+ uint64_t qemu_file = qemu_file_transferred_noflush(f);
+
+ trace_migration_transferred_bytes(qemu_file, multifd);
+ return qemu_file + multifd;
+}
+
bool migration_rate_exceeded(QEMUFile *f)
{
if (qemu_file_get_error(f)) {
@@ -25,7 +39,7 @@ bool migration_rate_exceeded(QEMUFile *f)
}
uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
- uint64_t rate_limit_current = migration_transferred_bytes(f);
+ uint64_t rate_limit_current = migration_transferred_bytes_noflush(f);
uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
uint64_t rate_limit_max = stat64_get(&mig_stats.rate_limit_max);

View File

@ -24,10 +24,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
1 file changed, 2 insertions(+), 6 deletions(-) 1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 53f804ac16..9b1b9f0412 100644 index bb12b0ad43..de14d3c3da 100644
--- a/hw/i386/acpi-build.c --- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c
@@ -347,13 +347,9 @@ Aml *aml_pci_device_dsm(void) @@ -362,13 +362,9 @@ Aml *aml_pci_device_dsm(void)
{ {
Aml *params = aml_local(0); Aml *params = aml_local(0);
Aml *pkg = aml_package(2); Aml *pkg = aml_package(2);

View File

@ -1,48 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Date: Thu, 11 Apr 2024 11:29:23 +0200
Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
First thing that crashes on unligned access here is
bdrv_reset_dirty_bitmap(). Correct way is to align-down the
snapshot-discard request.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/copy-before-write.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 5a9456d426..c0e70669a2 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BDRVCopyBeforeWriteState *s = bs->opaque;
+ uint32_t cluster_size = block_copy_cluster_size(s->bcs);
+ int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
+ int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
+ int64_t aligned_bytes;
+
+ if (aligned_end <= aligned_offset) {
+ return 0;
+ }
+ aligned_bytes = aligned_end - aligned_offset;
WITH_QEMU_LOCK_GUARD(&s->lock) {
- bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
+ bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
+ aligned_bytes);
}
- block_copy_reset(s->bcs, offset, bytes);
+ block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
- return bdrv_co_pdiscard(s->target, offset, bytes);
+ return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
}
static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)

View File

@ -1,373 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Date: Thu, 11 Apr 2024 11:29:24 +0200
Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
node
Currently block_copy creates copy_bitmap in source node. But that is in
bad relation with .independent_close=true of copy-before-write filter:
source node may be detached and removed before .bdrv_close() handler
called, which should call block_copy_state_free(), which in turn should
remove copy_bitmap.
That's all not ideal: it would be better if internal bitmap of
block-copy object is not attached to any node. But that is not possible
now.
The simplest solution is just create copy_bitmap in filter node, where
anyway two other bitmaps are created.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/block-copy.c | 3 +-
block/copy-before-write.c | 2 +-
include/block/block-copy.h | 1 +
tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
4 files changed, 60 insertions(+), 58 deletions(-)
diff --git a/block/block-copy.c b/block/block-copy.c
index 9ee3dd7ef5..8fca2c3698 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
}
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
Error **errp)
{
@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
return NULL;
}
- copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
+ copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
errp);
if (!copy_bitmap) {
return NULL;
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index c0e70669a2..94db31512d 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
bs->file->bs->supported_zero_flags);
- s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
+ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
if (!s->bcs) {
error_prepend(errp, "Cannot create block-copy-state: ");
return -EINVAL;
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 0700953ab8..8b41643bfa 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
typedef struct BlockCopyCallState BlockCopyCallState;
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
Error **errp);
diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
index aa76131ca9..c33dd7f3a9 100644
--- a/tests/qemu-iotests/257.out
+++ b/tests/qemu-iotests/257.out
@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,
@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
"granularity": 65536,
"persistent": false,
"recording": false
- }
- ],
- "drive0": [
+ },
{
"busy": false,
"count": 0,
"granularity": 65536,
"persistent": false,
"recording": false
- },
+ }
+ ],
+ "drive0": [
{
"busy": false,
"count": 458752,

View File

@ -0,0 +1,34 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 1 Feb 2022 20:09:41 +0100
Subject: [PATCH] target/i386: the sgx_epc_get_section stub is reachable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The sgx_epc_get_section stub is reachable from cpu_x86_cpuid. It
should not assert, instead it should just return true just like
the "real" sgx_epc_get_section does when SGX is disabled.
Reported-by: Vladimír Beneš <vbenes@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20220201190941.106001-1-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry-picked from commit 219615740425d9683588207b40a365e6741691a6)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/sgx-stub.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
index 26833eb233..16b1dfd90b 100644
--- a/hw/i386/sgx-stub.c
+++ b/hw/i386/sgx-stub.c
@@ -34,5 +34,5 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
{
- g_assert_not_reached();
+ return true;
}

View File

@ -1,277 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Date: Thu, 11 Apr 2024 11:29:25 +0200
Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
Add a parameter that enables discard-after-copy. That is mostly useful
in "push backup with fleecing" scheme, when source is snapshot-access
format driver node, based on copy-before-write filter snapshot-access
API:
[guest] [snapshot-access] ~~ blockdev-backup ~~> [backup target]
| |
| root | file
v v
[copy-before-write]
| |
| file | target
v v
[active disk] [temp.img]
In this case discard-after-copy does two things:
- discard data in temp.img to save disk space
- avoid further copy-before-write operation in discarded area
Note that we have to declare WRITE permission on source in
copy-before-write filter, for discard to work. Still we can't take it
unconditionally, as it will break normal backup from RO source. So, we
have to add a parameter and pass it thorough bdrv_open flags.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup.c | 5 +++--
block/block-copy.c | 9 +++++++++
block/copy-before-write.c | 15 +++++++++++++--
block/copy-before-write.h | 1 +
block/replication.c | 4 ++--
blockdev.c | 2 +-
include/block/block-common.h | 2 ++
include/block/block-copy.h | 1 +
include/block/block_int-global-state.h | 2 +-
qapi/block-core.json | 4 ++++
10 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index ec29d6b810..3dd2e229d2 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -356,7 +356,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
- bool compress,
+ bool compress, bool discard_source,
const char *filter_node_name,
BackupPerf *perf,
BlockdevOnError on_source_error,
@@ -457,7 +457,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
- cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
+ cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+ &bcs, errp);
if (!cbw) {
goto error;
}
diff --git a/block/block-copy.c b/block/block-copy.c
index 8fca2c3698..7e3b378528 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
CoMutex lock;
int64_t in_flight_bytes;
BlockCopyMethod method;
+ bool discard_source;
BlockReqList reqs;
QLIST_HEAD(, BlockCopyCallState) calls;
/*
@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
Error **errp)
{
ERRP_GUARD();
@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
cluster_size),
};
+ s->discard_source = discard_source;
block_copy_set_copy_opts(s, false, false);
ratelimit_init(&s->rate_limit);
@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
co_put_to_shres(s->mem, t->req.bytes);
block_copy_task_end(t, ret);
+ if (s->discard_source && ret == 0) {
+ int64_t nbytes =
+ MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+ bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+ }
+
return ret;
}
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 94db31512d..853e01a1eb 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
BdrvChild *target;
OnCbwError on_cbw_error;
uint64_t cbw_timeout_ns;
+ bool discard_source;
/*
* @lock: protects access to @access_bitmap, @done_bitmap and
@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+
if (!(role & BDRV_CHILD_FILTERED)) {
/*
* Target child
@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
* start
*/
*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+ if (s->discard_source) {
+ *nperm = *nperm | BLK_PERM_WRITE;
+ }
+
*nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
}
}
@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
bs->file->bs->supported_zero_flags);
- s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+ s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+ flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
if (!s->bcs) {
error_prepend(errp, "Cannot create block-copy-state: ");
return -EINVAL;
@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,
+ bool discard_source,
BlockCopyState **bcs,
Error **errp)
{
BDRVCopyBeforeWriteState *state;
BlockDriverState *top;
QDict *opts;
+ int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
assert(source->total_sectors == target->total_sectors);
GLOBAL_STATE_CODE();
@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
qdict_put_str(opts, "file", bdrv_get_node_name(source));
qdict_put_str(opts, "target", bdrv_get_node_name(target));
- top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
+ top = bdrv_insert_node(source, opts, flags, errp);
if (!top) {
return NULL;
}
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 6e72bb25e9..01af0cd3c4 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -39,6 +39,7 @@
BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,
+ bool discard_source,
BlockCopyState **bcs,
Error **errp);
void bdrv_cbw_drop(BlockDriverState *bs);
diff --git a/block/replication.c b/block/replication.c
index ca6bd0a720..0415a5e8b7 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
s->backup_job = backup_job_create(
NULL, s->secondary_disk->bs, s->hidden_disk->bs,
- 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
- &perf,
+ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
+ NULL, &perf,
BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
backup_job_completed, bs, NULL, &local_err);
diff --git a/blockdev.c b/blockdev.c
index 057601dcf0..4c33c3f5f0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2726,7 +2726,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
backup->sync, bmap, backup->bitmap_mode,
- backup->compress,
+ backup->compress, backup->discard_source,
backup->filter_node_name,
&perf,
backup->on_source_error,
diff --git a/include/block/block-common.h b/include/block/block-common.h
index a846023a09..338fe5ff7a 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -243,6 +243,8 @@ typedef enum {
read-write fails */
#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
+#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
+
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 8b41643bfa..bdc703bacd 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
Error **errp);
/* Function should be called prior any actual copy request */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index d2201e27f4..eb2d92a226 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -193,7 +193,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
MirrorSyncMode sync_mode,
BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
- bool compress,
+ bool compress, bool discard_source,
const char *filter_node_name,
BackupPerf *perf,
BlockdevOnError on_source_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4b18e01b85..b179d65520 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1610,6 +1610,9 @@
# node specified by @drive. If this option is not given, a node
# name is autogenerated. (Since: 4.2)
#
+# @discard-source: Discard blocks on source which are already copied
+# to the target. (Since 9.0)
+#
# @x-perf: Performance options. (Since 6.0)
#
# Features:
@@ -1631,6 +1634,7 @@
'*on-target-error': 'BlockdevOnError',
'*auto-finalize': 'bool', '*auto-dismiss': 'bool',
'*filter-node-name': 'str',
+ '*discard-source': 'bool',
'*x-perf': { 'type': 'BackupPerf',
'features': [ 'unstable' ] } } }

View File

@ -0,0 +1,86 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 24 Jan 2024 11:57:48 +0100
Subject: [PATCH] ui/clipboard: mark type as not available when there is no
data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT
message with len=0. In qemu_clipboard_set_data(), the clipboard info
will be updated setting data to NULL (because g_memdup(data, size)
returns NULL when size is 0). If the client does not set the
VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then
the 'request' callback for the clipboard peer is not initialized.
Later, because data is NULL, qemu_clipboard_request() can be reached
via vdagent_chr_write() and vdagent_clipboard_recv_request() and
there, the clipboard owner's 'request' callback will be attempted to
be called, but that is a NULL pointer.
In particular, this can happen when using the KRDC (22.12.3) VNC
client.
Another scenario leading to the same issue is with two clients (say
noVNC and KRDC):
The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and
initializes its cbpeer.
The KRDC client does not, but triggers a vnc_client_cut_text() (note
it's not the _ext variant)). There, a new clipboard info with it as
the 'owner' is created and via qemu_clipboard_set_data() is called,
which in turn calls qemu_clipboard_update() with that info.
In qemu_clipboard_update(), the notifier for the noVNC client will be
called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the
noVNC client. The 'owner' in that clipboard info is the clipboard peer
for the KRDC client, which did not initialize the 'request' function.
That sounds correct to me, it is the owner of that clipboard info.
Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set
the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it
passes), that clipboard info is passed to qemu_clipboard_request() and
the original segfault still happens.
Fix the issue by handling updates with size 0 differently. In
particular, mark in the clipboard info that the type is not available.
While at it, switch to g_memdup2(), because g_memdup() is deprecated.
Cc: qemu-stable@nongnu.org
Fixes: CVE-2023-6683
Reported-by: Markus Frank <m.frank@proxmox.com>
Suggested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Tested-by: Markus Frank <m.frank@proxmox.com>
(picked from https://lists.nongnu.org/archive/html/qemu-stable/2024-01/msg00228.html)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
ui/clipboard.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/ui/clipboard.c b/ui/clipboard.c
index 3d14bffaf8..b3f6fa3c9e 100644
--- a/ui/clipboard.c
+++ b/ui/clipboard.c
@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer,
}
g_free(info->types[type].data);
- info->types[type].data = g_memdup(data, size);
- info->types[type].size = size;
- info->types[type].available = true;
+ if (size) {
+ info->types[type].data = g_memdup2(data, size);
+ info->types[type].size = size;
+ info->types[type].available = true;
+ } else {
+ info->types[type].data = NULL;
+ info->types[type].size = 0;
+ info->types[type].available = false;
+ }
if (update) {
qemu_clipboard_update(info);

View File

@ -1,92 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 18 Jun 2024 14:19:58 +0200
Subject: [PATCH] hw/virtio: Fix the de-initialization of vhost-user devices
The unrealize functions of the various vhost-user devices are
calling the corresponding vhost_*_set_status() functions with a
status of 0 to shut down the device correctly.
Now these vhost_*_set_status() functions all follow this scheme:
bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
return;
}
if (should_start) {
/* ... do the initialization stuff ... */
} else {
/* ... do the cleanup stuff ... */
}
The problem here is virtio_device_should_start(vdev, 0) currently
always returns "true" since it internally only looks at vdev->started
instead of looking at the "status" parameter. Thus once the device
got started once, virtio_device_should_start() always returns true
and thus the vhost_*_set_status() functions return early, without
ever doing any clean-up when being called with status == 0. This
causes e.g. problems when trying to hot-plug and hot-unplug a vhost
user devices multiple times since the de-initialization step is
completely skipped during the unplug operation.
This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move
vm_running check to virtio_device_started") which replaced
should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
with
should_start = virtio_device_started(vdev, status);
which later got replaced by virtio_device_should_start(). This blocked
the possibility to set should_start to false in case the status flag
VIRTIO_CONFIG_S_DRIVER_OK was not set.
Fix it by adjusting the virtio_device_should_start() function to
only consider the status flag instead of vdev->started. Since this
function is only used in the various vhost_*_set_status() functions
for exactly the same purpose, it should be fine to fix it in this
central place there without any risk to change the behavior of other
code.
Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started")
Buglink: https://issues.redhat.com/browse/RHEL-40708
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20240618121958.88673-1-thuth@redhat.com>
Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/hw/virtio/virtio.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 7d5ffdc145..2eafad17b8 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
* @vdev - the VirtIO device
* @status - the devices status bits
*
- * This is similar to virtio_device_started() but also encapsulates a
- * check on the VM status which would prevent a device starting
- * anyway.
+ * This is similar to virtio_device_started() but ignores vdev->started
+ * and also encapsulates a check on the VM status which would prevent a
+ * device from starting anyway.
*/
static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
{
@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status
return false;
}
- return virtio_device_started(vdev, status);
+ return status & VIRTIO_CONFIG_S_DRIVER_OK;
}
static inline void virtio_set_started(VirtIODevice *vdev, bool started)

View File

@ -0,0 +1,65 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 16:31:56 +0100
Subject: [PATCH] virtio-scsi: Attach event vq notifier with no_poll
As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi:
don't waste CPU polling the event virtqueue"), we only attach an io_read
notifier for the virtio-scsi event virtqueue instead, and no polling
notifiers. During operation, the event virtqueue is typically
non-empty, but none of the buffers are intended to be used immediately.
Instead, they only get used when certain events occur. Therefore, it
makes no sense to continuously poll it when non-empty, because it is
supposed to be and stay non-empty.
We do this by using virtio_queue_aio_attach_host_notifier_no_poll()
instead of virtio_queue_aio_attach_host_notifier() for the event
virtqueue.
Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement
BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use
virtio_queue_aio_attach_host_notifier() for all virtqueues, including
the event virtqueue. This can lead to it being polled again, undoing
the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552.
Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the
event virtqueue.
("virtio-scsi: implement BlockDevOps->drained_begin()")
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
Fixes: 766aa2de0f29b657148e04599320d771c36fd126
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/scsi/virtio-scsi.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 45b95ea070..ad24a882fd 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -1148,6 +1148,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
static void virtio_scsi_drained_end(SCSIBus *bus)
{
VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
VirtIODevice *vdev = VIRTIO_DEVICE(s);
uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
s->parent_obj.conf.num_queues;
@@ -1165,7 +1166,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
for (uint32_t i = 0; i < total_queues; i++) {
VirtQueue *vq = virtio_get_queue(vdev, i);
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+ if (vq == vs->event_vq) {
+ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
+ } else {
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+ }
}
}

View File

@ -1,43 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Daniyal Khan <danikhan632@gmail.com>
Date: Wed, 17 Jul 2024 16:01:47 +1000
Subject: [PATCH] target/arm: Use float_status copy in sme_fmopa_s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We made a copy above because the fp exception flags
are not propagated back to the FPST register, but
then failed to use the copy.
Cc: qemu-stable@nongnu.org
Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)")
Signed-off-by: Daniyal Khan <danikhan632@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20240717060149.204788-2-richard.henderson@linaro.org
[rth: Split from a larger patch]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 31d93fedf41c24b0badb38cd9317590d1ef74e37)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/tcg/sme_helper.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index e2e0575039..5a6dd76489 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -916,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
if (pb & 1) {
uint32_t *a = vza_row + H1_4(col);
uint32_t *m = vzm + H1_4(col);
- *a = float32_muladd(n, *m, *a, 0, vst);
+ *a = float32_muladd(n, *m, *a, 0, &fpst);
}
col += 4;
pb >>= 4;

View File

@ -0,0 +1,125 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 16:31:57 +0100
Subject: [PATCH] virtio: Re-enable notifications after drain
During drain, we do not care about virtqueue notifications, which is why
we remove the handlers on it. When removing those handlers, whether vq
notifications are enabled or not depends on whether we were in polling
mode or not; if not, they are enabled (by default); if so, they have
been disabled by the io_poll_start callback.
Because we do not care about those notifications after removing the
handlers, this is fine. However, we have to explicitly ensure they are
enabled when re-attaching the handlers, so we will resume receiving
notifications. We do this in virtio_queue_aio_attach_host_notifier*().
If such a function is called while we are in a polling section,
attaching the notifiers will then invoke the io_poll_start callback,
re-disabling notifications.
Because we will always miss virtqueue updates in the drained section, we
also need to poll the virtqueue once after attaching the notifiers.
Buglink: https://issues.redhat.com/browse/RHEL-3934
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++
include/block/aio.h | 7 ++++++-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 969c25f4cf..02cce83111 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3526,6 +3526,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
{
+ /*
+ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
+ * Re-enable them. (And if detach has not been used before, notifications
+ * being enabled is still the default state while a notifier is attached;
+ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
+ * notifications enabled once the polling section is left.)
+ */
+ if (!virtio_queue_get_notification(vq)) {
+ virtio_queue_set_notification(vq, 1);
+ }
+
aio_set_event_notifier(ctx, &vq->host_notifier,
virtio_queue_host_notifier_read,
virtio_queue_host_notifier_aio_poll,
@@ -3533,6 +3544,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
aio_set_event_notifier_poll(ctx, &vq->host_notifier,
virtio_queue_host_notifier_aio_poll_begin,
virtio_queue_host_notifier_aio_poll_end);
+
+ /*
+ * We will have ignored notifications about new requests from the guest
+ * while no notifiers were attached, so "kick" the virt queue to process
+ * those requests now.
+ */
+ event_notifier_set(&vq->host_notifier);
}
/*
@@ -3543,14 +3561,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
*/
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
{
+ /* See virtio_queue_aio_attach_host_notifier() */
+ if (!virtio_queue_get_notification(vq)) {
+ virtio_queue_set_notification(vq, 1);
+ }
+
aio_set_event_notifier(ctx, &vq->host_notifier,
virtio_queue_host_notifier_read,
NULL, NULL);
+
+ /*
+ * See virtio_queue_aio_attach_host_notifier().
+ * Note that this may be unnecessary for the type of virtqueues this
+ * function is used for. Still, it will not hurt to have a quick look into
+ * whether we can/should process any of the virtqueue elements.
+ */
+ event_notifier_set(&vq->host_notifier);
}
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
{
aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
+
+ /*
+ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
+ * will run after io_poll_begin(), so by removing the notifier, we do not
+ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
+ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
+ * notifications are enabled or disabled. It does not really matter anyway;
+ * we just removed the notifier, so we do not care about notifications until
+ * we potentially re-attach it. The attach_host_notifier functions will
+ * ensure that notifications are enabled again when they are needed.
+ */
}
void virtio_queue_host_notifier_read(EventNotifier *n)
diff --git a/include/block/aio.h b/include/block/aio.h
index 32042e8905..79efadfa48 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -498,9 +498,14 @@ void aio_set_event_notifier(AioContext *ctx,
AioPollFn *io_poll,
EventNotifierHandler *io_poll_ready);
-/* Set polling begin/end callbacks for an event notifier that has already been
+/*
+ * Set polling begin/end callbacks for an event notifier that has already been
* registered with aio_set_event_notifier. Do nothing if the event notifier is
* not registered.
+ *
+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
+ * during polling, it will not be called, so an io_poll_begin() is not
+ * necessarily always followed by an io_poll_end().
*/
void aio_set_event_notifier_poll(AioContext *ctx,
EventNotifier *notifier,

View File

@ -1,62 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Wed, 17 Jul 2024 16:01:48 +1000
Subject: [PATCH] target/arm: Use FPST_F16 for SME FMOPA (widening)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This operation has float16 inputs and thus must use
the FZ16 control not the FZ control.
Cc: qemu-stable@nongnu.org
Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)")
Reported-by: Daniyal Khan <danikhan632@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20240717060149.204788-3-richard.henderson@linaro.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2374
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 207d30b5fdb5b45a36f26eefcf52fe2c1714dd4f)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/tcg/translate-sme.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 46c7fce8b4..185a8a917b 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -304,6 +304,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
}
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+ ARMFPStatusFlavour e_fpst,
gen_helper_gvec_5_ptr *fn)
{
int svl = streaming_vec_reg_size(s);
@@ -319,15 +320,18 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(e_fpst);
fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
return true;
}
-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
-TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
-TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
+ MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
+ MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
+ MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
/* TODO: FEAT_EBF16 */
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)

View File

@ -1,60 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 10 Jul 2024 17:25:29 +0200
Subject: [PATCH] scsi: fix regression and honor bootindex again for legacy
drives
Commit 3089637461 ("scsi: Don't ignore most usb-storage properties")
removed the call to object_property_set_int() and thus the 'set'
method for the bootindex property was also not called anymore. Here
that method is device_set_bootindex() (as configured by
scsi_dev_instance_init() -> device_add_bootindex_property()) which as
a side effect registers the device via add_boot_device_path().
As reported by a downstream user [0], the bootindex property did not
have the desired effect anymore for legacy drives. Fix the regression
by explicitly calling the add_boot_device_path() function after
checking that the bootindex is not yet used (to avoid
add_boot_device_path() calling exit()).
[0]: https://forum.proxmox.com/threads/149772/post-679433
Cc: qemu-stable@nongnu.org
Fixes: 3089637461 ("scsi: Don't ignore most usb-storage properties")
Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Link: https://lore.kernel.org/r/20240710152529.1737407-1-f.ebner@proxmox.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 57a8a80d1a5b28797b21d30bfc60601945820e51)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/scsi/scsi-bus.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 9e40b0c920..53eff5dd3d 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -384,6 +384,7 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
DeviceState *dev;
SCSIDevice *s;
DriveInfo *dinfo;
+ Error *local_err = NULL;
if (blk_is_sg(blk)) {
driver = "scsi-generic";
@@ -403,6 +404,14 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
s = SCSI_DEVICE(dev);
s->conf = *conf;
+ check_boot_index(conf->bootindex, &local_err);
+ if (local_err) {
+ object_unparent(OBJECT(dev));
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+ add_boot_device_path(conf->bootindex, dev, NULL);
+
qdev_prop_set_uint32(dev, "scsi-id", unit);
if (object_property_find(OBJECT(dev), "removable")) {
qdev_prop_set_bit(dev, "removable", removable);

View File

@ -1,48 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 15 Jul 2024 15:14:03 +0200
Subject: [PATCH] hw/scsi/lsi53c895a: bump instruction limit in scripts
processing to fix regression
Commit 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts
processing") reduced the maximum allowed instruction count by
a factor of 100 all the way down to 100.
This causes the "Check Point R81.20 Gaia" appliance [0] to fail to
boot after fully finishing the installation via the appliance's web
interface (there is already one reboot before that).
With a limit of 150, the appliance still fails to boot, while with a
limit of 200, it works. Bump to 500 to fix the regression and be on
the safe side.
Originally reported in the Proxmox community forum[1].
[0]: https://support.checkpoint.com/results/download/124397
[1]: https://forum.proxmox.com/threads/149772/post-683459
Cc: qemu-stable@nongnu.org
Fixes: 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts processing")
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Acked-by: Sven Schnelle <svens@stackframe.org>
Link: https://lore.kernel.org/r/20240715131403.223239-1-f.ebner@proxmox.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a4975023fb13cf229bd59c9ceec1b8cbdc5b9a20)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/scsi/lsi53c895a.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index eb9828dd5e..f1935e5328 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -188,7 +188,7 @@ static const char *names[] = {
#define LSI_TAG_VALID (1 << 16)
/* Maximum instructions to process. */
-#define LSI_MAX_INSN 100
+#define LSI_MAX_INSN 500
typedef struct lsi_request {
SCSIRequest *req;

View File

@ -1,38 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 27 Jun 2024 20:12:44 +0200
Subject: [PATCH] block-copy: Fix missing graph lock
The graph lock needs to be held when calling bdrv_co_pdiscard(). Fix
block_copy_task_entry() to take it for the call.
WITH_GRAPH_RDLOCK_GUARD() was implemented in a weak way because of
limitations in clang's Thread Safety Analysis at the time, so that it
only asserts that the lock is held (which allows calling functions that
require the lock), but we never deal with the unlocking (so even after
the scope of the guard, the compiler assumes that the lock is still
held). This is why the compiler didn't catch this locking error.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
(picked from https://lore.kernel.org/qemu-devel/20240627181245.281403-2-kwolf@redhat.com/)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/block-copy.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/block/block-copy.c b/block/block-copy.c
index 7e3b378528..cc618e4561 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -595,7 +595,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
if (s->discard_source && ret == 0) {
int64_t nbytes =
MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
- bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+ WITH_GRAPH_RDLOCK_GUARD() {
+ bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+ }
}
return ret;

View File

@ -1,93 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sergey Dyasli <sergey.dyasli@nutanix.com>
Date: Fri, 12 Jul 2024 09:26:59 +0000
Subject: [PATCH] Revert "qemu-char: do not operate on sources from finalize
callbacks"
This reverts commit 2b316774f60291f57ca9ecb6a9f0712c532cae34.
After 038b4217884c ("Revert "chardev: use a child source for qio input
source"") we've been observing the "iwp->src == NULL" assertion
triggering periodically during the initial capabilities querying by
libvirtd. One of possible backtraces:
Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)):
0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
1 0x00007f16c6c21e65 in __GI_abort () at abort.c:79
2 0x00007f16c6c21d39 in __assert_fail_base at assert.c:92
3 0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at assert.c:101
4 0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:99
5 io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88
6 0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0
7 0x00007f16c904baf9 in g_source_destroy_internal () from /lib64/libglib-2.0.so.0
8 0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at ../chardev/char-io.c:147
9 remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153
10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at ../chardev/char-socket.c:592
11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at ../chardev/char-fe.c:279
12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304
13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) at ../monitor/qmp.c:509
14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at ../util/async.c:216
15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, blocking=blocking@entry=true) at ../util/aio-posix.c:722
16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at ../iothread.c:63
17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at ../util/qemu-thread-posix.c:543
18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at pthread_create.c:479
19 0x00007f16c6c398d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls
g_source_destroy() which finds that iwp->src is not NULL in the finalize
callback. This can only happen if another thread has managed to trigger
io_watch_poll_prepare() callback in the meantime.
Move iwp->src destruction back to the finalize callback to prevent the
described race, and also remove the stale comment. The deadlock glib bug
was fixed back in 2010 by b35820285668 ("gmain: move finalization of
GSource outside of context lock").
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com>
Link: https://lore.kernel.org/r/20240712092659.216206-1-sergey.dyasli@nutanix.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e0bf95443ee9326d44031373420cf9f3513ee255)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
chardev/char-io.c | 19 +++++--------------
1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/chardev/char-io.c b/chardev/char-io.c
index dab77b112e..3be17b51ca 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -87,16 +87,12 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
static void io_watch_poll_finalize(GSource *source)
{
- /*
- * Due to a glib bug, removing the last reference to a source
- * inside a finalize callback causes recursive locking (and a
- * deadlock). This is not a problem inside other callbacks,
- * including dispatch callbacks, so we call io_remove_watch_poll
- * to remove this source. At this point, iwp->src must
- * be NULL, or we would leak it.
- */
IOWatchPoll *iwp = io_watch_poll_from_source(source);
- assert(iwp->src == NULL);
+ if (iwp->src) {
+ g_source_destroy(iwp->src);
+ g_source_unref(iwp->src);
+ iwp->src = NULL;
+ }
}
static GSourceFuncs io_watch_poll_funcs = {
@@ -139,11 +135,6 @@ static void io_remove_watch_poll(GSource *source)
IOWatchPoll *iwp;
iwp = io_watch_poll_from_source(source);
- if (iwp->src) {
- g_source_destroy(iwp->src);
- g_source_unref(iwp->src);
- iwp->src = NULL;
- }
g_source_destroy(&iwp->parent);
}

View File

@ -1,77 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Cindy Lu <lulu@redhat.com>
Date: Tue, 6 Aug 2024 17:37:12 +0800
Subject: [PATCH] virtio-pci: Fix the use of an uninitialized irqfd
The crash was reported in MAC OS and NixOS, here is the link for this bug
https://gitlab.com/qemu-project/qemu/-/issues/2334
https://gitlab.com/qemu-project/qemu/-/issues/2321
In this bug, they are using the virtio_input device. The guest notifier was
not supported for this device, The function virtio_pci_set_guest_notifiers()
was not called, and the vector_irqfd was not initialized.
So the fix is adding the check for vector_irqfd in virtio_pci_get_notifier()
The function virtio_pci_get_notifier() can be used in various devices.
It could also be called when VIRTIO_CONFIG_S_DRIVER_OK is not set. In this situation,
the vector_irqfd being NULL is acceptable. We can allow the device continue to boot
If the vector_irqfd still hasn't been initialized after VIRTIO_CONFIG_S_DRIVER_OK
is set, it means that the function set_guest_notifiers was not called before the
driver started. This indicates that the device is not using the notifier.
At this point, we will let the check fail.
This fix is verified in vyatta,MacOS,NixOS,fedora system.
The bt tree for this bug is:
Thread 6 "CPU 0/KVM" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7c817be006c0 (LWP 1269146)]
kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
817 if (irqfd->users == 0) {
(gdb) thread apply all bt
...
Thread 6 (Thread 0x7c817be006c0 (LWP 1269146) "CPU 0/KVM"):
0 kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
1 kvm_virtio_pci_vector_use_one () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:893
2 0x00005983657045e2 in memory_region_write_accessor () at ../qemu-9.0.0/system/memory.c:497
3 0x0000598365704ba6 in access_with_adjusted_size () at ../qemu-9.0.0/system/memory.c:573
4 0x0000598365705059 in memory_region_dispatch_write () at ../qemu-9.0.0/system/memory.c:1528
5 0x00005983659b8e1f in flatview_write_continue_step.isra.0 () at ../qemu-9.0.0/system/physmem.c:2713
6 0x000059836570ba7d in flatview_write_continue () at ../qemu-9.0.0/system/physmem.c:2743
7 flatview_write () at ../qemu-9.0.0/system/physmem.c:2774
8 0x000059836570bb76 in address_space_write () at ../qemu-9.0.0/system/physmem.c:2894
9 0x0000598365763afe in address_space_rw () at ../qemu-9.0.0/system/physmem.c:2904
10 kvm_cpu_exec () at ../qemu-9.0.0/accel/kvm/kvm-all.c:2917
11 0x000059836576656e in kvm_vcpu_thread_fn () at ../qemu-9.0.0/accel/kvm/kvm-accel-ops.c:50
12 0x0000598365926ca8 in qemu_thread_start () at ../qemu-9.0.0/util/qemu-thread-posix.c:541
13 0x00007c8185bcd1cf in ??? () at /usr/lib/libc.so.6
14 0x00007c8185c4e504 in clone () at /usr/lib/libc.so.6
Fixes: 2ce6cff94d ("virtio-pci: fix use of a released vector")
Cc: qemu-stable@nongnu.org
Signed-off-by: Cindy Lu <lulu@redhat.com>
Message-Id: <20240806093715.65105-1-lulu@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit a8e63ff289d137197ad7a701a587cc432872d798)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/virtio/virtio-pci.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index e04218a9fb..389bab003f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -860,6 +860,9 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
VirtQueue *vq;
+ if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
+ return -1;
+
if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
*n = virtio_config_get_guest_notifier(vdev);
*vector = vdev->config_vector;

View File

@ -1,35 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Mon, 1 Jul 2024 20:58:04 +0900
Subject: [PATCH] virtio-net: Ensure queue index fits with RSS
Ensure the queue index points to a valid queue when software RSS
enabled. The new calculation matches with the behavior of Linux's TAP
device with the RSS eBPF program.
Fixes: 4474e37a5b3a ("virtio-net: implement RX RSS processing")
Reported-by: Zhibin Hu <huzhibin5@huawei.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
(cherry picked from commit f1595ceb9aad36a6c1da95bcb77ab9509b38822d)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/net/virtio-net.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3644bfd91b..f48588638d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1949,7 +1949,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
int index = virtio_net_process_rss(nc, buf, size);
if (index >= 0) {
- NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
+ NetClientState *nc2 =
+ qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
return virtio_net_receive_rcu(nc2, buf, size, true);
}
}

View File

@ -1,338 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: thomas <east.moutain.yang@gmail.com>
Date: Fri, 12 Jul 2024 11:10:53 +0800
Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for
kick
Patch 06b12970174 ("virtio-net: fix network stall under load")
added double-check to test whether the available buffer size
can satisfy the request or not, in case the guest has added
some buffers to the avail ring simultaneously after the first
check. It will be lucky if the available buffer size becomes
okay after the double-check, then the host can send the packet
to the guest. If the buffer size still can't satisfy the request,
even if the guest has added some buffers, viritio-net would
stall at the host side forever.
The patch enables notification and checks whether the guest has
added some buffers since last check of available buffers when
the available buffers are insufficient. If no buffer is added,
return false, else recheck the available buffers in the loop.
If the available buffers are sufficient, disable notification
and return true.
Changes:
1. Change the return type of virtqueue_get_avail_bytes() from void
to int, it returns an opaque that represents the shadow_avail_idx
of the virtqueue on success, else -1 on error.
2. Add a new API: virtio_queue_enable_notification_and_check(),
it takes an opaque as input arg which is returned from
virtqueue_get_avail_bytes(). It enables notification firstly,
then checks whether the guest has added some buffers since
last check of available buffers or not by virtio_queue_poll(),
return ture if yes.
The patch also reverts patch "06b12970174".
The case below can reproduce the stall.
Guest 0
+--------+
| iperf |
---------------> | server |
Host | +--------+
+--------+ | ...
| iperf |----
| client |---- Guest n
+--------+ | +--------+
| | iperf |
---------------> | server |
+--------+
Boot many guests from qemu with virtio network:
qemu ... -netdev tap,id=net_x \
-device virtio-net-pci-non-transitional,\
iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x
Each guest acts as iperf server with commands below:
iperf3 -s -D -i 10 -p 8001
iperf3 -s -D -i 10 -p 8002
The host as iperf client:
iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000
iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000
After some time, the host loses connection to the guest,
the guest can send packet to the host, but can't receive
packet from the host.
It's more likely to happen if SWIOTLB is enabled in the guest,
allocating and freeing bounce buffer takes some CPU ticks,
copying from/to bounce buffer takes more CPU ticks, compared
with that there is no bounce buffer in the guest.
Once the rate of producing packets from the host approximates
the rate of receiveing packets in the guest, the guest would
loop in NAPI.
receive packets ---
| |
v |
free buf virtnet_poll
| |
v |
add buf to avail ring ---
|
| need kick the host?
| NAPI continues
v
receive packets ---
| |
v |
free buf virtnet_poll
| |
v |
add buf to avail ring ---
|
v
... ...
On the other hand, the host fetches free buf from avail
ring, if the buf in the avail ring is not enough, the
host notifies the guest the event by writing the avail
idx read from avail ring to the event idx of used ring,
then the host goes to sleep, waiting for the kick signal
from the guest.
Once the guest finds the host is waiting for kick singal
(in virtqueue_kick_prepare_split()), it kicks the host.
The host may stall forever at the sequences below:
Host Guest
------------ -----------
fetch buf, send packet receive packet ---
... ... |
fetch buf, send packet add buf |
... add buf virtnet_poll
buf not enough avail idx-> add buf |
read avail idx add buf |
add buf ---
receive packet ---
write event idx ... |
wait for kick add buf virtnet_poll
... |
---
no more packet, exit NAPI
In the first loop of NAPI above, indicated in the range of
virtnet_poll above, the host is sending packets while the
guest is receiving packets and adding buffers.
step 1: The buf is not enough, for example, a big packet
needs 5 buf, but the available buf count is 3.
The host read current avail idx.
step 2: The guest adds some buf, then checks whether the
host is waiting for kick signal, not at this time.
The used ring is not empty, the guest continues
the second loop of NAPI.
step 3: The host writes the avail idx read from avail
ring to used ring as event idx via
virtio_queue_set_notification(q->rx_vq, 1).
step 4: At the end of the second loop of NAPI, recheck
whether kick is needed, as the event idx in the
used ring written by the host is beyound the
range of kick condition, the guest will not
send kick signal to the host.
Fixes: 06b12970174 ("virtio-net: fix network stall under load")
Cc: qemu-stable@nongnu.org
Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
(cherry picked from commit f937309fbdbb48c354220a3e7110c202ae4aa7fa)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/net/virtio-net.c | 28 ++++++++++-------
hw/virtio/virtio.c | 64 +++++++++++++++++++++++++++++++++++---
include/hw/virtio/virtio.h | 21 +++++++++++--
3 files changed, 94 insertions(+), 19 deletions(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f48588638d..d4b979d343 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1680,24 +1680,28 @@ static bool virtio_net_can_receive(NetClientState *nc)
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
{
+ int opaque;
+ unsigned int in_bytes;
VirtIONet *n = q->n;
- if (virtio_queue_empty(q->rx_vq) ||
- (n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
- virtio_queue_set_notification(q->rx_vq, 1);
-
- /* To avoid a race condition where the guest has made some buffers
- * available after the above check but before notification was
- * enabled, check for available buffers again.
- */
- if (virtio_queue_empty(q->rx_vq) ||
- (n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
+
+ while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
+ opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
+ bufsize, 0);
+ /* Buffer is enough, disable notifiaction */
+ if (bufsize <= in_bytes) {
+ break;
+ }
+
+ if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
+ /* Guest has added some buffers, try again */
+ continue;
+ } else {
return 0;
}
}
virtio_queue_set_notification(q->rx_vq, 0);
+
return 1;
}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fd2dfe3a6b..08fba6b2d8 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -743,6 +743,60 @@ int virtio_queue_empty(VirtQueue *vq)
}
}
+static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx)
+{
+ if (unlikely(!vq->vring.avail)) {
+ return false;
+ }
+
+ return (uint16_t)shadow_idx != vring_avail_idx(vq);
+}
+
+static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx)
+{
+ VRingPackedDesc desc;
+ VRingMemoryRegionCaches *caches;
+
+ if (unlikely(!vq->vring.desc)) {
+ return false;
+ }
+
+ caches = vring_get_region_caches(vq);
+ if (!caches) {
+ return false;
+ }
+
+ vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
+ shadow_idx, true);
+
+ return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter);
+}
+
+static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx)
+{
+ if (virtio_device_disabled(vq->vdev)) {
+ return false;
+ }
+
+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+ return virtio_queue_packed_poll(vq, shadow_idx);
+ } else {
+ return virtio_queue_split_poll(vq, shadow_idx);
+ }
+}
+
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
+ int opaque)
+{
+ virtio_queue_set_notification(vq, 1);
+
+ if (opaque >= 0) {
+ return virtio_queue_poll(vq, (unsigned)opaque);
+ } else {
+ return false;
+ }
+}
+
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
@@ -1330,9 +1384,9 @@ err:
goto done;
}
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
- unsigned int *out_bytes,
- unsigned max_in_bytes, unsigned max_out_bytes)
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes, unsigned max_in_bytes,
+ unsigned max_out_bytes)
{
uint16_t desc_size;
VRingMemoryRegionCaches *caches;
@@ -1365,7 +1419,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
caches);
}
- return;
+ return (int)vq->shadow_avail_idx;
err:
if (in_bytes) {
*in_bytes = 0;
@@ -1373,6 +1427,8 @@ err:
if (out_bytes) {
*out_bytes = 0;
}
+
+ return -1;
}
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 2eafad17b8..8b4da92889 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -271,9 +271,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
VirtQueueElement *elem);
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
unsigned int out_bytes);
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
- unsigned int *out_bytes,
- unsigned max_in_bytes, unsigned max_out_bytes);
+/**
+ * Return <0 on error or an opaque >=0 to pass to
+ * virtio_queue_enable_notification_and_check on success.
+ */
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes, unsigned max_in_bytes,
+ unsigned max_out_bytes);
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
@@ -307,6 +311,17 @@ int virtio_queue_ready(VirtQueue *vq);
int virtio_queue_empty(VirtQueue *vq);
+/**
+ * Enable notification and check whether guest has added some
+ * buffers since last call to virtqueue_get_avail_bytes.
+ *
+ * @opaque: value returned from virtqueue_get_avail_bytes
+ */
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
+ int opaque);
+
+void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t idx);
+
/* Host binding interface. */
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);

View File

@ -1,70 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Tue, 9 Jul 2024 13:34:44 +0100
Subject: [PATCH] net: Reinstate '-net nic, model=help' output as documented in
man page
While refactoring the NIC initialization code, I broke '-net nic,model=help'
which no longer outputs a list of available NIC models.
Fixes: 2cdeca04adab ("net: report list of available models according to platform")
Cc: qemu-stable@nongnu.org
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Jason Wang <jasowang@redhat.com>
(cherry picked from commit 64f75f57f9d2c8c12ac6d9355fa5d3a2af5879ca)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
net/net.c | 25 ++++++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/net/net.c b/net/net.c
index a2f0c828bb..e6ca2529bb 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1150,6 +1150,21 @@ NICInfo *qemu_find_nic_info(const char *typename, bool match_default,
return NULL;
}
+static bool is_nic_model_help_option(const char *model)
+{
+ if (model && is_help_option(model)) {
+ /*
+ * Trigger the help output by instantiating the hash table which
+ * will gather tha available models as they get registered.
+ */
+ if (!nic_model_help) {
+ nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
+ g_free, NULL);
+ }
+ return true;
+ }
+ return false;
+}
/* "I have created a device. Please configure it if you can" */
bool qemu_configure_nic_device(DeviceState *dev, bool match_default,
@@ -1733,6 +1748,12 @@ void net_check_clients(void)
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
{
+ const char *model = qemu_opt_get_del(opts, "model");
+
+ if (is_nic_model_help_option(model)) {
+ return 0;
+ }
+
return net_client_init(opts, false, errp);
}
@@ -1789,9 +1810,7 @@ static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
memset(ni, 0, sizeof(*ni));
ni->model = qemu_opt_get_del(opts, "model");
- if (!nic_model_help && !g_strcmp0(ni->model, "help")) {
- nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
- g_free, NULL);
+ if (is_nic_model_help_option(ni->model)) {
return 0;
}

View File

@ -1,32 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Tue, 6 Aug 2024 18:21:37 +0100
Subject: [PATCH] net: Fix '-net nic,model=' for non-help arguments
Oops, don't *delete* the model option when checking for 'help'.
Fixes: 64f75f57f9d2 ("net: Reinstate '-net nic, model=help' output as documented in man page")
Reported-by: Hans <sungdgdhtryrt@gmail.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: qemu-stable@nongnu.org
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Jason Wang <jasowang@redhat.com>
(cherry picked from commit fa62cb989a9146c82f8f172715042852f5d36200)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
net/net.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/net.c b/net/net.c
index e6ca2529bb..897bb936cf 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1748,7 +1748,7 @@ void net_check_clients(void)
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
{
- const char *model = qemu_opt_get_del(opts, "model");
+ const char *model = qemu_opt_get(opts, "model");
if (is_nic_model_help_option(model)) {
return 0;

View File

@ -1,57 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 22 Jul 2024 18:29:54 +0100
Subject: [PATCH] target/arm: Don't assert for 128-bit tile accesses when SVL
is 128
For an instruction which accesses a 128-bit element tile when
the SVL is also 128 (for example MOV z0.Q, p0/M, ZA0H.Q[w0,0]),
we will assert in get_tile_rowcol():
qemu-system-aarch64: ../../tcg/tcg-op.c:926: tcg_gen_deposit_z_i32: Assertion `len > 0' failed.
This happens because we calculate
len = ctz32(streaming_vec_reg_size(s)) - esz;$
but if the SVL and the element size are the same len is 0, and
the deposit operation asserts.
In this case the ZA storage contains exactly one 128 bit
element ZA tile, and the horizontal or vertical slice is just
that tile. This means that regardless of the index value in
the Ws register, we always access that tile. (In pseudocode terms,
we calculate (index + offset) MOD 1, which is 0.)
Special case the len == 0 case to avoid hitting the assertion
in tcg_gen_deposit_z_i32().
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240722172957.1041231-2-peter.maydell@linaro.org
(cherry picked from commit 56f1c0db928aae0b83fd91c89ddb226b137e2b21)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/tcg/translate-sme.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 185a8a917b..a50a419af2 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
/* Prepare a power-of-two modulo via extraction of @len bits. */
len = ctz32(streaming_vec_reg_size(s)) - esz;
- if (vertical) {
+ if (!len) {
+ /*
+ * SVL is 128 and the element size is 128. There is exactly
+ * one 128x128 tile in the ZA storage, and so we calculate
+ * (Rs + imm) MOD 1, which is always 0. We need to special case
+ * this because TCG doesn't allow deposit ops with len 0.
+ */
+ tcg_gen_movi_i32(tmp, 0);
+ } else if (vertical) {
/*
* Compute the byte offset of the index within the tile:
* (index % (svl / size)) * size

View File

@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 22 Jul 2024 18:29:55 +0100
Subject: [PATCH] target/arm: Fix UMOPA/UMOPS of 16-bit values
The UMOPA/UMOPS instructions are supposed to multiply unsigned 8 or
16 bit elements and accumulate the products into a 64-bit element.
In the Arm ARM pseudocode, this is done with the usual
infinite-precision signed arithmetic. However our implementation
doesn't quite get it right, because in the DEF_IMOP_64() macro we do:
sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0);
where NTYPE and MTYPE are uint16_t or int16_t. In the uint16_t case,
the C usual arithmetic conversions mean the values are converted to
"int" type and the multiply is done as a 32-bit multiply. This means
that if the inputs are, for example, 0xffff and 0xffff then the
result is 0xFFFE0001 as an int, which is then promoted to uint64_t
for the accumulation into sum; this promotion incorrectly sign
extends the multiply.
Avoid the incorrect sign extension by casting to int64_t before
the multiply, so we do the multiply as 64-bit signed arithmetic,
which is a type large enough that the multiply can never
overflow into the sign bit.
(The equivalent 8-bit operations in DEF_IMOP_32() are fine, because
the 8-bit multiplies can never overflow into the sign bit of a
32-bit integer.)
Cc: qemu-stable@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2372
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240722172957.1041231-3-peter.maydell@linaro.org
(cherry picked from commit ea3f5a90f036734522e9af3bffd77e69e9f47355)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/tcg/sme_helper.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 5a6dd76489..f9001f5213 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -1146,10 +1146,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
uint64_t sum = 0; \
/* Apply P to N as a mask, making the inactive elements 0. */ \
n &= expand_pred_h(p); \
- sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
- sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
- sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
- sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
+ sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
+ sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
+ sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
+ sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
return neg ? a - sum : a + sum; \
}

View File

@ -1,62 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 22 Jul 2024 18:29:56 +0100
Subject: [PATCH] target/arm: Avoid shifts by -1 in tszimm_shr() and
tszimm_shl()
The function tszimm_esz() returns a shift amount, or possibly -1 in
certain cases that correspond to unallocated encodings in the
instruction set. We catch these later in the trans_ functions
(generally with an "a-esz < 0" check), but before we do the
decodetree-generated code will also call tszimm_shr() or tszimm_sl(),
which will use the tszimm_esz() return value as a shift count without
checking that it is not negative, which is undefined behaviour.
Avoid the UB by checking the return value in tszimm_shr() and
tszimm_shl().
Cc: qemu-stable@nongnu.org
Resolves: Coverity CID 1547617, 1547694
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240722172957.1041231-4-peter.maydell@linaro.org
(cherry picked from commit 76916dfa89e8900639c1055c07a295c06628a0bc)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/tcg/translate-sve.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index ada05aa530..466a19c25a 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x)
static int tszimm_shr(DisasContext *s, int x)
{
- return (16 << tszimm_esz(s, x)) - x;
+ /*
+ * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
+ * trans function will check for esz < 0), so we can return any
+ * value we like from here in that case as long as we avoid UB.
+ */
+ int esz = tszimm_esz(s, x);
+ if (esz < 0) {
+ return esz;
+ }
+ return (16 << esz) - x;
}
/* See e.g. LSL (immediate, predicated). */
static int tszimm_shl(DisasContext *s, int x)
{
- return x - (8 << tszimm_esz(s, x));
+ /* As with tszimm_shr(), value will be unused if esz < 0 */
+ int esz = tszimm_esz(s, x);
+ if (esz < 0) {
+ return esz;
+ }
+ return x - (8 << esz);
}
/* The SH bit is in bit 8. Extract the low 8 and shift. */

View File

@ -1,41 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 22 Jul 2024 18:29:57 +0100
Subject: [PATCH] target/arm: Ignore SMCR_EL2.LEN and SVCR_EL2.LEN if EL2 is
not enabled
When determining the current vector length, the SMCR_EL2.LEN and
SVCR_EL2.LEN settings should only be considered if EL2 is enabled
(compare the pseudocode CurrentSVL and CurrentNSVL which call
EL2Enabled()).
We were checking against ARM_FEATURE_EL2 rather than calling
arm_is_el2_enabled(), which meant that we would look at
SMCR_EL2/SVCR_EL2 when in Secure EL1 or Secure EL0 even if Secure EL2
was not enabled.
Use the correct check in sve_vqm1_for_el_sm().
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240722172957.1041231-5-peter.maydell@linaro.org
(cherry picked from commit f573ac059ed060234fcef4299fae9e500d357c33)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/helper.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index a620481d7c..42044ae14b 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -7191,7 +7191,7 @@ uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm)
if (el <= 1 && !el_is_in_host(env, el)) {
len = MIN(len, 0xf & (uint32_t)cr[1]);
}
- if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+ if (el <= 2 && arm_is_el2_enabled(env)) {
len = MIN(len, 0xf & (uint32_t)cr[2]);
}
if (arm_feature(env, ARM_FEATURE_EL3)) {

View File

@ -1,164 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 1 Aug 2024 10:15:03 +0100
Subject: [PATCH] target/arm: Handle denormals correctly for FMOPA (widening)
The FMOPA (widening) SME instruction takes pairs of half-precision
floating point values, widens them to single-precision, does a
two-way dot product and accumulates the results into a
single-precision destination. We don't quite correctly handle the
FPCR bits FZ and FZ16 which control flushing of denormal inputs and
outputs. This is because at the moment we pass a single float_status
value to the helper function, which then uses that configuration for
all the fp operations it does. However, because the inputs to this
operation are float16 and the outputs are float32 we need to use the
fp_status_f16 for the float16 input widening but the normal fp_status
for everything else. Otherwise we will apply the flushing control
FPCR.FZ16 to the 32-bit output rather than the FPCR.FZ control, and
incorrectly flush a denormal output to zero when we should not (or
vice-versa).
(In commit 207d30b5fdb5b we tried to fix the FZ handling but
didn't get it right, switching from "use FPCR.FZ for everything" to
"use FPCR.FZ16 for everything".)
Pass the CPU env to the sme_fmopa_h helper instead of an fp_status
pointer, and have the helper pass an extra fp_status into the
f16_dotadd() function so that we can use the right status for the
right parts of this operation.
Cc: qemu-stable@nongnu.org
Fixes: 207d30b5fdb5 ("target/arm: Use FPST_F16 for SME FMOPA (widening)")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2373
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
(cherry picked from commit 55f9f4ee018c5ccea81d8c8c586756d7711ae46f)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/arm/tcg/helper-sme.h | 2 +-
target/arm/tcg/sme_helper.c | 39 +++++++++++++++++++++++-----------
target/arm/tcg/translate-sme.c | 25 ++++++++++++++++++++--
3 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
index 27eef49a11..d22bf9d21b 100644
--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
@@ -121,7 +121,7 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index f9001f5213..3906bb51c0 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -976,12 +976,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
}
static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
- float_status *s_std, float_status *s_odd)
+ float_status *s_f16, float_status *s_std,
+ float_status *s_odd)
{
- float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
- float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
- float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
- float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
+ /*
+ * We need three different float_status for different parts of this
+ * operation:
+ * - the input conversion of the float16 values must use the
+ * f16-specific float_status, so that the FPCR.FZ16 control is applied
+ * - operations on float32 including the final accumulation must use
+ * the normal float_status, so that FPCR.FZ is applied
+ * - we have pre-set-up copy of s_std which is set to round-to-odd,
+ * for the multiply (see below)
+ */
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
float64 t64;
float32 t32;
@@ -1003,20 +1014,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
}
void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, void *vst, uint32_t desc)
+ void *vpm, CPUARMState *env, uint32_t desc)
{
intptr_t row, col, oprsz = simd_maxsz(desc);
uint32_t neg = simd_data(desc) * 0x80008000u;
uint16_t *pn = vpn, *pm = vpm;
- float_status fpst_odd, fpst_std;
+ float_status fpst_odd, fpst_std, fpst_f16;
/*
- * Make a copy of float_status because this operation does not
- * update the cumulative fp exception status. It also produces
- * default nans. Make a second copy with round-to-odd -- see above.
+ * Make copies of fp_status and fp_status_f16, because this operation
+ * does not update the cumulative fp exception status. It also
+ * produces default NaNs. We also need a second copy of fp_status with
+ * round-to-odd -- see above.
*/
- fpst_std = *(float_status *)vst;
+ fpst_f16 = env->vfp.fp_status_f16;
+ fpst_std = env->vfp.fp_status;
set_default_nan_mode(true, &fpst_std);
+ set_default_nan_mode(true, &fpst_f16);
fpst_odd = fpst_std;
set_float_rounding_mode(float_round_to_odd, &fpst_odd);
@@ -1036,7 +1050,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
m = f16mop_adj_pair(m, pcol, 0);
- *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
+ *a = f16_dotadd(*a, n, m,
+ &fpst_f16, &fpst_std, &fpst_odd);
}
col += 4;
pcol >>= 4;
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index a50a419af2..ae42ddef7b 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -334,8 +334,29 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
return true;
}
-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
- MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
+static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
+ gen_helper_gvec_5_ptr *fn)
+{
+ int svl = streaming_vec_reg_size(s);
+ uint32_t desc = simd_desc(svl, svl, a->sub);
+ TCGv_ptr za, zn, zm, pn, pm;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ za = get_tile(s, esz, a->zad);
+ zn = vec_full_reg_ptr(s, a->zn);
+ zm = vec_full_reg_ptr(s, a->zm);
+ pn = pred_full_reg_ptr(s, a->pn);
+ pm = pred_full_reg_ptr(s, a->pm);
+
+ fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc));
+ return true;
+}
+
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
+ MO_32, gen_helper_sme_fmopa_h)
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,

View File

@ -1,39 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?=
<clement.mathieu--drif@eviden.com>
Date: Tue, 9 Jul 2024 14:26:08 +0000
Subject: [PATCH] intel_iommu: fix FRCD construction macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The constant must be unsigned, otherwise the two's complement
overrides the other fields when a PASID is present.
Fixes: 1b2b12376c8a ("intel-iommu: PASID support")
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Message-Id: <20240709142557.317271-2-clement.mathieu--drif@eviden.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit a3c8d7e38550c3d5a46e6fa94ffadfa625a4861d)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/intel_iommu_internal.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99bddf..cbc4030031 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -267,7 +267,7 @@
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
-#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
+#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31)
#define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48)
/* DMA Remapping Fault Conditions */

View File

@ -1,33 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 12 Aug 2024 12:58:42 +1000
Subject: [PATCH] target/i386: Do not apply REX to MMX operands
Cc: qemu-stable@nongnu.org
Fixes: b3e22b2318a ("target/i386: add core of new i386 decoder")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2495
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Link: https://lore.kernel.org/r/20240812025844.58956-2-richard.henderson@linaro.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 416f2b16c02c618c0f233372ebfe343f9ee667d4)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/i386/tcg/decode-new.c.inc | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 4209d59ca8..09b8d2314a 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1271,7 +1271,10 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
op->unit = X86_OP_SSE;
}
get_reg:
- op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
+ op->n = ((get_modrm(s, env) >> 3) & 7);
+ if (op->unit != X86_OP_MMX) {
+ op->n |= REX_R(s);
+ }
break;
case X86_TYPE_E: /* ALU modrm operand */

View File

@ -1,42 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
Date: Fri, 9 Aug 2024 14:13:40 +0200
Subject: [PATCH] module: Prevent crash by resetting local_err in
module_load_qom_all()
Set local_err to NULL after it has been freed in error_report_err(). This
avoids triggering assert(*errp == NULL) failure in error_setv() when
local_err is reused in the loop.
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
Reviewed-by: Claudio Fontana <cfontana@suse.de>
Reviewed-by: Denis V. Lunev <den@openvz.org>
Link: https://lore.kernel.org/r/20240809121340.992049-2-alexander.ivanov@virtuozzo.com
[Do the same by moving the declaration instead. - Paolo]
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 940d802b24e63650e0eacad3714e2ce171cba17c)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
util/module.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/util/module.c b/util/module.c
index 32e263163c..3eb0f06df1 100644
--- a/util/module.c
+++ b/util/module.c
@@ -354,13 +354,13 @@ int module_load_qom(const char *type, Error **errp)
void module_load_qom_all(void)
{
const QemuModinfo *modinfo;
- Error *local_err = NULL;
if (module_loaded_qom_all) {
return;
}
for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+ Error *local_err = NULL;
if (!modinfo->objs) {
continue;
}

View File

@ -1,164 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 7 Aug 2024 08:50:01 -0500
Subject: [PATCH] nbd/server: Plumb in new args to nbd_client_add()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Upcoming patches to fix a CVE need to track an opaque pointer passed
in by the owner of a client object, as well as request for a time
limit on how fast negotiation must complete. Prepare for that by
changing the signature of nbd_client_new() and adding an accessor to
get at the opaque pointer, although for now the two servers
(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though
they pass in a new default timeout value.
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-11-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan]
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
blockdev-nbd.c | 6 ++++--
include/block/nbd.h | 11 ++++++++++-
nbd/server.c | 20 +++++++++++++++++---
qemu-nbd.c | 4 +++-
4 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 213012435f..267a1de903 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
nbd_update_server_watch(nbd_server);
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz,
- nbd_blockdev_client_closed);
+ /* TODO - expose handshake timeout as QMP option */
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+ nbd_server->tlscreds, nbd_server->tlsauthz,
+ nbd_blockdev_client_closed, NULL);
}
static void nbd_update_server_watch(NBDServerData *s)
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4e7bd6342f..1d4d65922d 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts;
extern const BlockExportDriver blk_exp_nbd;
+/*
+ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must
+ * succeed at NBD_OPT_GO before being forcefully dropped as too slow.
+ */
+#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
+
/* Handshake phase structs - this struct is passed on the wire */
typedef struct NBDOption {
@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp);
NBDExport *nbd_export_find(const char *name);
void nbd_client_new(QIOChannelSocket *sioc,
+ uint32_t handshake_max_secs,
QCryptoTLSCreds *tlscreds,
const char *tlsauthz,
- void (*close_fn)(NBDClient *, bool));
+ void (*close_fn)(NBDClient *, bool),
+ void *owner);
+void *nbd_client_owner(NBDClient *client);
void nbd_client_get(NBDClient *client);
void nbd_client_put(NBDClient *client);
diff --git a/nbd/server.c b/nbd/server.c
index 892797bb11..e50012499f 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -124,12 +124,14 @@ struct NBDMetaContexts {
struct NBDClient {
int refcount; /* atomic */
void (*close_fn)(NBDClient *client, bool negotiated);
+ void *owner;
QemuMutex lock;
NBDExport *exp;
QCryptoTLSCreds *tlscreds;
char *tlsauthz;
+ uint32_t handshake_max_secs;
QIOChannelSocket *sioc; /* The underlying data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
qemu_co_mutex_init(&client->send_lock);
+ /* TODO - utilize client->handshake_max_secs */
if (nbd_negotiate(client, &local_err)) {
if (local_err) {
error_report_err(local_err);
@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
}
/*
- * Create a new client listener using the given channel @sioc.
+ * Create a new client listener using the given channel @sioc and @owner.
* Begin servicing it in a coroutine. When the connection closes, call
- * @close_fn with an indication of whether the client completed negotiation.
+ * @close_fn with an indication of whether the client completed negotiation
+ * within @handshake_max_secs seconds (0 for unbounded).
*/
void nbd_client_new(QIOChannelSocket *sioc,
+ uint32_t handshake_max_secs,
QCryptoTLSCreds *tlscreds,
const char *tlsauthz,
- void (*close_fn)(NBDClient *, bool))
+ void (*close_fn)(NBDClient *, bool),
+ void *owner)
{
NBDClient *client;
Coroutine *co;
@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc,
object_ref(OBJECT(client->tlscreds));
}
client->tlsauthz = g_strdup(tlsauthz);
+ client->handshake_max_secs = handshake_max_secs;
client->sioc = sioc;
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
object_ref(OBJECT(client->sioc));
client->ioc = QIO_CHANNEL(sioc);
object_ref(OBJECT(client->ioc));
client->close_fn = close_fn;
+ client->owner = owner;
co = qemu_coroutine_create(nbd_co_client_start, client);
qemu_coroutine_enter(co);
}
+
+void *
+nbd_client_owner(NBDClient *client)
+{
+ return client->owner;
+}
diff --git a/qemu-nbd.c b/qemu-nbd.c
index d7b3ccab21..48e2fa5858 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
nb_fds++;
nbd_update_server_watch();
- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
+ /* TODO - expose handshake timeout as command line option */
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+ tlscreds, tlsauthz, nbd_client_closed, NULL);
}
static void nbd_update_server_watch(void)

View File

@ -1,172 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 6 Aug 2024 13:53:00 -0500
Subject: [PATCH] nbd/server: CVE-2024-7409: Cap default max-connections to 100
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Allowing an unlimited number of clients to any web service is a recipe
for a rudimentary denial of service attack: the client merely needs to
open lots of sockets without closing them, until qemu no longer has
any more fds available to allocate.
For qemu-nbd, we default to allowing only 1 connection unless more are
explicitly asked for (-e or --shared); this was historically picked as
a nice default (without an explicit -t, a non-persistent qemu-nbd goes
away after a client disconnects, without needing any additional
follow-up commands), and we are not going to change that interface now
(besides, someday we want to point people towards qemu-storage-daemon
instead of qemu-nbd).
But for qemu proper, and the newer qemu-storage-daemon, the QMP
nbd-server-start command has historically had a default of unlimited
number of connections, in part because unlike qemu-nbd it is
inherently persistent until nbd-server-stop. Allowing multiple client
sockets is particularly useful for clients that can take advantage of
MULTI_CONN (creating parallel sockets to increase throughput),
although known clients that do so (such as libnbd's nbdcopy) typically
use only 8 or 16 connections (the benefits of scaling diminish once
more sockets are competing for kernel attention). Picking a number
large enough for typical use cases, but not unlimited, makes it
slightly harder for a malicious client to perform a denial of service
merely by opening lots of connections withot progressing through the
handshake.
This change does not eliminate CVE-2024-7409 on its own, but reduces
the chance for fd exhaustion or unlimited memory usage as an attack
surface. On the other hand, by itself, it makes it more obvious that
with a finite limit, we have the problem of an unauthenticated client
holding 100 fds opened as a way to block out a legitimate client from
being able to connect; thus, later patches will further add timeouts
to reject clients that are not making progress.
This is an INTENTIONAL change in behavior, and will break any client
of nbd-server-start that was not passing an explicit max-connections
parameter, yet expects more than 100 simultaneous connections. We are
not aware of any such client (as stated above, most clients aware of
MULTI_CONN get by just fine on 8 or 16 connections, and probably cope
with later connections failing by relying on the earlier connections;
libvirt has not yet been passing max-connections, but generally
creates NBD servers with the intent for a single client for the sake
of live storage migration; meanwhile, the KubeSAN project anticipates
a large cluster sharing multiple clients [up to 8 per node, and up to
100 nodes in a cluster], but it currently uses qemu-nbd with an
explicit --shared=0 rather than qemu-storage-daemon with
nbd-server-start).
We considered using a deprecation period (declare that omitting
max-parameters is deprecated, and make it mandatory in 3 releases -
then we don't need to pick an arbitrary default); that has zero risk
of breaking any apps that accidentally depended on more than 100
connections, and where such breakage might not be noticed under unit
testing but only under the larger loads of production usage. But it
does not close the denial-of-service hole until far into the future,
and requires all apps to change to add the parameter even if 100 was
good enough. It also has a drawback that any app (like libvirt) that
is accidentally relying on an unlimited default should seriously
consider their own CVE now, at which point they are going to change to
pass explicit max-connections sooner than waiting for 3 qemu releases.
Finally, if our changed default breaks an app, that app can always
pass in an explicit max-parameters with a larger value.
It is also intentional that the HMP interface to nbd-server-start is
not changed to expose max-connections (any client needing to fine-tune
things should be using QMP).
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-12-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
[ericb: Expand commit message to summarize Dan's argument for why we
break corner-case back-compat behavior without a deprecation period]
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 3 ++-
blockdev-nbd.c | 8 ++++++++
include/block/nbd.h | 7 +++++++
qapi/block-export.json | 4 ++--
4 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d954bec6f1..bdf2eb50b6 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
goto exit;
}
- nbd_server_start(addr, NULL, NULL, 0, &local_err);
+ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS,
+ &local_err);
qapi_free_SocketAddress(addr);
if (local_err != NULL) {
goto exit;
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 267a1de903..24ba5382db 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
{
+ if (!arg->has_max_connections) {
+ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
+ }
+
nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz,
arg->max_connections, errp);
}
@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
{
SocketAddress *addr_flat = socket_address_flatten(addr);
+ if (!has_max_connections) {
+ max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
+ }
+
nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp);
qapi_free_SocketAddress(addr_flat);
}
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 1d4d65922d..d4f8b21aec 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd;
*/
#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
+/*
+ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at
+ * once; must be large enough to allow a MULTI_CONN-aware client like
+ * nbdcopy to create its typical number of 8-16 sockets.
+ */
+#define NBD_DEFAULT_MAX_CONNECTIONS 100
+
/* Handshake phase structs - this struct is passed on the wire */
typedef struct NBDOption {
diff --git a/qapi/block-export.json b/qapi/block-export.json
index 3919a2d5b9..f45e4fd481 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -28,7 +28,7 @@
# @max-connections: The maximum number of connections to allow at the
# same time, 0 for unlimited. Setting this to 1 also stops the
# server from advertising multiple client support (since 5.2;
-# default: 0)
+# default: 100)
#
# Since: 4.2
##
@@ -63,7 +63,7 @@
# @max-connections: The maximum number of connections to allow at the
# same time, 0 for unlimited. Setting this to 1 also stops the
# server from advertising multiple client support (since 5.2;
-# default: 0).
+# default: 100).
#
# Errors:
# - if the server is already running

View File

@ -1,123 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Thu, 8 Aug 2024 16:05:08 -0500
Subject: [PATCH] nbd/server: CVE-2024-7409: Drop non-negotiating clients
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A client that opens a socket but does not negotiate is merely hogging
qemu's resources (an open fd and a small amount of memory); and a
malicious client that can access the port where NBD is listening can
attempt a denial of service attack by intentionally opening and
abandoning lots of unfinished connections. The previous patch put a
default bound on the number of such ongoing connections, but once that
limit is hit, no more clients can connect (including legitimate ones).
The solution is to insist that clients complete handshake within a
reasonable time limit, defaulting to 10 seconds. A client that has
not successfully completed NBD_OPT_GO by then (including the case of
where the client didn't know TLS credentials to even reach the point
of NBD_OPT_GO) is wasting our time and does not deserve to stay
connected. Later patches will allow fine-tuning the limit away from
the default value (including disabling it for doing integration
testing of the handshake process itself).
Note that this patch in isolation actually makes it more likely to see
qemu SEGV after nbd-server-stop, as any client socket still connected
when the server shuts down will now be closed after 10 seconds rather
than at the client's whims. That will be addressed in the next patch.
For a demo of this patch in action:
$ qemu-nbd -f raw -r -t -e 10 file &
$ nbdsh --opt-mode -c '
H = list()
for i in range(20):
print(i)
H.insert(i, nbd.NBD())
H[i].set_opt_mode(True)
H[i].connect_uri("nbd://localhost")
'
$ kill $!
where later connections get to start progressing once earlier ones are
forcefully dropped for taking too long, rather than hanging.
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-13-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
[eblake: rebase to changes earlier in series, reduce scope of timer]
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
nbd/server.c | 28 +++++++++++++++++++++++++++-
nbd/trace-events | 1 +
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/nbd/server.c b/nbd/server.c
index e50012499f..39285cc971 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client)
}
}
+static void nbd_handshake_timer_cb(void *opaque)
+{
+ QIOChannel *ioc = opaque;
+
+ trace_nbd_handshake_timer_cb();
+ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+}
+
static coroutine_fn void nbd_co_client_start(void *opaque)
{
NBDClient *client = opaque;
Error *local_err = NULL;
+ QEMUTimer *handshake_timer = NULL;
qemu_co_mutex_init(&client->send_lock);
- /* TODO - utilize client->handshake_max_secs */
+ /*
+ * Create a timer to bound the time spent in negotiation. If the
+ * timer expires, it is likely nbd_negotiate will fail because the
+ * socket was shutdown.
+ */
+ if (client->handshake_max_secs > 0) {
+ handshake_timer = aio_timer_new(qemu_get_aio_context(),
+ QEMU_CLOCK_REALTIME,
+ SCALE_NS,
+ nbd_handshake_timer_cb,
+ client->sioc);
+ timer_mod(handshake_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+ client->handshake_max_secs * NANOSECONDS_PER_SECOND);
+ }
+
if (nbd_negotiate(client, &local_err)) {
if (local_err) {
error_report_err(local_err);
}
+ timer_free(handshake_timer);
client_close(client, false);
return;
}
+ timer_free(handshake_timer);
WITH_QEMU_LOCK_GUARD(&client->lock) {
nbd_client_receive_next_request(client);
}
diff --git a/nbd/trace-events b/nbd/trace-events
index 00ae3216a1..cbd0a4ab7e 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload
nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
nbd_trip(void) "Reading request"
+nbd_handshake_timer_cb(void) "client took too long to negotiate"
# client-connection.c
nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64

View File

@ -1,161 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 7 Aug 2024 12:23:13 -0500
Subject: [PATCH] nbd/server: CVE-2024-7409: Close stray clients at server-stop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A malicious client can attempt to connect to an NBD server, and then
intentionally delay progress in the handshake, including if it does
not know the TLS secrets. Although the previous two patches reduce
this behavior by capping the default max-connections parameter and
killing slow clients, they did not eliminate the possibility of a
client waiting to close the socket until after the QMP nbd-server-stop
command is executed, at which point qemu would SEGV when trying to
dereference the NULL nbd_server global which is no longer present.
This amounts to a denial of service attack. Worse, if another NBD
server is started before the malicious client disconnects, I cannot
rule out additional adverse effects when the old client interferes
with the connection count of the new server (although the most likely
is a crash due to an assertion failure when checking
nbd_server->connections > 0).
For environments without this patch, the CVE can be mitigated by
ensuring (such as via a firewall) that only trusted clients can
connect to an NBD server. Note that using frameworks like libvirt
that ensure that TLS is used and that nbd-server-stop is not executed
while any trusted clients are still connected will only help if there
is also no possibility for an untrusted client to open a connection
but then stall on the NBD handshake.
Given the previous patches, it would be possible to guarantee that no
clients remain connected by having nbd-server-stop sleep for longer
than the default handshake deadline before finally freeing the global
nbd_server object, but that could make QMP non-responsive for a long
time. So intead, this patch fixes the problem by tracking all client
sockets opened while the server is running, and forcefully closing any
such sockets remaining without a completed handshake at the time of
nbd-server-stop, then waiting until the coroutines servicing those
sockets notice the state change. nbd-server-stop now has a second
AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the
blk_exp_close_all_type() that disconnects all clients that completed
handshakes), but forced socket shutdown is enough to progress the
coroutines and quickly tear down all clients before the server is
freed, thus finally fixing the CVE.
This patch relies heavily on the fact that nbd/server.c guarantees
that it only calls nbd_blockdev_client_closed() from the main loop
(see the assertion in nbd_client_put() and the hoops used in
nbd_client_put_nonzero() to achieve that); if we did not have that
guarantee, we would also need a mutex protecting our accesses of the
list of connections to survive re-entrancy from independent iothreads.
Although I did not actually try to test old builds, it looks like this
problem has existed since at least commit 862172f45c (v2.12.0, 2017) -
even back when that patch started using a QIONetListener to handle
listening on multiple sockets, nbd_server_free() was already unaware
that the nbd_blockdev_client_closed callback can be reached later by a
client thread that has not completed handshakes (and therefore the
client's socket never got added to the list closed in
nbd_export_close_all), despite that patch intentionally tearing down
the QIONetListener to prevent new clients.
Reported-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
Fixes: CVE-2024-7409
CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-14-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 24ba5382db..f73409ae49 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -21,12 +21,18 @@
#include "io/channel-socket.h"
#include "io/net-listener.h"
+typedef struct NBDConn {
+ QIOChannelSocket *cioc;
+ QLIST_ENTRY(NBDConn) next;
+} NBDConn;
+
typedef struct NBDServerData {
QIONetListener *listener;
QCryptoTLSCreds *tlscreds;
char *tlsauthz;
uint32_t max_connections;
uint32_t connections;
+ QLIST_HEAD(, NBDConn) conns;
} NBDServerData;
static NBDServerData *nbd_server;
@@ -51,6 +57,14 @@ int nbd_server_max_connections(void)
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
{
+ NBDConn *conn = nbd_client_owner(client);
+
+ assert(qemu_in_main_thread() && nbd_server);
+
+ object_unref(OBJECT(conn->cioc));
+ QLIST_REMOVE(conn, next);
+ g_free(conn);
+
nbd_client_put(client);
assert(nbd_server->connections > 0);
nbd_server->connections--;
@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
gpointer opaque)
{
+ NBDConn *conn = g_new0(NBDConn, 1);
+
+ assert(qemu_in_main_thread() && nbd_server);
nbd_server->connections++;
+ object_ref(OBJECT(cioc));
+ conn->cioc = cioc;
+ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next);
nbd_update_server_watch(nbd_server);
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
/* TODO - expose handshake timeout as QMP option */
nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
nbd_server->tlscreds, nbd_server->tlsauthz,
- nbd_blockdev_client_closed, NULL);
+ nbd_blockdev_client_closed, conn);
}
static void nbd_update_server_watch(NBDServerData *s)
@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s)
static void nbd_server_free(NBDServerData *server)
{
+ NBDConn *conn, *tmp;
+
if (!server) {
return;
}
+ /*
+ * Forcefully close the listener socket, and any clients that have
+ * not yet disconnected on their own.
+ */
qio_net_listener_disconnect(server->listener);
object_unref(OBJECT(server->listener));
+ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
+ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
+ NULL);
+ }
+
+ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0);
+
if (server->tlscreds) {
object_unref(OBJECT(server->tlscreds));
}

View File

@ -1,47 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Tue, 20 Aug 2024 17:11:12 +0400
Subject: [PATCH] vnc: fix crash when no console attached
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Since commit e99441a3793b5 ("ui/curses: Do not use console_select()")
qemu_text_console_put_keysym() no longer checks for NULL console
argument, which leads to a later crash:
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
332 } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
(gdb) bt
#0 0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
#1 0x00005555559e18e5 in qemu_text_console_put_keysym (s=<optimized out>, keysym=<optimized out>) at ../ui/console.c:303
#2 0x00005555559f2e88 in do_key_event (vs=vs@entry=0x5555579045c0, down=down@entry=1, keycode=keycode@entry=60, sym=sym@entry=65471) at ../ui/vnc.c:2034
#3 0x00005555559f845c in ext_key_event (vs=0x5555579045c0, down=1, sym=65471, keycode=<optimized out>) at ../ui/vnc.c:2070
#4 protocol_client_msg (vs=0x5555579045c0, data=<optimized out>, len=<optimized out>) at ../ui/vnc.c:2514
#5 0x00005555559f515c in vnc_client_read (vs=0x5555579045c0) at ../ui/vnc.c:1607
Fixes: e99441a3793b5 ("ui/curses: Do not use console_select()")
Fixes: https://issues.redhat.com/browse/RHEL-50529
Cc: qemu-stable@nongnu.org
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
(picked from https://lore.kernel.org/qemu-devel/20240820131112.1267954-1-marcandre.lureau@redhat.com/)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
ui/vnc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ui/vnc.c b/ui/vnc.c
index b3fd78022b..953ea38318 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1935,7 +1935,7 @@ static void do_key_event(VncState *vs, int down, int keycode, int sym)
}
qkbd_state_key_event(vs->vd->kbd, qcode, down);
- if (!qemu_console_is_graphic(vs->vd->dcl.con)) {
+ if (QEMU_IS_TEXT_CONSOLE(vs->vd->dcl.con)) {
QemuTextConsole *con = QEMU_TEXT_CONSOLE(vs->vd->dcl.con);
bool numlock = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_NUMLOCK);
bool control = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_CTRL);

View File

@ -1,89 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Thu, 22 Aug 2024 09:35:29 -0500
Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing
server
Commit 3e7ef738 plugged the use-after-free of the global nbd_server
object, but overlooked a use-after-free of nbd_server->listener.
Although this race is harder to hit, notice that our shutdown path
first drops the reference count of nbd_server->listener, then triggers
actions that can result in a pending client reaching the
nbd_blockdev_client_closed() callback, which in turn calls
qio_net_listener_set_client_func on a potentially stale object.
If we know we don't want any more clients to connect, and have already
told the listener socket to shut down, then we should not be trying to
update the listener socket's associated function.
Reproducer:
> #!/usr/bin/python3
>
> import os
> from threading import Thread
>
> def start_stop():
> while 1:
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start",
+"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'')
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'')
>
> def nbd_list():
> while 1:
> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock')
>
> def test():
> sst = Thread(target=start_stop)
> sst.start()
> nlt = Thread(target=nbd_list)
> nlt.start()
>
> sst.join()
> nlt.join()
>
> test()
Fixes: CVE-2024-7409
Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop")
CC: qemu-stable@nongnu.org
Reported-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240822143617.800419-2-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
blockdev-nbd.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index f73409ae49..b36f41b7c5 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
static void nbd_update_server_watch(NBDServerData *s)
{
- if (!s->max_connections || s->connections < s->max_connections) {
- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL);
- } else {
- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
+ if (s->listener) {
+ if (!s->max_connections || s->connections < s->max_connections) {
+ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL,
+ NULL);
+ } else {
+ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
+ }
}
}
@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server)
*/
qio_net_listener_disconnect(server->listener);
object_unref(OBJECT(server->listener));
+ server->listener = NULL;
QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
NULL);

View File

@ -1,134 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 28 Aug 2024 11:07:43 +0200
Subject: [PATCH] softmmu/physmem: fix memory leak in dirty_memory_extend()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As reported by Peter, we might be leaking memory when removing the
highest RAMBlock (in the weird ram_addr_t space), and adding a new one.
We will fail to realize that we already allocated bitmaps for more
dirty memory blocks, and effectively discard the pointers to them.
Fix it by getting rid of last_ram_page() and by remembering the number
of dirty memory blocks that have been allocated already.
While at it, let's use "unsigned int" for the number of blocks, which
should be sufficient until we reach ~32 exabytes.
Looks like this leak was introduced as we switched from using a single
bitmap_zero_extend() to allocating multiple bitmaps:
bitmap_zero_extend() relies on g_renew() which should have taken care of
this.
Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug")
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-stable@nongnu.org
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
(picked from https://lore.kernel.org/qemu-devel/20240828090743.128647-1-david@redhat.com/)
[FE: backport - remove not-yet-existing variable in context of hunk touching ram_block_add()]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/exec/ramlist.h | 1 +
system/physmem.c | 35 +++++++++--------------------------
2 files changed, 10 insertions(+), 26 deletions(-)
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
index 2ad2a81acc..d9cfe530be 100644
--- a/include/exec/ramlist.h
+++ b/include/exec/ramlist.h
@@ -50,6 +50,7 @@ typedef struct RAMList {
/* RCU-enabled, writes protected by the ramlist lock. */
QLIST_HEAD(, RAMBlock) blocks;
DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
+ unsigned int num_dirty_blocks;
uint32_t version;
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
} RAMList;
diff --git a/system/physmem.c b/system/physmem.c
index a4fe3d2bf8..78f7db1121 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1497,18 +1497,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
return offset;
}
-static unsigned long last_ram_page(void)
-{
- RAMBlock *block;
- ram_addr_t last = 0;
-
- RCU_READ_LOCK_GUARD();
- RAMBLOCK_FOREACH(block) {
- last = MAX(last, block->offset + block->max_length);
- }
- return last >> TARGET_PAGE_BITS;
-}
-
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
int ret;
@@ -1762,13 +1750,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
}
/* Called with ram_list.mutex held */
-static void dirty_memory_extend(ram_addr_t old_ram_size,
- ram_addr_t new_ram_size)
+static void dirty_memory_extend(ram_addr_t new_ram_size)
{
- ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
- DIRTY_MEMORY_BLOCK_SIZE);
- ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
- DIRTY_MEMORY_BLOCK_SIZE);
+ unsigned int old_num_blocks = ram_list.num_dirty_blocks;
+ unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size,
+ DIRTY_MEMORY_BLOCK_SIZE);
int i;
/* Only need to extend if block count increased */
@@ -1800,6 +1786,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
g_free_rcu(old_blocks, rcu);
}
}
+
+ ram_list.num_dirty_blocks = new_num_blocks;
}
static void ram_block_add(RAMBlock *new_block, Error **errp)
@@ -1808,11 +1796,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
const bool shared = qemu_ram_is_shared(new_block);
RAMBlock *block;
RAMBlock *last_block = NULL;
- ram_addr_t old_ram_size, new_ram_size;
+ ram_addr_t ram_size;
Error *err = NULL;
- old_ram_size = last_ram_page();
-
qemu_mutex_lock_ramlist();
new_block->offset = find_ram_offset(new_block->max_length);
@@ -1840,11 +1826,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
}
- new_ram_size = MAX(old_ram_size,
- (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
- if (new_ram_size > old_ram_size) {
- dirty_memory_extend(old_ram_size, new_ram_size);
- }
+ ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
+ dirty_memory_extend(ram_size);
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.

View File

@ -1,104 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 7 Nov 2024 17:51:13 +0100
Subject: [PATCH] block/reqlist: allow adding overlapping requests
Allow overlapping request by removing the assert that made it
impossible. There are only two callers:
1. block_copy_task_create()
It already asserts the very same condition before calling
reqlist_init_req().
2. cbw_snapshot_read_lock()
There is no need to have read requests be non-overlapping in
copy-before-write when used for snapshot-access. In fact, there was no
protection against two callers of cbw_snapshot_read_lock() calling
reqlist_init_req() with overlapping ranges and this could lead to an
assertion failure [1].
In particular, with the reproducer script below [0], two
cbw_co_snapshot_block_status() callers could race, with the second
calling reqlist_init_req() before the first one finishes and removes
its conflicting request.
[0]:
> #!/bin/bash -e
> dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
> ./qemu-img create /tmp/fleecing.raw -f raw 1G
> (
> ./qemu-system-x86_64 --qmp stdio \
> --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
> --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \
> <<EOF
> {"execute": "qmp_capabilities"}
> {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", "file": "node0", "target": "node1", "node-name": "node3" } }
> {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", "file": "node3", "node-name": "snap0" } }
> {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", "data": { "path": "/tmp/nbd.socket" } } } }
> {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "snap0", "type": "nbd", "name": "exp0"}}
> EOF
> ) &
> sleep 5
> while true; do
> ./qemu-nbd -d /dev/nbd0
> ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r
> nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket'
> done
[1]:
> #5 0x000071e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
> #6 0x0000615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
> #7 0x00006152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237
> #8 0x00006152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304
> #9 0x00006152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726
> #10 0x000061528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48
> #11 0x00006152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
> #12 0x00006152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652
> #13 0x00006152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
> #14 0x00006152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473
> #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
> #16 0x000061528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
> #17 0x000061528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
> #18 0x000061528538f906 in nbd_trip (...) at ../nbd/server.c:3121
> #19 0x00006152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175
Cc: qemu-stable@nongnu.org
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
block/copy-before-write.c | 3 ++-
block/reqlist.c | 2 --
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 8aba27a71d..3698b3bc60 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -65,7 +65,8 @@ typedef struct BDRVCopyBeforeWriteState {
/*
* @frozen_read_reqs: current read requests for fleecing user in bs->file
- * node. These areas must not be rewritten by guest.
+ * node. These areas must not be rewritten by guest. There can be multiple
+ * overlapping read requests.
*/
BlockReqList frozen_read_reqs;
diff --git a/block/reqlist.c b/block/reqlist.c
index 08cb57cfa4..098e807378 100644
--- a/block/reqlist.c
+++ b/block/reqlist.c
@@ -20,8 +20,6 @@
void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
int64_t bytes)
{
- assert(!reqlist_find_conflict(reqs, offset, bytes));
-
*req = (BlockReq) {
.offset = offset,
.bytes = bytes,

1271
debian/patches/pve-qemu-8.1-vitastor.patch vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-) 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c diff --git a/block/file-posix.c b/block/file-posix.c
index 35684f7e21..43bc0bd520 100644 index 7f540b03ed..ca551baa42 100644
--- a/block/file-posix.c --- a/block/file-posix.c
+++ b/block/file-posix.c +++ b/block/file-posix.c
@@ -563,7 +563,7 @@ static QemuOptsList raw_runtime_opts = { @@ -563,7 +563,7 @@ static QemuOptsList raw_runtime_opts = {

View File

@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-) 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h diff --git a/include/net/net.h b/include/net/net.h
index b1f9b35fcc..096c0d52e4 100644 index 685ec58318..22edf4ee96 100644
--- a/include/net/net.h --- a/include/net/net.h
+++ b/include/net/net.h +++ b/include/net/net.h
@@ -317,8 +317,8 @@ void netdev_add(QemuOpts *opts, Error **errp); @@ -260,8 +260,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
int net_hub_id_for_client(NetClientState *nc, int *id); int net_hub_id_for_client(NetClientState *nc, int *id);
NetClientState *net_hub_port_find(int hub_id); NetClientState *net_hub_port_find(int hub_id);

View File

@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-) 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 6b05738079..d82869900a 100644 index 0893b794e9..6d650a58b9 100644
--- a/target/i386/cpu.h --- a/target/i386/cpu.h
+++ b/target/i386/cpu.h +++ b/target/i386/cpu.h
@@ -2291,9 +2291,9 @@ uint64_t cpu_get_tsc(CPUX86State *env); @@ -2243,9 +2243,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define CPU_RESOLVING_TYPE TYPE_X86_CPU #define CPU_RESOLVING_TYPE TYPE_X86_CPU
#ifdef TARGET_X86_64 #ifdef TARGET_X86_64

View File

@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 6 deletions(-) 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/ui/spice-core.c b/ui/spice-core.c diff --git a/ui/spice-core.c b/ui/spice-core.c
index 15be640286..ea20e6153c 100644 index 52a59386d7..b20c25aee0 100644
--- a/ui/spice-core.c --- a/ui/spice-core.c
+++ b/ui/spice-core.c +++ b/ui/spice-core.c
@@ -690,32 +690,35 @@ static void qemu_spice_init(void) @@ -691,32 +691,35 @@ static void qemu_spice_init(void)
if (tls_port) { if (tls_port) {
x509_dir = qemu_opt_get(opts, "x509-dir"); x509_dir = qemu_opt_get(opts, "x509-dir");

View File

@ -9,7 +9,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 11 insertions(+), 4 deletions(-) 1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/block/gluster.c b/block/gluster.c diff --git a/block/gluster.c b/block/gluster.c
index cc74af06dc..3ba9bbfa5e 100644 index ad5fadbe79..d0011085c4 100644
--- a/block/gluster.c --- a/block/gluster.c
+++ b/block/gluster.c +++ b/block/gluster.c
@@ -43,7 +43,7 @@ @@ -43,7 +43,7 @@

View File

@ -18,7 +18,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+) 1 file changed, 2 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c diff --git a/block/rbd.c b/block/rbd.c
index 84bb2fa5d7..63f60d41be 100644 index 978671411e..a4749f3b1b 100644
--- a/block/rbd.c --- a/block/rbd.c
+++ b/block/rbd.c +++ b/block/rbd.c
@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, @@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,

View File

@ -16,7 +16,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 1 deletion(-) 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/gluster.c b/block/gluster.c diff --git a/block/gluster.c b/block/gluster.c
index 3ba9bbfa5e..34936eb855 100644 index d0011085c4..2df3d6e35d 100644
--- a/block/gluster.c --- a/block/gluster.c
+++ b/block/gluster.c +++ b/block/gluster.c
@@ -58,6 +58,7 @@ typedef struct GlusterAIOCB { @@ -58,6 +58,7 @@ typedef struct GlusterAIOCB {
@ -39,7 +39,7 @@ index 3ba9bbfa5e..34936eb855 100644
} }
aio_co_schedule(acb->aio_context, acb->coroutine); aio_co_schedule(acb->aio_context, acb->coroutine);
@@ -1023,6 +1026,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, @@ -1021,6 +1024,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
acb.ret = 0; acb.ret = 0;
acb.coroutine = qemu_coroutine_self(); acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs); acb.aio_context = bdrv_get_aio_context(bs);
@ -47,7 +47,7 @@ index 3ba9bbfa5e..34936eb855 100644
ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb); ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
if (ret < 0) { if (ret < 0) {
@@ -1203,9 +1207,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, @@ -1201,9 +1205,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb.aio_context = bdrv_get_aio_context(bs); acb.aio_context = bdrv_get_aio_context(bs);
if (write) { if (write) {
@ -59,7 +59,7 @@ index 3ba9bbfa5e..34936eb855 100644
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb); gluster_finish_aiocb, &acb);
} }
@@ -1268,6 +1274,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) @@ -1266,6 +1272,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb.ret = 0; acb.ret = 0;
acb.coroutine = qemu_coroutine_self(); acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs); acb.aio_context = bdrv_get_aio_context(bs);
@ -67,7 +67,7 @@ index 3ba9bbfa5e..34936eb855 100644
ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb); ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) { if (ret < 0) {
@@ -1316,6 +1323,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs, @@ -1314,6 +1321,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
acb.ret = 0; acb.ret = 0;
acb.coroutine = qemu_coroutine_self(); acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs); acb.aio_context = bdrv_get_aio_context(bs);

View File

@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-) 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c diff --git a/qemu-img.c b/qemu-img.c
index 7668f86769..2575e97b43 100644 index 78433f3746..25d427edd1 100644
--- a/qemu-img.c --- a/qemu-img.c
+++ b/qemu-img.c +++ b/qemu-img.c
@@ -3075,7 +3075,8 @@ static int img_info(int argc, char **argv) @@ -3062,7 +3062,8 @@ static int img_info(int argc, char **argv)
list = collect_image_info_list(image_opts, filename, fmt, chain, list = collect_image_info_list(image_opts, filename, fmt, chain,
force_share); force_share);
if (!list) { if (!list) {

View File

@ -38,10 +38,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
2 files changed, 133 insertions(+), 73 deletions(-) 2 files changed, 133 insertions(+), 73 deletions(-)
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index c9dd70a892..048788b23d 100644 index 1b1dab5b17..d1616c045a 100644
--- a/qemu-img-cmds.hx --- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST @@ -58,9 +58,9 @@ SRST
ERST ERST
DEF("dd", img_dd, DEF("dd", img_dd,
@ -54,10 +54,10 @@ index c9dd70a892..048788b23d 100644
DEF("info", img_info, DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c diff --git a/qemu-img.c b/qemu-img.c
index 2575e97b43..8ec68b346f 100644 index 25d427edd1..220e6ec577 100644
--- a/qemu-img.c --- a/qemu-img.c
+++ b/qemu-img.c +++ b/qemu-img.c
@@ -4993,10 +4993,12 @@ static int img_bitmap(int argc, char **argv) @@ -4899,10 +4899,12 @@ static int img_bitmap(int argc, char **argv)
#define C_IF 04 #define C_IF 04
#define C_OF 010 #define C_OF 010
#define C_SKIP 020 #define C_SKIP 020
@ -70,7 +70,7 @@ index 2575e97b43..8ec68b346f 100644
}; };
struct DdIo { struct DdIo {
@@ -5072,6 +5074,19 @@ static int img_dd_skip(const char *arg, @@ -4978,6 +4980,19 @@ static int img_dd_skip(const char *arg,
return 0; return 0;
} }
@ -90,7 +90,7 @@ index 2575e97b43..8ec68b346f 100644
static int img_dd(int argc, char **argv) static int img_dd(int argc, char **argv)
{ {
int ret = 0; int ret = 0;
@@ -5112,6 +5127,7 @@ static int img_dd(int argc, char **argv) @@ -5018,6 +5033,7 @@ static int img_dd(int argc, char **argv)
{ "if", img_dd_if, C_IF }, { "if", img_dd_if, C_IF },
{ "of", img_dd_of, C_OF }, { "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP }, { "skip", img_dd_skip, C_SKIP },
@ -98,7 +98,7 @@ index 2575e97b43..8ec68b346f 100644
{ NULL, NULL, 0 } { NULL, NULL, 0 }
}; };
const struct option long_options[] = { const struct option long_options[] = {
@@ -5187,91 +5203,112 @@ static int img_dd(int argc, char **argv) @@ -5093,91 +5109,112 @@ static int img_dd(int argc, char **argv)
arg = NULL; arg = NULL;
} }
@ -275,7 +275,7 @@ index 2575e97b43..8ec68b346f 100644
} }
if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz || if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
@@ -5288,20 +5325,43 @@ static int img_dd(int argc, char **argv) @@ -5194,20 +5231,43 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz); in.buf = g_new(uint8_t, in.bsz);
for (out_pos = 0; in_pos < size; ) { for (out_pos = 0; in_pos < size; ) {

View File

@ -16,10 +16,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
1 file changed, 25 insertions(+), 3 deletions(-) 1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c diff --git a/qemu-img.c b/qemu-img.c
index 8ec68b346f..b98184bba1 100644 index 220e6ec577..58bf9b43d1 100644
--- a/qemu-img.c --- a/qemu-img.c
+++ b/qemu-img.c +++ b/qemu-img.c
@@ -4994,11 +4994,13 @@ static int img_bitmap(int argc, char **argv) @@ -4900,11 +4900,13 @@ static int img_bitmap(int argc, char **argv)
#define C_OF 010 #define C_OF 010
#define C_SKIP 020 #define C_SKIP 020
#define C_OSIZE 040 #define C_OSIZE 040
@ -33,7 +33,7 @@ index 8ec68b346f..b98184bba1 100644
}; };
struct DdIo { struct DdIo {
@@ -5087,6 +5089,19 @@ static int img_dd_osize(const char *arg, @@ -4993,6 +4995,19 @@ static int img_dd_osize(const char *arg,
return 0; return 0;
} }
@ -53,7 +53,7 @@ index 8ec68b346f..b98184bba1 100644
static int img_dd(int argc, char **argv) static int img_dd(int argc, char **argv)
{ {
int ret = 0; int ret = 0;
@@ -5101,12 +5116,14 @@ static int img_dd(int argc, char **argv) @@ -5007,12 +5022,14 @@ static int img_dd(int argc, char **argv)
int c, i; int c, i;
const char *out_fmt = "raw"; const char *out_fmt = "raw";
const char *fmt = NULL; const char *fmt = NULL;
@ -69,7 +69,7 @@ index 8ec68b346f..b98184bba1 100644
}; };
struct DdIo in = { struct DdIo in = {
.bsz = 512, /* Block size is by default 512 bytes */ .bsz = 512, /* Block size is by default 512 bytes */
@@ -5128,6 +5145,7 @@ static int img_dd(int argc, char **argv) @@ -5034,6 +5051,7 @@ static int img_dd(int argc, char **argv)
{ "of", img_dd_of, C_OF }, { "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP }, { "skip", img_dd_skip, C_SKIP },
{ "osize", img_dd_osize, C_OSIZE }, { "osize", img_dd_osize, C_OSIZE },
@ -77,7 +77,7 @@ index 8ec68b346f..b98184bba1 100644
{ NULL, NULL, 0 } { NULL, NULL, 0 }
}; };
const struct option long_options[] = { const struct option long_options[] = {
@@ -5324,9 +5342,10 @@ static int img_dd(int argc, char **argv) @@ -5230,9 +5248,10 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz); in.buf = g_new(uint8_t, in.bsz);
@ -90,7 +90,7 @@ index 8ec68b346f..b98184bba1 100644
if (blk1) { if (blk1) {
in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0); in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
if (in_ret == 0) { if (in_ret == 0) {
@@ -5335,6 +5354,9 @@ static int img_dd(int argc, char **argv) @@ -5241,6 +5260,9 @@ static int img_dd(int argc, char **argv)
} else { } else {
in_ret = read(STDIN_FILENO, in.buf, bytes); in_ret = read(STDIN_FILENO, in.buf, bytes);
if (in_ret == 0) { if (in_ret == 0) {

View File

@ -13,10 +13,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 26 insertions(+), 12 deletions(-) 3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 3653adb963..d83e8fb3c0 100644 index 15aeddc6d8..5e713e231d 100644
--- a/docs/tools/qemu-img.rst --- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst
@@ -212,6 +212,10 @@ Parameters to convert subcommand: @@ -208,6 +208,10 @@ Parameters to convert subcommand:
Parameters to dd subcommand: Parameters to dd subcommand:
@ -27,7 +27,7 @@ index 3653adb963..d83e8fb3c0 100644
.. program:: qemu-img-dd .. program:: qemu-img-dd
.. option:: bs=BLOCK_SIZE .. option:: bs=BLOCK_SIZE
@@ -492,7 +496,7 @@ Command description: @@ -488,7 +492,7 @@ Command description:
it doesn't need to be specified separately in this case. it doesn't need to be specified separately in this case.
@ -36,7 +36,7 @@ index 3653adb963..d83e8fb3c0 100644
dd copies from *INPUT* file to *OUTPUT* file converting it from dd copies from *INPUT* file to *OUTPUT* file converting it from
*FMT* format to *OUTPUT_FMT* format. *FMT* format to *OUTPUT_FMT* format.
@@ -503,6 +507,11 @@ Command description: @@ -499,6 +503,11 @@ Command description:
The size syntax is similar to :manpage:`dd(1)`'s size syntax. The size syntax is similar to :manpage:`dd(1)`'s size syntax.
@ -49,10 +49,10 @@ index 3653adb963..d83e8fb3c0 100644
Give information about the disk image *FILENAME*. Use it in Give information about the disk image *FILENAME*. Use it in
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 048788b23d..0b29a67a06 100644 index d1616c045a..b5b0bb4467 100644
--- a/qemu-img-cmds.hx --- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST @@ -58,9 +58,9 @@ SRST
ERST ERST
DEF("dd", img_dd, DEF("dd", img_dd,
@ -65,10 +65,10 @@ index 048788b23d..0b29a67a06 100644
DEF("info", img_info, DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c diff --git a/qemu-img.c b/qemu-img.c
index b98184bba1..6fc8384f64 100644 index 58bf9b43d1..9d414d639b 100644
--- a/qemu-img.c --- a/qemu-img.c
+++ b/qemu-img.c +++ b/qemu-img.c
@@ -5118,7 +5118,7 @@ static int img_dd(int argc, char **argv) @@ -5024,7 +5024,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL; const char *fmt = NULL;
int64_t size = 0, readsize = 0; int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos; int64_t out_pos, in_pos;
@ -77,7 +77,7 @@ index b98184bba1..6fc8384f64 100644
struct DdInfo dd = { struct DdInfo dd = {
.flags = 0, .flags = 0,
.count = 0, .count = 0,
@@ -5156,7 +5156,7 @@ static int img_dd(int argc, char **argv) @@ -5062,7 +5062,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 } { 0, 0, 0, 0 }
}; };
@ -86,7 +86,7 @@ index b98184bba1..6fc8384f64 100644
if (c == EOF) { if (c == EOF) {
break; break;
} }
@@ -5176,6 +5176,9 @@ static int img_dd(int argc, char **argv) @@ -5082,6 +5082,9 @@ static int img_dd(int argc, char **argv)
case 'h': case 'h':
help(); help();
break; break;
@ -96,7 +96,7 @@ index b98184bba1..6fc8384f64 100644
case 'U': case 'U':
force_share = true; force_share = true;
break; break;
@@ -5306,13 +5309,15 @@ static int img_dd(int argc, char **argv) @@ -5212,13 +5215,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort); size - in.bsz * in.offset, &error_abort);
} }

View File

@ -12,10 +12,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
3 files changed, 36 insertions(+), 7 deletions(-) 3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index d83e8fb3c0..61c6b21859 100644 index 5e713e231d..9390d5e5cf 100644
--- a/docs/tools/qemu-img.rst --- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst
@@ -496,10 +496,10 @@ Command description: @@ -492,10 +492,10 @@ Command description:
it doesn't need to be specified separately in this case. it doesn't need to be specified separately in this case.
@ -30,10 +30,10 @@ index d83e8fb3c0..61c6b21859 100644
The data is by default read and written using blocks of 512 bytes but can be The data is by default read and written using blocks of 512 bytes but can be
modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 0b29a67a06..758f397232 100644 index b5b0bb4467..36f97e1f19 100644
--- a/qemu-img-cmds.hx --- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST @@ -58,9 +58,9 @@ SRST
ERST ERST
DEF("dd", img_dd, DEF("dd", img_dd,
@ -46,10 +46,10 @@ index 0b29a67a06..758f397232 100644
DEF("info", img_info, DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c diff --git a/qemu-img.c b/qemu-img.c
index 6fc8384f64..a6c88e0860 100644 index 9d414d639b..e13a12137b 100644
--- a/qemu-img.c --- a/qemu-img.c
+++ b/qemu-img.c +++ b/qemu-img.c
@@ -5110,6 +5110,7 @@ static int img_dd(int argc, char **argv) @@ -5016,6 +5016,7 @@ static int img_dd(int argc, char **argv)
BlockDriver *drv = NULL, *proto_drv = NULL; BlockDriver *drv = NULL, *proto_drv = NULL;
BlockBackend *blk1 = NULL, *blk2 = NULL; BlockBackend *blk1 = NULL, *blk2 = NULL;
QemuOpts *opts = NULL; QemuOpts *opts = NULL;
@ -57,7 +57,7 @@ index 6fc8384f64..a6c88e0860 100644
QemuOptsList *create_opts = NULL; QemuOptsList *create_opts = NULL;
Error *local_err = NULL; Error *local_err = NULL;
bool image_opts = false; bool image_opts = false;
@@ -5119,6 +5120,7 @@ static int img_dd(int argc, char **argv) @@ -5025,6 +5026,7 @@ static int img_dd(int argc, char **argv)
int64_t size = 0, readsize = 0; int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos; int64_t out_pos, in_pos;
bool force_share = false, skip_create = false; bool force_share = false, skip_create = false;
@ -65,7 +65,7 @@ index 6fc8384f64..a6c88e0860 100644
struct DdInfo dd = { struct DdInfo dd = {
.flags = 0, .flags = 0,
.count = 0, .count = 0,
@@ -5156,7 +5158,7 @@ static int img_dd(int argc, char **argv) @@ -5062,7 +5064,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 } { 0, 0, 0, 0 }
}; };
@ -74,7 +74,7 @@ index 6fc8384f64..a6c88e0860 100644
if (c == EOF) { if (c == EOF) {
break; break;
} }
@@ -5179,6 +5181,19 @@ static int img_dd(int argc, char **argv) @@ -5085,6 +5087,19 @@ static int img_dd(int argc, char **argv)
case 'n': case 'n':
skip_create = true; skip_create = true;
break; break;
@ -94,7 +94,7 @@ index 6fc8384f64..a6c88e0860 100644
case 'U': case 'U':
force_share = true; force_share = true;
break; break;
@@ -5238,11 +5253,24 @@ static int img_dd(int argc, char **argv) @@ -5144,11 +5159,24 @@ static int img_dd(int argc, char **argv)
if (dd.flags & C_IF) { if (dd.flags & C_IF) {
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false, blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
force_share); force_share);
@ -120,7 +120,7 @@ index 6fc8384f64..a6c88e0860 100644
} }
if (dd.flags & C_OSIZE) { if (dd.flags & C_OSIZE) {
@@ -5397,6 +5425,7 @@ static int img_dd(int argc, char **argv) @@ -5303,6 +5331,7 @@ static int img_dd(int argc, char **argv)
out: out:
g_free(arg); g_free(arg);
qemu_opts_del(opts); qemu_opts_del(opts);

View File

@ -18,10 +18,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
4 files changed, 82 insertions(+), 4 deletions(-) 4 files changed, 82 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index a6ff6a4875..e7f74d1c63 100644 index c3e55ef9e9..0e32e6201f 100644
--- a/hw/core/machine-hmp-cmds.c --- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c
@@ -175,7 +175,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict) @@ -169,7 +169,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return; return;
} }
@ -59,10 +59,10 @@ index a6ff6a4875..e7f74d1c63 100644
qapi_free_BalloonInfo(info); qapi_free_BalloonInfo(info);
} }
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 609e39a821..8cb6dfcac3 100644 index d004cf29d2..2660ed520b 100644
--- a/hw/virtio/virtio-balloon.c --- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c
@@ -781,8 +781,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, @@ -782,8 +782,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
static void virtio_balloon_stat(void *opaque, BalloonInfo *info) static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{ {
VirtIOBalloon *dev = opaque; VirtIOBalloon *dev = opaque;
@ -103,10 +103,10 @@ index 609e39a821..8cb6dfcac3 100644
static void virtio_balloon_to_target(void *opaque, ram_addr_t target) static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
diff --git a/qapi/machine.json b/qapi/machine.json diff --git a/qapi/machine.json b/qapi/machine.json
index e8b60641f2..2054cdc70d 100644 index a08b6576ca..5c9a4d55f4 100644
--- a/qapi/machine.json --- a/qapi/machine.json
+++ b/qapi/machine.json +++ b/qapi/machine.json
@@ -1079,9 +1079,29 @@ @@ -1063,9 +1063,29 @@
# @actual: the logical size of the VM in bytes Formula used: # @actual: the logical size of the VM in bytes Formula used:
# logical_vm_size = vm_ram_size - balloon_size # logical_vm_size = vm_ram_size - balloon_size
# #
@ -138,10 +138,10 @@ index e8b60641f2..2054cdc70d 100644
## ##
# @query-balloon: # @query-balloon:
diff --git a/qapi/pragma.json b/qapi/pragma.json diff --git a/qapi/pragma.json b/qapi/pragma.json
index 59fbe74b8c..be8fa304c5 100644 index 7f810b0e97..325e684411 100644
--- a/qapi/pragma.json --- a/qapi/pragma.json
+++ b/qapi/pragma.json +++ b/qapi/pragma.json
@@ -90,6 +90,7 @@ @@ -35,6 +35,7 @@
'member-name-exceptions': [ # visible in: 'member-name-exceptions': [ # visible in:
'ACPISlotType', # query-acpi-ospm-status 'ACPISlotType', # query-acpi-ospm-status
'AcpiTableOptions', # -acpitable 'AcpiTableOptions', # -acpitable

View File

@ -13,10 +13,10 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
2 files changed, 9 insertions(+), 1 deletion(-) 2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 4b72009cd3..314351cdff 100644 index 3860a50c3b..40821e2317 100644
--- a/hw/core/machine-qmp-cmds.c --- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c
@@ -90,6 +90,12 @@ MachineInfoList *qmp_query_machines(Error **errp) @@ -91,6 +91,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
info->numa_mem_supported = mc->numa_mem_supported; info->numa_mem_supported = mc->numa_mem_supported;
info->deprecated = !!mc->deprecation_reason; info->deprecated = !!mc->deprecation_reason;
info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi"); info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
@ -30,10 +30,10 @@ index 4b72009cd3..314351cdff 100644
info->default_cpu_type = g_strdup(mc->default_cpu_type); info->default_cpu_type = g_strdup(mc->default_cpu_type);
} }
diff --git a/qapi/machine.json b/qapi/machine.json diff --git a/qapi/machine.json b/qapi/machine.json
index 2054cdc70d..a024d5b05d 100644 index 5c9a4d55f4..fbb61f18e4 100644
--- a/qapi/machine.json --- a/qapi/machine.json
+++ b/qapi/machine.json +++ b/qapi/machine.json
@@ -146,6 +146,8 @@ @@ -139,6 +139,8 @@
# #
# @is-default: whether the machine is default # @is-default: whether the machine is default
# #
@ -42,7 +42,7 @@ index 2054cdc70d..a024d5b05d 100644
# @cpu-max: maximum number of CPUs supported by the machine type # @cpu-max: maximum number of CPUs supported by the machine type
# (since 1.5) # (since 1.5)
# #
@@ -170,7 +172,7 @@ @@ -163,7 +165,7 @@
## ##
{ 'struct': 'MachineInfo', { 'struct': 'MachineInfo',
'data': { 'name': 'str', '*alias': 'str', 'data': { 'name': 'str', '*alias': 'str',

View File

@ -14,10 +14,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
2 files changed, 7 insertions(+) 2 files changed, 7 insertions(+)
diff --git a/qapi/ui.json b/qapi/ui.json diff --git a/qapi/ui.json b/qapi/ui.json
index f610bce118..6ea26a9acb 100644 index 006616aa77..dfd1d3e36b 100644
--- a/qapi/ui.json --- a/qapi/ui.json
+++ b/qapi/ui.json +++ b/qapi/ui.json
@@ -314,11 +314,14 @@ @@ -317,11 +317,14 @@
# #
# @channels: a list of @SpiceChannel for each active spice channel # @channels: a list of @SpiceChannel for each active spice channel
# #
@ -33,7 +33,7 @@ index f610bce118..6ea26a9acb 100644
'if': 'CONFIG_SPICE' } 'if': 'CONFIG_SPICE' }
diff --git a/ui/spice-core.c b/ui/spice-core.c diff --git a/ui/spice-core.c b/ui/spice-core.c
index ea20e6153c..55a15fba8b 100644 index b20c25aee0..26baeb7846 100644
--- a/ui/spice-core.c --- a/ui/spice-core.c
+++ b/ui/spice-core.c +++ b/ui/spice-core.c
@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp) @@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)

View File

@ -14,21 +14,20 @@ Additionally, allows tracking the current position from the outside
(intended to be used for progress tracking). (intended to be used for progress tracking).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
migration/channel-savevm-async.c | 184 +++++++++++++++++++++++++++++++ migration/channel-savevm-async.c | 183 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.h | 51 +++++++++ migration/channel-savevm-async.h | 51 +++++++++
migration/meson.build | 1 + migration/meson.build | 1 +
3 files changed, 236 insertions(+) 3 files changed, 235 insertions(+)
create mode 100644 migration/channel-savevm-async.c create mode 100644 migration/channel-savevm-async.c
create mode 100644 migration/channel-savevm-async.h create mode 100644 migration/channel-savevm-async.h
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
new file mode 100644 new file mode 100644
index 0000000000..081a192f49 index 0000000000..aab081ce07
--- /dev/null --- /dev/null
+++ b/migration/channel-savevm-async.c +++ b/migration/channel-savevm-async.c
@@ -0,0 +1,184 @@ @@ -0,0 +1,183 @@
+/* +/*
+ * QIO Channel implementation to be used by savevm-async QMP calls + * QIO Channel implementation to be used by savevm-async QMP calls
+ */ + */
@ -175,9 +174,8 @@ index 0000000000..081a192f49
+ +
+static void +static void
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc, +qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
+ AioContext *read_ctx, + AioContext *ctx,
+ IOHandler *io_read, + IOHandler *io_read,
+ AioContext *write_ctx,
+ IOHandler *io_write, + IOHandler *io_write,
+ void *opaque) + void *opaque)
+{ +{
@ -271,7 +269,7 @@ index 0000000000..17ae2cb261
+ +
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */ +#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
diff --git a/migration/meson.build b/migration/meson.build diff --git a/migration/meson.build b/migration/meson.build
index 1eeb915ff6..95d1cf2250 100644 index 1ae28523a1..37ddcb5d60 100644
--- a/migration/meson.build --- a/migration/meson.build
+++ b/migration/meson.build +++ b/migration/meson.build
@@ -13,6 +13,7 @@ system_ss.add(files( @@ -13,6 +13,7 @@ system_ss.add(files(

View File

@ -27,9 +27,7 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[FE: further improve aborting [FE: further improve aborting
adapt to removal of QEMUFileOps adapt to removal of QEMUFileOps
improve condition for entering final stage improve condition for entering final stage
adapt to QAPI and other changes for 8.2 adapt to QAPI and other changes for 8.0]
make sure to not call vm_start() from coroutine
stop CPU throttling after finishing]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
hmp-commands-info.hx | 13 + hmp-commands-info.hx | 13 +
@ -37,17 +35,17 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
include/migration/snapshot.h | 2 + include/migration/snapshot.h | 2 +
include/monitor/hmp.h | 3 + include/monitor/hmp.h | 3 +
migration/meson.build | 1 + migration/meson.build | 1 +
migration/savevm-async.c | 545 +++++++++++++++++++++++++++++++++++ migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 38 +++ monitor/hmp-cmds.c | 38 +++
qapi/migration.json | 34 +++ qapi/migration.json | 34 +++
qapi/misc.json | 18 ++ qapi/misc.json | 16 ++
qemu-options.hx | 12 + qemu-options.hx | 12 +
system/vl.c | 10 + softmmu/vl.c | 10 +
11 files changed, 693 insertions(+) 11 files changed, 677 insertions(+)
create mode 100644 migration/savevm-async.c create mode 100644 migration/savevm-async.c
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index ad1b1306e3..d5ab880492 100644 index f5b37eb74a..10fdd822e0 100644
--- a/hmp-commands-info.hx --- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx +++ b/hmp-commands-info.hx
@@ -525,6 +525,19 @@ SRST @@ -525,6 +525,19 @@ SRST
@ -71,10 +69,10 @@ index ad1b1306e3..d5ab880492 100644
.name = "balloon", .name = "balloon",
.args_type = "", .args_type = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2e2a3bcf98..7506de251c 100644 index 2cbd0f77a0..e352f86872 100644
--- a/hmp-commands.hx --- a/hmp-commands.hx
+++ b/hmp-commands.hx +++ b/hmp-commands.hx
@@ -1862,3 +1862,20 @@ SRST @@ -1865,3 +1865,20 @@ SRST
List event channels in the guest List event channels in the guest
ERST ERST
#endif #endif
@ -96,12 +94,12 @@ index 2e2a3bcf98..7506de251c 100644
+ .coroutine = true, + .coroutine = true,
+ }, + },
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index 9e4dcaaa75..2581730d74 100644 index e72083b117..c846d37806 100644
--- a/include/migration/snapshot.h --- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h +++ b/include/migration/snapshot.h
@@ -68,4 +68,6 @@ bool delete_snapshot(const char *name, @@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
*/ bool has_devices, strList *devices,
void load_snapshot_resume(RunState state); Error **errp);
+int load_snapshot_from_blockdev(const char *filename, Error **errp); +int load_snapshot_from_blockdev(const char *filename, Error **errp);
+ +
@ -128,10 +126,10 @@ index 13f9a2dedb..7a7def7530 100644
void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict); void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_chardev_add(Monitor *mon, const QDict *qdict); void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/migration/meson.build b/migration/meson.build diff --git a/migration/meson.build b/migration/meson.build
index 95d1cf2250..800f12a60d 100644 index 37ddcb5d60..07f6057acc 100644
--- a/migration/meson.build --- a/migration/meson.build
+++ b/migration/meson.build +++ b/migration/meson.build
@@ -28,6 +28,7 @@ system_ss.add(files( @@ -26,6 +26,7 @@ system_ss.add(files(
'options.c', 'options.c',
'postcopy-ram.c', 'postcopy-ram.c',
'savevm.c', 'savevm.c',
@ -141,10 +139,10 @@ index 95d1cf2250..800f12a60d 100644
'threadinfo.c', 'threadinfo.c',
diff --git a/migration/savevm-async.c b/migration/savevm-async.c diff --git a/migration/savevm-async.c b/migration/savevm-async.c
new file mode 100644 new file mode 100644
index 0000000000..1af32604c7 index 0000000000..e9fc18fb10
--- /dev/null --- /dev/null
+++ b/migration/savevm-async.c +++ b/migration/savevm-async.c
@@ -0,0 +1,545 @@ @@ -0,0 +1,531 @@
+#include "qemu/osdep.h" +#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h" +#include "migration/channel-savevm-async.h"
+#include "migration/migration.h" +#include "migration/migration.h"
@ -155,7 +153,6 @@ index 0000000000..1af32604c7
+#include "migration/global_state.h" +#include "migration/global_state.h"
+#include "migration/ram.h" +#include "migration/ram.h"
+#include "migration/qemu-file.h" +#include "migration/qemu-file.h"
+#include "sysemu/cpu-throttle.h"
+#include "sysemu/sysemu.h" +#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h" +#include "sysemu/runstate.h"
+#include "block/block.h" +#include "block/block.h"
@ -303,6 +300,7 @@ index 0000000000..1af32604c7
+static void process_savevm_finalize(void *opaque) +static void process_savevm_finalize(void *opaque)
+{ +{
+ int ret; + int ret;
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ MigrationState *ms = migrate_get_current(); + MigrationState *ms = migrate_get_current();
+ +
+ bool aborted = savevm_aborted(); + bool aborted = savevm_aborted();
@ -319,7 +317,9 @@ index 0000000000..1af32604c7
+ * so move it back. It can stay in the main context and live out its live + * so move it back. It can stay in the main context and live out its live
+ * there, since we're done with it after this method ends anyway. + * there, since we're done with it after this method ends anyway.
+ */ + */
+ aio_context_acquire(iohandler_ctx);
+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL); + blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+ aio_context_release(iohandler_ctx);
+ +
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) { + if (ret < 0) {
@ -344,12 +344,6 @@ index 0000000000..1af32604c7
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED); + ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL; + ms->to_dst_file = NULL;
+ +
+ /*
+ * Same as in migration_iteration_finish(): saving RAM might've turned on CPU throttling for
+ * auto-converge, make sure to disable it.
+ */
+ cpu_throttle_stop();
+
+ qemu_savevm_state_cleanup(); + qemu_savevm_state_cleanup();
+ +
+ ret = save_snapshot_cleanup(); + ret = save_snapshot_cleanup();
@ -402,12 +396,12 @@ index 0000000000..1af32604c7
+ * lock. Similar to what is done in migration.c, call the exact variant + * lock. Similar to what is done in migration.c, call the exact variant
+ * only once pend_precopy in the estimate is below the threshold. + * only once pend_precopy in the estimate is below the threshold.
+ */ + */
+ bql_unlock(); + qemu_mutex_unlock_iothread();
+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy); + qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
+ if (pend_precopy <= threshold) { + if (pend_precopy <= threshold) {
+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy); + qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
+ } + }
+ bql_lock(); + qemu_mutex_lock_iothread();
+ pending_size = pend_precopy + pend_postcopy; + pending_size = pend_precopy + pend_postcopy;
+ +
+ /* + /*
@ -447,12 +441,12 @@ index 0000000000..1af32604c7
+ * so move there now and after every flush. + * so move there now and after every flush.
+ */ + */
+ aio_co_reschedule_self(qemu_get_aio_context()); + aio_co_reschedule_self(qemu_get_aio_context());
+ bdrv_graph_co_rdlock(); + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ bs = bdrv_first(&it);
+ bdrv_graph_co_rdunlock();
+ while (bs) {
+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */ + /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+ if (bs != blk_bs(snap_state.target)) { + if (bs == blk_bs(snap_state.target)) {
+ continue;
+ }
+
+ AioContext *bs_ctx = bdrv_get_aio_context(bs); + AioContext *bs_ctx = bdrv_get_aio_context(bs);
+ if (bs_ctx != qemu_get_aio_context()) { + if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename); + DPRINTF("savevm: async flushing drive %s\n", bs->filename);
@ -463,10 +457,6 @@ index 0000000000..1af32604c7
+ aio_co_reschedule_self(qemu_get_aio_context()); + aio_co_reschedule_self(qemu_get_aio_context());
+ } + }
+ } + }
+ bdrv_graph_co_rdlock();
+ bs = bdrv_next(&it);
+ bdrv_graph_co_rdunlock();
+ }
+ +
+ DPRINTF("timing: async flushing took %ld ms\n", + DPRINTF("timing: async flushing took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush); + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
@ -488,7 +478,7 @@ index 0000000000..1af32604c7
+ return; + return;
+ } + }
+ +
+ if (migration_is_running()) { + if (migration_is_running(ms->state)) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE); + error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+ return; + return;
+ } + }
@ -545,10 +535,9 @@ index 0000000000..1af32604c7
+ * State is cleared in process_savevm_co, but has to be initialized + * State is cleared in process_savevm_co, but has to be initialized
+ * here (blocking main thread, from QMP) to avoid race conditions. + * here (blocking main thread, from QMP) to avoid race conditions.
+ */ + */
+ if (migrate_init(ms, errp)) { + migrate_init(ms);
+ return;
+ }
+ memset(&mig_stats, 0, sizeof(mig_stats)); + memset(&mig_stats, 0, sizeof(mig_stats));
+ memset(&compression_counters, 0, sizeof(compression_counters));
+ ms->to_dst_file = snap_state.file; + ms->to_dst_file = snap_state.file;
+ +
+ error_setg(&snap_state.blocker, "block device is in use by savevm"); + error_setg(&snap_state.blocker, "block device is in use by savevm");
@ -557,8 +546,10 @@ index 0000000000..1af32604c7
+ snap_state.state = SAVE_STATE_ACTIVE; + snap_state.state = SAVE_STATE_ACTIVE;
+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state); + snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL); + snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_header(snap_state.file); + qemu_savevm_state_header(snap_state.file);
+ qemu_savevm_state_setup(snap_state.file); + qemu_savevm_state_setup(snap_state.file);
+ qemu_mutex_lock_iothread();
+ +
+ /* Async processing from here on out happens in iohandler context, so let + /* Async processing from here on out happens in iohandler context, so let
+ * the target bdrv have its home there. + * the target bdrv have its home there.
@ -579,10 +570,29 @@ index 0000000000..1af32604c7
+ } + }
+} +}
+ +
+static void coroutine_fn wait_for_close_co(void *opaque) +void coroutine_fn qmp_savevm_end(Error **errp)
+{ +{
+ int64_t timeout; + int64_t timeout;
+ +
+ if (snap_state.state == SAVE_STATE_DONE) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot not started\n");
+ return;
+ }
+
+ if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_CANCELLED;
+ goto wait_for_close;
+ }
+
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ snap_state.saved_vm_running = false;
+ }
+
+ snap_state.state = SAVE_STATE_DONE;
+
+wait_for_close:
+ if (!snap_state.target) { + if (!snap_state.target) {
+ DPRINTF("savevm-end: no target file open\n"); + DPRINTF("savevm-end: no target file open\n");
+ return; + return;
@ -610,32 +620,6 @@ index 0000000000..1af32604c7
+ DPRINTF("savevm-end: cleanup done\n"); + DPRINTF("savevm-end: cleanup done\n");
+} +}
+ +
+void qmp_savevm_end(Error **errp)
+{
+ if (snap_state.state == SAVE_STATE_DONE) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot not started\n");
+ return;
+ }
+
+ Coroutine *wait_for_close = qemu_coroutine_create(wait_for_close_co, NULL);
+
+ if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_CANCELLED;
+ qemu_coroutine_enter(wait_for_close);
+ return;
+ }
+
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ snap_state.saved_vm_running = false;
+ }
+
+ snap_state.state = SAVE_STATE_DONE;
+
+ qemu_coroutine_enter(wait_for_close);
+}
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp) +int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{ +{
+ BlockBackend *be; + BlockBackend *be;
@ -691,7 +675,7 @@ index 0000000000..1af32604c7
+ return ret; + return ret;
+} +}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 871898ac46..ef4634e5c1 100644 index 6c559b48c8..91be698308 100644
--- a/monitor/hmp-cmds.c --- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c
@@ -22,6 +22,7 @@ @@ -22,6 +22,7 @@
@ -701,7 +685,7 @@ index 871898ac46..ef4634e5c1 100644
+#include "qapi/qapi-commands-migration.h" +#include "qapi/qapi-commands-migration.h"
#include "qapi/qapi-commands-misc.h" #include "qapi/qapi-commands-misc.h"
#include "qapi/qmp/qdict.h" #include "qapi/qmp/qdict.h"
#include "qemu/cutils.h" #include "qapi/qmp/qerror.h"
@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict) @@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
mtree_info(flatview, dispatch_tree, owner, disabled); mtree_info(flatview, dispatch_tree, owner, disabled);
@ -744,10 +728,10 @@ index 871898ac46..ef4634e5c1 100644
+ } + }
+} +}
diff --git a/qapi/migration.json b/qapi/migration.json diff --git a/qapi/migration.json b/qapi/migration.json
index 8c65b90328..ed20d066cd 100644 index 8843e74b59..aca0ca1ac1 100644
--- a/qapi/migration.json --- a/qapi/migration.json
+++ b/qapi/migration.json +++ b/qapi/migration.json
@@ -297,6 +297,40 @@ @@ -291,6 +291,40 @@
'*dirty-limit-throttle-time-per-round': 'uint64', '*dirty-limit-throttle-time-per-round': 'uint64',
'*dirty-limit-ring-full-time': 'uint64'} } '*dirty-limit-ring-full-time': 'uint64'} }
@ -789,10 +773,10 @@ index 8c65b90328..ed20d066cd 100644
# @query-migrate: # @query-migrate:
# #
diff --git a/qapi/misc.json b/qapi/misc.json diff --git a/qapi/misc.json b/qapi/misc.json
index ec30e5c570..3c68633f68 100644 index cda2effa81..94a58bb0bf 100644
--- a/qapi/misc.json --- a/qapi/misc.json
+++ b/qapi/misc.json +++ b/qapi/misc.json
@@ -454,6 +454,24 @@ @@ -456,6 +456,22 @@
## ##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] } { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
@ -801,8 +785,6 @@ index ec30e5c570..3c68633f68 100644
+# +#
+# Prepare for snapshot and halt VM. Save VM state to statefile. +# Prepare for snapshot and halt VM. Save VM state to statefile.
+# +#
+# @statefile: target file that state should be written to.
+#
+## +##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } } +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+ +
@ -812,16 +794,16 @@ index ec30e5c570..3c68633f68 100644
+# Resume VM after a snapshot. +# Resume VM after a snapshot.
+# +#
+## +##
+{ 'command': 'savevm-end' } +{ 'command': 'savevm-end', 'coroutine': true }
+ +
## ##
# @CommandLineParameterType: # @CommandLineParameterType:
# #
diff --git a/qemu-options.hx b/qemu-options.hx diff --git a/qemu-options.hx b/qemu-options.hx
index 8ce85d4559..511ab9415e 100644 index 8073f5edf5..dc1ececc9c 100644
--- a/qemu-options.hx --- a/qemu-options.hx
+++ b/qemu-options.hx +++ b/qemu-options.hx
@@ -4610,6 +4610,18 @@ SRST @@ -4483,6 +4483,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor) Start right away with a saved state (``loadvm`` in monitor)
ERST ERST
@ -840,11 +822,11 @@ index 8ce85d4559..511ab9415e 100644
#ifndef _WIN32 #ifndef _WIN32
DEF("daemonize", 0, QEMU_OPTION_daemonize, \ DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL) "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/system/vl.c b/system/vl.c diff --git a/softmmu/vl.c b/softmmu/vl.c
index c644222982..2738ab7c91 100644 index c9e9ede237..3f2681aded 100644
--- a/system/vl.c --- a/softmmu/vl.c
+++ b/system/vl.c +++ b/softmmu/vl.c
@@ -163,6 +163,7 @@ static const char *accelerators; @@ -164,6 +164,7 @@ static const char *accelerators;
static bool have_custom_ram_size; static bool have_custom_ram_size;
static const char *ram_memdev_id; static const char *ram_memdev_id;
static QDict *machine_opts_dict; static QDict *machine_opts_dict;
@ -852,10 +834,10 @@ index c644222982..2738ab7c91 100644
static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts); static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts); static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
static int display_remote; static int display_remote;
@@ -2712,6 +2713,12 @@ void qmp_x_exit_preconfig(Error **errp) @@ -2647,6 +2648,12 @@ void qmp_x_exit_preconfig(Error **errp)
RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
if (loadvm) {
load_snapshot(loadvm, NULL, false, NULL, &error_fatal); load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
load_snapshot_resume(state);
+ } else if (loadstate) { + } else if (loadstate) {
+ Error *local_err = NULL; + Error *local_err = NULL;
+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) { + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
@ -865,7 +847,7 @@ index c644222982..2738ab7c91 100644
} }
if (replay_mode != REPLAY_MODE_NONE) { if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init(); replay_vmstate_init();
@@ -3259,6 +3266,9 @@ void qemu_init(int argc, char **argv) @@ -3194,6 +3201,9 @@ void qemu_init(int argc, char **argv)
case QEMU_OPTION_loadvm: case QEMU_OPTION_loadvm:
loadvm = optarg; loadvm = optarg;
break; break;

View File

@ -13,18 +13,18 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to removal of QEMUFileOps] [FE: adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
migration/qemu-file.c | 50 +++++++++++++++++++++++++++------------- migration/qemu-file.c | 49 +++++++++++++++++++++++++++-------------
migration/qemu-file.h | 2 ++ migration/qemu-file.h | 2 ++
migration/savevm-async.c | 5 ++-- migration/savevm-async.c | 5 ++--
3 files changed, 39 insertions(+), 18 deletions(-) 3 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index a10882d47f..19c1de0472 100644 index 19c33c9985..e9ffff0f0a 100644
--- a/migration/qemu-file.c --- a/migration/qemu-file.c
+++ b/migration/qemu-file.c +++ b/migration/qemu-file.c
@@ -35,8 +35,8 @@ @@ -33,8 +33,8 @@
#include "rdma.h" #include "options.h"
#include "io/channel-file.h" #include "qapi/error.h"
-#define IO_BUF_SIZE 32768 -#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64) -#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
@ -32,8 +32,8 @@ index a10882d47f..19c1de0472 100644
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256) +#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile { struct QEMUFile {
QIOChannel *ioc; const QEMUFileHooks *hooks;
@@ -44,7 +44,8 @@ struct QEMUFile { @@ -46,7 +46,8 @@ struct QEMUFile {
int buf_index; int buf_index;
int buf_size; /* 0 when writing */ int buf_size; /* 0 when writing */
@ -43,7 +43,7 @@ index a10882d47f..19c1de0472 100644
DECLARE_BITMAP(may_free, MAX_IOV_SIZE); DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE]; struct iovec iov[MAX_IOV_SIZE];
@@ -101,7 +102,9 @@ int qemu_file_shutdown(QEMUFile *f) @@ -100,7 +101,9 @@ int qemu_file_shutdown(QEMUFile *f)
return 0; return 0;
} }
@ -54,7 +54,7 @@ index a10882d47f..19c1de0472 100644
{ {
QEMUFile *f; QEMUFile *f;
@@ -110,6 +113,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable) @@ -109,6 +112,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
object_ref(ioc); object_ref(ioc);
f->ioc = ioc; f->ioc = ioc;
f->is_writable = is_writable; f->is_writable = is_writable;
@ -63,7 +63,7 @@ index a10882d47f..19c1de0472 100644
return f; return f;
} }
@@ -120,17 +125,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable) @@ -119,17 +124,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
*/ */
QEMUFile *qemu_file_get_return_path(QEMUFile *f) QEMUFile *qemu_file_get_return_path(QEMUFile *f)
{ {
@ -93,8 +93,8 @@ index a10882d47f..19c1de0472 100644
+ return qemu_file_new_impl(ioc, false, buffer_size); + return qemu_file_new_impl(ioc, false, buffer_size);
} }
/* void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
@@ -328,7 +343,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) @@ -375,7 +390,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
do { do {
len = qio_channel_read(f->ioc, len = qio_channel_read(f->ioc,
(char *)f->buf + pending, (char *)f->buf + pending,
@ -103,17 +103,16 @@ index a10882d47f..19c1de0472 100644
&local_error); &local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) { if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) { if (qemu_in_coroutine()) {
@@ -368,6 +383,9 @@ int qemu_fclose(QEMUFile *f) @@ -425,6 +440,8 @@ int qemu_fclose(QEMUFile *f)
ret = ret2;
} }
g_clear_pointer(&f->ioc, object_unref); g_clear_pointer(&f->ioc, object_unref);
+
+ free(f->buf); + free(f->buf);
+ +
error_free(f->last_error_obj); /* If any error was spotted before closing, we should report it
g_free(f); * instead of the close() return value.
trace_qemu_file_fclose(); */
@@ -416,7 +434,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len) @@ -479,7 +496,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{ {
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) { if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len; f->buf_index += len;
@ -122,7 +121,7 @@ index a10882d47f..19c1de0472 100644
qemu_fflush(f); qemu_fflush(f);
} }
} }
@@ -441,7 +459,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) @@ -504,7 +521,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
} }
while (size > 0) { while (size > 0) {
@ -131,7 +130,7 @@ index a10882d47f..19c1de0472 100644
if (l > size) { if (l > size) {
l = size; l = size;
} }
@@ -587,8 +605,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si @@ -549,8 +566,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
size_t index; size_t index;
assert(!qemu_file_is_writable(f)); assert(!qemu_file_is_writable(f));
@ -142,7 +141,7 @@ index a10882d47f..19c1de0472 100644
/* The 1st byte to read from */ /* The 1st byte to read from */
index = f->buf_index + offset; index = f->buf_index + offset;
@@ -638,7 +656,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size @@ -600,7 +617,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
size_t res; size_t res;
uint8_t *src; uint8_t *src;
@ -151,7 +150,7 @@ index a10882d47f..19c1de0472 100644
if (res == 0) { if (res == 0) {
return done; return done;
} }
@@ -672,7 +690,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size @@ -634,7 +651,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
*/ */
size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{ {
@ -160,7 +159,7 @@ index a10882d47f..19c1de0472 100644
size_t res; size_t res;
uint8_t *src = NULL; uint8_t *src = NULL;
@@ -697,7 +715,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) @@ -659,7 +676,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset; int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f)); assert(!qemu_file_is_writable(f));
@ -169,7 +168,7 @@ index a10882d47f..19c1de0472 100644
if (index >= f->buf_size) { if (index >= f->buf_size) {
qemu_fill_buffer(f); qemu_fill_buffer(f);
@@ -811,7 +829,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len, @@ -777,7 +794,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size) const uint8_t *p, size_t size)
{ {
@ -179,24 +178,24 @@ index a10882d47f..19c1de0472 100644
if (blen < compressBound(size)) { if (blen < compressBound(size)) {
return -1; return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 32fd4a34fd..36a0cd8cc8 100644 index 47015f5201..1312b7c903 100644
--- a/migration/qemu-file.h --- a/migration/qemu-file.h
+++ b/migration/qemu-file.h +++ b/migration/qemu-file.h
@@ -30,7 +30,9 @@ @@ -63,7 +63,9 @@ typedef struct QEMUFileHooks {
#include "io/channel.h" } QEMUFileHooks;
QEMUFile *qemu_file_new_input(QIOChannel *ioc); QEMUFile *qemu_file_new_input(QIOChannel *ioc);
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size); +QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
QEMUFile *qemu_file_new_output(QIOChannel *ioc); QEMUFile *qemu_file_new_output(QIOChannel *ioc);
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size); +QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_fclose(QEMUFile *f); int qemu_fclose(QEMUFile *f);
/*
diff --git a/migration/savevm-async.c b/migration/savevm-async.c diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 1af32604c7..be2035cd2e 100644 index e9fc18fb10..80624fada8 100644
--- a/migration/savevm-async.c --- a/migration/savevm-async.c
+++ b/migration/savevm-async.c +++ b/migration/savevm-async.c
@@ -386,7 +386,7 @@ void qmp_savevm_start(const char *statefile, Error **errp) @@ -378,7 +378,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target, QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
&snap_state.bs_pos)); &snap_state.bs_pos));
@ -205,7 +204,7 @@ index 1af32604c7..be2035cd2e 100644
if (!snap_state.file) { if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile); error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -510,7 +510,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp) @@ -496,7 +496,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker); blk_op_block_all(be, blocker);
/* restore the VM state */ /* restore the VM state */

View File

@ -4,23 +4,21 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
Subject: [PATCH] PVE: block: add the zeroinit block driver filter Subject: [PATCH] PVE: block: add the zeroinit block driver filter
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures [FE: adapt to changed function signatures]
adhere to block graph lock requirements
use dedicated function to open file child]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/meson.build | 1 + block/meson.build | 1 +
block/zeroinit.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++ block/zeroinit.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 208 insertions(+) 2 files changed, 201 insertions(+)
create mode 100644 block/zeroinit.c create mode 100644 block/zeroinit.c
diff --git a/block/meson.build b/block/meson.build diff --git a/block/meson.build b/block/meson.build
index e1f03fd773..b530e117b5 100644 index 529fc172c6..1833c71ce9 100644
--- a/block/meson.build --- a/block/meson.build
+++ b/block/meson.build +++ b/block/meson.build
@@ -39,6 +39,7 @@ block_ss.add(files( @@ -40,6 +40,7 @@ block_ss.add(files(
'throttle.c',
'throttle-groups.c', 'throttle-groups.c',
'throttle.c',
'write-threshold.c', 'write-threshold.c',
+ 'zeroinit.c', + 'zeroinit.c',
), zstd, zlib, gnutls) ), zstd, zlib, gnutls)
@ -28,10 +26,10 @@ index e1f03fd773..b530e117b5 100644
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c')) system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/zeroinit.c b/block/zeroinit.c diff --git a/block/zeroinit.c b/block/zeroinit.c
new file mode 100644 new file mode 100644
index 0000000000..7998c9332d index 0000000000..1257342724
--- /dev/null --- /dev/null
+++ b/block/zeroinit.c +++ b/block/zeroinit.c
@@ -0,0 +1,207 @@ @@ -0,0 +1,200 @@
+/* +/*
+ * Filter to fake a zero-initialized block device. + * Filter to fake a zero-initialized block device.
+ * + *
@ -46,7 +44,6 @@ index 0000000000..7998c9332d
+#include "qapi/error.h" +#include "qapi/error.h"
+#include "block/block_int.h" +#include "block/block_int.h"
+#include "block/block-io.h" +#include "block/block-io.h"
+#include "block/graph-lock.h"
+#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h" +#include "qemu/cutils.h"
@ -112,9 +109,12 @@ index 0000000000..7998c9332d
+ } + }
+ +
+ /* Open the raw file */ + /* Open the raw file */
+ ret = bdrv_open_file_child(qemu_opt_get(opts, "x-next"), options, "next", + bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &local_err); + bs, &child_of_bds,
+ if (ret < 0) { + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+ false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err); + error_propagate(errp, local_err);
+ goto fail; + goto fail;
+ } + }
@ -125,9 +125,7 @@ index 0000000000..7998c9332d
+ ret = 0; + ret = 0;
+fail: +fail:
+ if (ret < 0) { + if (ret < 0) {
+ bdrv_graph_wrlock();
+ bdrv_unref_child(bs, bs->file); + bdrv_unref_child(bs, bs->file);
+ bdrv_graph_wrunlock();
+ } + }
+ qemu_opts_del(opts); + qemu_opts_del(opts);
+ return ret; + return ret;
@ -139,22 +137,19 @@ index 0000000000..7998c9332d
+ (void)s; + (void)s;
+} +}
+ +
+static coroutine_fn int64_t GRAPH_RDLOCK +static coroutine_fn int64_t zeroinit_co_getlength(BlockDriverState *bs)
+zeroinit_co_getlength(BlockDriverState *bs)
+{ +{
+ return bdrv_co_getlength(bs->file->bs); + return bdrv_co_getlength(bs->file->bs);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
+zeroinit_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{ +{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t bytes, BdrvRequestFlags flags)
+ BdrvRequestFlags flags)
+{ +{
+ BDRVZeroinitState *s = bs->opaque; + BDRVZeroinitState *s = bs->opaque;
+ if (offset >= s->extents) + if (offset >= s->extents)
@ -162,9 +157,8 @@ index 0000000000..7998c9332d
+ return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags); + return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
+zeroinit_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{ +{
+ BDRVZeroinitState *s = bs->opaque; + BDRVZeroinitState *s = bs->opaque;
+ int64_t extents = offset + bytes; + int64_t extents = offset + bytes;
@ -173,35 +167,32 @@ index 0000000000..7998c9332d
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+} +}
+ +
+static coroutine_fn int GRAPH_RDLOCK +static coroutine_fn int zeroinit_co_flush(BlockDriverState *bs)
+zeroinit_co_flush(BlockDriverState *bs)
+{ +{
+ return bdrv_co_flush(bs->file->bs); + return bdrv_co_flush(bs->file->bs);
+} +}
+ +
+static int GRAPH_RDLOCK +static int zeroinit_has_zero_init(BlockDriverState *bs)
+zeroinit_has_zero_init(BlockDriverState *bs)
+{ +{
+ BDRVZeroinitState *s = bs->opaque; + BDRVZeroinitState *s = bs->opaque;
+ return s->has_zero_init; + return s->has_zero_init;
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
+zeroinit_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) + int64_t offset, int64_t bytes)
+{ +{
+ return bdrv_co_pdiscard(bs->file, offset, bytes); + return bdrv_co_pdiscard(bs->file, offset, bytes);
+} +}
+ +
+static int GRAPH_RDLOCK +static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
+zeroinit_co_truncate(BlockDriverState *bs, int64_t offset, _Bool exact, + _Bool exact, PreallocMode prealloc,
+ PreallocMode prealloc, BdrvRequestFlags req_flags, + BdrvRequestFlags req_flags, Error **errp)
+ Error **errp)
+{ +{
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp); + return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
+} +}
+ +
+static coroutine_fn int GRAPH_RDLOCK +static coroutine_fn int zeroinit_co_get_info(BlockDriverState *bs,
+zeroinit_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) + BlockDriverInfo *bdi)
+{ +{
+ return bdrv_co_get_info(bs->file->bs, bdi); + return bdrv_co_get_info(bs->file->bs, bdi);
+} +}

View File

@ -10,14 +10,14 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
qemu-options.hx | 3 +++ qemu-options.hx | 3 +++
system/vl.c | 8 ++++++++ softmmu/vl.c | 8 ++++++++
2 files changed, 11 insertions(+) 2 files changed, 11 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx diff --git a/qemu-options.hx b/qemu-options.hx
index 511ab9415e..92e301d545 100644 index dc1ececc9c..848d2dfdd1 100644
--- a/qemu-options.hx --- a/qemu-options.hx
+++ b/qemu-options.hx +++ b/qemu-options.hx
@@ -1237,6 +1237,9 @@ legacy PC, they are not recommended for modern configurations. @@ -1197,6 +1197,9 @@ legacy PC, they are not recommended for modern configurations.
ERST ERST
@ -27,11 +27,11 @@ index 511ab9415e..92e301d545 100644
DEF("fda", HAS_ARG, QEMU_OPTION_fda, DEF("fda", HAS_ARG, QEMU_OPTION_fda,
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL) "-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL) DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
diff --git a/system/vl.c b/system/vl.c diff --git a/softmmu/vl.c b/softmmu/vl.c
index 2738ab7c91..20ebf2c920 100644 index 3f2681aded..1a3b9cc4b8 100644
--- a/system/vl.c --- a/softmmu/vl.c
+++ b/system/vl.c +++ b/softmmu/vl.c
@@ -2748,6 +2748,7 @@ void qemu_init(int argc, char **argv) @@ -2683,6 +2683,7 @@ void qemu_init(int argc, char **argv)
MachineClass *machine_class; MachineClass *machine_class;
bool userconfig = true; bool userconfig = true;
FILE *vmstate_dump_file = NULL; FILE *vmstate_dump_file = NULL;
@ -39,7 +39,7 @@ index 2738ab7c91..20ebf2c920 100644
qemu_add_opts(&qemu_drive_opts); qemu_add_opts(&qemu_drive_opts);
qemu_add_drive_opts(&qemu_legacy_drive_opts); qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -3371,6 +3372,13 @@ void qemu_init(int argc, char **argv) @@ -3306,6 +3307,13 @@ void qemu_init(int argc, char **argv)
machine_parse_property_opt(qemu_find_opts("smp-opts"), machine_parse_property_opt(qemu_find_opts("smp-opts"),
"smp", optarg); "smp", optarg);
break; break;
@ -50,6 +50,6 @@ index 2738ab7c91..20ebf2c920 100644
+ exit(1); + exit(1);
+ } + }
+ break; + break;
#ifdef CONFIG_VNC
case QEMU_OPTION_vnc: case QEMU_OPTION_vnc:
vnc_parse(optarg); vnc_parse(optarg);
break;

View File

@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+) 1 file changed, 9 insertions(+)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d8fc1e2815..789694b8b3 100644 index 4a34f03047..59b917e50c 100644
--- a/hw/intc/apic_common.c --- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c
@@ -263,6 +263,15 @@ static void apic_reset_common(DeviceState *dev) @@ -252,6 +252,15 @@ static void apic_reset_common(DeviceState *dev)
info->vapic_base_update(s); info->vapic_base_update(s);
apic_init_reset(dev); apic_init_reset(dev);

View File

@ -9,14 +9,14 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
block/file-posix.c | 59 ++++++++++++++++++++++++++++++-------------- block/file-posix.c | 59 ++++++++++++++++++++++++++++++--------------
qapi/block-core.json | 7 +++++- qapi/block-core.json | 3 ++-
2 files changed, 46 insertions(+), 20 deletions(-) 2 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c diff --git a/block/file-posix.c b/block/file-posix.c
index 43bc0bd520..60e98c87f1 100644 index ca551baa42..8b3b83e9d4 100644
--- a/block/file-posix.c --- a/block/file-posix.c
+++ b/block/file-posix.c +++ b/block/file-posix.c
@@ -2876,6 +2876,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) @@ -2873,6 +2873,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
int fd; int fd;
uint64_t perm, shared; uint64_t perm, shared;
int result = 0; int result = 0;
@ -24,7 +24,7 @@ index 43bc0bd520..60e98c87f1 100644
/* Validate options and set default values */ /* Validate options and set default values */
assert(options->driver == BLOCKDEV_DRIVER_FILE); assert(options->driver == BLOCKDEV_DRIVER_FILE);
@@ -2916,19 +2917,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) @@ -2913,19 +2914,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
perm = BLK_PERM_WRITE | BLK_PERM_RESIZE; perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
@ -59,7 +59,7 @@ index 43bc0bd520..60e98c87f1 100644
} }
/* Clear the file by truncating it to 0 */ /* Clear the file by truncating it to 0 */
@@ -2982,13 +2986,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) @@ -2979,13 +2983,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
} }
out_unlock: out_unlock:
@ -82,7 +82,7 @@ index 43bc0bd520..60e98c87f1 100644
} }
out_close: out_close:
@@ -3012,6 +3018,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, @@ -3009,6 +3015,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
PreallocMode prealloc; PreallocMode prealloc;
char *buf = NULL; char *buf = NULL;
Error *local_err = NULL; Error *local_err = NULL;
@ -90,7 +90,7 @@ index 43bc0bd520..60e98c87f1 100644
/* Skip file: protocol prefix */ /* Skip file: protocol prefix */
strstart(filename, "file:", &filename); strstart(filename, "file:", &filename);
@@ -3034,6 +3041,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, @@ -3031,6 +3038,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
return -EINVAL; return -EINVAL;
} }
@ -109,7 +109,7 @@ index 43bc0bd520..60e98c87f1 100644
options = (BlockdevCreateOptions) { options = (BlockdevCreateOptions) {
.driver = BLOCKDEV_DRIVER_FILE, .driver = BLOCKDEV_DRIVER_FILE,
.u.file = { .u.file = {
@@ -3045,6 +3064,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, @@ -3042,6 +3061,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
.nocow = nocow, .nocow = nocow,
.has_extent_size_hint = has_extent_size_hint, .has_extent_size_hint = has_extent_size_hint,
.extent_size_hint = extent_size_hint, .extent_size_hint = extent_size_hint,
@ -119,21 +119,10 @@ index 43bc0bd520..60e98c87f1 100644
}; };
return raw_co_create(&options, errp); return raw_co_create(&options, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json diff --git a/qapi/block-core.json b/qapi/block-core.json
index 905da8be72..3db587a6e4 100644 index a5cea82139..bb471c078d 100644
--- a/qapi/block-core.json --- a/qapi/block-core.json
+++ b/qapi/block-core.json +++ b/qapi/block-core.json
@@ -4956,6 +4956,10 @@ @@ -4880,7 +4880,8 @@
# @extent-size-hint: Extent size hint to add to the image file; 0 for
# not adding an extent size hint (default: 1 MB, since 5.1)
#
+# @locking: whether to enable file locking. If set to 'auto', only
+# enable when Open File Descriptor (OFD) locking API is available
+# (default: auto).
+#
# Since: 2.12
##
{ 'struct': 'BlockdevCreateOptionsFile',
@@ -4963,7 +4967,8 @@
'size': 'size', 'size': 'size',
'*preallocation': 'PreallocMode', '*preallocation': 'PreallocMode',
'*nocow': 'bool', '*nocow': 'bool',

View File

@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-) 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c diff --git a/hw/core/machine.c b/hw/core/machine.c
index 4273de16a0..83f1fc0293 100644 index f0d35c6401..1427983543 100644
--- a/hw/core/machine.c --- a/hw/core/machine.c
+++ b/hw/core/machine.c +++ b/hw/core/machine.c
@@ -162,7 +162,8 @@ GlobalProperty hw_compat_4_0[] = { @@ -148,7 +148,8 @@ GlobalProperty hw_compat_4_0[] = {
{ "virtio-vga", "edid", "false" }, { "virtio-vga", "edid", "false" },
{ "virtio-gpu-device", "edid", "false" }, { "virtio-gpu-device", "edid", "false" },
{ "virtio-device", "use-started", "false" }, { "virtio-device", "use-started", "false" },

View File

@ -17,14 +17,14 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
hw/core/machine-qmp-cmds.c | 5 +++++ hw/core/machine-qmp-cmds.c | 5 +++++
include/hw/boards.h | 2 ++ include/hw/boards.h | 2 ++
qapi/machine.json | 4 +++- qapi/machine.json | 4 +++-
system/vl.c | 25 +++++++++++++++++++++++++ softmmu/vl.c | 25 +++++++++++++++++++++++++
4 files changed, 35 insertions(+), 1 deletion(-) 4 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 314351cdff..628a3537c5 100644 index 40821e2317..ee93ddd69a 100644
--- a/hw/core/machine-qmp-cmds.c --- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c
@@ -94,6 +94,11 @@ MachineInfoList *qmp_query_machines(Error **errp) @@ -95,6 +95,11 @@ MachineInfoList *qmp_query_machines(Error **errp)
if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) { if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
info->has_is_current = true; info->has_is_current = true;
info->is_current = true; info->is_current = true;
@ -37,10 +37,10 @@ index 314351cdff..628a3537c5 100644
if (mc->default_cpu_type) { if (mc->default_cpu_type) {
diff --git a/include/hw/boards.h b/include/hw/boards.h diff --git a/include/hw/boards.h b/include/hw/boards.h
index 8b8f6d5c00..dd6d0a1447 100644 index ed83360198..f8b88cd86a 100644
--- a/include/hw/boards.h --- a/include/hw/boards.h
+++ b/include/hw/boards.h +++ b/include/hw/boards.h
@@ -246,6 +246,8 @@ struct MachineClass { @@ -235,6 +235,8 @@ struct MachineClass {
const char *desc; const char *desc;
const char *deprecation_reason; const char *deprecation_reason;
@ -50,10 +50,10 @@ index 8b8f6d5c00..dd6d0a1447 100644
void (*reset)(MachineState *state, ShutdownCause reason); void (*reset)(MachineState *state, ShutdownCause reason);
void (*wakeup)(MachineState *state); void (*wakeup)(MachineState *state);
diff --git a/qapi/machine.json b/qapi/machine.json diff --git a/qapi/machine.json b/qapi/machine.json
index a024d5b05d..1d69bffaa0 100644 index fbb61f18e4..7da3c519ba 100644
--- a/qapi/machine.json --- a/qapi/machine.json
+++ b/qapi/machine.json +++ b/qapi/machine.json
@@ -168,6 +168,8 @@ @@ -161,6 +161,8 @@
# #
# @acpi: machine type supports ACPI (since 8.0) # @acpi: machine type supports ACPI (since 8.0)
# #
@ -62,7 +62,7 @@ index a024d5b05d..1d69bffaa0 100644
# Since: 1.2 # Since: 1.2
## ##
{ 'struct': 'MachineInfo', { 'struct': 'MachineInfo',
@@ -175,7 +177,7 @@ @@ -168,7 +170,7 @@
'*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int', '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool', 'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str', 'deprecated': 'bool', '*default-cpu-type': 'str',
@ -71,19 +71,19 @@ index a024d5b05d..1d69bffaa0 100644
## ##
# @query-machines: # @query-machines:
diff --git a/system/vl.c b/system/vl.c diff --git a/softmmu/vl.c b/softmmu/vl.c
index 20ebf2c920..4d39e32097 100644 index 1a3b9cc4b8..e9b5f62cc3 100644
--- a/system/vl.c --- a/softmmu/vl.c
+++ b/system/vl.c +++ b/softmmu/vl.c
@@ -1659,6 +1659,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv, @@ -1597,6 +1597,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp) static MachineClass *select_machine(QDict *qdict, Error **errp)
{ {
const char *machine_type = qdict_get_try_str(qdict, "type"); const char *optarg = qdict_get_try_str(qdict, "type");
+ const char *pvever = qdict_get_try_str(qdict, "pvever"); + const char *pvever = qdict_get_try_str(qdict, "pvever");
GSList *machines = object_class_get_list(TYPE_MACHINE, false); GSList *machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class; MachineClass *machine_class;
Error *local_err = NULL; Error *local_err = NULL;
@@ -1676,6 +1677,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp) @@ -1614,6 +1615,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
} }
} }
@ -95,7 +95,7 @@ index 20ebf2c920..4d39e32097 100644
g_slist_free(machines); g_slist_free(machines);
if (local_err) { if (local_err) {
error_append_hint(&local_err, "Use -machine help to list supported machines\n"); error_append_hint(&local_err, "Use -machine help to list supported machines\n");
@@ -3313,12 +3319,31 @@ void qemu_init(int argc, char **argv) @@ -3248,12 +3254,31 @@ void qemu_init(int argc, char **argv)
case QEMU_OPTION_machine: case QEMU_OPTION_machine:
{ {
bool help; bool help;

View File

@ -25,7 +25,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 4 deletions(-) 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c diff --git a/block/backup.c b/block/backup.c
index 3dd2e229d2..eba5b11493 100644 index db3791f4d1..39410dcf8d 100644
--- a/block/backup.c --- a/block/backup.c
+++ b/block/backup.c +++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job) @@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
@ -48,9 +48,9 @@ index 3dd2e229d2..eba5b11493 100644
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0; int64_t offset = 0;
int64_t count; int64_t count;
@@ -502,6 +500,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, @@ -495,6 +493,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort); &error_abort);
bdrv_graph_wrunlock();
+ backup_init_bcs_bitmap(job); + backup_init_bcs_bitmap(job);
+ +

View File

@ -15,21 +15,21 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/meson.build | 2 + block/meson.build | 2 +
meson.build | 5 + meson.build | 5 +
vma-reader.c | 870 ++++++++++++++++++++++++++++++++++++++++++++ vma-reader.c | 867 ++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 817 +++++++++++++++++++++++++++++++++++++++++ vma-writer.c | 818 +++++++++++++++++++++++++++++++++++++++++
vma.c | 901 ++++++++++++++++++++++++++++++++++++++++++++++ vma.c | 900 ++++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 ++++++++ vma.h | 150 ++++++++
6 files changed, 2745 insertions(+) 6 files changed, 2742 insertions(+)
create mode 100644 vma-reader.c create mode 100644 vma-reader.c
create mode 100644 vma-writer.c create mode 100644 vma-writer.c
create mode 100644 vma.c create mode 100644 vma.c
create mode 100644 vma.h create mode 100644 vma.h
diff --git a/block/meson.build b/block/meson.build diff --git a/block/meson.build b/block/meson.build
index b530e117b5..b245daa98e 100644 index 1833c71ce9..59b71ba9f3 100644
--- a/block/meson.build --- a/block/meson.build
+++ b/block/meson.build +++ b/block/meson.build
@@ -42,6 +42,8 @@ block_ss.add(files( @@ -43,6 +43,8 @@ block_ss.add(files(
'zeroinit.c', 'zeroinit.c',
), zstd, zlib, gnutls) ), zstd, zlib, gnutls)
@ -39,10 +39,10 @@ index b530e117b5..b245daa98e 100644
system_ss.add(files('block-ram-registrar.c')) system_ss.add(files('block-ram-registrar.c'))
diff --git a/meson.build b/meson.build diff --git a/meson.build b/meson.build
index 91a0aa64c6..620cc594b2 100644 index a9c4f28247..cd95530d3b 100644
--- a/meson.build --- a/meson.build
+++ b/meson.build +++ b/meson.build
@@ -1922,6 +1922,8 @@ endif @@ -1778,6 +1778,8 @@ endif
has_gettid = cc.has_function('gettid') has_gettid = cc.has_function('gettid')
@ -51,7 +51,7 @@ index 91a0aa64c6..620cc594b2 100644
# libselinux # libselinux
selinux = dependency('libselinux', selinux = dependency('libselinux',
required: get_option('selinux'), required: get_option('selinux'),
@@ -4023,6 +4025,9 @@ if have_tools @@ -3908,6 +3910,9 @@ if have_tools
dependencies: [blockdev, qemuutil, gnutls, selinux], dependencies: [blockdev, qemuutil, gnutls, selinux],
install: true) install: true)
@ -59,14 +59,14 @@ index 91a0aa64c6..620cc594b2 100644
+ dependencies: [authz, block, crypto, io, qom], install: true) + dependencies: [authz, block, crypto, io, qom], install: true)
+ +
subdir('storage-daemon') subdir('storage-daemon')
subdir('contrib/rdmacm-mux')
foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon'] subdir('contrib/elf2dmp')
diff --git a/vma-reader.c b/vma-reader.c diff --git a/vma-reader.c b/vma-reader.c
new file mode 100644 new file mode 100644
index 0000000000..d0b6721812 index 0000000000..81a891c6b1
--- /dev/null --- /dev/null
+++ b/vma-reader.c +++ b/vma-reader.c
@@ -0,0 +1,870 @@ @@ -0,0 +1,867 @@
+/* +/*
+ * VMA: Virtual Machine Archive + * VMA: Virtual Machine Archive
+ * + *
@ -88,7 +88,6 @@ index 0000000000..d0b6721812
+#include "qemu/ratelimit.h" +#include "qemu/ratelimit.h"
+#include "vma.h" +#include "vma.h"
+#include "block/block.h" +#include "block/block.h"
+#include "block/graph-lock.h"
+#include "sysemu/block-backend.h" +#include "sysemu/block-backend.h"
+ +
+static unsigned char zero_vma_block[VMA_BLOCK_SIZE]; +static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
@ -601,10 +600,8 @@ index 0000000000..d0b6721812
+ } else { + } else {
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0); + int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
+ if (res < 0) { + if (res < 0) {
+ bdrv_graph_rdlock_main_loop();
+ error_setg(errp, "blk_pwrite to %s failed (%d)", + error_setg(errp, "blk_pwrite to %s failed (%d)",
+ bdrv_get_device_name(blk_bs(target)), res); + bdrv_get_device_name(blk_bs(target)), res);
+ bdrv_graph_rdunlock_main_loop();
+ return -1; + return -1;
+ } + }
+ } + }
@ -939,10 +936,10 @@ index 0000000000..d0b6721812
+ +
diff --git a/vma-writer.c b/vma-writer.c diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644 new file mode 100644
index 0000000000..a466652a5d index 0000000000..126b296647
--- /dev/null --- /dev/null
+++ b/vma-writer.c +++ b/vma-writer.c
@@ -0,0 +1,817 @@ @@ -0,0 +1,818 @@
+/* +/*
+ * VMA: Virtual Machine Archive + * VMA: Virtual Machine Archive
+ * + *
@ -1517,8 +1514,8 @@ index 0000000000..a466652a5d
+ int i; + int i;
+ +
+ g_assert(vmaw != NULL); + g_assert(vmaw != NULL);
+ g_assert(status != NULL);
+ +
+ if (status) {
+ status->status = vmaw->status; + status->status = vmaw->status;
+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg)); + g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
+ for (i = 0; i <= 255; i++) { + for (i = 0; i <= 255; i++) {
@ -1526,6 +1523,7 @@ index 0000000000..a466652a5d
+ } + }
+ +
+ uuid_unparse_lower(vmaw->uuid, status->uuid_str); + uuid_unparse_lower(vmaw->uuid, status->uuid_str);
+ }
+ +
+ status->closed = vmaw->closed; + status->closed = vmaw->closed;
+ +
@ -1762,10 +1760,10 @@ index 0000000000..a466652a5d
+} +}
diff --git a/vma.c b/vma.c diff --git a/vma.c b/vma.c
new file mode 100644 new file mode 100644
index 0000000000..bb715e9061 index 0000000000..347f6283ca
--- /dev/null --- /dev/null
+++ b/vma.c +++ b/vma.c
@@ -0,0 +1,901 @@ @@ -0,0 +1,900 @@
+/* +/*
+ * VMA: Virtual Machine Archive + * VMA: Virtual Machine Archive
+ * + *
@ -2078,17 +2076,17 @@ index 0000000000..bb715e9061
+ inbuf); + inbuf);
+ } + }
+ +
+ RestoreMap *restore_map = g_new0(RestoreMap, 1); + RestoreMap *map = g_new0(RestoreMap, 1);
+ restore_map->devname = g_strdup(devname); + map->devname = g_strdup(devname);
+ restore_map->path = g_strdup(path); + map->path = g_strdup(path);
+ restore_map->format = format; + map->format = format;
+ restore_map->throttling_bps = bps_value; + map->throttling_bps = bps_value;
+ restore_map->throttling_group = group; + map->throttling_group = group;
+ restore_map->cache = cache; + map->cache = cache;
+ restore_map->write_zero = write_zero; + map->write_zero = write_zero;
+ restore_map->skip = skip; + map->skip = skip;
+ +
+ g_hash_table_insert(devmap, restore_map->devname, restore_map); + g_hash_table_insert(devmap, map->devname, map);
+ +
+ }; + };
+ } + }
@ -2387,7 +2385,7 @@ index 0000000000..bb715e9061
+ +
+static int create_archive(int argc, char **argv) +static int create_archive(int argc, char **argv)
+{ +{
+ int c; + int i, c;
+ int verbose = 0; + int verbose = 0;
+ const char *archivename; + const char *archivename;
+ GList *backup_coroutines = NULL; + GList *backup_coroutines = NULL;
@ -2545,7 +2543,6 @@ index 0000000000..bb715e9061
+ vma_writer_get_status(vmaw, &vmastat); + vma_writer_get_status(vmaw, &vmastat);
+ +
+ if (verbose) { + if (verbose) {
+ int i;
+ for (i = 0; i < 256; i++) { + for (i = 0; i < 256; i++) {
+ VmaStreamInfo *si = &vmastat.stream_info[i]; + VmaStreamInfo *si = &vmastat.stream_info[i];
+ if (si->size) { + if (si->size) {

View File

@ -12,20 +12,20 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to coroutine changes] [FE: adapt to coroutine changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/backup-dump.c | 172 +++++++++++++++++++++++++++++++ block/backup-dump.c | 168 +++++++++++++++++++++++++++++++
block/backup.c | 30 ++---- block/backup.c | 30 ++----
block/meson.build | 1 + block/meson.build | 1 +
include/block/block_int-common.h | 35 +++++++ include/block/block_int-common.h | 35 +++++++
job.c | 3 +- job.c | 3 +-
5 files changed, 218 insertions(+), 23 deletions(-) 5 files changed, 214 insertions(+), 23 deletions(-)
create mode 100644 block/backup-dump.c create mode 100644 block/backup-dump.c
diff --git a/block/backup-dump.c b/block/backup-dump.c diff --git a/block/backup-dump.c b/block/backup-dump.c
new file mode 100644 new file mode 100644
index 0000000000..e46abf1070 index 0000000000..232a094426
--- /dev/null --- /dev/null
+++ b/block/backup-dump.c +++ b/block/backup-dump.c
@@ -0,0 +1,172 @@ @@ -0,0 +1,168 @@
+/* +/*
+ * BlockDriver to send backup data stream to a callback function + * BlockDriver to send backup data stream to a callback function
+ * + *
@ -37,8 +37,6 @@ index 0000000000..e46abf1070
+ */ + */
+ +
+#include "qemu/osdep.h" +#include "qemu/osdep.h"
+
+#include "qapi/qmp/qdict.h"
+#include "qom/object_interfaces.h" +#include "qom/object_interfaces.h"
+#include "block/block_int.h" +#include "block/block_int.h"
+ +
@ -171,7 +169,7 @@ index 0000000000..e46abf1070
+block_init(bdrv_backup_dump_init); +block_init(bdrv_backup_dump_init);
+ +
+ +
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create( +BlockDriverState *bdrv_backup_dump_create(
+ int dump_cb_block_size, + int dump_cb_block_size,
+ uint64_t byte_size, + uint64_t byte_size,
+ BackupDumpFunc *dump_cb, + BackupDumpFunc *dump_cb,
@ -179,11 +177,9 @@ index 0000000000..e46abf1070
+ Error **errp) + Error **errp)
+{ +{
+ BDRVBackupDumpState *state; + BDRVBackupDumpState *state;
+ BlockDriverState *bs = bdrv_new_open_driver(
+ &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
+ +
+ QDict *options = qdict_new();
+ qdict_put_str(options, "driver", "backup-dump-drive");
+
+ BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
+ if (!bs) { + if (!bs) {
+ return NULL; + return NULL;
+ } + }
@ -199,7 +195,7 @@ index 0000000000..e46abf1070
+ return bs; + return bs;
+} +}
diff --git a/block/backup.c b/block/backup.c diff --git a/block/backup.c b/block/backup.c
index eba5b11493..1963e47ab9 100644 index 39410dcf8d..af87fa6aa9 100644
--- a/block/backup.c --- a/block/backup.c
+++ b/block/backup.c +++ b/block/backup.c
@@ -29,28 +29,6 @@ @@ -29,28 +29,6 @@
@ -231,7 +227,7 @@ index eba5b11493..1963e47ab9 100644
static const BlockJobDriver backup_job_driver; static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -462,6 +440,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, @@ -457,6 +435,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
} }
cluster_size = block_copy_cluster_size(bcs); cluster_size = block_copy_cluster_size(bcs);
@ -247,7 +243,7 @@ index eba5b11493..1963e47ab9 100644
if (perf->max_chunk && perf->max_chunk < cluster_size) { if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup " error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
diff --git a/block/meson.build b/block/meson.build diff --git a/block/meson.build b/block/meson.build
index b245daa98e..e99914eaa4 100644 index 59b71ba9f3..6fde9f7dcd 100644
--- a/block/meson.build --- a/block/meson.build
+++ b/block/meson.build +++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files( @@ -4,6 +4,7 @@ block_ss.add(files(
@ -255,11 +251,11 @@ index b245daa98e..e99914eaa4 100644
'amend.c', 'amend.c',
'backup.c', 'backup.c',
+ 'backup-dump.c', + 'backup-dump.c',
'copy-before-write.c',
'blkdebug.c', 'blkdebug.c',
'blklogwrites.c', 'blklogwrites.c',
'blkverify.c',
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 761276127e..b3e6697613 100644 index 74195c3004..0f2e1817ad 100644
--- a/include/block/block_int-common.h --- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h +++ b/include/block/block_int-common.h
@@ -26,6 +26,7 @@ @@ -26,6 +26,7 @@
@ -276,7 +272,7 @@ index 761276127e..b3e6697613 100644
+typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf); +typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
+ +
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create( +BlockDriverState *bdrv_backup_dump_create(
+ int dump_cb_block_size, + int dump_cb_block_size,
+ uint64_t byte_size, + uint64_t byte_size,
+ BackupDumpFunc *dump_cb, + BackupDumpFunc *dump_cb,
@ -312,10 +308,10 @@ index 761276127e..b3e6697613 100644
BDRV_TRACKED_READ, BDRV_TRACKED_READ,
BDRV_TRACKED_WRITE, BDRV_TRACKED_WRITE,
diff --git a/job.c b/job.c diff --git a/job.c b/job.c
index 660ce22c56..baf54c8d60 100644 index 72d57f0934..93e22d180b 100644
--- a/job.c --- a/job.c
+++ b/job.c +++ b/job.c
@@ -331,7 +331,8 @@ static bool job_started_locked(Job *job) @@ -330,7 +330,8 @@ static bool job_started_locked(Job *job)
} }
/* Called with job_mutex held. */ /* Called with job_mutex held. */

View File

@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 46 insertions(+) 2 files changed, 46 insertions(+)
diff --git a/include/qemu/job.h b/include/qemu/job.h diff --git a/include/qemu/job.h b/include/qemu/job.h
index 2b873f2576..528cd6acb9 100644 index e502787dd8..963cf2bef5 100644
--- a/include/qemu/job.h --- a/include/qemu/job.h
+++ b/include/qemu/job.h +++ b/include/qemu/job.h
@@ -362,6 +362,18 @@ void job_unlock(void); @@ -381,6 +381,18 @@ void job_unlock(void);
*/ */
JobTxn *job_txn_new(void); JobTxn *job_txn_new(void);
@ -34,10 +34,10 @@ index 2b873f2576..528cd6acb9 100644
* Release a reference that was previously acquired with job_txn_add_job or * Release a reference that was previously acquired with job_txn_add_job or
* job_txn_new. If it's the last reference to the object, it will be freed. * job_txn_new. If it's the last reference to the object, it will be freed.
diff --git a/job.c b/job.c diff --git a/job.c b/job.c
index baf54c8d60..3ac5e5cde2 100644 index 93e22d180b..2b31f1e14f 100644
--- a/job.c --- a/job.c
+++ b/job.c +++ b/job.c
@@ -94,6 +94,8 @@ struct JobTxn { @@ -93,6 +93,8 @@ struct JobTxn {
/* Reference count */ /* Reference count */
int refcnt; int refcnt;
@ -46,7 +46,7 @@ index baf54c8d60..3ac5e5cde2 100644
}; };
void job_lock(void) void job_lock(void)
@@ -119,6 +121,25 @@ JobTxn *job_txn_new(void) @@ -118,6 +120,25 @@ JobTxn *job_txn_new(void)
return txn; return txn;
} }
@ -72,7 +72,7 @@ index baf54c8d60..3ac5e5cde2 100644
/* Called with job_mutex held. */ /* Called with job_mutex held. */
static void job_txn_ref_locked(JobTxn *txn) static void job_txn_ref_locked(JobTxn *txn)
{ {
@@ -1042,6 +1063,12 @@ static void job_completed_txn_success_locked(Job *job) @@ -1057,6 +1078,12 @@ static void job_completed_txn_success_locked(Job *job)
*/ */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) { QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!job_is_completed_locked(other_job)) { if (!job_is_completed_locked(other_job)) {
@ -85,7 +85,7 @@ index baf54c8d60..3ac5e5cde2 100644
return; return;
} }
assert(other_job->ret == 0); assert(other_job->ret == 0);
@@ -1253,6 +1280,13 @@ int job_finish_sync_locked(Job *job, @@ -1268,6 +1295,13 @@ int job_finish_sync_locked(Job *job,
return -EBUSY; return -EBUSY;
} }

View File

@ -84,30 +84,68 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
create jobs in a drained section] create jobs in a drained section]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/backup-dump.c | 10 +-
block/meson.build | 5 + block/meson.build | 5 +
block/monitor/block-hmp-cmds.c | 39 ++ block/monitor/block-hmp-cmds.c | 39 ++
blockdev.c | 1 + blockdev.c | 1 +
hmp-commands-info.hx | 14 + hmp-commands-info.hx | 14 +
hmp-commands.hx | 29 + hmp-commands.hx | 29 +
include/block/block_int-common.h | 2 +-
include/monitor/hmp.h | 3 + include/monitor/hmp.h | 3 +
meson.build | 1 + meson.build | 1 +
monitor/hmp-cmds.c | 72 +++ monitor/hmp-cmds.c | 72 ++
proxmox-backup-client.c | 146 +++++ proxmox-backup-client.c | 146 ++++
proxmox-backup-client.h | 60 ++ proxmox-backup-client.h | 60 ++
pve-backup.c | 1092 ++++++++++++++++++++++++++++++++ pve-backup.c | 1067 ++++++++++++++++++++++++++++++
qapi/block-core.json | 233 +++++++ qapi/block-core.json | 229 +++++++
qapi/common.json | 14 + qapi/common.json | 14 +
qapi/machine.json | 16 +- qapi/machine.json | 16 +-
14 files changed, 1711 insertions(+), 14 deletions(-) 16 files changed, 1690 insertions(+), 18 deletions(-)
create mode 100644 proxmox-backup-client.c create mode 100644 proxmox-backup-client.c
create mode 100644 proxmox-backup-client.h create mode 100644 proxmox-backup-client.h
create mode 100644 pve-backup.c create mode 100644 pve-backup.c
diff --git a/block/backup-dump.c b/block/backup-dump.c
index 232a094426..e46abf1070 100644
--- a/block/backup-dump.c
+++ b/block/backup-dump.c
@@ -9,6 +9,8 @@
*/
#include "qemu/osdep.h"
+
+#include "qapi/qmp/qdict.h"
#include "qom/object_interfaces.h"
#include "block/block_int.h"
@@ -141,7 +143,7 @@ static void bdrv_backup_dump_init(void)
block_init(bdrv_backup_dump_init);
-BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
int dump_cb_block_size,
uint64_t byte_size,
BackupDumpFunc *dump_cb,
@@ -149,9 +151,11 @@ BlockDriverState *bdrv_backup_dump_create(
Error **errp)
{
BDRVBackupDumpState *state;
- BlockDriverState *bs = bdrv_new_open_driver(
- &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
+ QDict *options = qdict_new();
+ qdict_put_str(options, "driver", "backup-dump-drive");
+
+ BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
if (!bs) {
return NULL;
}
diff --git a/block/meson.build b/block/meson.build diff --git a/block/meson.build b/block/meson.build
index e99914eaa4..6bba803f94 100644 index 6fde9f7dcd..6d468f89e5 100644
--- a/block/meson.build --- a/block/meson.build
+++ b/block/meson.build +++ b/block/meson.build
@@ -44,6 +44,11 @@ block_ss.add(files( @@ -45,6 +45,11 @@ block_ss.add(files(
), zstd, zlib, gnutls) ), zstd, zlib, gnutls)
block_ss.add(files('../vma-writer.c'), libuuid) block_ss.add(files('../vma-writer.c'), libuuid)
@ -120,10 +158,10 @@ index e99914eaa4..6bba803f94 100644
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c')) system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c')) system_ss.add(files('block-ram-registrar.c'))
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index bdf2eb50b6..439a7a14c8 100644 index ca2599de44..6efe28cef5 100644
--- a/block/monitor/block-hmp-cmds.c --- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c
@@ -1009,3 +1009,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target, @@ -1029,3 +1029,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
qmp_blockdev_change_medium(device, NULL, target, arg, true, force, qmp_blockdev_change_medium(device, NULL, target, arg, true, force,
!!read_only, read_only_mode, errp); !!read_only, read_only_mode, errp);
} }
@ -167,7 +205,7 @@ index bdf2eb50b6..439a7a14c8 100644
+ hmp_handle_error(mon, error); + hmp_handle_error(mon, error);
+} +}
diff --git a/blockdev.c b/blockdev.c diff --git a/blockdev.c b/blockdev.c
index ed8198f351..1054a69279 100644 index cd5f205ad1..7793143d76 100644
--- a/blockdev.c --- a/blockdev.c
+++ b/blockdev.c +++ b/blockdev.c
@@ -37,6 +37,7 @@ @@ -37,6 +37,7 @@
@ -179,7 +217,7 @@ index ed8198f351..1054a69279 100644
#include "monitor/monitor.h" #include "monitor/monitor.h"
#include "qemu/error-report.h" #include "qemu/error-report.h"
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index d5ab880492..6c97248d1b 100644 index 10fdd822e0..15937793c1 100644
--- a/hmp-commands-info.hx --- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx +++ b/hmp-commands-info.hx
@@ -471,6 +471,20 @@ SRST @@ -471,6 +471,20 @@ SRST
@ -204,7 +242,7 @@ index d5ab880492..6c97248d1b 100644
{ {
.name = "usernet", .name = "usernet",
diff --git a/hmp-commands.hx b/hmp-commands.hx diff --git a/hmp-commands.hx b/hmp-commands.hx
index 7506de251c..d5f9c28194 100644 index e352f86872..0c8b6725fb 100644
--- a/hmp-commands.hx --- a/hmp-commands.hx
+++ b/hmp-commands.hx +++ b/hmp-commands.hx
@@ -101,6 +101,35 @@ ERST @@ -101,6 +101,35 @@ ERST
@ -243,6 +281,19 @@ index 7506de251c..d5f9c28194 100644
ERST ERST
{ {
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 0f2e1817ad..0a0339eee4 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -63,7 +63,7 @@
typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
-BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
int dump_cb_block_size,
uint64_t byte_size,
BackupDumpFunc *dump_cb,
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 7a7def7530..cba7afe70c 100644 index 7a7def7530..cba7afe70c 100644
--- a/include/monitor/hmp.h --- a/include/monitor/hmp.h
@ -265,10 +316,10 @@ index 7a7def7530..cba7afe70c 100644
void hmp_device_add(Monitor *mon, const QDict *qdict); void hmp_device_add(Monitor *mon, const QDict *qdict);
void hmp_device_del(Monitor *mon, const QDict *qdict); void hmp_device_del(Monitor *mon, const QDict *qdict);
diff --git a/meson.build b/meson.build diff --git a/meson.build b/meson.build
index 620cc594b2..d16b97cf3c 100644 index cd95530d3b..d53976d621 100644
--- a/meson.build --- a/meson.build
+++ b/meson.build +++ b/meson.build
@@ -1923,6 +1923,7 @@ endif @@ -1779,6 +1779,7 @@ endif
has_gettid = cc.has_function('gettid') has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true) libuuid = cc.find_library('uuid', required: true)
@ -277,7 +328,7 @@ index 620cc594b2..d16b97cf3c 100644
# libselinux # libselinux
selinux = dependency('libselinux', selinux = dependency('libselinux',
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index ef4634e5c1..6e25279f42 100644 index 91be698308..5b9c231a4c 100644
--- a/monitor/hmp-cmds.c --- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c
@@ -21,6 +21,7 @@ @@ -21,6 +21,7 @@
@ -586,10 +637,10 @@ index 0000000000..8cbf645b2c
+#endif /* PROXMOX_BACKUP_CLIENT_H */ +#endif /* PROXMOX_BACKUP_CLIENT_H */
diff --git a/pve-backup.c b/pve-backup.c diff --git a/pve-backup.c b/pve-backup.c
new file mode 100644 new file mode 100644
index 0000000000..c755bf302b index 0000000000..d84d807654
--- /dev/null --- /dev/null
+++ b/pve-backup.c +++ b/pve-backup.c
@@ -0,0 +1,1092 @@ @@ -0,0 +1,1067 @@
+#include "proxmox-backup-client.h" +#include "proxmox-backup-client.h"
+#include "vma.h" +#include "vma.h"
+ +
@ -600,7 +651,6 @@ index 0000000000..c755bf302b
+#include "block/block_int-global-state.h" +#include "block/block_int-global-state.h"
+#include "block/blockjob.h" +#include "block/blockjob.h"
+#include "block/dirty-bitmap.h" +#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
+#include "qapi/qapi-commands-block.h" +#include "qapi/qapi-commands-block.h"
+#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h" +#include "qemu/cutils.h"
@ -626,6 +676,7 @@ index 0000000000..c755bf302b
+ * ---end-bad-example-- + * ---end-bad-example--
+ * + *
+ * ==> Always use CoMutext inside coroutines. + * ==> Always use CoMutext inside coroutines.
+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
+ * + *
+ */ + */
+ +
@ -678,6 +729,7 @@ index 0000000000..c755bf302b
+ uint64_t block_size; + uint64_t block_size;
+ uint8_t dev_id; + uint8_t dev_id;
+ int completed_ret; // INT_MAX if not completed + int completed_ret; // INT_MAX if not completed
+ char targetfile[PATH_MAX];
+ BdrvDirtyBitmap *bitmap; + BdrvDirtyBitmap *bitmap;
+ BlockDriverState *target; + BlockDriverState *target;
+ BlockJob *job; + BlockJob *job;
@ -898,12 +950,7 @@ index 0000000000..c755bf302b
+ +
+ qemu_co_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.backup_mutex);
+ +
+ /* + if (ret < 0) {
+ * All jobs in the transaction will be canceled when one receives an error.
+ * The first error wins, so only set it for ECANCELED if it was the last
+ * job. This allows more interesting errors from other jobs to win.
+ */
+ if (ret < 0 && (ret != -ECANCELED || !g_list_nth(backup_state.di_list, 1))) {
+ Error *local_err = NULL; + Error *local_err = NULL;
+ error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret)); + error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
+ pvebackup_propagate_error(local_err); + pvebackup_propagate_error(local_err);
@ -927,6 +974,13 @@ index 0000000000..c755bf302b
+ } + }
+ } + }
+ +
+ if (di->job) {
+ WITH_JOB_LOCK_GUARD() {
+ job_unref_locked(&di->job->job);
+ di->job = NULL;
+ }
+ }
+
+ // remove self from job list + // remove self from job list
+ backup_state.di_list = g_list_remove(backup_state.di_list, di); + backup_state.di_list = g_list_remove(backup_state.di_list, di);
+ +
@ -946,16 +1000,6 @@ index 0000000000..c755bf302b
+ di->completed_ret = ret; + di->completed_ret = ret;
+ +
+ /* + /*
+ * Needs to happen outside of coroutine, because it takes the graph write lock.
+ */
+ if (di->job) {
+ WITH_JOB_LOCK_GUARD() {
+ job_unref_locked(&di->job->job);
+ di->job = NULL;
+ }
+ }
+
+ /*
+ * Schedule stream cleanup in async coroutine. close_image and finish might + * Schedule stream cleanup in async coroutine. close_image and finish might
+ * take a while, so we can't block on them here. This way it also doesn't + * take a while, so we can't block on them here. This way it also doesn't
+ * matter if we're already running in a coroutine or not. + * matter if we're already running in a coroutine or not.
@ -1076,7 +1120,8 @@ index 0000000000..c755bf302b
+} +}
+ +
+/* +/*
+ * backup_job_create can *not* be run from a coroutine, so this can't either. + * backup_job_create can *not* be run from a coroutine (and requires an
+ * acquired AioContext), so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless. + * The caller is responsible that backup_mutex is held nonetheless.
+ */ + */
+static void create_backup_jobs_bh(void *opaque) { +static void create_backup_jobs_bh(void *opaque) {
@ -1109,6 +1154,9 @@ index 0000000000..c755bf302b
+ sync_mode = MIRROR_SYNC_MODE_BITMAP; + sync_mode = MIRROR_SYNC_MODE_BITMAP;
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS; + bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ } + }
+ AioContext *aio_context = bdrv_get_aio_context(di->bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_drained_begin(di->bs); + bdrv_drained_begin(di->bs);
+ +
+ BlockJob *job = backup_job_create( + BlockJob *job = backup_job_create(
@ -1119,6 +1167,8 @@ index 0000000000..c755bf302b
+ +
+ bdrv_drained_end(di->bs); + bdrv_drained_end(di->bs);
+ +
+ aio_context_release(aio_context);
+
+ di->job = job; + di->job = job;
+ if (job) { + if (job) {
+ WITH_JOB_LOCK_GUARD() { + WITH_JOB_LOCK_GUARD() {
@ -1169,66 +1219,6 @@ index 0000000000..c755bf302b
+ aio_co_enter(data->ctx, data->co); + aio_co_enter(data->ctx, data->co);
+} +}
+ +
+/*
+ * Returns a list of device infos, which needs to be freed by the caller. In
+ * case of an error, errp will be set, but the returned value might still be a
+ * list.
+ */
+static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+ const char *devlist,
+ Error **errp)
+{
+ gchar **devs = NULL;
+ GList *di_list = NULL;
+
+ if (devlist) {
+ devs = g_strsplit_set(devlist, ",;:", -1);
+
+ gchar **d = devs;
+ while (d && *d) {
+ BlockBackend *blk = blk_by_name(*d);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", *d);
+ goto err;
+ }
+ BlockDriverState *bs = blk_bs(blk);
+ if (!bdrv_co_is_inserted(bs)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ goto err;
+ }
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
+ di_list = g_list_append(di_list, di);
+ d++;
+ }
+ } else {
+ BdrvNextIterator it;
+
+ for (BlockDriverState *bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ continue;
+ }
+
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
+ di_list = g_list_append(di_list, di);
+ }
+ }
+
+ if (!di_list) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ goto err;
+ }
+
+err:
+ if (devs) {
+ g_strfreev(devs);
+ }
+
+ return di_list;
+}
+
+UuidInfo coroutine_fn *qmp_backup( +UuidInfo coroutine_fn *qmp_backup(
+ const char *backup_file, + const char *backup_file,
+ const char *password, + const char *password,
@ -1254,10 +1244,13 @@ index 0000000000..c755bf302b
+ +
+ qemu_co_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.backup_mutex);
+ +
+ BlockBackend *blk;
+ BlockDriverState *bs = NULL;
+ Error *local_err = NULL; + Error *local_err = NULL;
+ uuid_t uuid; + uuid_t uuid;
+ VmaWriter *vmaw = NULL; + VmaWriter *vmaw = NULL;
+ ProxmoxBackupHandle *pbs = NULL; + ProxmoxBackupHandle *pbs = NULL;
+ gchar **devs = NULL;
+ GList *di_list = NULL; + GList *di_list = NULL;
+ GList *l; + GList *l;
+ UuidInfo *uuid_info; + UuidInfo *uuid_info;
@ -1275,14 +1268,48 @@ index 0000000000..c755bf302b
+ /* Todo: try to auto-detect format based on file name */ + /* Todo: try to auto-detect format based on file name */
+ format = has_format ? format : BACKUP_FORMAT_VMA; + format = has_format ? format : BACKUP_FORMAT_VMA;
+ +
+ bdrv_graph_co_rdlock(); + if (devlist) {
+ di_list = get_device_info(devlist, &local_err); + devs = g_strsplit_set(devlist, ",;:", -1);
+ bdrv_graph_co_rdunlock(); +
+ if (local_err) { + gchar **d = devs;
+ error_propagate(errp, local_err); + while (d && *d) {
+ blk = blk_by_name(*d);
+ if (blk) {
+ bs = blk_bs(blk);
+ if (!bdrv_co_is_inserted(bs)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ goto err;
+ }
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
+ di_list = g_list_append(di_list, di);
+ } else {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", *d);
+ goto err;
+ }
+ d++;
+ }
+
+ } else {
+ BdrvNextIterator it;
+
+ bs = NULL;
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ continue;
+ }
+
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
+ di_list = g_list_append(di_list, di);
+ }
+ }
+
+ if (!di_list) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ goto err; + goto err;
+ } + }
+ assert(di_list);
+ +
+ size_t total = 0; + size_t total = 0;
+ +
@ -1290,11 +1317,7 @@ index 0000000000..c755bf302b
+ while (l) { + while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l); + l = g_list_next(l);
+ + if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+ bdrv_graph_co_rdlock();
+ bool blocked = bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp);
+ bdrv_graph_co_rdunlock();
+ if (blocked) {
+ goto err; + goto err;
+ } + }
+ +
@ -1378,9 +1401,7 @@ index 0000000000..c755bf302b
+ +
+ di->block_size = dump_cb_block_size; + di->block_size = dump_cb_block_size;
+ +
+ bdrv_graph_co_rdlock();
+ const char *devname = bdrv_get_device_name(di->bs); + const char *devname = bdrv_get_device_name(di->bs);
+ bdrv_graph_co_rdunlock();
+ PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED; + PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
+ size_t dirty = di->size; + size_t dirty = di->size;
+ +
@ -1456,9 +1477,7 @@ index 0000000000..c755bf302b
+ goto err_mutex; + goto err_mutex;
+ } + }
+ +
+ bdrv_graph_co_rdlock();
+ const char *devname = bdrv_get_device_name(di->bs); + const char *devname = bdrv_get_device_name(di->bs);
+ bdrv_graph_co_rdunlock();
+ di->dev_id = vma_writer_register_stream(vmaw, devname, di->size); + di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
+ if (di->dev_id <= 0) { + if (di->dev_id <= 0) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
@ -1570,11 +1589,18 @@ index 0000000000..c755bf302b
+ bdrv_co_unref(di->target); + bdrv_co_unref(di->target);
+ } + }
+ +
+ if (di->targetfile[0]) {
+ unlink(di->targetfile);
+ }
+ g_free(di); + g_free(di);
+ } + }
+ g_list_free(di_list); + g_list_free(di_list);
+ backup_state.di_list = NULL; + backup_state.di_list = NULL;
+ +
+ if (devs) {
+ g_strfreev(devs);
+ }
+
+ if (vmaw) { + if (vmaw) {
+ Error *err = NULL; + Error *err = NULL;
+ vma_writer_close(vmaw, &err); + vma_writer_close(vmaw, &err);
@ -1683,10 +1709,10 @@ index 0000000000..c755bf302b
+ return ret; + return ret;
+} +}
diff --git a/qapi/block-core.json b/qapi/block-core.json diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3db587a6e4..d05fffce1d 100644 index bb471c078d..1b8462a51b 100644
--- a/qapi/block-core.json --- a/qapi/block-core.json
+++ b/qapi/block-core.json +++ b/qapi/block-core.json
@@ -851,6 +851,239 @@ @@ -839,6 +839,235 @@
{ 'command': 'query-block', 'returns': ['BlockInfo'], { 'command': 'query-block', 'returns': ['BlockInfo'],
'allow-preconfig': true } 'allow-preconfig': true }
@ -1755,9 +1781,6 @@ index 3db587a6e4..d05fffce1d 100644
+# @config-file: a configuration file to include into +# @config-file: a configuration file to include into
+# the backup archive. +# the backup archive.
+# +#
+# @firewall-file: a firewall configuration file to include into the backup
+# archive.
+#
+# @speed: the maximum speed, in bytes per second +# @speed: the maximum speed, in bytes per second
+# +#
+# @devlist: list of block device names (separated by ',', ';' +# @devlist: list of block device names (separated by ',', ';'
@ -1825,6 +1848,8 @@ index 3db587a6e4..d05fffce1d 100644
+# +#
+# Cancel the current executing backup process. +# Cancel the current executing backup process.
+# +#
+# Returns: nothing on success
+#
+# Notes: This command succeeds even if there is no backup process running. +# Notes: This command succeeds even if there is no backup process running.
+# +#
+## +##
@ -1848,9 +1873,6 @@ index 3db587a6e4..d05fffce1d 100644
+# +#
+# @pbs-library-version: Running version of libproxmox-backup-qemu0 library. +# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+# +#
+# @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
+# supported or not.
+#
+## +##
+{ 'struct': 'ProxmoxSupportStatus', +{ 'struct': 'ProxmoxSupportStatus',
+ 'data': { 'pbs-dirty-bitmap': 'bool', + 'data': { 'pbs-dirty-bitmap': 'bool',
@ -1927,10 +1949,10 @@ index 3db587a6e4..d05fffce1d 100644
# @BlockDeviceTimedStats: # @BlockDeviceTimedStats:
# #
diff --git a/qapi/common.json b/qapi/common.json diff --git a/qapi/common.json b/qapi/common.json
index 7558ce5430..6e3d800373 100644 index 6fed9cde1a..630a2a8f9a 100644
--- a/qapi/common.json --- a/qapi/common.json
+++ b/qapi/common.json +++ b/qapi/common.json
@@ -200,3 +200,17 @@ @@ -207,3 +207,17 @@
## ##
{ 'struct': 'HumanReadableText', { 'struct': 'HumanReadableText',
'data': { 'human-readable-text': 'str' } } 'data': { 'human-readable-text': 'str' } }
@ -1949,7 +1971,7 @@ index 7558ce5430..6e3d800373 100644
+## +##
+{ 'struct': 'UuidInfo', 'data': {'UUID': 'str'} } +{ 'struct': 'UuidInfo', 'data': {'UUID': 'str'} }
diff --git a/qapi/machine.json b/qapi/machine.json diff --git a/qapi/machine.json b/qapi/machine.json
index 1d69bffaa0..731d8d2f60 100644 index 7da3c519ba..888457f810 100644
--- a/qapi/machine.json --- a/qapi/machine.json
+++ b/qapi/machine.json +++ b/qapi/machine.json
@@ -4,6 +4,8 @@ @@ -4,6 +4,8 @@
@ -1961,7 +1983,7 @@ index 1d69bffaa0..731d8d2f60 100644
## ##
# = Machines # = Machines
## ##
@@ -237,20 +239,6 @@ @@ -230,20 +232,6 @@
## ##
{ 'command': 'query-target', 'returns': 'TargetInfo' } { 'command': 'query-target', 'returns': 'TargetInfo' }

View File

@ -14,10 +14,10 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
create mode 100644 pbs-restore.c create mode 100644 pbs-restore.c
diff --git a/meson.build b/meson.build diff --git a/meson.build b/meson.build
index d16b97cf3c..6de51c34cb 100644 index d53976d621..c3330310d9 100644
--- a/meson.build --- a/meson.build
+++ b/meson.build +++ b/meson.build
@@ -4029,6 +4029,10 @@ if have_tools @@ -3914,6 +3914,10 @@ if have_tools
vma = executable('vma', files('vma.c', 'vma-reader.c') + genh, vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
dependencies: [authz, block, crypto, io, qom], install: true) dependencies: [authz, block, crypto, io, qom], install: true)
@ -26,8 +26,8 @@ index d16b97cf3c..6de51c34cb 100644
+ libproxmox_backup_qemu], install: true) + libproxmox_backup_qemu], install: true)
+ +
subdir('storage-daemon') subdir('storage-daemon')
subdir('contrib/rdmacm-mux')
foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon'] subdir('contrib/elf2dmp')
diff --git a/pbs-restore.c b/pbs-restore.c diff --git a/pbs-restore.c b/pbs-restore.c
new file mode 100644 new file mode 100644
index 0000000000..f03d9bab8d index 0000000000..f03d9bab8d

View File

@ -14,33 +14,35 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
getlength is now a coroutine function] getlength is now a coroutine function]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/meson.build | 2 + block/meson.build | 3 +
block/pbs.c | 313 +++++++++++++++++++++++++++++++++++++++++++ block/pbs.c | 305 +++++++++++++++++++++++++++++++++++++++++++
configure | 9 ++
meson.build | 2 +- meson.build | 2 +-
qapi/block-core.json | 29 ++++ qapi/block-core.json | 13 ++
qapi/pragma.json | 1 + qapi/pragma.json | 1 +
5 files changed, 346 insertions(+), 1 deletion(-) 6 files changed, 332 insertions(+), 1 deletion(-)
create mode 100644 block/pbs.c create mode 100644 block/pbs.c
diff --git a/block/meson.build b/block/meson.build diff --git a/block/meson.build b/block/meson.build
index 6bba803f94..1945e04eeb 100644 index 6d468f89e5..becc99ac4e 100644
--- a/block/meson.build --- a/block/meson.build
+++ b/block/meson.build +++ b/block/meson.build
@@ -49,6 +49,8 @@ block_ss.add(files( @@ -50,6 +50,9 @@ block_ss.add(files(
'../pve-backup.c', '../pve-backup.c',
), libproxmox_backup_qemu) ), libproxmox_backup_qemu)
+block_ss.add(files('pbs.c'), libproxmox_backup_qemu) +block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: files('pbs.c'))
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+ +
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c')) system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c')) system_ss.add(files('block-ram-registrar.c'))
diff --git a/block/pbs.c b/block/pbs.c diff --git a/block/pbs.c b/block/pbs.c
new file mode 100644 new file mode 100644
index 0000000000..aee66c2e93 index 0000000000..a2211e0f3b
--- /dev/null --- /dev/null
+++ b/block/pbs.c +++ b/block/pbs.c
@@ -0,0 +1,313 @@ @@ -0,0 +1,305 @@
+/* +/*
+ * Proxmox Backup Server read-only block driver + * Proxmox Backup Server read-only block driver
+ */ + */
@ -68,7 +70,7 @@ index 0000000000..aee66c2e93
+ +
+typedef struct { +typedef struct {
+ ProxmoxRestoreHandle *conn; + ProxmoxRestoreHandle *conn;
+ uint8_t aid; + char aid;
+ int64_t length; + int64_t length;
+ +
+ char *repository; + char *repository;
@ -201,18 +203,12 @@ index 0000000000..aee66c2e93
+ } + }
+ +
+ /* acquire handle and length */ + /* acquire handle and length */
+ ret = proxmox_restore_open_image(s->conn, s->archive, &pbs_error); + s->aid = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+ if (ret < 0) { + if (s->aid < 0) {
+ if (pbs_error && errp) error_setg(errp, "PBS open_image failed: %s", pbs_error); + if (pbs_error && errp) error_setg(errp, "PBS open_image failed: %s", pbs_error);
+ if (pbs_error) proxmox_backup_free_error(pbs_error); + if (pbs_error) proxmox_backup_free_error(pbs_error);
+ return -ENODEV; + return -ENODEV;
+ } + }
+ if (ret > UINT8_MAX) {
+ error_setg(errp, "PBS open_image returned an ID larger than %u", UINT8_MAX);
+ return -ENODEV;
+ }
+ s->aid = ret;
+
+ s->length = proxmox_restore_get_image_length(s->conn, s->aid, &pbs_error); + s->length = proxmox_restore_get_image_length(s->conn, s->aid, &pbs_error);
+ if (s->length < 0) { + if (s->length < 0) {
+ if (pbs_error && errp) error_setg(errp, "PBS get_image_length failed: %s", pbs_error); + if (pbs_error && errp) error_setg(errp, "PBS get_image_length failed: %s", pbs_error);
@ -238,8 +234,7 @@ index 0000000000..aee66c2e93
+ proxmox_restore_disconnect(s->conn); + proxmox_restore_disconnect(s->conn);
+} +}
+ +
+static coroutine_fn int64_t GRAPH_RDLOCK +static coroutine_fn int64_t pbs_co_getlength(BlockDriverState *bs)
+pbs_co_getlength(BlockDriverState *bs)
+{ +{
+ BDRVPBSState *s = bs->opaque; + BDRVPBSState *s = bs->opaque;
+ return s->length; + return s->length;
@ -256,8 +251,8 @@ index 0000000000..aee66c2e93
+ aio_co_schedule(rcb->ctx, rcb->co); + aio_co_schedule(rcb->ctx, rcb->co);
+} +}
+ +
+static coroutine_fn int GRAPH_RDLOCK +static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
+pbs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags) + QEMUIOVector *qiov, BdrvRequestFlags flags)
+{ +{
+ BDRVPBSState *s = bs->opaque; + BDRVPBSState *s = bs->opaque;
@ -303,8 +298,8 @@ index 0000000000..aee66c2e93
+ return 0; + return 0;
+} +}
+ +
+static coroutine_fn int GRAPH_RDLOCK +static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
+pbs_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags) + QEMUIOVector *qiov, BdrvRequestFlags flags)
+{ +{
+ fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure " + fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
@ -312,8 +307,7 @@ index 0000000000..aee66c2e93
+ return -EPERM; + return -EPERM;
+} +}
+ +
+static void GRAPH_RDLOCK +static void pbs_refresh_filename(BlockDriverState *bs)
+pbs_refresh_filename(BlockDriverState *bs)
+{ +{
+ BDRVPBSState *s = bs->opaque; + BDRVPBSState *s = bs->opaque;
+ if (s->namespace) { + if (s->namespace) {
@ -354,11 +348,52 @@ index 0000000000..aee66c2e93
+} +}
+ +
+block_init(bdrv_pbs_init); +block_init(bdrv_pbs_init);
diff --git a/configure b/configure
index 133f4e3235..f5a830c1f3 100755
--- a/configure
+++ b/configure
@@ -256,6 +256,7 @@ qemu_suffix="qemu"
softmmu="yes"
linux_user=""
bsd_user=""
+pbs_bdrv="yes"
plugins="$default_feature"
ninja=""
python=
@@ -809,6 +810,10 @@ for opt do
;;
--enable-download) download="enabled"; git_submodules_action=update;
;;
+ --disable-pbs-bdrv) pbs_bdrv="no"
+ ;;
+ --enable-pbs-bdrv) pbs_bdrv="yes"
+ ;;
--enable-plugins) if test "$mingw32" = "yes"; then
error_exit "TCG plugins not currently supported on Windows platforms"
else
@@ -959,6 +964,7 @@ cat << EOF
bsd-user all BSD usermode emulation targets
pie Position Independent Executables
debug-tcg TCG debugging (default is disabled)
+ pbs-bdrv Proxmox backup server read-only block driver support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -1744,6 +1750,9 @@ if test "$solaris" = "yes" ; then
fi
echo "SRC_PATH=$source_path" >> $config_host_mak
echo "TARGET_DIRS=$target_list" >> $config_host_mak
+if test "$pbs_bdrv" = "yes" ; then
+ echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
+fi
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
diff --git a/meson.build b/meson.build diff --git a/meson.build b/meson.build
index 6de51c34cb..3bc039f60f 100644 index c3330310d9..cbfc9a43fb 100644
--- a/meson.build --- a/meson.build
+++ b/meson.build +++ b/meson.build
@@ -4477,7 +4477,7 @@ summary_info += {'bzip2 support': libbzip2} @@ -4319,7 +4319,7 @@ summary_info += {'bzip2 support': libbzip2}
summary_info += {'lzfse support': liblzfse} summary_info += {'lzfse support': liblzfse}
summary_info += {'zstd support': zstd} summary_info += {'zstd support': zstd}
summary_info += {'NUMA host support': numa} summary_info += {'NUMA host support': numa}
@ -368,10 +403,10 @@ index 6de51c34cb..3bc039f60f 100644
summary_info += {'libdaxctl support': libdaxctl} summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libudev': libudev} summary_info += {'libudev': libudev}
diff --git a/qapi/block-core.json b/qapi/block-core.json diff --git a/qapi/block-core.json b/qapi/block-core.json
index d05fffce1d..e7cf3d94f3 100644 index 1b8462a51b..d67a6d448a 100644
--- a/qapi/block-core.json --- a/qapi/block-core.json
+++ b/qapi/block-core.json +++ b/qapi/block-core.json
@@ -3457,6 +3457,7 @@ @@ -3396,6 +3396,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' }, { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
@ -379,7 +414,7 @@ index d05fffce1d..e7cf3d94f3 100644
'ssh', 'throttle', 'vdi', 'vhdx', 'ssh', 'throttle', 'vdi', 'vhdx',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' }, { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' }, { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
@@ -3543,6 +3544,33 @@ @@ -3482,6 +3483,17 @@
{ 'struct': 'BlockdevOptionsNull', { 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } } 'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
@ -388,22 +423,6 @@ index d05fffce1d..e7cf3d94f3 100644
+# +#
+# Driver specific block device options for the PBS backend. +# Driver specific block device options for the PBS backend.
+# +#
+# @repository: Proxmox Backup Server repository.
+#
+# @snapshot: backup snapshots ID.
+#
+# @archive: archive name.
+#
+# @keyfile: keyfile to use for encryption.
+#
+# @password: password to use for connection.
+#
+# @fingerprint: backup server fingerprint.
+#
+# @key_password: password to unlock key.
+#
+# @namespace: namespace where backup snapshot lives.
+#
+## +##
+{ 'struct': 'BlockdevOptionsPbs', +{ 'struct': 'BlockdevOptionsPbs',
+ 'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str', + 'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
@ -413,7 +432,7 @@ index d05fffce1d..e7cf3d94f3 100644
## ##
# @BlockdevOptionsNVMe: # @BlockdevOptionsNVMe:
# #
@@ -4977,6 +5005,7 @@ @@ -4890,6 +4902,7 @@
'nfs': 'BlockdevOptionsNfs', 'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull', 'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull', 'null-co': 'BlockdevOptionsNull',
@ -422,10 +441,10 @@ index d05fffce1d..e7cf3d94f3 100644
'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring', 'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
'if': 'CONFIG_BLKIO' }, 'if': 'CONFIG_BLKIO' },
diff --git a/qapi/pragma.json b/qapi/pragma.json diff --git a/qapi/pragma.json b/qapi/pragma.json
index be8fa304c5..7ff46bd128 100644 index 325e684411..b6079f6a0e 100644
--- a/qapi/pragma.json --- a/qapi/pragma.json
+++ b/qapi/pragma.json +++ b/qapi/pragma.json
@@ -100,6 +100,7 @@ @@ -45,6 +45,7 @@
'BlockInfo', # query-block 'BlockInfo', # query-block
'BlockdevAioOptions', # blockdev-add, -blockdev 'BlockdevAioOptions', # blockdev-add, -blockdev
'BlockdevDriver', # blockdev-add, query-blockstats, ... 'BlockdevDriver', # blockdev-add, query-blockstats, ...

View File

@ -9,15 +9,15 @@ fitting.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
meson.build | 3 ++- meson.build | 2 ++
os-posix.c | 7 +++++-- os-posix.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-) 2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build diff --git a/meson.build b/meson.build
index 3bc039f60f..067e8956a7 100644 index cbfc9a43fb..8206270272 100644
--- a/meson.build --- a/meson.build
+++ b/meson.build +++ b/meson.build
@@ -1923,6 +1923,7 @@ endif @@ -1779,6 +1779,7 @@ endif
has_gettid = cc.has_function('gettid') has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true) libuuid = cc.find_library('uuid', required: true)
@ -25,29 +25,28 @@ index 3bc039f60f..067e8956a7 100644
libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true) libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
# libselinux # libselinux
@@ -3530,7 +3531,7 @@ if have_block @@ -3406,6 +3407,7 @@ if have_block
if host_os == 'windows' # os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
system_ss.add(files('os-win32.c')) # os-win32.c does not
else blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
- blockdev_ss.add(files('os-posix.c')) + blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
+ blockdev_ss.add(files('os-posix.c'), libsystemd) system_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
endif
endif endif
diff --git a/os-posix.c b/os-posix.c diff --git a/os-posix.c b/os-posix.c
index a4284e2c07..197a2120fd 100644 index cfcb96533c..fb2ad87009 100644
--- a/os-posix.c --- a/os-posix.c
+++ b/os-posix.c +++ b/os-posix.c
@@ -29,6 +29,8 @@ @@ -28,6 +28,8 @@
#include <pwd.h> #include <pwd.h>
#include <grp.h> #include <grp.h>
#include <libgen.h> #include <libgen.h>
+#include <systemd/sd-journal.h> +#include <systemd/sd-journal.h>
+#include <syslog.h> +#include <syslog.h>
#include "qemu/error-report.h" /* Needed early for CONFIG_BSD etc. */
#include "qemu/log.h" #include "net/slirp.h"
@@ -302,9 +304,10 @@ void os_setup_post(void) @@ -310,9 +312,10 @@ void os_setup_post(void)
dup2(fd, 0); dup2(fd, 0);
dup2(fd, 1); dup2(fd, 1);

View File

@ -26,10 +26,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
create mode 100644 migration/pbs-state.c create mode 100644 migration/pbs-state.c
diff --git a/include/migration/misc.h b/include/migration/misc.h diff --git a/include/migration/misc.h b/include/migration/misc.h
index c9e200f4eb..12c99ebc69 100644 index 7dcc0b5c2c..4c940b2475 100644
--- a/include/migration/misc.h --- a/include/migration/misc.h
+++ b/include/migration/misc.h +++ b/include/migration/misc.h
@@ -117,4 +117,7 @@ bool migration_in_bg_snapshot(void); @@ -77,4 +77,7 @@ bool migration_in_bg_snapshot(void);
/* migration/block-dirty-bitmap.c */ /* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void); void dirty_bitmap_mig_init(void);
@ -38,7 +38,7 @@ index c9e200f4eb..12c99ebc69 100644
+ +
#endif #endif
diff --git a/migration/meson.build b/migration/meson.build diff --git a/migration/meson.build b/migration/meson.build
index 800f12a60d..35a4306183 100644 index 07f6057acc..343994d891 100644
--- a/migration/meson.build --- a/migration/meson.build
+++ b/migration/meson.build +++ b/migration/meson.build
@@ -7,7 +7,9 @@ migration_files = files( @@ -7,7 +7,9 @@ migration_files = files(
@ -52,17 +52,17 @@ index 800f12a60d..35a4306183 100644
system_ss.add(files( system_ss.add(files(
'block-dirty-bitmap.c', 'block-dirty-bitmap.c',
diff --git a/migration/migration.c b/migration/migration.c diff --git a/migration/migration.c b/migration/migration.c
index 86bf76e925..b8d7e471a4 100644 index 7a4c8beb5d..0a955a2a18 100644
--- a/migration/migration.c --- a/migration/migration.c
+++ b/migration/migration.c +++ b/migration/migration.c
@@ -239,6 +239,7 @@ void migration_object_init(void) @@ -162,6 +162,7 @@ void migration_object_init(void)
blk_mig_init(); blk_mig_init();
ram_mig_init(); ram_mig_init();
dirty_bitmap_mig_init(); dirty_bitmap_mig_init();
+ pbs_state_mig_init(); + pbs_state_mig_init();
} }
typedef struct { void migration_cancel(const Error *error)
diff --git a/migration/pbs-state.c b/migration/pbs-state.c diff --git a/migration/pbs-state.c b/migration/pbs-state.c
new file mode 100644 new file mode 100644
index 0000000000..887e998b9e index 0000000000..887e998b9e
@ -174,10 +174,10 @@ index 0000000000..887e998b9e
+ NULL); + NULL);
+} +}
diff --git a/pve-backup.c b/pve-backup.c diff --git a/pve-backup.c b/pve-backup.c
index c755bf302b..5ebb6a3947 100644 index d84d807654..9c8b88d075 100644
--- a/pve-backup.c --- a/pve-backup.c
+++ b/pve-backup.c +++ b/pve-backup.c
@@ -1085,6 +1085,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp) @@ -1060,6 +1060,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version()); ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true; ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true; ret->pbs_dirty_bitmap_savevm = true;
@ -186,10 +186,10 @@ index c755bf302b..5ebb6a3947 100644
ret->pbs_masterkey = true; ret->pbs_masterkey = true;
ret->backup_max_workers = true; ret->backup_max_workers = true;
diff --git a/qapi/block-core.json b/qapi/block-core.json diff --git a/qapi/block-core.json b/qapi/block-core.json
index e7cf3d94f3..282e2e8a8c 100644 index d67a6d448a..09de550c95 100644
--- a/qapi/block-core.json --- a/qapi/block-core.json
+++ b/qapi/block-core.json +++ b/qapi/block-core.json
@@ -1004,6 +1004,11 @@ @@ -991,6 +991,11 @@
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can # @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async. # safely be set for savevm-async.
# #
@ -201,7 +201,7 @@ index e7cf3d94f3..282e2e8a8c 100644
# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile' # @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
# parameter. # parameter.
# #
@@ -1017,6 +1022,7 @@ @@ -1001,6 +1006,7 @@
'data': { 'pbs-dirty-bitmap': 'bool', 'data': { 'pbs-dirty-bitmap': 'bool',
'query-bitmap-info': 'bool', 'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool', 'pbs-dirty-bitmap-savevm': 'bool',

View File

@ -19,7 +19,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 2708abf3d7..fb17c01308 100644 index e1ae3b7316..285dd1d148 100644
--- a/migration/block-dirty-bitmap.c --- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c
@@ -540,7 +540,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, @@ -540,7 +540,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,

View File

@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 30 insertions(+) 1 file changed, 30 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c diff --git a/block/iscsi.c b/block/iscsi.c
index 2ff14b7472..46f275fbf7 100644 index 34f97ab646..398782963d 100644
--- a/block/iscsi.c --- a/block/iscsi.c
+++ b/block/iscsi.c +++ b/block/iscsi.c
@@ -1392,12 +1392,42 @@ static char *get_initiator_name(QemuOpts *opts) @@ -1391,12 +1391,42 @@ static char *get_initiator_name(QemuOpts *opts)
const char *name; const char *name;
char *iscsi_name; char *iscsi_name;
UuidInfo *uuid_info; UuidInfo *uuid_info;

View File

@ -11,7 +11,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/stream.c b/block/stream.c diff --git a/block/stream.c b/block/stream.c
index 7031eef12b..d2da83ae7c 100644 index e522bbdec5..afed72db55 100644
--- a/block/stream.c --- a/block/stream.c
+++ b/block/stream.c +++ b/block/stream.c
@@ -27,7 +27,7 @@ enum { @@ -27,7 +27,7 @@ enum {

View File

@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 2 Mar 2021 16:11:54 +0100
Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request
Some operations, e.g. block-stream, perform reads while discarding the
results (only copy-on-read matters). In this case they will pass NULL as
the target QEMUIOVector, which will however trip bdrv_pad_request, since
it wants to extend its passed vector.
Simply check for NULL and do nothing, there's no reason to pad the
target if it will be discarded anyway.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/io.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/block/io.c b/block/io.c
index 83d1b1dfdc..24a3c84c93 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1710,6 +1710,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
int sliced_niov;
size_t sliced_head, sliced_tail;
+ if (!qiov) {
+ return 0;
+ }
+
/* Should have been checked by the caller already */
ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
if (ret < 0) {

View File

@ -19,33 +19,27 @@ well.
This only worked if the target supports backing images, so up until now This only worked if the target supports backing images, so up until now
only for qcow2, with alloc-track any driver for the target can be used. only for qcow2, with alloc-track any driver for the target can be used.
Replacing the node cannot be done in the If 'auto-remove' is set, alloc-track will automatically detach itself
track_co_change_backing_file() callback, because replacing a node once the backing image is removed. It will be replaced by 'file'.
cannot happen in a coroutine and requires the block graph lock
exclusively. Could either become a special option for the stream job,
or maybe the upcoming blockdev-replace QMP command can be used in the
future.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures [FE: adapt to changed function signatures
make error return value consistent with QEMU make error return value consistent with QEMU
avoid premature break during read avoid premature break during read]
adhere to block graph lock requirements]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
--- ---
block/alloc-track.c | 366 ++++++++++++++++++++++++++++++++++++++++++++ block/alloc-track.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
block/meson.build | 1 + block/meson.build | 1 +
block/stream.c | 34 ++++ 2 files changed, 353 insertions(+)
3 files changed, 401 insertions(+)
create mode 100644 block/alloc-track.c create mode 100644 block/alloc-track.c
diff --git a/block/alloc-track.c b/block/alloc-track.c diff --git a/block/alloc-track.c b/block/alloc-track.c
new file mode 100644 new file mode 100644
index 0000000000..b9f8ea9137 index 0000000000..b75d7c6460
--- /dev/null --- /dev/null
+++ b/block/alloc-track.c +++ b/block/alloc-track.c
@@ -0,0 +1,366 @@ @@ -0,0 +1,352 @@
+/* +/*
+ * Node to allow backing images to be applied to any node. Assumes a blank + * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this + * image to begin with, only new writes are tracked as allocated, thus this
@ -62,11 +56,9 @@ index 0000000000..b9f8ea9137
+#include "qapi/error.h" +#include "qapi/error.h"
+#include "block/block_int.h" +#include "block/block_int.h"
+#include "block/dirty-bitmap.h" +#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
+#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h" +#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h" +#include "qemu/option.h"
+#include "qemu/module.h" +#include "qemu/module.h"
+#include "sysemu/block-backend.h" +#include "sysemu/block-backend.h"
@ -75,12 +67,12 @@ index 0000000000..b9f8ea9137
+ +
+typedef enum DropState { +typedef enum DropState {
+ DropNone, + DropNone,
+ DropRequested,
+ DropInProgress, + DropInProgress,
+} DropState; +} DropState;
+ +
+typedef struct { +typedef struct {
+ BdrvDirtyBitmap *bitmap; + BdrvDirtyBitmap *bitmap;
+ uint64_t granularity;
+ DropState drop_state; + DropState drop_state;
+ bool auto_remove; + bool auto_remove;
+} BDRVAllocTrackState; +} BDRVAllocTrackState;
@ -99,29 +91,26 @@ index 0000000000..b9f8ea9137
+ }, + },
+}; +};
+ +
+static void GRAPH_RDLOCK +static void track_refresh_limits(BlockDriverState *bs, Error **errp)
+track_refresh_limits(BlockDriverState *bs, Error **errp)
+{ +{
+ BDRVAllocTrackState *s = bs->opaque; + BlockDriverInfo bdi;
+ +
+ if (!bs->file) { + if (!bs->file) {
+ return; + return;
+ } + }
+ +
+ /* + /* always use alignment from underlying write device so RMW cycle for
+ * Always use alignment from underlying write device so RMW cycle for + * bdrv_pwritev reads data from our backing via track_co_preadv (no partial
+ * bdrv_pwritev reads data from our backing via track_co_preadv. Also use at + * cluster allocation in 'file') */
+ * least the bitmap granularity. + bdrv_get_info(bs->file->bs, &bdi);
+ */
+ bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment, + bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
+ s->granularity); + MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+} +}
+ +
+static int track_open(BlockDriverState *bs, QDict *options, int flags, +static int track_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp) + Error **errp)
+{ +{
+ BDRVAllocTrackState *s = bs->opaque; + BDRVAllocTrackState *s = bs->opaque;
+ BdrvChild *file = NULL;
+ QemuOpts *opts; + QemuOpts *opts;
+ Error *local_err = NULL; + Error *local_err = NULL;
+ int ret = 0; + int ret = 0;
@ -137,45 +126,18 @@ index 0000000000..b9f8ea9137
+ s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false); + s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+ +
+ /* open the target (write) node, backing will be attached by block layer */ + /* open the target (write) node, backing will be attached by block layer */
+ file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false, + BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+ &local_err); + &local_err);
+ bdrv_graph_wrlock();
+ bs->file = file;
+ bdrv_graph_wrunlock();
+ if (local_err) { + if (local_err) {
+ ret = -EINVAL; + ret = -EINVAL;
+ error_propagate(errp, local_err); + error_propagate(errp, local_err);
+ goto fail; + goto fail;
+ } + }
+ +
+ bdrv_graph_rdlock_main_loop();
+ BlockDriverInfo bdi = {0};
+ ret = bdrv_get_info(bs->file->bs, &bdi);
+ if (ret < 0) {
+ /*
+ * Not a hard failure. Worst that can happen is partial cluster
+ * allocation in the write target. However, the driver here returns its
+ * allocation status based on the dirty bitmap, so any other data that
+ * maps to such a cluster will still be copied later by a stream job (or
+ * during writes to that cluster).
+ */
+ warn_report("alloc-track: unable to query cluster size for write target: %s",
+ strerror(ret));
+ }
+ ret = 0;
+ /*
+ * Always consider alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv. Also try to
+ * avoid partial cluster allocation in the write target by considering the
+ * cluster size.
+ */
+ s->granularity = MAX(bs->file->bs->bl.request_alignment,
+ MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+ track_refresh_limits(bs, errp); + track_refresh_limits(bs, errp);
+ s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, s->granularity, NULL, + uint64_t gran = bs->bl.request_alignment;
+ &local_err); + s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, gran, NULL, &local_err);
+ bdrv_graph_rdunlock_main_loop();
+ if (local_err) { + if (local_err) {
+ ret = -EIO; + ret = -EIO;
+ error_propagate(errp, local_err); + error_propagate(errp, local_err);
@ -186,9 +148,7 @@ index 0000000000..b9f8ea9137
+ +
+fail: +fail:
+ if (ret < 0) { + if (ret < 0) {
+ bdrv_graph_wrlock();
+ bdrv_unref_child(bs, bs->file); + bdrv_unref_child(bs, bs->file);
+ bdrv_graph_wrunlock();
+ if (s->bitmap) { + if (s->bitmap) {
+ bdrv_release_dirty_bitmap(s->bitmap); + bdrv_release_dirty_bitmap(s->bitmap);
+ } + }
@ -205,15 +165,13 @@ index 0000000000..b9f8ea9137
+ } + }
+} +}
+ +
+static coroutine_fn int64_t GRAPH_RDLOCK +static coroutine_fn int64_t track_co_getlength(BlockDriverState *bs)
+track_co_getlength(BlockDriverState *bs)
+{ +{
+ return bdrv_co_getlength(bs->file->bs); + return bdrv_co_getlength(bs->file->bs);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn track_co_preadv(BlockDriverState *bs,
+track_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{ +{
+ BDRVAllocTrackState *s = bs->opaque; + BDRVAllocTrackState *s = bs->opaque;
+ QEMUIOVector local_qiov; + QEMUIOVector local_qiov;
@ -271,34 +229,31 @@ index 0000000000..b9f8ea9137
+ return ret; + return ret;
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
+track_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{ +{
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
+track_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+ BdrvRequestFlags flags)
+{ +{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
+track_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) + int64_t offset, int64_t bytes)
+{ +{
+ return bdrv_co_pdiscard(bs->file, offset, bytes); + return bdrv_co_pdiscard(bs->file, offset, bytes);
+} +}
+ +
+static coroutine_fn int GRAPH_RDLOCK +static coroutine_fn int track_co_flush(BlockDriverState *bs)
+track_co_flush(BlockDriverState *bs)
+{ +{
+ return bdrv_co_flush(bs->file->bs); + return bdrv_co_flush(bs->file->bs);
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn track_co_block_status(BlockDriverState *bs,
+track_co_block_status(BlockDriverState *bs, bool want_zero, + bool want_zero,
+ int64_t offset, + int64_t offset,
+ int64_t bytes, + int64_t bytes,
+ int64_t *pnum, + int64_t *pnum,
@ -329,9 +284,9 @@ index 0000000000..b9f8ea9137
+ return 0; + return 0;
+} +}
+ +
+static void GRAPH_RDLOCK +static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
+track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role, + BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, + uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared) + uint64_t *nperm, uint64_t *nshared)
+{ +{
+ BDRVAllocTrackState *s = bs->opaque; + BDRVAllocTrackState *s = bs->opaque;
@ -355,28 +310,53 @@ index 0000000000..b9f8ea9137
+ } + }
+} +}
+ +
+static int coroutine_fn GRAPH_RDLOCK +static void track_drop(void *opaque)
+track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+ const char *backing_fmt)
+{ +{
+ /* + BlockDriverState *bs = (BlockDriverState*)opaque;
+ * Note that the actual backing file graph change is already done in the + BlockDriverState *file = bs->file->bs;
+ * stream job itself with bdrv_set_backing_hd_drained(), so no need to
+ * actually do anything here. But still needs to be implemented, to make
+ * our caller (i.e. bdrv_co_change_backing_file() do the right thing).
+ *
+ * FIXME
+ * We'd like to auto-remove ourselves from the block graph, but it cannot
+ * be done from a coroutine. Currently done in the stream job, where it
+ * kinda fits better, but in the long-term, a special parameter would be
+ * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+ */
+ if (backing_file == NULL) {
+ BDRVAllocTrackState *s = bs->opaque; + BDRVAllocTrackState *s = bs->opaque;
+
+ assert(file);
+
+ /* we rely on the fact that we're not used anywhere else, so let's wait
+ * until we're only used once - in the drive connected to the guest (and one
+ * ref is held by bdrv_ref in track_change_backing_file) */
+ if (bs->refcnt > 2) {
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
+ return;
+ }
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_drained_begin(bs); + bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress; + s->drop_state = DropInProgress;
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort); + bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+ bdrv_drained_end(bs); + bdrv_drained_end(bs);
+ bdrv_unref(bs);
+ aio_context_release(aio_context);
+}
+
+static int track_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ if (s->auto_remove && s->drop_state == DropNone &&
+ backing_file == NULL && backing_fmt == NULL)
+ {
+ /* backing file has been disconnected, there's no longer any use for
+ * this node, so let's remove ourselves from the block graph - we need
+ * to schedule this for later however, since when this function is
+ * called, the blockjob modifying us is probably not done yet and has a
+ * blocker on 'bs' */
+ s->drop_state = DropRequested;
+ bdrv_ref(bs);
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ } + }
+ +
+ return 0; + return 0;
@ -403,7 +383,7 @@ index 0000000000..b9f8ea9137
+ .supports_backing = true, + .supports_backing = true,
+ +
+ .bdrv_co_block_status = track_co_block_status, + .bdrv_co_block_status = track_co_block_status,
+ .bdrv_co_change_backing_file = track_co_change_backing_file, + .bdrv_change_backing_file = track_change_backing_file,
+}; +};
+ +
+static void bdrv_alloc_track_init(void) +static void bdrv_alloc_track_init(void)
@ -413,7 +393,7 @@ index 0000000000..b9f8ea9137
+ +
+block_init(bdrv_alloc_track_init); +block_init(bdrv_alloc_track_init);
diff --git a/block/meson.build b/block/meson.build diff --git a/block/meson.build b/block/meson.build
index 1945e04eeb..2873f3a25a 100644 index becc99ac4e..0a69836593 100644
--- a/block/meson.build --- a/block/meson.build
+++ b/block/meson.build +++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh) @@ -2,6 +2,7 @@ block_ss.add(genh)
@ -424,48 +404,3 @@ index 1945e04eeb..2873f3a25a 100644
'amend.c', 'amend.c',
'backup.c', 'backup.c',
'backup-dump.c', 'backup-dump.c',
diff --git a/block/stream.c b/block/stream.c
index d2da83ae7c..f941cba14e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -120,6 +120,40 @@ static int stream_prepare(Job *job)
ret = -EPERM;
goto out;
}
+
+ /*
+ * This cannot be done in the co_change_backing_file callback, because
+ * bdrv_replace_node() cannot be done in a coroutine. The latter also
+ * requires the graph lock exclusively. Only required for the
+ * alloc-track driver.
+ *
+ * The long-term plan is to either have an explicit parameter for the
+ * stream job or use the upcoming blockdev-replace QMP command.
+ */
+ if (base_id == NULL && strcmp(unfiltered_bs->drv->format_name, "alloc-track") == 0) {
+ BlockDriverState *file_bs;
+
+ bdrv_graph_rdlock_main_loop();
+ file_bs = unfiltered_bs->file->bs;
+ bdrv_graph_rdunlock_main_loop();
+
+ bdrv_ref(unfiltered_bs); // unrefed by bdrv_replace_node()
+ bdrv_drained_begin(file_bs);
+ bdrv_graph_wrlock();
+
+ bdrv_replace_node(unfiltered_bs, file_bs, &local_err);
+
+ bdrv_graph_wrunlock();
+ bdrv_drained_end(file_bs);
+ bdrv_unref(unfiltered_bs);
+
+ if (local_err) {
+ error_prepend(&local_err, "failed to replace alloc-track node: ");
+ error_report_err(local_err);
+ ret = -EPERM;
+ goto out;
+ }
+ }
}
out:

View File

@ -7,16 +7,15 @@ This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b. preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
block/rbd.c | 42 ++---------------------------------------- block/rbd.c | 42 ++----------------------------------------
1 file changed, 2 insertions(+), 40 deletions(-) 1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c diff --git a/block/rbd.c b/block/rbd.c
index 63f60d41be..367db42dce 100644 index a4749f3b1b..53e0396b51 100644
--- a/block/rbd.c --- a/block/rbd.c
+++ b/block/rbd.c +++ b/block/rbd.c
@@ -1515,7 +1515,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, @@ -1511,7 +1511,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r; int status, r;
RBDDiffIterateReq req = { .offs = offset }; RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags; uint64_t features, flags;
@ -24,7 +23,7 @@ index 63f60d41be..367db42dce 100644
assert(offset + bytes <= s->image_size); assert(offset + bytes <= s->image_size);
@@ -1543,43 +1542,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, @@ -1539,43 +1538,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status; return status;
} }
@ -69,7 +68,7 @@ index 63f60d41be..367db42dce 100644
qemu_rbd_diff_iterate_cb, &req); qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status; return status;
@@ -1598,8 +1561,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, @@ -1594,8 +1557,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
} }

View File

@ -8,16 +8,15 @@ This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b. preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
block/rbd.c | 10 +++++----- block/rbd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-) 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c diff --git a/block/rbd.c b/block/rbd.c
index 367db42dce..347b121626 100644 index 53e0396b51..0913a0af39 100644
--- a/block/rbd.c --- a/block/rbd.c
+++ b/block/rbd.c +++ b/block/rbd.c
@@ -1474,11 +1474,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, @@ -1470,11 +1470,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque; RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs); assert(req->offs + req->bytes <= offs);

View File

@ -18,13 +18,12 @@ Upstream bug report:
https://gitlab.com/qemu-project/qemu/-/issues/1026 https://gitlab.com/qemu-project/qemu/-/issues/1026
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
block/rbd.c | 112 ---------------------------------------------------- block/rbd.c | 112 ----------------------------------------------------
1 file changed, 112 deletions(-) 1 file changed, 112 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c diff --git a/block/rbd.c b/block/rbd.c
index 347b121626..e61b359b97 100644 index 0913a0af39..1dab254517 100644
--- a/block/rbd.c --- a/block/rbd.c
+++ b/block/rbd.c +++ b/block/rbd.c
@@ -108,12 +108,6 @@ typedef struct RBDTask { @@ -108,12 +108,6 @@ typedef struct RBDTask {
@ -40,7 +39,7 @@ index 347b121626..e61b359b97 100644
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache, BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid, const char *keypairs, const char *secretid,
@@ -1460,111 +1454,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, @@ -1456,111 +1450,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info; return spec_info;
} }
@ -152,7 +151,7 @@ index 347b121626..e61b359b97 100644
static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs) static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
{ {
BDRVRBDState *s = bs->opaque; BDRVRBDState *s = bs->opaque;
@@ -1800,7 +1689,6 @@ static BlockDriver bdrv_rbd = { @@ -1796,7 +1685,6 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes, .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif #endif

View File

@ -1,43 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Tue, 26 Mar 2024 14:57:51 +0100
Subject: [PATCH] alloc-track: error out when auto-remove is not set
Since replacing the node now happens in the stream job, where the
option cannot be read from (it's internal to the driver), it will
always be treated as on.
qemu-server will always set it, make sure to have other users notice
the change (should they even exist). The option can be fully dropped
in the future while adding a version guard in qemu-server.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index b9f8ea9137..f3ed2935c4 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -34,7 +34,6 @@ typedef struct {
BdrvDirtyBitmap *bitmap;
uint64_t granularity;
DropState drop_state;
- bool auto_remove;
} BDRVAllocTrackState;
static QemuOptsList runtime_opts = {
@@ -86,7 +85,11 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+ if (!qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false)) {
+ error_setg(errp, "alloc-track: requires auto-remove option to be set to on");
+ ret = -EINVAL;
+ goto fail;
+ }
/* open the target (write) node, backing will be attached by block layer */
file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,

View File

@ -1,84 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 27 Mar 2024 11:15:39 +0100
Subject: [PATCH] alloc-track: avoid seemingly superfluous child permission
update
Doesn't seem necessary nowadays (maybe after commit "alloc-track: fix
deadlock during drop" where the dropping is not rescheduled and delayed
anymore or some upstream change). Should there really be some issue,
instead of having a drop state, this could also be just based off the
fact whether there is still a backing child.
Dumping the cumulative (shared) permissions for the BDS with a debug
print yields the same values after this patch and with QEMU 8.1,
namely 3 and 5.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 26 --------------------------
1 file changed, 26 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index f3ed2935c4..29138dcc49 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -25,15 +25,9 @@
#define TRACK_OPT_AUTO_REMOVE "auto-remove"
-typedef enum DropState {
- DropNone,
- DropInProgress,
-} DropState;
-
typedef struct {
BdrvDirtyBitmap *bitmap;
uint64_t granularity;
- DropState drop_state;
} BDRVAllocTrackState;
static QemuOptsList runtime_opts = {
@@ -137,8 +131,6 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->drop_state = DropNone;
-
fail:
if (ret < 0) {
bdrv_graph_wrlock();
@@ -289,18 +281,8 @@ track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
- BDRVAllocTrackState *s = bs->opaque;
-
*nshared = BLK_PERM_ALL;
- /* in case we're currently dropping ourselves, claim to not use any
- * permissions at all - which is fine, since from this point on we will
- * never issue a read or write anymore */
- if (s->drop_state == DropInProgress) {
- *nperm = 0;
- return;
- }
-
if (role & BDRV_CHILD_DATA) {
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
} else {
@@ -326,14 +308,6 @@ track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
* kinda fits better, but in the long-term, a special parameter would be
* nice (or done via qemu-server via upcoming blockdev-replace QMP command).
*/
- if (backing_file == NULL) {
- BDRVAllocTrackState *s = bs->opaque;
- bdrv_drained_begin(bs);
- s->drop_state = DropInProgress;
- bdrv_child_refresh_perms(bs, bs->file, &error_abort);
- bdrv_drained_end(bs);
- }
-
return 0;
}

View File

@ -0,0 +1,153 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 6 Apr 2023 14:59:31 +0200
Subject: [PATCH] alloc-track: fix deadlock during drop
by replacing the block node directly after changing the backing file
instead of rescheduling it.
With changes in QEMU 8.0, calling bdrv_get_info (and bdrv_unref)
during drop can lead to a deadlock when using iothread (only triggered
with multiple disks, except during debugging where it also triggered
with one disk sometimes):
1. job_unref_locked acquires the AioContext and calls job->driver->free
2. track_drop gets scheduled
3. bdrv_graph_wrlock is called and polls which leads to track_drop being
called
4. track_drop acquires the AioContext recursively
5. bdrv_get_info is a wrapped coroutine (since 8.0) and thus polls for
bdrv_co_get_info. This releases the AioContext, but only once! The
documentation for the AIO_WAIT_WHILE macro states that the
AioContext lock needs to be acquired exactly once, but there does
not seem to be a way for track_drop to know if it acquired the lock
recursively or not (without adding further hacks).
6. Because the AioContext is still held by the main thread once, it can't
be acquired before entering bdrv_co_get_info in co_schedule_bh_cb
which happens in the iothread
When doing the operation in change_backing_file, the AioContext has
already been acquired by the caller, so the issue with the recursive
lock goes away.
The comment explaining why delaying the replace is necessary is
> we need to schedule this for later however, since when this function
> is called, the blockjob modifying us is probably not done yet and
> has a blocker on 'bs'
However, there is no check for blockers in bdrv_replace_node. It would
need to be done by us, the caller, with check_to_replace_node.
Furthermore, the mirror job also does its call to bdrv_replace_node
while there is an active blocker (inserted by mirror itself) and they
use a specialized version to check for blockers instead of
check_to_replace_node there. Alloc-track could also do something
similar to check for other blockers, but it should be fine to rely on
Proxmox VE that no other operation with the blockdev is going on.
Mirror also drains the target before replacing the node, but the
target can have other users. In case of alloc-track the file child
should not be accessible by anybody else and so there can't be an
in-flight operation for the file child when alloc-track is drained.
The rescheduling based on refcounting is a hack and it doesn't seem to
be necessary anymore. It's not clear what the original issue from the
comment was. Testing with older builds with track_drop done directly
without rescheduling also didn't lead to any noticable issue for me.
One issue it might have been is the one fixed by b1e1af394d
("block/stream: Drain subtree around graph change"), where
block-stream had a use-after-free if the base node changed at an
inconvenient time (which alloc-track's auto-drop does).
It's also not possible to just not auto-replace the alloc-track. Not
replacing it at all leads to other operations like block resize
hanging, and there is no good way to replace it manually via QMP
(there is x-blockdev-change, but it is experimental and doesn't
implement the required operation yet). Also, it's just cleaner in
general to not leave unnecessary block nodes lying around.
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 54 ++++++++++++++-------------------------------
1 file changed, 16 insertions(+), 38 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index b75d7c6460..76da140a68 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -25,7 +25,6 @@
typedef enum DropState {
DropNone,
- DropRequested,
DropInProgress,
} DropState;
@@ -268,37 +267,6 @@ static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
}
}
-static void track_drop(void *opaque)
-{
- BlockDriverState *bs = (BlockDriverState*)opaque;
- BlockDriverState *file = bs->file->bs;
- BDRVAllocTrackState *s = bs->opaque;
-
- assert(file);
-
- /* we rely on the fact that we're not used anywhere else, so let's wait
- * until we're only used once - in the drive connected to the guest (and one
- * ref is held by bdrv_ref in track_change_backing_file) */
- if (bs->refcnt > 2) {
- aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
- return;
- }
- AioContext *aio_context = bdrv_get_aio_context(bs);
- aio_context_acquire(aio_context);
-
- bdrv_drained_begin(bs);
-
- /* now that we're drained, we can safely set 'DropInProgress' */
- s->drop_state = DropInProgress;
- bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-
- bdrv_replace_node(bs, file, &error_abort);
- bdrv_set_backing_hd(bs, NULL, &error_abort);
- bdrv_drained_end(bs);
- bdrv_unref(bs);
- aio_context_release(aio_context);
-}
-
static int track_change_backing_file(BlockDriverState *bs,
const char *backing_file,
const char *backing_fmt)
@@ -308,13 +276,23 @@ static int track_change_backing_file(BlockDriverState *bs,
backing_file == NULL && backing_fmt == NULL)
{
/* backing file has been disconnected, there's no longer any use for
- * this node, so let's remove ourselves from the block graph - we need
- * to schedule this for later however, since when this function is
- * called, the blockjob modifying us is probably not done yet and has a
- * blocker on 'bs' */
- s->drop_state = DropRequested;
+ * this node, so let's remove ourselves from the block graph */
+ BlockDriverState *file = bs->file->bs;
+
+ /* Just to be sure, because bdrv_replace_node unrefs it */
bdrv_ref(bs);
- aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress;
+
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+
+ bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
return 0;

View File

@ -1,133 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 11 Apr 2024 11:29:26 +0200
Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
Useful to make discard-source work in the context of backup fleecing
when the fleecing image has a larger granularity than the backup
target.
Copy-before-write operations will use at least this granularity and in
particular, discard requests to the source node will too. If the
granularity is too small, they will just be aligned down in
cbw_co_pdiscard_snapshot() and thus effectively ignored.
The QAPI uses uint32 so the value will be non-negative, but still fit
into a uint64_t.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/block-copy.c | 17 +++++++++++++----
block/copy-before-write.c | 3 ++-
include/block/block-copy.h | 1 +
qapi/block-core.json | 8 +++++++-
4 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/block/block-copy.c b/block/block-copy.c
index cc618e4561..12d662e9d4 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
}
static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ int64_t min_cluster_size,
Error **errp)
{
int ret;
@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
} else if (ret < 0 && !target_does_cow) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
return ret;
} else if (ret < 0 && target_does_cow) {
/* Not fatal; just trudge on ahead. */
- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
}
- return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+ return MAX(min_cluster_size,
+ MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
}
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
bool discard_source,
+ int64_t min_cluster_size,
Error **errp)
{
ERRP_GUARD();
@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
GLOBAL_STATE_CODE();
- cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+ if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
+ error_setg(errp, "min-cluster-size needs to be a power of 2");
+ return NULL;
+ }
+
+ cluster_size = block_copy_calculate_cluster_size(target->bs,
+ min_cluster_size, errp);
if (cluster_size < 0) {
return NULL;
}
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 28f6a096cd..ef4e666303 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -478,7 +478,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
- flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+ flags & BDRV_O_CBW_DISCARD_SOURCE,
+ opts->min_cluster_size, errp);
if (!s->bcs) {
error_prepend(errp, "Cannot create block-copy-state: ");
return -EINVAL;
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index bdc703bacd..77857c6c68 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
bool discard_source,
+ int64_t min_cluster_size,
Error **errp);
/* Function should be called prior any actual copy request */
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 282e2e8a8c..9caf04cbe9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4926,12 +4926,18 @@
# @on-cbw-error parameter will decide how this failure is handled.
# Default 0. (Since 7.1)
#
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+# operations. Has to be a power of 2. No effect if smaller than
+# the maximum of the target's cluster size and 64 KiB. Default 0.
+# (Since 8.1)
+#
# Since: 6.2
##
{ 'struct': 'BlockdevOptionsCbw',
'base': 'BlockdevOptionsGenericFormat',
'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
- '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
+ '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
+ '*min-cluster-size': 'uint32' } }
##
# @BlockdevOptions:

View File

@ -0,0 +1,190 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 5 May 2023 13:39:53 +0200
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
In spirit, this is a partial revert of commit 9b09503752 ("migration:
run setup callbacks out of big lock"), but only for the snapshot case.
For snapshots, the bdrv_writev_vmstate() function is used during setup
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
while calling it could lead to an assertion failure. To understand
how, first note the following:
1. Generated coroutine wrappers for block layer functions spawn the
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
2. If the host OS switches threads at an inconvenient time, it can
happen that a bottom half scheduled for the main thread's AioContext
is executed as part of a vCPU thread's aio_poll().
An example leading to the assertion failure is as follows:
main thread:
1. A snapshot-save QMP command gets issued.
2. snapshot_save_job_bh() is scheduled.
vCPU thread:
3. aio_poll() for the main thread's AioContext is called (e.g. when
the guest writes to a pflash device, as part of blk_pwrite which is a
generated coroutine wrapper).
4. snapshot_save_job_bh() is executed as part of aio_poll().
3. qemu_savevm_state() is called.
4. qemu_mutex_unlock_iothread() is called. Now
qemu_get_current_aio_context() returns 0x0.
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
But this function is a generated coroutine wrapper, so it uses
AIO_WAIT_WHILE. There, the assertion
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
will fail.
To fix it, ensure that the BQL is held during setup. To avoid changing
the behavior for migration too, introduce conditionals for the setup
callbacks that need the BQL and only take the lock if it's not already
held.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/register.h | 2 +-
migration/block-dirty-bitmap.c | 15 ++++++++++++---
migration/block.c | 15 ++++++++++++---
migration/ram.c | 16 +++++++++++++---
migration/savevm.c | 2 --
5 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/include/migration/register.h b/include/migration/register.h
index 90914f32f5..c728fd9120 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
* by other locks.
*/
int (*save_live_iterate)(QEMUFile *f, void *opaque);
+ int (*save_setup)(QEMUFile *f, void *opaque);
/* This runs outside the iothread lock! */
- int (*save_setup)(QEMUFile *f, void *opaque);
/* Note for save_live_pending:
* must_precopy:
* - must be migrated in precopy or in stopped state
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 285dd1d148..f7ee5a74d9 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1219,10 +1219,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
{
DBMSaveState *s = &((DBMState *)opaque)->save;
SaveBitmapState *dbms = NULL;
+ bool release_lock = false;
- qemu_mutex_lock_iothread();
+ /* For snapshots, the BQL is held during setup. */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
if (init_dirty_bitmap_migration(s) < 0) {
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return -1;
}
@@ -1230,7 +1237,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
send_bitmap_start(f, s, dbms);
}
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return 0;
}
diff --git a/migration/block.c b/migration/block.c
index 86c2256a2b..8423e0c9f9 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -725,21 +725,30 @@ static void block_migration_cleanup(void *opaque)
static int block_save_setup(QEMUFile *f, void *opaque)
{
int ret;
+ bool release_lock = false;
trace_migration_block_save("setup", block_mig_state.submitted,
block_mig_state.transferred);
- qemu_mutex_lock_iothread();
+ /* For snapshots, the BQL is held during setup. */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
ret = init_blk_migration(f);
if (ret < 0) {
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return ret;
}
/* start track dirty blocks */
ret = set_dirty_tracking();
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
if (ret) {
return ret;
diff --git a/migration/ram.c b/migration/ram.c
index 6e1514f69f..6a1aec7031 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2896,8 +2896,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
static void ram_init_bitmaps(RAMState *rs)
{
- /* For memory_global_dirty_log_start below. */
- qemu_mutex_lock_iothread();
+ bool release_lock = false;
+
+ /*
+ * For memory_global_dirty_log_start below.
+ * For snapshots, the BQL is held during setup.
+ */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
qemu_mutex_lock_ramlist();
WITH_RCU_READ_LOCK_GUARD() {
@@ -2909,7 +2917,9 @@ static void ram_init_bitmaps(RAMState *rs)
}
}
qemu_mutex_unlock_ramlist();
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
/*
* After an eventual first bitmap sync, fixup the initial bitmap
diff --git a/migration/savevm.c b/migration/savevm.c
index d60c4f487a..3c015722f7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1625,10 +1625,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
reset_vfio_bytes_transferred();
ms->to_dst_file = f;
- qemu_mutex_unlock_iothread();
qemu_savevm_state_header(f);
qemu_savevm_state_setup(f);
- qemu_mutex_lock_iothread();
while (qemu_file_get_error(f) == 0) {
if (qemu_savevm_state_iterate(f, false) > 0) {

View File

@ -1,106 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 11 Apr 2024 11:29:27 +0200
Subject: [PATCH] backup: add minimum cluster size to performance options
Useful to make discard-source work in the context of backup fleecing
when the fleecing image has a larger granularity than the backup
target.
Backup/block-copy will use at least this granularity for copy operations
and in particular, discard requests to the backup source will too. If
the granularity is too small, they will just be aligned down in
cbw_co_pdiscard_snapshot() and thus effectively ignored.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup.c | 2 +-
block/copy-before-write.c | 2 ++
block/copy-before-write.h | 1 +
blockdev.c | 3 +++
qapi/block-core.json | 9 +++++++--
5 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index 1963e47ab9..fe69723ada 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
}
cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
- &bcs, errp);
+ perf->min_cluster_size, &bcs, errp);
if (!cbw) {
goto error;
}
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index ef4e666303..adb27649a8 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -547,6 +547,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,
bool discard_source,
+ int64_t min_cluster_size,
BlockCopyState **bcs,
Error **errp)
{
@@ -565,6 +566,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
}
qdict_put_str(opts, "file", bdrv_get_node_name(source));
qdict_put_str(opts, "target", bdrv_get_node_name(target));
+ qdict_put_int(opts, "min-cluster-size", min_cluster_size);
top = bdrv_insert_node(source, opts, flags, errp);
if (!top) {
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 01af0cd3c4..dc6cafe7fa 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,
bool discard_source,
+ int64_t min_cluster_size,
BlockCopyState **bcs,
Error **errp);
void bdrv_cbw_drop(BlockDriverState *bs);
diff --git a/blockdev.c b/blockdev.c
index 1054a69279..cbe224387b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
if (backup->x_perf->has_max_chunk) {
perf.max_chunk = backup->x_perf->max_chunk;
}
+ if (backup->x_perf->has_min_cluster_size) {
+ perf.min_cluster_size = backup->x_perf->min_cluster_size;
+ }
}
if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9caf04cbe9..df934647ed 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1790,11 +1790,16 @@
# it should not be less than job cluster size which is calculated
# as maximum of target image cluster size and 64k. Default 0.
#
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+# and background copy operations. Has to be a power of 2. No
+# effect if smaller than the maximum of the target's cluster size
+# and 64 KiB. Default 0. (Since 8.1)
+#
# Since: 6.0
##
{ 'struct': 'BackupPerf',
- 'data': { '*use-copy-range': 'bool',
- '*max-workers': 'int', '*max-chunk': 'int64' } }
+ 'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
+ '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
##
# @BackupCommon:

Some files were not shown because too many files have changed in this diff Show More