various PVE backup code refactoring/improvements

Mostly preparation for our external backup plugin work, but fine to already commit now. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
PVE backup: improve error handling for fleecing
2024-11-12 16:48:28 +01:00 · 2024-11-12 16:48:07 +01:00 · 2024-11-12 16:42:52 +01:00 · 2024-11-12 16:42:30 +01:00 · 2024-11-10 11:23:14 +01:00 · 2024-11-10 11:20:39 +01:00
108 changed files with 5501 additions and 1723 deletions
--- a/6
+++ b/6
@ -24,6 +24,7 @@ endif

 PC_BIOS_FW_PURGE_LIST_IN = \
 	hppa-firmware.img \
+	hppa-firmware64.img \
 	openbios-ppc \
 	openbios-sparc32 \
 	openbios-sparc64 \
@ -31,7 +32,8 @@ PC_BIOS_FW_PURGE_LIST_IN = \
 	s390-ccw.img \
 	s390-netboot.img \
 	u-boot.e500 \
-	.*\.dtb \
+	.*[a-zA-Z0-9]\.dtb \
+	.*[a-zA-Z0-9]\.dts \
 	qemu_vga.ndrv \
 	slof.bin \
 	opensbi-riscv.*-generic-fw_dynamic.bin \
@ -56,7 +58,7 @@ $(BUILDDIR): submodule
 deb kvm: $(DEBS)
 $(DEB_DBG): $(DEB)
 $(DEB): $(BUILDDIR)
-	cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j
+	cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
 	lintian $(DEBS)

 sbuild: $(DSC)
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,141 @@
+pve-qemu-kvm (9.0.2-4) bookworm; urgency=medium
+
+  * async snapshot: ensure any dynamic vCPU-throttling applied for
+    auto-converge gets always disabled again after finishing the snapshot.
+
+ -- Proxmox Support Team <support@proxmox.com>  Sun, 10 Nov 2024 11:23:09 +0100
+
+pve-qemu-kvm (9.0.2-3) bookworm; urgency=medium
+
+  * pick up fix for VirtIO PCI regressions
+
+  * pick up stable fixes for 9.0, including fixes for VirtIO-net, ARM and
+    x86(_64) emulation, CVEs to harden NBD server against malicious clients,
+    as well as a few others (VNC, physmem, Intel IOMMU, ...).
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 06 Sep 2024 16:21:42 +0200
+
+pve-qemu-kvm (9.0.2-2) bookworm; urgency=medium
+
+  * actually update submodule to QEMU 9.0.2. The previous release was still
+    based on 9.0.0 by mistake.
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 07 Aug 2024 10:16:01 +0200
+
+pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
+
+  * update submodule and patches to QEMU 9.0.2. While our version had most
+    stable fixes included already, there are new fixes for VirtIO and VGA
+    display screen blanking (#4786)
+
+  * backport fix for a regression with the LSI-53c895a controller and one for
+    the boot order getting ignored for USB storage
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 29 Jul 2024 18:59:40 +0200
+
+pve-qemu-kvm (9.0.0-6) bookworm; urgency=medium
+
+  * fix a regression in the zeroinit block driver that prevented importing and
+    cloning disks to RBD storages which are not using the krbd setting
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 08 Jul 2024 16:11:15 +0200
+
+pve-qemu-kvm (9.0.0-5) bookworm; urgency=medium
+
+  * backport fix for CVE-2024-4467 to prevent malicious qcow2 image files from
+    already causing bad effects if being queried via 'qemu-img info'. For
+    Proxmox VE, this is an additional safe guard, as currently it directly
+    creates and manages the qcow2 images used by VMs and does not allow
+    unprivileged users to import them
+
+  * fix #4726: code cleanup: avoid superfluous check in vma backup code
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 03 Jul 2024 13:13:35 +0200
+
+pve-qemu-kvm (9.0.0-4) bookworm; urgency=medium
+
+  * fix crash after saving a snapshot without including VM state when a VirtIO
+    block device with iothread is configured.
+
+  * fix edge case in error handling when opening a block device from PBS fails
+
+  * minor code cleanup in backup code
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 01 Jul 2024 11:26:11 +0200
+
+pve-qemu-kvm (9.0.0-3) bookworm; urgency=medium
+
+  * fix crash when doing resize after hotplugging a disk using io_uring
+
+  * fix some minor issues in software CPU emulation (i.e. non-KVM) for ARM and
+    x86(_64)
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 29 May 2024 15:55:44 +0200
+
+pve-qemu-kvm (9.0.0-2) bookworm; urgency=medium
+
+  * fix #5409: backup: fix copy-before-write timeout
+
+  * backup: improve error when copy-before-write fails for fleecing
+
+  * fix forwards and backwards migration with VirtIO-GPU display
+
+  * fix a regression in pflash device introduced in 8.2
+
+  * revert a commit for VirtIO PCI devices that turned out to cause more
+    potential security issues than what it fixed
+
+  * move compatibility flags for a new VirtIO-net feature to the correct
+    machine type. The feature was introduced in QEMU 8.2, but the
+    compatibility flags got added to machine version 8.0 instead of 8.1. This
+    breaks backwards migration with machine version 8.1 from a 8.2/9.0 binary
+    to an 8.1 binary, in cases where the guest kernel enables the feature
+    (e.g. Ubuntu 23.10).
+    While that breaks migration with machine version 8.1 from an unpatched to
+    a patched binary, Proxmox VE only ever had 8.2 on the test repository and
+    9.0 not yet in any public repository.
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 17 May 2024 17:04:52 +0200
+
+pve-qemu-kvm (9.0.0-1) bookworm; urgency=medium
+
+  * update submodule and patches to QEMU 9.0.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 29 Apr 2024 10:51:37 +0200
+
+pve-qemu-kvm (8.2.2-1) bookworm; urgency=medium
+
+  * update submodule and patches to QEMU 8.2.2
+
+ -- Proxmox Support Team <support@proxmox.com>  Sat, 27 Apr 2024 12:44:30 +0200
+
+pve-qemu-kvm (8.1.5-5) bookworm; urgency=medium
+
+  * implement support for backup fleecing
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 11 Apr 2024 17:46:48 +0200
+
+pve-qemu-kvm (8.1.5-4) bookworm; urgency=medium
+
+  * fix live-import for certain kinds of VMDK images that rely on padding
+
+  * backup: avoid bubbling up first error if it's an ECANCELED one, as those
+    are often a result of cancling the job due to running into an actual
+    issue.
+
+  * backup: factor out & clean up gathering device info into helper
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 12 Mar 2024 14:08:40 +0100
+
+pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
+
+  * backport fix for potential deadlock during QMP stop command if the VM has
+    disks attached through VirtIO-Block and IO-Thread enabled
+
+  * fix #4507: add patch to automatically increase NOFILE soft limit
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 21 Feb 2024 20:11:23 +0100
+
 pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium

  * work around for a situation where guest IO might get stuck, if the VM is
--- a/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
+++ b/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
@ -27,18 +27,18 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
 Signed-off-by: John Snow <jsnow@redhat.com>
 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
-[FE: rebased for 8.1.1]
+[FE: rebased for 8.2.2]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/mirror.c                         | 98 +++++++++++++++++++++-----
+ block/mirror.c                         | 99 ++++++++++++++++++++------
 blockdev.c                             | 38 +++++++++-
 include/block/block_int-global-state.h |  4 +-
 qapi/block-core.json                   | 25 ++++++-
 tests/unit/test-block-iothread.c       |  4 +-
- 5 files changed, 142 insertions(+), 27 deletions(-)
+ 5 files changed, 142 insertions(+), 28 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index d3cacd1708..1ff42c8af1 100644
+index 1bdce3b657..0c5c72df2e 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@ -50,7 +50,7 @@ index d3cacd1708..1ff42c8af1 100644
     BlockMirrorBackingMode backing_mode;
     /* Whether the target image requires explicit zero-initialization */
     bool zero_target;
-@@ -65,6 +65,8 @@ typedef struct MirrorBlockJob {
+@@ -73,6 +73,8 @@ typedef struct MirrorBlockJob {
     size_t buf_size;
     int64_t bdev_length;
     unsigned long *cow_bitmap;
@ -59,9 +59,9 @@ index d3cacd1708..1ff42c8af1 100644
     BdrvDirtyBitmap *dirty_bitmap;
     BdrvDirtyBitmapIter *dbi;
     uint8_t *buf;
-@@ -705,7 +707,8 @@ static int mirror_exit_common(Job *job)
-     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
+@@ -722,7 +724,8 @@ static int mirror_exit_common(Job *job)
                              &error_abort);
+ 
     if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
 -        BlockDriverState *backing = s->is_none_mode ? src : s->base;
 +        BlockDriverState *backing;
@ -69,7 +69,7 @@ index d3cacd1708..1ff42c8af1 100644
         BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
 
         if (bdrv_cow_bs(unfiltered_target) != backing) {
-@@ -809,6 +812,16 @@ static void mirror_abort(Job *job)
+@@ -819,6 +822,16 @@ static void mirror_abort(Job *job)
     assert(ret == 0);
 }
 
@ -86,7 +86,7 @@ index d3cacd1708..1ff42c8af1 100644
 static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
 {
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-@@ -997,7 +1010,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
+@@ -1015,7 +1028,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     mirror_free_init(s);
 
     s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@ -96,7 +96,7 @@ index d3cacd1708..1ff42c8af1 100644
         ret = mirror_dirty_init(s);
         if (ret < 0 || job_is_cancelled(&s->common.job)) {
             goto immediate_exit;
-@@ -1251,6 +1265,7 @@ static const BlockJobDriver mirror_job_driver = {
+@@ -1304,6 +1318,7 @@ static const BlockJobDriver mirror_job_driver = {
         .run                    = mirror_run,
         .prepare                = mirror_prepare,
         .abort                  = mirror_abort,
@ -104,7 +104,7 @@ index d3cacd1708..1ff42c8af1 100644
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
         .cancel                 = mirror_cancel,
-@@ -1267,6 +1282,7 @@ static const BlockJobDriver commit_active_job_driver = {
+@@ -1322,6 +1337,7 @@ static const BlockJobDriver commit_active_job_driver = {
         .run                    = mirror_run,
         .prepare                = mirror_prepare,
         .abort                  = mirror_abort,
@ -112,7 +112,7 @@ index d3cacd1708..1ff42c8af1 100644
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
         .cancel                 = commit_active_cancel,
-@@ -1658,7 +1674,10 @@ static BlockJob *mirror_start_job(
+@@ -1714,7 +1730,10 @@ static BlockJob *mirror_start_job(
                              BlockCompletionFunc *cb,
                              void *opaque,
                              const BlockJobDriver *driver,
@ -124,9 +124,9 @@ index d3cacd1708..1ff42c8af1 100644
                              bool auto_complete, const char *filter_node_name,
                              bool is_mirror, MirrorCopyMode copy_mode,
                              Error **errp)
-@@ -1670,10 +1689,39 @@ static BlockJob *mirror_start_job(
-     uint64_t target_perms, target_shared_perms;
-     int ret;
+@@ -1728,10 +1747,39 @@ static BlockJob *mirror_start_job(
+ 
+     GLOBAL_STATE_CODE();
 
 -    if (granularity == 0) {
 -        granularity = bdrv_get_default_bitmap_granularity(target);
@ -166,7 +166,7 @@ index d3cacd1708..1ff42c8af1 100644
     assert(is_power_of_2(granularity));
 
     if (buf_size < 0) {
-@@ -1804,7 +1852,9 @@ static BlockJob *mirror_start_job(
+@@ -1871,7 +1919,9 @@ static BlockJob *mirror_start_job(
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
@ -176,10 +176,10 @@ index d3cacd1708..1ff42c8af1 100644
 +    s->bitmap_mode = bitmap_mode;
     s->backing_mode = backing_mode;
     s->zero_target = zero_target;
-     s->copy_mode = copy_mode;
-@@ -1825,6 +1875,18 @@ static BlockJob *mirror_start_job(
+     qatomic_set(&s->copy_mode, copy_mode);
+@@ -1897,6 +1947,18 @@ static BlockJob *mirror_start_job(
+      */
     bdrv_disable_dirty_bitmap(s->dirty_bitmap);
-     }
 
 +    if (s->sync_bitmap) {
 +        bdrv_dirty_bitmap_set_busy(s->sync_bitmap, true);
@ -193,10 +193,10 @@ index d3cacd1708..1ff42c8af1 100644
 +        }
 +    }
 +
+     bdrv_graph_wrlock();
     ret = block_job_add_bdrv(&s->common, "source", bs, 0,
                              BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
-                              BLK_PERM_CONSISTENT_READ,
-@@ -1902,6 +1964,9 @@ fail:
+@@ -1979,6 +2041,9 @@ fail:
         if (s->dirty_bitmap) {
             bdrv_release_dirty_bitmap(s->dirty_bitmap);
         }
@ -206,7 +206,7 @@ index d3cacd1708..1ff42c8af1 100644
         job_early_fail(&s->common.job);
     }
 
-@@ -1919,31 +1984,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
+@@ -2001,35 +2066,28 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, const char *replaces,
                   int creation_flags, int64_t speed,
                   uint32_t granularity, int64_t buf_size,
@ -231,8 +231,12 @@ index d3cacd1708..1ff42c8af1 100644
 -                   MirrorSyncMode_str(mode));
 -        return;
 -    }
+-
+     bdrv_graph_rdlock_main_loop();
 -    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
+     bdrv_graph_rdunlock_main_loop();
+ 
     mirror_start_job(job_id, bs, creation_flags, target, replaces,
                      speed, granularity, buf_size, backing_mode, zero_target,
                      on_source_error, on_target_error, unmap, NULL, NULL,
@ -243,7 +247,7 @@ index d3cacd1708..1ff42c8af1 100644
 }
 
 BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
-@@ -1970,7 +2029,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
+@@ -2056,7 +2114,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
                      job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN, false,
                      on_error, on_error, true, cb, opaque,
@ -254,10 +258,10 @@ index d3cacd1708..1ff42c8af1 100644
                      errp);
     if (!job) {
 diff --git a/blockdev.c b/blockdev.c
-index c28462a633..a402fa4bf7 100644
+index 4c33c3f5f0..f3e508a6a7 100644
 --- a/blockdev.c
 +++ b/blockdev.c
-@@ -2849,6 +2849,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2776,6 +2776,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    BlockDriverState *target,
                                    const char *replaces,
                                    enum MirrorSyncMode sync,
@ -267,7 +271,7 @@ index c28462a633..a402fa4bf7 100644
                                    BlockMirrorBackingMode backing_mode,
                                    bool zero_target,
                                    bool has_speed, int64_t speed,
-@@ -2867,6 +2870,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2794,6 +2797,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
 {
     BlockDriverState *unfiltered_bs;
     int job_flags = JOB_DEFAULT;
@ -275,7 +279,7 @@ index c28462a633..a402fa4bf7 100644
 
     GLOBAL_STATE_CODE();
     GRAPH_RDLOCK_GUARD_MAINLOOP();
-@@ -2921,6 +2925,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2848,6 +2852,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
@ -305,7 +309,7 @@ index c28462a633..a402fa4bf7 100644
     if (!replaces) {
         /* We want to mirror from @bs, but keep implicit filters on top */
         unfiltered_bs = bdrv_skip_implicit_filters(bs);
-@@ -2966,8 +2993,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2889,8 +2916,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
      * and will allow to check whether the node still exist at mirror completion
      */
     mirror_start(job_id, bs, target,
@ -316,7 +320,7 @@ index c28462a633..a402fa4bf7 100644
                  on_source_error, on_target_error, unmap, filter_node_name,
                  copy_mode, errp);
 }
-@@ -3115,6 +3142,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
+@@ -3034,6 +3061,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
 
     blockdev_mirror_common(arg->job_id, bs, target_bs,
                            arg->replaces, arg->sync,
@ -325,7 +329,7 @@ index c28462a633..a402fa4bf7 100644
                            backing_mode, zero_target,
                            arg->has_speed, arg->speed,
                            arg->has_granularity, arg->granularity,
-@@ -3136,6 +3165,8 @@ void qmp_blockdev_mirror(const char *job_id,
+@@ -3053,6 +3082,8 @@ void qmp_blockdev_mirror(const char *job_id,
                          const char *device, const char *target,
                          const char *replaces,
                          MirrorSyncMode sync,
@ -334,7 +338,7 @@ index c28462a633..a402fa4bf7 100644
                          bool has_speed, int64_t speed,
                          bool has_granularity, uint32_t granularity,
                          bool has_buf_size, int64_t buf_size,
-@@ -3184,7 +3215,8 @@ void qmp_blockdev_mirror(const char *job_id,
+@@ -3093,7 +3124,8 @@ void qmp_blockdev_mirror(const char *job_id,
     }
 
     blockdev_mirror_common(job_id, bs, target_bs,
@ -345,10 +349,10 @@ index c28462a633..a402fa4bf7 100644
                            has_granularity, granularity,
                            has_buf_size, buf_size,
 diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
-index da5fb31089..32f0f9858a 100644
+index eb2d92a226..f0c642b194 100644
 --- a/include/block/block_int-global-state.h
 +++ b/include/block/block_int-global-state.h
-@@ -152,7 +152,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
+@@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, const char *replaces,
                   int creation_flags, int64_t speed,
                   uint32_t granularity, int64_t buf_size,
@ -360,10 +364,10 @@ index da5fb31089..32f0f9858a 100644
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index bca1a0c372..a5cea82139 100644
+index b179d65520..905da8be72 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -2145,6 +2145,15 @@
+@@ -2174,6 +2174,15 @@
 #     destination (all the disk, only the sectors allocated in the
 #     topmost image, or only new I/O).
 #
@ -379,7 +383,7 @@ index bca1a0c372..a5cea82139 100644
 # @granularity: granularity of the dirty bitmap, default is 64K if the
 #     image format doesn't have clusters, 4K if the clusters are
 #     smaller than that, else the cluster size.  Must be a power of 2
-@@ -2187,7 +2196,9 @@
+@@ -2216,7 +2225,9 @@
 { 'struct': 'DriveMirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             '*format': 'str', '*node-name': 'str', '*replaces': 'str',
@ -390,7 +394,7 @@ index bca1a0c372..a5cea82139 100644
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
-@@ -2471,6 +2482,15 @@
+@@ -2496,6 +2507,15 @@
 #     destination (all the disk, only the sectors allocated in the
 #     topmost image, or only new I/O).
 #
@ -406,7 +410,7 @@ index bca1a0c372..a5cea82139 100644
 # @granularity: granularity of the dirty bitmap, default is 64K if the
 #     image format doesn't have clusters, 4K if the clusters are
 #     smaller than that, else the cluster size.  Must be a power of 2
-@@ -2521,7 +2541,8 @@
+@@ -2544,7 +2564,8 @@
 { 'command': 'blockdev-mirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             '*replaces': 'str',
@ -417,10 +421,10 @@ index bca1a0c372..a5cea82139 100644
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
 diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
-index d727a5fee8..8a34aa2328 100644
+index 3766d5de6b..afa44cbd34 100644
 --- a/tests/unit/test-block-iothread.c
 +++ b/tests/unit/test-block-iothread.c
-@@ -757,8 +757,8 @@ static void test_propagate_mirror(void)
+@@ -755,8 +755,8 @@ static void test_propagate_mirror(void)
 
     /* Start a mirror job */
     mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@ -430,4 +434,4 @@ index d727a5fee8..8a34aa2328 100644
 +                 false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
                  false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
                  &error_abort);
-     WITH_JOB_LOCK_GUARD() {
+ 
--- a/debian/patches/bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
+++ b/debian/patches/bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 18 insertions(+), 6 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index 1ff42c8af1..11b8a8e959 100644
+index 0c5c72df2e..37fee3fa25 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -682,8 +682,6 @@ static int mirror_exit_common(Job *job)
+@@ -693,8 +693,6 @@ static int mirror_exit_common(Job *job)
         bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
     }
 
@ -36,9 +36,9 @@ index 1ff42c8af1..11b8a8e959 100644
     /* Make sure that the source BDS doesn't go away during bdrv_replace_node,
      * before we can call bdrv_drained_end */
     bdrv_ref(src);
-@@ -788,6 +786,18 @@ static int mirror_exit_common(Job *job)
-     block_job_remove_all_bdrv(bjob);
-     bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+@@ -800,6 +798,18 @@ static int mirror_exit_common(Job *job)
+     bdrv_drained_end(target_bs);
+     bdrv_unref(target_bs);
 
 +    if (s->sync_bitmap) {
 +        if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@ -55,7 +55,7 @@ index 1ff42c8af1..11b8a8e959 100644
     bs_opaque->job = NULL;
 
     bdrv_drained_end(src);
-@@ -1699,10 +1709,6 @@ static BlockJob *mirror_start_job(
+@@ -1757,10 +1767,6 @@ static BlockJob *mirror_start_job(
                        " sync mode",
                        MirrorSyncMode_str(sync_mode));
             return NULL;
@ -66,7 +66,7 @@ index 1ff42c8af1..11b8a8e959 100644
         }
     } else if (bitmap) {
         error_setg(errp,
-@@ -1719,6 +1725,12 @@ static BlockJob *mirror_start_job(
+@@ -1777,6 +1783,12 @@ static BlockJob *mirror_start_job(
             return NULL;
         }
         granularity = bdrv_dirty_bitmap_granularity(bitmap);
--- a/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
+++ b/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 3 insertions(+)

 diff --git a/blockdev.c b/blockdev.c
-index a402fa4bf7..01b0ab0549 100644
+index f3e508a6a7..37b8437f3e 100644
 --- a/blockdev.c
 +++ b/blockdev.c
-@@ -2946,6 +2946,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2873,6 +2873,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
             return;
         }
--- a/debian/patches/bitmap-mirror/0004-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
+++ b/debian/patches/bitmap-mirror/0004-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 4 insertions(+), 7 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index 11b8a8e959..00f2665ca4 100644
+index 37fee3fa25..6b3cce1007 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -792,8 +792,8 @@ static int mirror_exit_common(Job *job)
+@@ -804,8 +804,8 @@ static int mirror_exit_common(Job *job)
              job->ret == 0 && ret == 0)) {
             /* Success; synchronize copy back to sync. */
             bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@ -30,7 +30,7 @@ index 11b8a8e959..00f2665ca4 100644
         }
     }
     bdrv_release_dirty_bitmap(s->dirty_bitmap);
-@@ -1892,11 +1892,8 @@ static BlockJob *mirror_start_job(
+@@ -1964,11 +1964,8 @@ static BlockJob *mirror_start_job(
     }
 
     if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
@ -43,4 +43,4 @@ index 11b8a8e959..00f2665ca4 100644
 +                                         NULL, true);
     }
 
-     ret = block_job_add_bdrv(&s->common, "source", bs, 0,
+     bdrv_graph_wrlock();
--- a/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
+++ b/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
@ -12,7 +12,7 @@ uniform w.r.t. backup block jobs.

 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
-[FE: rebase for 8.0]
+[FE: rebase for 8.2.2]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 block/mirror.c             | 28 +++------------
@ -21,12 +21,12 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 3 files changed, 70 insertions(+), 59 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index 00f2665ca4..60cf574de5 100644
+index 6b3cce1007..2f1223852b 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -1699,31 +1699,13 @@ static BlockJob *mirror_start_job(
-     uint64_t target_perms, target_shared_perms;
-     int ret;
+@@ -1757,31 +1757,13 @@ static BlockJob *mirror_start_job(
+ 
+     GLOBAL_STATE_CODE();
 
 -    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
 -        error_setg(errp, "Sync mode '%s' not supported",
@ -62,10 +62,10 @@ index 00f2665ca4..60cf574de5 100644
 
         if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
 diff --git a/blockdev.c b/blockdev.c
-index 01b0ab0549..cd5f205ad1 100644
+index 37b8437f3e..ed8198f351 100644
 --- a/blockdev.c
 +++ b/blockdev.c
-@@ -2925,7 +2925,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2852,7 +2852,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
--- a/debian/patches/extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
+++ b/debian/patches/extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
@ -78,7 +78,7 @@ index 252de85681..8db28f9272 100644
 
 /**
 diff --git a/monitor/monitor.c b/monitor/monitor.c
-index dc352f9e9d..56e1307014 100644
+index 01ede1babd..5681bca346 100644
 --- a/monitor/monitor.c
 +++ b/monitor/monitor.c
@@ -117,6 +117,21 @@ bool monitor_cur_is_qmp(void)
--- a/debian/patches/extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
+++ b/debian/patches/extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
@ -22,7 +22,7 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 1 file changed, 2 insertions(+), 12 deletions(-)

 diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
-index 32c70c9e99..984b6a3145 100644
+index 2d0c607177..97e51733af 100644
 --- a/hw/scsi/megasas.c
 +++ b/hw/scsi/megasas.c
@@ -1781,7 +1781,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
--- a/debian/patches/extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
+++ b/debian/patches/extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
@ -55,10 +55,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 1 file changed, 6 insertions(+), 6 deletions(-)

 diff --git a/hw/ide/core.c b/hw/ide/core.c
-index c3508acbb1..289347af58 100644
+index e8cb2dac92..3b21acf651 100644
 --- a/hw/ide/core.c
 +++ b/hw/ide/core.c
-@@ -444,7 +444,7 @@ static void ide_trim_bh_cb(void *opaque)
+@@ -456,7 +456,7 @@ static void ide_trim_bh_cb(void *opaque)
     iocb->bh = NULL;
     qemu_aio_unref(iocb);
 
@ -67,7 +67,7 @@ index c3508acbb1..289347af58 100644
     blk_dec_in_flight(blk);
 }
 
-@@ -504,6 +504,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
+@@ -516,6 +516,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
 done:
     iocb->aiocb = NULL;
     if (iocb->bh) {
@ -76,7 +76,7 @@ index c3508acbb1..289347af58 100644
         replay_bh_schedule_event(iocb->bh);
     }
 }
-@@ -516,9 +518,6 @@ BlockAIOCB *ide_issue_trim(
+@@ -528,9 +530,6 @@ BlockAIOCB *ide_issue_trim(
     IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
     TrimAIOCB *iocb;
 
@ -86,7 +86,7 @@ index c3508acbb1..289347af58 100644
     iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
     iocb->s = s;
     iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
-@@ -742,8 +741,9 @@ void ide_cancel_dma_sync(IDEState *s)
+@@ -754,8 +753,9 @@ void ide_cancel_dma_sync(IDEState *s)
      */
     if (s->bus->dma->aiocb) {
         trace_ide_cancel_dma_sync_remaining();
--- a/debian/patches/extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
+++ b/debian/patches/extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
@ -24,10 +24,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 1 file changed, 2 insertions(+), 6 deletions(-)

 diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
-index bb12b0ad43..de14d3c3da 100644
+index 53f804ac16..9b1b9f0412 100644
 --- a/hw/i386/acpi-build.c
 +++ b/hw/i386/acpi-build.c
-@@ -362,13 +362,9 @@ Aml *aml_pci_device_dsm(void)
+@@ -347,13 +347,9 @@ Aml *aml_pci_device_dsm(void)
     {
         Aml *params = aml_local(0);
         Aml *pkg = aml_package(2);
--- a/debian/patches/extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
+++ b/debian/patches/extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
@ -1,48 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Fri, 28 Jul 2023 10:47:48 +0200
-Subject: [PATCH] migration/block-dirty-bitmap: fix loading bitmap when there
- is an iothread
-
-The bdrv_create_dirty_bitmap() function (which is also called by
-bdrv_dirty_bitmap_create_successor()) uses bdrv_getlength(bs). This is
-a wrapper around a coroutine, and thus uses bdrv_poll_co(). Polling
-tries to release the AioContext which will trigger an assert() if it
-hasn't been acquired before.
-
-The issue does not happen for migration, because there we are in a
-coroutine already, so the wrapper will just call bdrv_co_getlength()
-directly without polling.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- migration/block-dirty-bitmap.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
-index 032fc5f405..e1ae3b7316 100644
--- a/migration/block-dirty-bitmap.c
-+++ b/migration/block-dirty-bitmap.c
-@@ -805,8 +805,11 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
-                      "destination", bdrv_dirty_bitmap_name(s->bitmap));
-         return -EINVAL;
-     } else {
-+        AioContext *ctx = bdrv_get_aio_context(s->bs);
-+        aio_context_acquire(ctx);
-         s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
-                                              s->bitmap_name, &local_err);
-+        aio_context_release(ctx);
-         if (!s->bitmap) {
-             error_report_err(local_err);
-             return -EINVAL;
-@@ -833,7 +836,10 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
- 
-     bdrv_disable_dirty_bitmap(s->bitmap);
-     if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
-+        AioContext *ctx = bdrv_get_aio_context(s->bs);
-+        aio_context_acquire(ctx);
-         bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
-+        aio_context_release(ctx);
-         if (local_err) {
-             error_report_err(local_err);
-             return -EINVAL;
--- a/debian/patches/extra/0005-Revert-Revert-graph-lock-Disable-locking-for-now.patch
+++ b/debian/patches/extra/0005-Revert-Revert-graph-lock-Disable-locking-for-now.patch
@ -1,140 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 28 Sep 2023 10:07:03 +0200
-Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
-
-This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
-
-There are still some issues with graph locking, e.g. deadlocks during
-backup canceling [0]. Because the AioContext locks still exist, it
-should be safe to disable locking again.
-
-From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
-
-> We don't currently rely on graph locking yet. It is supposed to replace
-> the AioContext lock eventually to enable multiqueue support, but as long
-> as we still have the AioContext lock, it is sufficient without the graph
-> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
-> more either and this commit can be reverted. (Of course, it can also be
-> reverted while the AioContext lock still exists if the callers have been
-> fixed.)
-
-[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- block/graph-lock.c | 24 ++++++++++++++++++++++++
- 1 file changed, 24 insertions(+)
-
-diff --git a/block/graph-lock.c b/block/graph-lock.c
-index 5e66f01ae8..5c2873262a 100644
--- a/block/graph-lock.c
-+++ b/block/graph-lock.c
-@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
- /* Protects the list of aiocontext and orphaned_reader_count */
- static QemuMutex aio_context_list_lock;
- 
-+#if 0
- /* Written and read with atomic operations. */
- static int has_writer;
-+#endif
- 
- /*
-  * A reader coroutine could move from an AioContext to another.
-@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
-     g_free(ctx->bdrv_graph);
- }
- 
-+#if 0
- static uint32_t reader_count(void)
- {
-     BdrvGraphRWlock *brdv_graph;
-@@ -105,12 +108,19 @@ static uint32_t reader_count(void)
-     assert((int32_t)rd >= 0);
-     return rd;
- }
-+#endif
- 
- void bdrv_graph_wrlock(BlockDriverState *bs)
- {
-+#if 0
-     AioContext *ctx = NULL;
- 
-     GLOBAL_STATE_CODE();
-+    /*
-+     * TODO Some callers hold an AioContext lock when this is called, which
-+     * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
-+     * AioContext locks are gone).
-+     */
-     assert(!qatomic_read(&has_writer));
- 
-     /*
-@@ -158,11 +168,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
-     if (ctx) {
-         aio_context_acquire(bdrv_get_aio_context(bs));
-     }
-+#endif
- }
- 
- void bdrv_graph_wrunlock(void)
- {
-     GLOBAL_STATE_CODE();
-+#if 0
-     QEMU_LOCK_GUARD(&aio_context_list_lock);
-     assert(qatomic_read(&has_writer));
- 
-@@ -174,10 +186,13 @@ void bdrv_graph_wrunlock(void)
- 
-     /* Wake up all coroutine that are waiting to read the graph */
-     qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
-+#endif
- }
- 
- void coroutine_fn bdrv_graph_co_rdlock(void)
- {
-+    /* TODO Reenable when wrlock is reenabled */
-+#if 0
-     BdrvGraphRWlock *bdrv_graph;
-     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
- 
-@@ -237,10 +252,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
-             qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
-         }
-     }
-+#endif
- }
- 
- void coroutine_fn bdrv_graph_co_rdunlock(void)
- {
-+#if 0
-     BdrvGraphRWlock *bdrv_graph;
-     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
- 
-@@ -258,6 +275,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
-     if (qatomic_read(&has_writer)) {
-         aio_wait_kick();
-     }
-+#endif
- }
- 
- void bdrv_graph_rdlock_main_loop(void)
-@@ -275,13 +293,19 @@ void bdrv_graph_rdunlock_main_loop(void)
- void assert_bdrv_graph_readable(void)
- {
-     /* reader_count() is slow due to aio_context_list_lock lock contention */
-+    /* TODO Reenable when wrlock is reenabled */
-+#if 0
- #ifdef CONFIG_DEBUG_GRAPH_LOCK
-     assert(qemu_in_main_thread() || reader_count());
- #endif
-+#endif
- }
- 
- void assert_bdrv_graph_writable(void)
- {
-     assert(qemu_in_main_thread());
-+    /* TODO Reenable when wrlock is reenabled */
-+#if 0
-     assert(qatomic_read(&has_writer));
-+#endif
- }
--- a/debian/patches/extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
+++ b/debian/patches/extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
@ -0,0 +1,35 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 29 Apr 2024 15:41:11 +0200
+Subject: [PATCH] block/copy-before-write: use uint64_t for timeout in
+ nanoseconds
+
+rather than the uint32_t for which the maximum is slightly more than 4
+seconds and larger values would overflow. The QAPI interface allows
+specifying the number of seconds, so only values 0 to 4 are safe right
+now, other values lead to a much lower timeout than a user expects.
+
+The block_copy() call where this is used already takes a uint64_t for
+the timeout, so no change required there.
+
+Fixes: 6db7fd1ca9 ("block/copy-before-write: implement cbw-timeout option")
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+---
+ block/copy-before-write.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 8aba27a71d..026fa9840f 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -43,7 +43,7 @@ typedef struct BDRVCopyBeforeWriteState {
+     BlockCopyState *bcs;
+     BdrvChild *target;
+     OnCbwError on_cbw_error;
+-    uint32_t cbw_timeout_ns;
+    uint64_t cbw_timeout_ns;
+ 
+     /*
+      * @lock: protects access to @access_bitmap, @done_bitmap and
--- a/debian/patches/extra/0006-block-copy-before-write-fix-permission.patch
+++ b/debian/patches/extra/0006-block-copy-before-write-fix-permission.patch
@ -0,0 +1,55 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:22 +0200
+Subject: [PATCH] block/copy-before-write: fix permission
+
+In case when source node does not have any parents, the condition still
+works as required: backup job do create the parent by
+
+  block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
+
+Still, in this case checking @perm variable doesn't work, as backup job
+creates the root blk with empty permissions (as it rely on CBW filter
+to require correct permissions and don't want to create extra
+conflicts).
+
+So, we should not check @perm.
+
+The hack may be dropped entirely when transactional insertion of
+filter (when we don't try to recalculate permissions in intermediate
+state, when filter does conflict with original parent of the source
+node) merged (old big series
+"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
+current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
+
+[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
+[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 026fa9840f..5a9456d426 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                            perm, shared, nperm, nshared);
+ 
+         if (!QLIST_EMPTY(&bs->parents)) {
+-            if (perm & BLK_PERM_WRITE) {
+-                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+-            }
+            /*
+             * Note, that source child may be shared with backup job. Backup job
+             * does create own blk parent on copy-before-write node, so this
+             * works even if source node does not have any parents before backup
+             * start
+             */
+            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+         }
+     }
--- a/debian/patches/extra/0006-migration-states-workaround-snapshot-performance-reg.patch
+++ b/debian/patches/extra/0006-migration-states-workaround-snapshot-performance-reg.patch
@ -1,57 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 28 Sep 2023 11:19:14 +0200
-Subject: [PATCH] migration states: workaround snapshot performance regression
-
-Commit 813cd616 ("migration: Use migration_transferred_bytes() to
-calculate rate_limit") introduced a prohibitive performance regression
-when taking a snapshot [0]. The reason turns out to be the flushing
-done by migration_transferred_bytes()
-
-Just use a _noflush version of the relevant function as a workaround
-until upstream fixes the issue. This is inspired by a not-applied
-upstream series [1], but doing the very minimum to avoid the
-regression.
-
-[0]: https://gitlab.com/qemu-project/qemu/-/issues/1821
-[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-05/msg07708.html
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- migration/migration-stats.c | 16 +++++++++++++++-
- 1 file changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/migration/migration-stats.c b/migration/migration-stats.c
-index 095d6d75bb..8073c8ebaa 100644
--- a/migration/migration-stats.c
-+++ b/migration/migration-stats.c
-@@ -18,6 +18,20 @@
- 
- MigrationAtomicStats mig_stats;
- 
-+/*
-+ * Same as migration_transferred_bytes below, but using the _noflush
-+ * variant of qemu_file_transferred() to avoid a performance
-+ * regression in migration_rate_exceeded().
-+ */
-+static uint64_t migration_transferred_bytes_noflush(QEMUFile *f)
-+{
-+    uint64_t multifd = stat64_get(&mig_stats.multifd_bytes);
-+    uint64_t qemu_file = qemu_file_transferred_noflush(f);
-+
-+    trace_migration_transferred_bytes(qemu_file, multifd);
-+    return qemu_file + multifd;
-+}
-+
- bool migration_rate_exceeded(QEMUFile *f)
- {
-     if (qemu_file_get_error(f)) {
-@@ -25,7 +39,7 @@ bool migration_rate_exceeded(QEMUFile *f)
-     }
- 
-     uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
-    uint64_t rate_limit_current = migration_transferred_bytes(f);
-+    uint64_t rate_limit_current = migration_transferred_bytes_noflush(f);
-     uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
-     uint64_t rate_limit_max = stat64_get(&mig_stats.rate_limit_max);
- 
--- a/debian/patches/extra/0007-block-copy-before-write-support-unligned-snapshot-di.patch
+++ b/debian/patches/extra/0007-block-copy-before-write-support-unligned-snapshot-di.patch
@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:23 +0200
+Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
+
+First thing that crashes on unligned access here is
+bdrv_reset_dirty_bitmap(). Correct way is to align-down the
+snapshot-discard request.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 5a9456d426..c0e70669a2 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
+ cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+ {
+     BDRVCopyBeforeWriteState *s = bs->opaque;
+    uint32_t cluster_size = block_copy_cluster_size(s->bcs);
+    int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
+    int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
+    int64_t aligned_bytes;
+
+    if (aligned_end <= aligned_offset) {
+        return 0;
+    }
+    aligned_bytes = aligned_end - aligned_offset;
+ 
+     WITH_QEMU_LOCK_GUARD(&s->lock) {
+-        bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
+        bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
+                                aligned_bytes);
+     }
+ 
+-    block_copy_reset(s->bcs, offset, bytes);
+    block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
+ 
+-    return bdrv_co_pdiscard(s->target, offset, bytes);
+    return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
+ }
+ 
+ static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
--- a/debian/patches/extra/0008-block-copy-before-write-create-block_copy-bitmap-in-.patch
+++ b/debian/patches/extra/0008-block-copy-before-write-create-block_copy-bitmap-in-.patch
@ -0,0 +1,373 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:24 +0200
+Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
+ node
+
+Currently block_copy creates copy_bitmap in source node. But that is in
+bad relation with .independent_close=true of copy-before-write filter:
+source node may be detached and removed before .bdrv_close() handler
+called, which should call block_copy_state_free(), which in turn should
+remove copy_bitmap.
+
+That's all not ideal: it would be better if internal bitmap of
+block-copy object is not attached to any node. But that is not possible
+now.
+
+The simplest solution is just create copy_bitmap in filter node, where
+anyway two other bitmaps are created.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c         |   3 +-
+ block/copy-before-write.c  |   2 +-
+ include/block/block-copy.h |   1 +
+ tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
+ 4 files changed, 60 insertions(+), 58 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 9ee3dd7ef5..8fca2c3698 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ }
+ 
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      Error **errp)
+ {
+@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+         return NULL;
+     }
+ 
+-    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
+    copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
+                                            errp);
+     if (!copy_bitmap) {
+         return NULL;
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index c0e70669a2..94db31512d 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+              bs->file->bs->supported_zero_flags);
+ 
+-    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
+    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 0700953ab8..8b41643bfa 100644
+--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
+@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
+ typedef struct BlockCopyCallState BlockCopyCallState;
+ 
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      Error **errp);
+ 
+diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
+index aa76131ca9..c33dd7f3a9 100644
+--- a/tests/qemu-iotests/257.out
+++ b/tests/qemu-iotests/257.out
+@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
--- a/debian/patches/extra/0008-target-i386-the-sgx_epc_get_section-stub-is-reachabl.patch
+++ b/debian/patches/extra/0008-target-i386-the-sgx_epc_get_section-stub-is-reachabl.patch
@ -1,34 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Paolo Bonzini <pbonzini@redhat.com>
-Date: Tue, 1 Feb 2022 20:09:41 +0100
-Subject: [PATCH] target/i386: the sgx_epc_get_section stub is reachable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The sgx_epc_get_section stub is reachable from cpu_x86_cpuid.  It
-should not assert, instead it should just return true just like
-the "real" sgx_epc_get_section does when SGX is disabled.
-
-Reported-by: Vladimír Beneš <vbenes@redhat.com>
-Cc: qemu-stable@nongnu.org
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Message-ID: <20220201190941.106001-1-pbonzini@redhat.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-(cherry-picked from commit 219615740425d9683588207b40a365e6741691a6)
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- hw/i386/sgx-stub.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
-index 26833eb233..16b1dfd90b 100644
--- a/hw/i386/sgx-stub.c
-+++ b/hw/i386/sgx-stub.c
-@@ -34,5 +34,5 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
- 
- bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
- {
-    g_assert_not_reached();
-+    return true;
- }
--- a/debian/patches/extra/0009-qapi-blockdev-backup-add-discard-source-parameter.patch
+++ b/debian/patches/extra/0009-qapi-blockdev-backup-add-discard-source-parameter.patch
@ -0,0 +1,277 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:25 +0200
+Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
+
+Add a parameter that enables discard-after-copy. That is mostly useful
+in "push backup with fleecing" scheme, when source is snapshot-access
+format driver node, based on copy-before-write filter snapshot-access
+API:
+
+[guest]      [snapshot-access] ~~ blockdev-backup ~~> [backup target]
+   |            |
+   | root       | file
+   v            v
+[copy-before-write]
+   |             |
+   | file        | target
+   v             v
+[active disk]   [temp.img]
+
+In this case discard-after-copy does two things:
+
+ - discard data in temp.img to save disk space
+ - avoid further copy-before-write operation in discarded area
+
+Note that we have to declare WRITE permission on source in
+copy-before-write filter, for discard to work. Still we can't take it
+unconditionally, as it will break normal backup from RO source. So, we
+have to add a parameter and pass it thorough bdrv_open flags.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c                         |  5 +++--
+ block/block-copy.c                     |  9 +++++++++
+ block/copy-before-write.c              | 15 +++++++++++++--
+ block/copy-before-write.h              |  1 +
+ block/replication.c                    |  4 ++--
+ blockdev.c                             |  2 +-
+ include/block/block-common.h           |  2 ++
+ include/block/block-copy.h             |  1 +
+ include/block/block_int-global-state.h |  2 +-
+ qapi/block-core.json                   |  4 ++++
+ 10 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index ec29d6b810..3dd2e229d2 100644
+--- a/block/backup.c
+++ b/block/backup.c
+@@ -356,7 +356,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                   BlockDriverState *target, int64_t speed,
+                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+                   BitmapSyncMode bitmap_mode,
+-                  bool compress,
+                  bool compress, bool discard_source,
+                   const char *filter_node_name,
+                   BackupPerf *perf,
+                   BlockdevOnError on_source_error,
+@@ -457,7 +457,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+         goto error;
+     }
+ 
+-    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
+    cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+                          &bcs, errp);
+     if (!cbw) {
+         goto error;
+     }
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 8fca2c3698..7e3b378528 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
+     CoMutex lock;
+     int64_t in_flight_bytes;
+     BlockCopyMethod method;
+    bool discard_source;
+     BlockReqList reqs;
+     QLIST_HEAD(, BlockCopyCallState) calls;
+     /*
+@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                     bool discard_source,
+                                      Error **errp)
+ {
+     ERRP_GUARD();
+@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     cluster_size),
+     };
+ 
+    s->discard_source = discard_source;
+     block_copy_set_copy_opts(s, false, false);
+ 
+     ratelimit_init(&s->rate_limit);
+@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+     co_put_to_shres(s->mem, t->req.bytes);
+     block_copy_task_end(t, ret);
+ 
+    if (s->discard_source && ret == 0) {
+        int64_t nbytes =
+            MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+        bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+    }
+
+     return ret;
+ }
+ 
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 94db31512d..853e01a1eb 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
+     BdrvChild *target;
+     OnCbwError on_cbw_error;
+     uint64_t cbw_timeout_ns;
+    bool discard_source;
+ 
+     /*
+      * @lock: protects access to @access_bitmap, @done_bitmap and
+@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                uint64_t perm, uint64_t shared,
+                uint64_t *nperm, uint64_t *nshared)
+ {
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+
+     if (!(role & BDRV_CHILD_FILTERED)) {
+         /*
+          * Target child
+@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+              * start
+              */
+             *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+            if (s->discard_source) {
+                *nperm = *nperm | BLK_PERM_WRITE;
+            }
+
+             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+         }
+     }
+@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+              bs->file->bs->supported_zero_flags);
+ 
+-    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+    s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                  bool discard_source,
+                                   BlockCopyState **bcs,
+                                   Error **errp)
+ {
+     BDRVCopyBeforeWriteState *state;
+     BlockDriverState *top;
+     QDict *opts;
+    int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
+ 
+     assert(source->total_sectors == target->total_sectors);
+     GLOBAL_STATE_CODE();
+@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+     qdict_put_str(opts, "file", bdrv_get_node_name(source));
+     qdict_put_str(opts, "target", bdrv_get_node_name(target));
+ 
+-    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
+    top = bdrv_insert_node(source, opts, flags, errp);
+     if (!top) {
+         return NULL;
+     }
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 6e72bb25e9..01af0cd3c4 100644
+--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
+@@ -39,6 +39,7 @@
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                  bool discard_source,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/block/replication.c b/block/replication.c
+index ca6bd0a720..0415a5e8b7 100644
+--- a/block/replication.c
+++ b/block/replication.c
+@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
+ 
+         s->backup_job = backup_job_create(
+                                 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+-                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
+-                                &perf,
+                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
+                                NULL, &perf,
+                                 BLOCKDEV_ON_ERROR_REPORT,
+                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
+                                 backup_job_completed, bs, NULL, &local_err);
+diff --git a/blockdev.c b/blockdev.c
+index 057601dcf0..4c33c3f5f0 100644
+--- a/blockdev.c
+++ b/blockdev.c
+@@ -2726,7 +2726,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+ 
+     job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                             backup->sync, bmap, backup->bitmap_mode,
+-                            backup->compress,
+                            backup->compress, backup->discard_source,
+                             backup->filter_node_name,
+                             &perf,
+                             backup->on_source_error,
+diff --git a/include/block/block-common.h b/include/block/block-common.h
+index a846023a09..338fe5ff7a 100644
+--- a/include/block/block-common.h
+++ b/include/block/block-common.h
+@@ -243,6 +243,8 @@ typedef enum {
+                                       read-write fails */
+ #define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
+ 
+#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
+
+ #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+ 
+ 
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 8b41643bfa..bdc703bacd 100644
+--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
+@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                     bool discard_source,
+                                      Error **errp);
+ 
+ /* Function should be called prior any actual copy request */
+diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
+index d2201e27f4..eb2d92a226 100644
+--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
+@@ -193,7 +193,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                             MirrorSyncMode sync_mode,
+                             BdrvDirtyBitmap *sync_bitmap,
+                             BitmapSyncMode bitmap_mode,
+-                            bool compress,
+                            bool compress, bool discard_source,
+                             const char *filter_node_name,
+                             BackupPerf *perf,
+                             BlockdevOnError on_source_error,
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 4b18e01b85..b179d65520 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -1610,6 +1610,9 @@
+ #     node specified by @drive.  If this option is not given, a node
+ #     name is autogenerated.  (Since: 4.2)
+ #
+# @discard-source: Discard blocks on source which are already copied
+#     to the target.  (Since 9.0)
+#
+ # @x-perf: Performance options.  (Since 6.0)
+ #
+ # Features:
+@@ -1631,6 +1634,7 @@
+             '*on-target-error': 'BlockdevOnError',
+             '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
+             '*filter-node-name': 'str',
+            '*discard-source': 'bool',
+             '*x-perf': { 'type': 'BackupPerf',
+                          'features': [ 'unstable' ] } } }
+ 
--- a/debian/patches/extra/0009-ui-clipboard-mark-type-as-not-available-when-there-i.patch
+++ b/debian/patches/extra/0009-ui-clipboard-mark-type-as-not-available-when-there-i.patch
@ -1,86 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Wed, 24 Jan 2024 11:57:48 +0100
-Subject: [PATCH] ui/clipboard: mark type as not available when there is no
- data
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT
-message with len=0. In qemu_clipboard_set_data(), the clipboard info
-will be updated setting data to NULL (because g_memdup(data, size)
-returns NULL when size is 0). If the client does not set the
-VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then
-the 'request' callback for the clipboard peer is not initialized.
-Later, because data is NULL, qemu_clipboard_request() can be reached
-via vdagent_chr_write() and vdagent_clipboard_recv_request() and
-there, the clipboard owner's 'request' callback will be attempted to
-be called, but that is a NULL pointer.
-
-In particular, this can happen when using the KRDC (22.12.3) VNC
-client.
-
-Another scenario leading to the same issue is with two clients (say
-noVNC and KRDC):
-
-The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and
-initializes its cbpeer.
-
-The KRDC client does not, but triggers a vnc_client_cut_text() (note
-it's not the _ext variant)). There, a new clipboard info with it as
-the 'owner' is created and via qemu_clipboard_set_data() is called,
-which in turn calls qemu_clipboard_update() with that info.
-
-In qemu_clipboard_update(), the notifier for the noVNC client will be
-called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the
-noVNC client. The 'owner' in that clipboard info is the clipboard peer
-for the KRDC client, which did not initialize the 'request' function.
-That sounds correct to me, it is the owner of that clipboard info.
-
-Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set
-the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it
-passes), that clipboard info is passed to qemu_clipboard_request() and
-the original segfault still happens.
-
-Fix the issue by handling updates with size 0 differently. In
-particular, mark in the clipboard info that the type is not available.
-
-While at it, switch to g_memdup2(), because g_memdup() is deprecated.
-
-Cc: qemu-stable@nongnu.org
-Fixes: CVE-2023-6683
-Reported-by: Markus Frank <m.frank@proxmox.com>
-Suggested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
-Tested-by: Markus Frank <m.frank@proxmox.com>
-(picked from https://lists.nongnu.org/archive/html/qemu-stable/2024-01/msg00228.html)
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- ui/clipboard.c | 12 +++++++++---
- 1 file changed, 9 insertions(+), 3 deletions(-)
-
-diff --git a/ui/clipboard.c b/ui/clipboard.c
-index 3d14bffaf8..b3f6fa3c9e 100644
--- a/ui/clipboard.c
-+++ b/ui/clipboard.c
-@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer,
-     }
- 
-     g_free(info->types[type].data);
-    info->types[type].data = g_memdup(data, size);
-    info->types[type].size = size;
-    info->types[type].available = true;
-+    if (size) {
-+        info->types[type].data = g_memdup2(data, size);
-+        info->types[type].size = size;
-+        info->types[type].available = true;
-+    } else {
-+        info->types[type].data = NULL;
-+        info->types[type].size = 0;
-+        info->types[type].available = false;
-+    }
- 
-     if (update) {
-         qemu_clipboard_update(info);
--- a/debian/patches/extra/0010-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
+++ b/debian/patches/extra/0010-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
@ -0,0 +1,92 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Tue, 18 Jun 2024 14:19:58 +0200
+Subject: [PATCH] hw/virtio: Fix the de-initialization of vhost-user devices
+
+The unrealize functions of the various vhost-user devices are
+calling the corresponding vhost_*_set_status() functions with a
+status of 0 to shut down the device correctly.
+
+Now these vhost_*_set_status() functions all follow this scheme:
+
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        /* ... do the initialization stuff ... */
+    } else {
+        /* ... do the cleanup stuff ... */
+    }
+
+The problem here is virtio_device_should_start(vdev, 0) currently
+always returns "true" since it internally only looks at vdev->started
+instead of looking at the "status" parameter. Thus once the device
+got started once, virtio_device_should_start() always returns true
+and thus the vhost_*_set_status() functions return early, without
+ever doing any clean-up when being called with status == 0. This
+causes e.g. problems when trying to hot-plug and hot-unplug a vhost
+user devices multiple times since the de-initialization step is
+completely skipped during the unplug operation.
+
+This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move
+vm_running check to virtio_device_started") which replaced
+
+ should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+with
+
+ should_start = virtio_device_started(vdev, status);
+
+which later got replaced by virtio_device_should_start(). This blocked
+the possibility to set should_start to false in case the status flag
+VIRTIO_CONFIG_S_DRIVER_OK was not set.
+
+Fix it by adjusting the virtio_device_should_start() function to
+only consider the status flag instead of vdev->started. Since this
+function is only used in the various vhost_*_set_status() functions
+for exactly the same purpose, it should be fine to fix it in this
+central place there without any risk to change the behavior of other
+code.
+
+Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started")
+Buglink: https://issues.redhat.com/browse/RHEL-40708
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+Message-Id: <20240618121958.88673-1-thuth@redhat.com>
+Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ include/hw/virtio/virtio.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
+index 7d5ffdc145..2eafad17b8 100644
+--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
+@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
+  * @vdev - the VirtIO device
+  * @status - the devices status bits
+  *
+- * This is similar to virtio_device_started() but also encapsulates a
+- * check on the VM status which would prevent a device starting
+- * anyway.
+ * This is similar to virtio_device_started() but ignores vdev->started
+ * and also encapsulates a check on the VM status which would prevent a
+ * device from starting anyway.
+  */
+ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
+ {
+@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status
+         return false;
+     }
+ 
+-    return virtio_device_started(vdev, status);
+    return status & VIRTIO_CONFIG_S_DRIVER_OK;
+ }
+ 
+ static inline void virtio_set_started(VirtIODevice *vdev, bool started)
--- a/debian/patches/extra/0010-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch
+++ b/debian/patches/extra/0010-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch
@ -1,65 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Hanna Czenczek <hreitz@redhat.com>
-Date: Fri, 2 Feb 2024 16:31:56 +0100
-Subject: [PATCH] virtio-scsi: Attach event vq notifier with no_poll
-
-As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi:
-don't waste CPU polling the event virtqueue"), we only attach an io_read
-notifier for the virtio-scsi event virtqueue instead, and no polling
-notifiers.  During operation, the event virtqueue is typically
-non-empty, but none of the buffers are intended to be used immediately.
-Instead, they only get used when certain events occur.  Therefore, it
-makes no sense to continuously poll it when non-empty, because it is
-supposed to be and stay non-empty.
-
-We do this by using virtio_queue_aio_attach_host_notifier_no_poll()
-instead of virtio_queue_aio_attach_host_notifier() for the event
-virtqueue.
-
-Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement
-BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use
-virtio_queue_aio_attach_host_notifier() for all virtqueues, including
-the event virtqueue.  This can lead to it being polled again, undoing
-the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552.
-
-Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the
-event virtqueue.
-
-       ("virtio-scsi: implement BlockDevOps->drained_begin()")
-
-Reported-by: Fiona Ebner <f.ebner@proxmox.com>
-Fixes: 766aa2de0f29b657148e04599320d771c36fd126
-Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
-Tested-by: Fiona Ebner <f.ebner@proxmox.com>
-Reviewed-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- hw/scsi/virtio-scsi.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
-index 45b95ea070..ad24a882fd 100644
--- a/hw/scsi/virtio-scsi.c
-+++ b/hw/scsi/virtio-scsi.c
-@@ -1148,6 +1148,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
- static void virtio_scsi_drained_end(SCSIBus *bus)
- {
-     VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
-+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
-     VirtIODevice *vdev = VIRTIO_DEVICE(s);
-     uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
-                             s->parent_obj.conf.num_queues;
-@@ -1165,7 +1166,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
- 
-     for (uint32_t i = 0; i < total_queues; i++) {
-         VirtQueue *vq = virtio_get_queue(vdev, i);
-        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
-+        if (vq == vs->event_vq) {
-+            virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
-+        } else {
-+            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
-+        }
-     }
- }
- 
--- a/debian/patches/extra/0011-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
+++ b/debian/patches/extra/0011-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Daniyal Khan <danikhan632@gmail.com>
+Date: Wed, 17 Jul 2024 16:01:47 +1000
+Subject: [PATCH] target/arm: Use float_status copy in sme_fmopa_s
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We made a copy above because the fp exception flags
+are not propagated back to the FPST register, but
+then failed to use the copy.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)")
+Signed-off-by: Daniyal Khan <danikhan632@gmail.com>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-id: 20240717060149.204788-2-richard.henderson@linaro.org
+[rth: Split from a larger patch]
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+(cherry picked from commit 31d93fedf41c24b0badb38cd9317590d1ef74e37)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/sme_helper.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
+index e2e0575039..5a6dd76489 100644
+--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
+@@ -916,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
+                         if (pb & 1) {
+                             uint32_t *a = vza_row + H1_4(col);
+                             uint32_t *m = vzm + H1_4(col);
+-                            *a = float32_muladd(n, *m, *a, 0, vst);
+                            *a = float32_muladd(n, *m, *a, 0, &fpst);
+                         }
+                         col += 4;
+                         pb >>= 4;
--- a/debian/patches/extra/0011-virtio-Re-enable-notifications-after-drain.patch
+++ b/debian/patches/extra/0011-virtio-Re-enable-notifications-after-drain.patch
@ -1,125 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Hanna Czenczek <hreitz@redhat.com>
-Date: Fri, 2 Feb 2024 16:31:57 +0100
-Subject: [PATCH] virtio: Re-enable notifications after drain
-
-During drain, we do not care about virtqueue notifications, which is why
-we remove the handlers on it.  When removing those handlers, whether vq
-notifications are enabled or not depends on whether we were in polling
-mode or not; if not, they are enabled (by default); if so, they have
-been disabled by the io_poll_start callback.
-
-Because we do not care about those notifications after removing the
-handlers, this is fine.  However, we have to explicitly ensure they are
-enabled when re-attaching the handlers, so we will resume receiving
-notifications.  We do this in virtio_queue_aio_attach_host_notifier*().
-If such a function is called while we are in a polling section,
-attaching the notifiers will then invoke the io_poll_start callback,
-re-disabling notifications.
-
-Because we will always miss virtqueue updates in the drained section, we
-also need to poll the virtqueue once after attaching the notifiers.
-
-Buglink: https://issues.redhat.com/browse/RHEL-3934
-Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- hw/virtio/virtio.c  | 42 ++++++++++++++++++++++++++++++++++++++++++
- include/block/aio.h |  7 ++++++-
- 2 files changed, 48 insertions(+), 1 deletion(-)
-
-diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
-index 969c25f4cf..02cce83111 100644
--- a/hw/virtio/virtio.c
-+++ b/hw/virtio/virtio.c
-@@ -3526,6 +3526,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
- 
- void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
- {
-+    /*
-+     * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
-+     * Re-enable them.  (And if detach has not been used before, notifications
-+     * being enabled is still the default state while a notifier is attached;
-+     * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
-+     * notifications enabled once the polling section is left.)
-+     */
-+    if (!virtio_queue_get_notification(vq)) {
-+        virtio_queue_set_notification(vq, 1);
-+    }
-+
-     aio_set_event_notifier(ctx, &vq->host_notifier,
-                            virtio_queue_host_notifier_read,
-                            virtio_queue_host_notifier_aio_poll,
-@@ -3533,6 +3544,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
-     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
-                                 virtio_queue_host_notifier_aio_poll_begin,
-                                 virtio_queue_host_notifier_aio_poll_end);
-+
-+    /*
-+     * We will have ignored notifications about new requests from the guest
-+     * while no notifiers were attached, so "kick" the virt queue to process
-+     * those requests now.
-+     */
-+    event_notifier_set(&vq->host_notifier);
- }
- 
- /*
-@@ -3543,14 +3561,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
-  */
- void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
- {
-+    /* See virtio_queue_aio_attach_host_notifier() */
-+    if (!virtio_queue_get_notification(vq)) {
-+        virtio_queue_set_notification(vq, 1);
-+    }
-+
-     aio_set_event_notifier(ctx, &vq->host_notifier,
-                            virtio_queue_host_notifier_read,
-                            NULL, NULL);
-+
-+    /*
-+     * See virtio_queue_aio_attach_host_notifier().
-+     * Note that this may be unnecessary for the type of virtqueues this
-+     * function is used for.  Still, it will not hurt to have a quick look into
-+     * whether we can/should process any of the virtqueue elements.
-+     */
-+    event_notifier_set(&vq->host_notifier);
- }
- 
- void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
- {
-     aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
-+
-+    /*
-+     * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
-+     * will run after io_poll_begin(), so by removing the notifier, we do not
-+     * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
-+     * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
-+     * notifications are enabled or disabled.  It does not really matter anyway;
-+     * we just removed the notifier, so we do not care about notifications until
-+     * we potentially re-attach it.  The attach_host_notifier functions will
-+     * ensure that notifications are enabled again when they are needed.
-+     */
- }
- 
- void virtio_queue_host_notifier_read(EventNotifier *n)
-diff --git a/include/block/aio.h b/include/block/aio.h
-index 32042e8905..79efadfa48 100644
--- a/include/block/aio.h
-+++ b/include/block/aio.h
-@@ -498,9 +498,14 @@ void aio_set_event_notifier(AioContext *ctx,
-                             AioPollFn *io_poll,
-                             EventNotifierHandler *io_poll_ready);
- 
-/* Set polling begin/end callbacks for an event notifier that has already been
-+/*
-+ * Set polling begin/end callbacks for an event notifier that has already been
-  * registered with aio_set_event_notifier.  Do nothing if the event notifier is
-  * not registered.
-+ *
-+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
-+ * during polling, it will not be called, so an io_poll_begin() is not
-+ * necessarily always followed by an io_poll_end().
-  */
- void aio_set_event_notifier_poll(AioContext *ctx,
-                                  EventNotifier *notifier,
--- a/debian/patches/extra/0012-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
+++ b/debian/patches/extra/0012-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Wed, 17 Jul 2024 16:01:48 +1000
+Subject: [PATCH] target/arm: Use FPST_F16 for SME FMOPA (widening)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This operation has float16 inputs and thus must use
+the FZ16 control not the FZ control.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)")
+Reported-by: Daniyal Khan <danikhan632@gmail.com>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-id: 20240717060149.204788-3-richard.henderson@linaro.org
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2374
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+(cherry picked from commit 207d30b5fdb5b45a36f26eefcf52fe2c1714dd4f)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/translate-sme.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
+index 46c7fce8b4..185a8a917b 100644
+--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
+@@ -304,6 +304,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
+ }
+ 
+ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+                            ARMFPStatusFlavour e_fpst,
+                             gen_helper_gvec_5_ptr *fn)
+ {
+     int svl = streaming_vec_reg_size(s);
+@@ -319,15 +320,18 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+     zm = vec_full_reg_ptr(s, a->zm);
+     pn = pred_full_reg_ptr(s, a->pn);
+     pm = pred_full_reg_ptr(s, a->pm);
+-    fpst = fpstatus_ptr(FPST_FPCR);
+    fpst = fpstatus_ptr(e_fpst);
+ 
+     fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
+     return true;
+ }
+ 
+-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
+-TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
+-TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
+           MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
+           MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
+           MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
+ 
+ /* TODO: FEAT_EBF16 */
+ TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
--- a/debian/patches/extra/0013-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
+++ b/debian/patches/extra/0013-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
@ -0,0 +1,60 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Wed, 10 Jul 2024 17:25:29 +0200
+Subject: [PATCH] scsi: fix regression and honor bootindex again for legacy
+ drives
+
+Commit 3089637461 ("scsi: Don't ignore most usb-storage properties")
+removed the call to object_property_set_int() and thus the 'set'
+method for the bootindex property was also not called anymore. Here
+that method is device_set_bootindex() (as configured by
+scsi_dev_instance_init() -> device_add_bootindex_property()) which as
+a side effect registers the device via add_boot_device_path().
+
+As reported by a downstream user [0], the bootindex property did not
+have the desired effect anymore for legacy drives. Fix the regression
+by explicitly calling the add_boot_device_path() function after
+checking that the bootindex is not yet used (to avoid
+add_boot_device_path() calling exit()).
+
+[0]: https://forum.proxmox.com/threads/149772/post-679433
+
+Cc: qemu-stable@nongnu.org
+Fixes: 3089637461 ("scsi: Don't ignore most usb-storage properties")
+Suggested-by: Kevin Wolf <kwolf@redhat.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Link: https://lore.kernel.org/r/20240710152529.1737407-1-f.ebner@proxmox.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 57a8a80d1a5b28797b21d30bfc60601945820e51)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/scsi-bus.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
+index 9e40b0c920..53eff5dd3d 100644
+--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
+@@ -384,6 +384,7 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+     DeviceState *dev;
+     SCSIDevice *s;
+     DriveInfo *dinfo;
+    Error *local_err = NULL;
+ 
+     if (blk_is_sg(blk)) {
+         driver = "scsi-generic";
+@@ -403,6 +404,14 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+     s = SCSI_DEVICE(dev);
+     s->conf = *conf;
+ 
+    check_boot_index(conf->bootindex, &local_err);
+    if (local_err) {
+        object_unparent(OBJECT(dev));
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+    add_boot_device_path(conf->bootindex, dev, NULL);
+
+     qdev_prop_set_uint32(dev, "scsi-id", unit);
+     if (object_property_find(OBJECT(dev), "removable")) {
+         qdev_prop_set_bit(dev, "removable", removable);
--- a/debian/patches/extra/0014-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
+++ b/debian/patches/extra/0014-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 15 Jul 2024 15:14:03 +0200
+Subject: [PATCH] hw/scsi/lsi53c895a: bump instruction limit in scripts
+ processing to fix regression
+
+Commit 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts
+processing") reduced the maximum allowed instruction count by
+a factor of 100 all the way down to 100.
+
+This causes the "Check Point R81.20 Gaia" appliance [0] to fail to
+boot after fully finishing the installation via the appliance's web
+interface (there is already one reboot before that).
+
+With a limit of 150, the appliance still fails to boot, while with a
+limit of 200, it works. Bump to 500 to fix the regression and be on
+the safe side.
+
+Originally reported in the Proxmox community forum[1].
+
+[0]: https://support.checkpoint.com/results/download/124397
+[1]: https://forum.proxmox.com/threads/149772/post-683459
+
+Cc: qemu-stable@nongnu.org
+Fixes: 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts processing")
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Acked-by: Sven Schnelle <svens@stackframe.org>
+Link: https://lore.kernel.org/r/20240715131403.223239-1-f.ebner@proxmox.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit a4975023fb13cf229bd59c9ceec1b8cbdc5b9a20)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/lsi53c895a.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
+index eb9828dd5e..f1935e5328 100644
+--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
+@@ -188,7 +188,7 @@ static const char *names[] = {
+ #define LSI_TAG_VALID     (1 << 16)
+ 
+ /* Maximum instructions to process. */
+-#define LSI_MAX_INSN    100
+#define LSI_MAX_INSN    500
+ 
+ typedef struct lsi_request {
+     SCSIRequest *req;
--- a/debian/patches/extra/0015-block-copy-Fix-missing-graph-lock.patch
+++ b/debian/patches/extra/0015-block-copy-Fix-missing-graph-lock.patch
@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Thu, 27 Jun 2024 20:12:44 +0200
+Subject: [PATCH] block-copy: Fix missing graph lock
+
+The graph lock needs to be held when calling bdrv_co_pdiscard(). Fix
+block_copy_task_entry() to take it for the call.
+
+WITH_GRAPH_RDLOCK_GUARD() was implemented in a weak way because of
+limitations in clang's Thread Safety Analysis at the time, so that it
+only asserts that the lock is held (which allows calling functions that
+require the lock), but we never deal with the unlocking (so even after
+the scope of the guard, the compiler assumes that the lock is still
+held). This is why the compiler didn't catch this locking error.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+(picked from https://lore.kernel.org/qemu-devel/20240627181245.281403-2-kwolf@redhat.com/)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/block-copy.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 7e3b378528..cc618e4561 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -595,7 +595,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+     if (s->discard_source && ret == 0) {
+         int64_t nbytes =
+             MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+-        bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+        }
+     }
+ 
+     return ret;
--- a/debian/patches/extra/0016-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
+++ b/debian/patches/extra/0016-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
@ -0,0 +1,93 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli@nutanix.com>
+Date: Fri, 12 Jul 2024 09:26:59 +0000
+Subject: [PATCH] Revert "qemu-char: do not operate on sources from finalize
+ callbacks"
+
+This reverts commit 2b316774f60291f57ca9ecb6a9f0712c532cae34.
+
+After 038b4217884c ("Revert "chardev: use a child source for qio input
+source"") we've been observing the "iwp->src == NULL" assertion
+triggering periodically during the initial capabilities querying by
+libvirtd. One of possible backtraces:
+
+Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)):
+0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
+1  0x00007f16c6c21e65 in __GI_abort () at abort.c:79
+2  0x00007f16c6c21d39 in __assert_fail_base  at assert.c:92
+3  0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at assert.c:101
+4  0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:99
+5  io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88
+6  0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0
+7  0x00007f16c904baf9 in g_source_destroy_internal () from /lib64/libglib-2.0.so.0
+8  0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at ../chardev/char-io.c:147
+9  remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153
+10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at ../chardev/char-socket.c:592
+11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at ../chardev/char-fe.c:279
+12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304
+13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) at ../monitor/qmp.c:509
+14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at ../util/async.c:216
+15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, blocking=blocking@entry=true) at ../util/aio-posix.c:722
+16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at ../iothread.c:63
+17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at ../util/qemu-thread-posix.c:543
+18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at pthread_create.c:479
+19 0x00007f16c6c398d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
+
+io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls
+g_source_destroy() which finds that iwp->src is not NULL in the finalize
+callback. This can only happen if another thread has managed to trigger
+io_watch_poll_prepare() callback in the meantime.
+
+Move iwp->src destruction back to the finalize callback to prevent the
+described race, and also remove the stale comment. The deadlock glib bug
+was fixed back in 2010 by b35820285668 ("gmain: move finalization of
+GSource outside of context lock").
+
+Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com>
+Link: https://lore.kernel.org/r/20240712092659.216206-1-sergey.dyasli@nutanix.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit e0bf95443ee9326d44031373420cf9f3513ee255)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ chardev/char-io.c | 19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+diff --git a/chardev/char-io.c b/chardev/char-io.c
+index dab77b112e..3be17b51ca 100644
+--- a/chardev/char-io.c
+++ b/chardev/char-io.c
+@@ -87,16 +87,12 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
+ 
+ static void io_watch_poll_finalize(GSource *source)
+ {
+-    /*
+-     * Due to a glib bug, removing the last reference to a source
+-     * inside a finalize callback causes recursive locking (and a
+-     * deadlock).  This is not a problem inside other callbacks,
+-     * including dispatch callbacks, so we call io_remove_watch_poll
+-     * to remove this source.  At this point, iwp->src must
+-     * be NULL, or we would leak it.
+-     */
+     IOWatchPoll *iwp = io_watch_poll_from_source(source);
+-    assert(iwp->src == NULL);
+    if (iwp->src) {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+ }
+ 
+ static GSourceFuncs io_watch_poll_funcs = {
+@@ -139,11 +135,6 @@ static void io_remove_watch_poll(GSource *source)
+     IOWatchPoll *iwp;
+ 
+     iwp = io_watch_poll_from_source(source);
+-    if (iwp->src) {
+-        g_source_destroy(iwp->src);
+-        g_source_unref(iwp->src);
+-        iwp->src = NULL;
+-    }
+     g_source_destroy(&iwp->parent);
+ }
+ 
--- a/debian/patches/extra/0017-virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch
+++ b/debian/patches/extra/0017-virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch
@ -0,0 +1,77 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Tue, 6 Aug 2024 17:37:12 +0800
+Subject: [PATCH] virtio-pci: Fix the use of an uninitialized irqfd
+
+The crash was reported in MAC OS and NixOS, here is the link for this bug
+https://gitlab.com/qemu-project/qemu/-/issues/2334
+https://gitlab.com/qemu-project/qemu/-/issues/2321
+
+In this bug, they are using the virtio_input device. The guest notifier was
+not supported for this device, The function virtio_pci_set_guest_notifiers()
+was not called, and the vector_irqfd was not initialized.
+
+So the fix is adding the check for vector_irqfd in virtio_pci_get_notifier()
+
+The function virtio_pci_get_notifier() can be used in various devices.
+It could also be called when VIRTIO_CONFIG_S_DRIVER_OK is not set. In this situation,
+the vector_irqfd being NULL is acceptable. We can allow the device continue to boot
+
+If the vector_irqfd still hasn't been initialized after VIRTIO_CONFIG_S_DRIVER_OK
+is set, it means that the function set_guest_notifiers was not called before the
+driver started. This indicates that the device is not using the notifier.
+At this point, we will let the check fail.
+
+This fix is verified in vyatta,MacOS,NixOS,fedora system.
+
+The bt tree for this bug is:
+Thread 6 "CPU 0/KVM" received signal SIGSEGV, Segmentation fault.
+[Switching to Thread 0x7c817be006c0 (LWP 1269146)]
+kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
+817         if (irqfd->users == 0) {
+(gdb) thread apply all bt
+...
+Thread 6 (Thread 0x7c817be006c0 (LWP 1269146) "CPU 0/KVM"):
+0  kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
+1  kvm_virtio_pci_vector_use_one () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:893
+2  0x00005983657045e2 in memory_region_write_accessor () at ../qemu-9.0.0/system/memory.c:497
+3  0x0000598365704ba6 in access_with_adjusted_size () at ../qemu-9.0.0/system/memory.c:573
+4  0x0000598365705059 in memory_region_dispatch_write () at ../qemu-9.0.0/system/memory.c:1528
+5  0x00005983659b8e1f in flatview_write_continue_step.isra.0 () at ../qemu-9.0.0/system/physmem.c:2713
+6  0x000059836570ba7d in flatview_write_continue () at ../qemu-9.0.0/system/physmem.c:2743
+7  flatview_write () at ../qemu-9.0.0/system/physmem.c:2774
+8  0x000059836570bb76 in address_space_write () at ../qemu-9.0.0/system/physmem.c:2894
+9  0x0000598365763afe in address_space_rw () at ../qemu-9.0.0/system/physmem.c:2904
+10 kvm_cpu_exec () at ../qemu-9.0.0/accel/kvm/kvm-all.c:2917
+11 0x000059836576656e in kvm_vcpu_thread_fn () at ../qemu-9.0.0/accel/kvm/kvm-accel-ops.c:50
+12 0x0000598365926ca8 in qemu_thread_start () at ../qemu-9.0.0/util/qemu-thread-posix.c:541
+13 0x00007c8185bcd1cf in ??? () at /usr/lib/libc.so.6
+14 0x00007c8185c4e504 in clone () at /usr/lib/libc.so.6
+
+Fixes: 2ce6cff94d ("virtio-pci: fix use of a released vector")
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20240806093715.65105-1-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit a8e63ff289d137197ad7a701a587cc432872d798)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/virtio/virtio-pci.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
+index e04218a9fb..389bab003f 100644
+--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
+@@ -860,6 +860,9 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtQueue *vq;
+ 
+    if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
+        return -1;
+
+     if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
+         *n = virtio_config_get_guest_notifier(vdev);
+         *vector = vdev->config_vector;
--- a/debian/patches/extra/0018-virtio-net-Ensure-queue-index-fits-with-RSS.patch
+++ b/debian/patches/extra/0018-virtio-net-Ensure-queue-index-fits-with-RSS.patch
@ -0,0 +1,35 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Akihiko Odaki <akihiko.odaki@daynix.com>
+Date: Mon, 1 Jul 2024 20:58:04 +0900
+Subject: [PATCH] virtio-net: Ensure queue index fits with RSS
+
+Ensure the queue index points to a valid queue when software RSS
+enabled. The new calculation matches with the behavior of Linux's TAP
+device with the RSS eBPF program.
+
+Fixes: 4474e37a5b3a ("virtio-net: implement RX RSS processing")
+Reported-by: Zhibin Hu <huzhibin5@huawei.com>
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+(cherry picked from commit f1595ceb9aad36a6c1da95bcb77ab9509b38822d)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/net/virtio-net.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index 3644bfd91b..f48588638d 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -1949,7 +1949,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
+     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
+         int index = virtio_net_process_rss(nc, buf, size);
+         if (index >= 0) {
+-            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
+            NetClientState *nc2 =
+                qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
+             return virtio_net_receive_rcu(nc2, buf, size, true);
+         }
+     }
--- a/debian/patches/extra/0019-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch
+++ b/debian/patches/extra/0019-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch
@ -0,0 +1,338 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: thomas <east.moutain.yang@gmail.com>
+Date: Fri, 12 Jul 2024 11:10:53 +0800
+Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for
+ kick
+
+Patch 06b12970174 ("virtio-net: fix network stall under load")
+added double-check to test whether the available buffer size
+can satisfy the request or not, in case the guest has added
+some buffers to the avail ring simultaneously after the first
+check. It will be lucky if the available buffer size becomes
+okay after the double-check, then the host can send the packet
+to the guest. If the buffer size still can't satisfy the request,
+even if the guest has added some buffers, viritio-net would
+stall at the host side forever.
+
+The patch enables notification and checks whether the guest has
+added some buffers since last check of available buffers when
+the available buffers are insufficient. If no buffer is added,
+return false, else recheck the available buffers in the loop.
+If the available buffers are sufficient, disable notification
+and return true.
+
+Changes:
+1. Change the return type of virtqueue_get_avail_bytes() from void
+   to int, it returns an opaque that represents the shadow_avail_idx
+   of the virtqueue on success, else -1 on error.
+2. Add a new API: virtio_queue_enable_notification_and_check(),
+   it takes an opaque as input arg which is returned from
+   virtqueue_get_avail_bytes(). It enables notification firstly,
+   then checks whether the guest has added some buffers since
+   last check of available buffers or not by virtio_queue_poll(),
+   return ture if yes.
+
+The patch also reverts patch "06b12970174".
+
+The case below can reproduce the stall.
+
+                                       Guest 0
+                                     +--------+
+                                     | iperf  |
+                    ---------------> | server |
+         Host       |                +--------+
+       +--------+   |                    ...
+       | iperf  |----
+       | client |----                  Guest n
+       +--------+   |                +--------+
+                    |                | iperf  |
+                    ---------------> | server |
+                                     +--------+
+
+Boot many guests from qemu with virtio network:
+ qemu ... -netdev tap,id=net_x \
+    -device virtio-net-pci-non-transitional,\
+    iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x
+
+Each guest acts as iperf server with commands below:
+ iperf3 -s -D -i 10 -p 8001
+ iperf3 -s -D -i 10 -p 8002
+
+The host as iperf client:
+ iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000
+ iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000
+
+After some time, the host loses connection to the guest,
+the guest can send packet to the host, but can't receive
+packet from the host.
+
+It's more likely to happen if SWIOTLB is enabled in the guest,
+allocating and freeing bounce buffer takes some CPU ticks,
+copying from/to bounce buffer takes more CPU ticks, compared
+with that there is no bounce buffer in the guest.
+Once the rate of producing packets from the host approximates
+the rate of receiveing packets in the guest, the guest would
+loop in NAPI.
+
+         receive packets    ---
+               |             |
+               v             |
+           free buf      virtnet_poll
+               |             |
+               v             |
+     add buf to avail ring  ---
+               |
+               |  need kick the host?
+               |  NAPI continues
+               v
+         receive packets    ---
+               |             |
+               v             |
+           free buf      virtnet_poll
+               |             |
+               v             |
+     add buf to avail ring  ---
+               |
+               v
+              ...           ...
+
+On the other hand, the host fetches free buf from avail
+ring, if the buf in the avail ring is not enough, the
+host notifies the guest the event by writing the avail
+idx read from avail ring to the event idx of used ring,
+then the host goes to sleep, waiting for the kick signal
+from the guest.
+
+Once the guest finds the host is waiting for kick singal
+(in virtqueue_kick_prepare_split()), it kicks the host.
+
+The host may stall forever at the sequences below:
+
+         Host                        Guest
+     ------------                 -----------
+ fetch buf, send packet           receive packet ---
+         ...                          ...         |
+ fetch buf, send packet             add buf       |
+         ...                        add buf   virtnet_poll
+    buf not enough      avail idx-> add buf       |
+    read avail idx                  add buf       |
+                                    add buf      ---
+                                  receive packet ---
+    write event idx                   ...         |
+    wait for kick                   add buf   virtnet_poll
+                                      ...         |
+                                                 ---
+                                 no more packet, exit NAPI
+
+In the first loop of NAPI above, indicated in the range of
+virtnet_poll above, the host is sending packets while the
+guest is receiving packets and adding buffers.
+ step 1: The buf is not enough, for example, a big packet
+         needs 5 buf, but the available buf count is 3.
+         The host read current avail idx.
+ step 2: The guest adds some buf, then checks whether the
+         host is waiting for kick signal, not at this time.
+         The used ring is not empty, the guest continues
+         the second loop of NAPI.
+ step 3: The host writes the avail idx read from avail
+         ring to used ring as event idx via
+         virtio_queue_set_notification(q->rx_vq, 1).
+ step 4: At the end of the second loop of NAPI, recheck
+         whether kick is needed, as the event idx in the
+         used ring written by the host is beyound the
+         range of kick condition, the guest will not
+         send kick signal to the host.
+
+Fixes: 06b12970174 ("virtio-net: fix network stall under load")
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+(cherry picked from commit f937309fbdbb48c354220a3e7110c202ae4aa7fa)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/net/virtio-net.c        | 28 ++++++++++-------
+ hw/virtio/virtio.c         | 64 +++++++++++++++++++++++++++++++++++---
+ include/hw/virtio/virtio.h | 21 +++++++++++--
+ 3 files changed, 94 insertions(+), 19 deletions(-)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index f48588638d..d4b979d343 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -1680,24 +1680,28 @@ static bool virtio_net_can_receive(NetClientState *nc)
+ 
+ static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
+ {
+    int opaque;
+    unsigned int in_bytes;
+     VirtIONet *n = q->n;
+-    if (virtio_queue_empty(q->rx_vq) ||
+-        (n->mergeable_rx_bufs &&
+-         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
+-        virtio_queue_set_notification(q->rx_vq, 1);
+-
+-        /* To avoid a race condition where the guest has made some buffers
+-         * available after the above check but before notification was
+-         * enabled, check for available buffers again.
+-         */
+-        if (virtio_queue_empty(q->rx_vq) ||
+-            (n->mergeable_rx_bufs &&
+-             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
+
+    while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
+        opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
+                                           bufsize, 0);
+        /* Buffer is enough, disable notifiaction */
+        if (bufsize <= in_bytes) {
+            break;
+        }
+
+        if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
+            /* Guest has added some buffers, try again */
+            continue;
+        } else {
+             return 0;
+         }
+     }
+ 
+     virtio_queue_set_notification(q->rx_vq, 0);
+
+     return 1;
+ }
+ 
+diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
+index fd2dfe3a6b..08fba6b2d8 100644
+--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
+@@ -743,6 +743,60 @@ int virtio_queue_empty(VirtQueue *vq)
+     }
+ }
+ 
+static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx)
+{
+    if (unlikely(!vq->vring.avail)) {
+        return false;
+    }
+
+    return (uint16_t)shadow_idx != vring_avail_idx(vq);
+}
+
+static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx)
+{
+    VRingPackedDesc desc;
+    VRingMemoryRegionCaches *caches;
+
+    if (unlikely(!vq->vring.desc)) {
+        return false;
+    }
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        return false;
+    }
+
+    vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
+                           shadow_idx, true);
+
+    return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter);
+}
+
+static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx)
+{
+    if (virtio_device_disabled(vq->vdev)) {
+        return false;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        return virtio_queue_packed_poll(vq, shadow_idx);
+    } else {
+        return virtio_queue_split_poll(vq, shadow_idx);
+    }
+}
+
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
+                                                int opaque)
+{
+    virtio_queue_set_notification(vq, 1);
+
+    if (opaque >= 0) {
+        return virtio_queue_poll(vq, (unsigned)opaque);
+    } else {
+        return false;
+    }
+}
+
+ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
+                                unsigned int len)
+ {
+@@ -1330,9 +1384,9 @@ err:
+     goto done;
+ }
+ 
+-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+-                               unsigned int *out_bytes,
+-                               unsigned max_in_bytes, unsigned max_out_bytes)
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                              unsigned int *out_bytes, unsigned max_in_bytes,
+                              unsigned max_out_bytes)
+ {
+     uint16_t desc_size;
+     VRingMemoryRegionCaches *caches;
+@@ -1365,7 +1419,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                                         caches);
+     }
+ 
+-    return;
+    return (int)vq->shadow_avail_idx;
+ err:
+     if (in_bytes) {
+         *in_bytes = 0;
+@@ -1373,6 +1427,8 @@ err:
+     if (out_bytes) {
+         *out_bytes = 0;
+     }
+
+    return -1;
+ }
+ 
+ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
+index 2eafad17b8..8b4da92889 100644
+--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
+@@ -271,9 +271,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
+                                 VirtQueueElement *elem);
+ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                           unsigned int out_bytes);
+-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+-                               unsigned int *out_bytes,
+-                               unsigned max_in_bytes, unsigned max_out_bytes);
+/**
+ * Return <0 on error or an opaque >=0 to pass to
+ * virtio_queue_enable_notification_and_check on success.
+ */
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                              unsigned int *out_bytes, unsigned max_in_bytes,
+                              unsigned max_out_bytes);
+ 
+ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
+ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
+@@ -307,6 +311,17 @@ int virtio_queue_ready(VirtQueue *vq);
+ 
+ int virtio_queue_empty(VirtQueue *vq);
+ 
+/**
+ * Enable notification and check whether guest has added some
+ * buffers since last call to virtqueue_get_avail_bytes.
+ *
+ * @opaque: value returned from virtqueue_get_avail_bytes
+ */
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
+                                                int opaque);
+
+void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t idx);
+
+ /* Host binding interface.  */
+ 
+ uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);
--- a/debian/patches/extra/0020-net-Reinstate-net-nic-model-help-output-as-documente.patch
+++ b/debian/patches/extra/0020-net-Reinstate-net-nic-model-help-output-as-documente.patch
@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 9 Jul 2024 13:34:44 +0100
+Subject: [PATCH] net: Reinstate '-net nic, model=help' output as documented in
+ man page
+
+While refactoring the NIC initialization code, I broke '-net nic,model=help'
+which no longer outputs a list of available NIC models.
+
+Fixes: 2cdeca04adab ("net: report list of available models according to platform")
+Cc: qemu-stable@nongnu.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+(cherry picked from commit 64f75f57f9d2c8c12ac6d9355fa5d3a2af5879ca)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ net/net.c | 25 ++++++++++++++++++++++---
+ 1 file changed, 22 insertions(+), 3 deletions(-)
+
+diff --git a/net/net.c b/net/net.c
+index a2f0c828bb..e6ca2529bb 100644
+--- a/net/net.c
+++ b/net/net.c
+@@ -1150,6 +1150,21 @@ NICInfo *qemu_find_nic_info(const char *typename, bool match_default,
+     return NULL;
+ }
+ 
+static bool is_nic_model_help_option(const char *model)
+{
+    if (model && is_help_option(model)) {
+        /*
+         * Trigger the help output by instantiating the hash table which
+         * will gather tha available models as they get registered.
+         */
+        if (!nic_model_help) {
+            nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
+                                                   g_free, NULL);
+        }
+        return true;
+    }
+    return false;
+}
+ 
+ /* "I have created a device. Please configure it if you can" */
+ bool qemu_configure_nic_device(DeviceState *dev, bool match_default,
+@@ -1733,6 +1748,12 @@ void net_check_clients(void)
+ 
+ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
+ {
+    const char *model = qemu_opt_get_del(opts, "model");
+
+    if (is_nic_model_help_option(model)) {
+        return 0;
+    }
+
+     return net_client_init(opts, false, errp);
+ }
+ 
+@@ -1789,9 +1810,7 @@ static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
+     memset(ni, 0, sizeof(*ni));
+     ni->model = qemu_opt_get_del(opts, "model");
+ 
+-    if (!nic_model_help && !g_strcmp0(ni->model, "help")) {
+-        nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
+-                                               g_free, NULL);
+    if (is_nic_model_help_option(ni->model)) {
+         return 0;
+     }
+ 
--- a/debian/patches/extra/0021-net-Fix-net-nic-model-for-non-help-arguments.patch
+++ b/debian/patches/extra/0021-net-Fix-net-nic-model-for-non-help-arguments.patch
@ -0,0 +1,32 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 6 Aug 2024 18:21:37 +0100
+Subject: [PATCH] net: Fix '-net nic,model=' for non-help arguments
+
+Oops, don't *delete* the model option when checking for 'help'.
+
+Fixes: 64f75f57f9d2 ("net: Reinstate '-net nic, model=help' output as documented in man page")
+Reported-by: Hans <sungdgdhtryrt@gmail.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: qemu-stable@nongnu.org
+Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+(cherry picked from commit fa62cb989a9146c82f8f172715042852f5d36200)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ net/net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/net.c b/net/net.c
+index e6ca2529bb..897bb936cf 100644
+--- a/net/net.c
+++ b/net/net.c
+@@ -1748,7 +1748,7 @@ void net_check_clients(void)
+ 
+ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
+ {
+-    const char *model = qemu_opt_get_del(opts, "model");
+    const char *model = qemu_opt_get(opts, "model");
+ 
+     if (is_nic_model_help_option(model)) {
+         return 0;
--- a/debian/patches/extra/0022-target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch
+++ b/debian/patches/extra/0022-target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch
@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Maydell <peter.maydell@linaro.org>
+Date: Mon, 22 Jul 2024 18:29:54 +0100
+Subject: [PATCH] target/arm: Don't assert for 128-bit tile accesses when SVL
+ is 128
+
+For an instruction which accesses a 128-bit element tile when
+the SVL is also 128 (for example MOV z0.Q, p0/M, ZA0H.Q[w0,0]),
+we will assert in get_tile_rowcol():
+
+qemu-system-aarch64: ../../tcg/tcg-op.c:926: tcg_gen_deposit_z_i32: Assertion `len > 0' failed.
+
+This happens because we calculate
+    len = ctz32(streaming_vec_reg_size(s)) - esz;$
+but if the SVL and the element size are the same len is 0, and
+the deposit operation asserts.
+
+In this case the ZA storage contains exactly one 128 bit
+element ZA tile, and the horizontal or vertical slice is just
+that tile. This means that regardless of the index value in
+the Ws register, we always access that tile. (In pseudocode terms,
+we calculate (index + offset) MOD 1, which is 0.)
+
+Special case the len == 0 case to avoid hitting the assertion
+in tcg_gen_deposit_z_i32().
+
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20240722172957.1041231-2-peter.maydell@linaro.org
+(cherry picked from commit 56f1c0db928aae0b83fd91c89ddb226b137e2b21)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/translate-sme.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
+index 185a8a917b..a50a419af2 100644
+--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
+@@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
+     /* Prepare a power-of-two modulo via extraction of @len bits. */
+     len = ctz32(streaming_vec_reg_size(s)) - esz;
+ 
+-    if (vertical) {
+    if (!len) {
+        /*
+         * SVL is 128 and the element size is 128. There is exactly
+         * one 128x128 tile in the ZA storage, and so we calculate
+         * (Rs + imm) MOD 1, which is always 0. We need to special case
+         * this because TCG doesn't allow deposit ops with len 0.
+         */
+        tcg_gen_movi_i32(tmp, 0);
+    } else if (vertical) {
+         /*
+          * Compute the byte offset of the index within the tile:
+          *     (index % (svl / size)) * size
--- a/debian/patches/extra/0023-target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch
+++ b/debian/patches/extra/0023-target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch
@ -0,0 +1,59 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Maydell <peter.maydell@linaro.org>
+Date: Mon, 22 Jul 2024 18:29:55 +0100
+Subject: [PATCH] target/arm: Fix UMOPA/UMOPS of 16-bit values
+
+The UMOPA/UMOPS instructions are supposed to multiply unsigned 8 or
+16 bit elements and accumulate the products into a 64-bit element.
+In the Arm ARM pseudocode, this is done with the usual
+infinite-precision signed arithmetic.  However our implementation
+doesn't quite get it right, because in the DEF_IMOP_64() macro we do:
+  sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0);
+
+where NTYPE and MTYPE are uint16_t or int16_t.  In the uint16_t case,
+the C usual arithmetic conversions mean the values are converted to
+"int" type and the multiply is done as a 32-bit multiply.  This means
+that if the inputs are, for example, 0xffff and 0xffff then the
+result is 0xFFFE0001 as an int, which is then promoted to uint64_t
+for the accumulation into sum; this promotion incorrectly sign
+extends the multiply.
+
+Avoid the incorrect sign extension by casting to int64_t before
+the multiply, so we do the multiply as 64-bit signed arithmetic,
+which is a type large enough that the multiply can never
+overflow into the sign bit.
+
+(The equivalent 8-bit operations in DEF_IMOP_32() are fine, because
+the 8-bit multiplies can never overflow into the sign bit of a
+32-bit integer.)
+
+Cc: qemu-stable@nongnu.org
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2372
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20240722172957.1041231-3-peter.maydell@linaro.org
+(cherry picked from commit ea3f5a90f036734522e9af3bffd77e69e9f47355)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/sme_helper.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
+index 5a6dd76489..f9001f5213 100644
+--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
+@@ -1146,10 +1146,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
+     uint64_t sum = 0;                                                       \
+     /* Apply P to N as a mask, making the inactive elements 0. */           \
+     n &= expand_pred_h(p);                                                  \
+-    sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0);                               \
+-    sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16);                             \
+-    sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32);                             \
+-    sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48);                             \
+    sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0);                      \
+    sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16);                    \
+    sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32);                    \
+    sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48);                    \
+     return neg ? a - sum : a + sum;                                         \
+ }
+ 
--- a/debian/patches/extra/0024-target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch
+++ b/debian/patches/extra/0024-target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch
@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Maydell <peter.maydell@linaro.org>
+Date: Mon, 22 Jul 2024 18:29:56 +0100
+Subject: [PATCH] target/arm: Avoid shifts by -1 in tszimm_shr() and
+ tszimm_shl()
+
+The function tszimm_esz() returns a shift amount, or possibly -1 in
+certain cases that correspond to unallocated encodings in the
+instruction set.  We catch these later in the trans_ functions
+(generally with an "a-esz < 0" check), but before we do the
+decodetree-generated code will also call tszimm_shr() or tszimm_sl(),
+which will use the tszimm_esz() return value as a shift count without
+checking that it is not negative, which is undefined behaviour.
+
+Avoid the UB by checking the return value in tszimm_shr() and
+tszimm_shl().
+
+Cc: qemu-stable@nongnu.org
+Resolves: Coverity CID 1547617, 1547694
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20240722172957.1041231-4-peter.maydell@linaro.org
+(cherry picked from commit 76916dfa89e8900639c1055c07a295c06628a0bc)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/translate-sve.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
+index ada05aa530..466a19c25a 100644
+--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
+@@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x)
+ 
+ static int tszimm_shr(DisasContext *s, int x)
+ {
+-    return (16 << tszimm_esz(s, x)) - x;
+    /*
+     * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
+     * trans function will check for esz < 0), so we can return any
+     * value we like from here in that case as long as we avoid UB.
+     */
+    int esz = tszimm_esz(s, x);
+    if (esz < 0) {
+        return esz;
+    }
+    return (16 << esz) - x;
+ }
+ 
+ /* See e.g. LSL (immediate, predicated).  */
+ static int tszimm_shl(DisasContext *s, int x)
+ {
+-    return x - (8 << tszimm_esz(s, x));
+    /* As with tszimm_shr(), value will be unused if esz < 0 */
+    int esz = tszimm_esz(s, x);
+    if (esz < 0) {
+        return esz;
+    }
+    return x - (8 << esz);
+ }
+ 
+ /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
--- a/debian/patches/extra/0025-target-arm-Ignore-SMCR_EL2.LEN-and-SVCR_EL2.LEN-if-E.patch
+++ b/debian/patches/extra/0025-target-arm-Ignore-SMCR_EL2.LEN-and-SVCR_EL2.LEN-if-E.patch
@ -0,0 +1,41 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Maydell <peter.maydell@linaro.org>
+Date: Mon, 22 Jul 2024 18:29:57 +0100
+Subject: [PATCH] target/arm: Ignore SMCR_EL2.LEN and SVCR_EL2.LEN if EL2 is
+ not enabled
+
+When determining the current vector length, the SMCR_EL2.LEN and
+SVCR_EL2.LEN settings should only be considered if EL2 is enabled
+(compare the pseudocode CurrentSVL and CurrentNSVL which call
+EL2Enabled()).
+
+We were checking against ARM_FEATURE_EL2 rather than calling
+arm_is_el2_enabled(), which meant that we would look at
+SMCR_EL2/SVCR_EL2 when in Secure EL1 or Secure EL0 even if Secure EL2
+was not enabled.
+
+Use the correct check in sve_vqm1_for_el_sm().
+
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20240722172957.1041231-5-peter.maydell@linaro.org
+(cherry picked from commit f573ac059ed060234fcef4299fae9e500d357c33)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/helper.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/target/arm/helper.c b/target/arm/helper.c
+index a620481d7c..42044ae14b 100644
+--- a/target/arm/helper.c
+++ b/target/arm/helper.c
+@@ -7191,7 +7191,7 @@ uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm)
+     if (el <= 1 && !el_is_in_host(env, el)) {
+         len = MIN(len, 0xf & (uint32_t)cr[1]);
+     }
+-    if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+    if (el <= 2 && arm_is_el2_enabled(env)) {
+         len = MIN(len, 0xf & (uint32_t)cr[2]);
+     }
+     if (arm_feature(env, ARM_FEATURE_EL3)) {
--- a/debian/patches/extra/0026-target-arm-Handle-denormals-correctly-for-FMOPA-wide.patch
+++ b/debian/patches/extra/0026-target-arm-Handle-denormals-correctly-for-FMOPA-wide.patch
@ -0,0 +1,164 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Maydell <peter.maydell@linaro.org>
+Date: Thu, 1 Aug 2024 10:15:03 +0100
+Subject: [PATCH] target/arm: Handle denormals correctly for FMOPA (widening)
+
+The FMOPA (widening) SME instruction takes pairs of half-precision
+floating point values, widens them to single-precision, does a
+two-way dot product and accumulates the results into a
+single-precision destination.  We don't quite correctly handle the
+FPCR bits FZ and FZ16 which control flushing of denormal inputs and
+outputs.  This is because at the moment we pass a single float_status
+value to the helper function, which then uses that configuration for
+all the fp operations it does.  However, because the inputs to this
+operation are float16 and the outputs are float32 we need to use the
+fp_status_f16 for the float16 input widening but the normal fp_status
+for everything else.  Otherwise we will apply the flushing control
+FPCR.FZ16 to the 32-bit output rather than the FPCR.FZ control, and
+incorrectly flush a denormal output to zero when we should not (or
+vice-versa).
+
+(In commit 207d30b5fdb5b we tried to fix the FZ handling but
+didn't get it right, switching from "use FPCR.FZ for everything" to
+"use FPCR.FZ16 for everything".)
+
+Pass the CPU env to the sme_fmopa_h helper instead of an fp_status
+pointer, and have the helper pass an extra fp_status into the
+f16_dotadd() function so that we can use the right status for the
+right parts of this operation.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 207d30b5fdb5 ("target/arm: Use FPST_F16 for SME FMOPA (widening)")
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2373
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+(cherry picked from commit 55f9f4ee018c5ccea81d8c8c586756d7711ae46f)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/helper-sme.h    |  2 +-
+ target/arm/tcg/sme_helper.c    | 39 +++++++++++++++++++++++-----------
+ target/arm/tcg/translate-sme.c | 25 ++++++++++++++++++++--
+ 3 files changed, 51 insertions(+), 15 deletions(-)
+
+diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
+index 27eef49a11..d22bf9d21b 100644
+--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
+@@ -121,7 +121,7 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+ DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+ 
+ DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
+-                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+                   void, ptr, ptr, ptr, ptr, ptr, env, i32)
+ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
+                    void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
+diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
+index f9001f5213..3906bb51c0 100644
+--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
+@@ -976,12 +976,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
+ }
+ 
+ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
+-                          float_status *s_std, float_status *s_odd)
+                          float_status *s_f16, float_status *s_std,
+                          float_status *s_odd)
+ {
+-    float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
+-    float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
+-    float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
+-    float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
+    /*
+     * We need three different float_status for different parts of this
+     * operation:
+     *  - the input conversion of the float16 values must use the
+     *    f16-specific float_status, so that the FPCR.FZ16 control is applied
+     *  - operations on float32 including the final accumulation must use
+     *    the normal float_status, so that FPCR.FZ is applied
+     *  - we have pre-set-up copy of s_std which is set to round-to-odd,
+     *    for the multiply (see below)
+     */
+    float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
+    float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
+    float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
+    float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
+     float64 t64;
+     float32 t32;
+ 
+@@ -1003,20 +1014,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
+ }
+ 
+ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
+-                         void *vpm, void *vst, uint32_t desc)
+                         void *vpm, CPUARMState *env, uint32_t desc)
+ {
+     intptr_t row, col, oprsz = simd_maxsz(desc);
+     uint32_t neg = simd_data(desc) * 0x80008000u;
+     uint16_t *pn = vpn, *pm = vpm;
+-    float_status fpst_odd, fpst_std;
+    float_status fpst_odd, fpst_std, fpst_f16;
+ 
+     /*
+-     * Make a copy of float_status because this operation does not
+-     * update the cumulative fp exception status.  It also produces
+-     * default nans.  Make a second copy with round-to-odd -- see above.
+     * Make copies of fp_status and fp_status_f16, because this operation
+     * does not update the cumulative fp exception status.  It also
+     * produces default NaNs. We also need a second copy of fp_status with
+     * round-to-odd -- see above.
+      */
+-    fpst_std = *(float_status *)vst;
+    fpst_f16 = env->vfp.fp_status_f16;
+    fpst_std = env->vfp.fp_status;
+     set_default_nan_mode(true, &fpst_std);
+    set_default_nan_mode(true, &fpst_f16);
+     fpst_odd = fpst_std;
+     set_float_rounding_mode(float_round_to_odd, &fpst_odd);
+ 
+@@ -1036,7 +1050,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
+                         uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+ 
+                         m = f16mop_adj_pair(m, pcol, 0);
+-                        *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
+                        *a = f16_dotadd(*a, n, m,
+                                        &fpst_f16, &fpst_std, &fpst_odd);
+                     }
+                     col += 4;
+                     pcol >>= 4;
+diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
+index a50a419af2..ae42ddef7b 100644
+--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
+@@ -334,8 +334,29 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+     return true;
+ }
+ 
+-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
+-           MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
+static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
+                           gen_helper_gvec_5_ptr *fn)
+{
+    int svl = streaming_vec_reg_size(s);
+    uint32_t desc = simd_desc(svl, svl, a->sub);
+    TCGv_ptr za, zn, zm, pn, pm;
+
+    if (!sme_smza_enabled_check(s)) {
+        return true;
+    }
+
+    za = get_tile(s, esz, a->zad);
+    zn = vec_full_reg_ptr(s, a->zn);
+    zm = vec_full_reg_ptr(s, a->zm);
+    pn = pred_full_reg_ptr(s, a->pn);
+    pm = pred_full_reg_ptr(s, a->pm);
+
+    fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc));
+    return true;
+}
+
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
+           MO_32, gen_helper_sme_fmopa_h)
+ TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
+            MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
+ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
--- a/debian/patches/extra/0027-intel_iommu-fix-FRCD-construction-macro.patch
+++ b/debian/patches/extra/0027-intel_iommu-fix-FRCD-construction-macro.patch
@ -0,0 +1,39 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?=
+ <clement.mathieu--drif@eviden.com>
+Date: Tue, 9 Jul 2024 14:26:08 +0000
+Subject: [PATCH] intel_iommu: fix FRCD construction macro
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The constant must be unsigned, otherwise the two's complement
+overrides the other fields when a PASID is present.
+
+Fixes: 1b2b12376c8a ("intel-iommu: PASID support")
+Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
+Reviewed-by: Yi Liu <yi.l.liu@intel.com>
+Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
+Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
+Message-Id: <20240709142557.317271-2-clement.mathieu--drif@eviden.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit a3c8d7e38550c3d5a46e6fa94ffadfa625a4861d)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/i386/intel_iommu_internal.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
+index f8cf99bddf..cbc4030031 100644
+--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
+@@ -267,7 +267,7 @@
+ /* For the low 64-bit of 128-bit */
+ #define VTD_FRCD_FI(val)        ((val) & ~0xfffULL)
+ #define VTD_FRCD_PV(val)        (((val) & 0xffffULL) << 40)
+-#define VTD_FRCD_PP(val)        (((val) & 0x1) << 31)
+#define VTD_FRCD_PP(val)        (((val) & 0x1ULL) << 31)
+ #define VTD_FRCD_IR_IDX(val)    (((val) & 0xffffULL) << 48)
+ 
+ /* DMA Remapping Fault Conditions */
--- a/debian/patches/extra/0028-target-i386-Do-not-apply-REX-to-MMX-operands.patch
+++ b/debian/patches/extra/0028-target-i386-Do-not-apply-REX-to-MMX-operands.patch
@ -0,0 +1,33 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Mon, 12 Aug 2024 12:58:42 +1000
+Subject: [PATCH] target/i386: Do not apply REX to MMX operands
+
+Cc: qemu-stable@nongnu.org
+Fixes: b3e22b2318a ("target/i386: add core of new i386 decoder")
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2495
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Link: https://lore.kernel.org/r/20240812025844.58956-2-richard.henderson@linaro.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 416f2b16c02c618c0f233372ebfe343f9ee667d4)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/decode-new.c.inc | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
+index 4209d59ca8..09b8d2314a 100644
+--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
+@@ -1271,7 +1271,10 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+             op->unit = X86_OP_SSE;
+         }
+     get_reg:
+-        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
+        op->n = ((get_modrm(s, env) >> 3) & 7);
+        if (op->unit != X86_OP_MMX) {
+            op->n |= REX_R(s);
+        }
+         break;
+ 
+     case X86_TYPE_E:  /* ALU modrm operand */
--- a/debian/patches/extra/0029-module-Prevent-crash-by-resetting-local_err-in-modul.patch
+++ b/debian/patches/extra/0029-module-Prevent-crash-by-resetting-local_err-in-modul.patch
@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
+Date: Fri, 9 Aug 2024 14:13:40 +0200
+Subject: [PATCH] module: Prevent crash by resetting local_err in
+ module_load_qom_all()
+
+Set local_err to NULL after it has been freed in error_report_err(). This
+avoids triggering assert(*errp == NULL) failure in error_setv() when
+local_err is reused in the loop.
+
+Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
+Reviewed-by: Claudio Fontana <cfontana@suse.de>
+Reviewed-by: Denis V. Lunev <den@openvz.org>
+Link: https://lore.kernel.org/r/20240809121340.992049-2-alexander.ivanov@virtuozzo.com
+[Do the same by moving the declaration instead. - Paolo]
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 940d802b24e63650e0eacad3714e2ce171cba17c)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ util/module.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/util/module.c b/util/module.c
+index 32e263163c..3eb0f06df1 100644
+--- a/util/module.c
+++ b/util/module.c
+@@ -354,13 +354,13 @@ int module_load_qom(const char *type, Error **errp)
+ void module_load_qom_all(void)
+ {
+     const QemuModinfo *modinfo;
+-    Error *local_err = NULL;
+ 
+     if (module_loaded_qom_all) {
+         return;
+     }
+ 
+     for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        Error *local_err = NULL;
+         if (!modinfo->objs) {
+             continue;
+         }
--- a/debian/patches/extra/0030-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch
+++ b/debian/patches/extra/0030-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch
@ -0,0 +1,164 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Eric Blake <eblake@redhat.com>
+Date: Wed, 7 Aug 2024 08:50:01 -0500
+Subject: [PATCH] nbd/server: Plumb in new args to nbd_client_add()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Upcoming patches to fix a CVE need to track an opaque pointer passed
+in by the owner of a client object, as well as request for a time
+limit on how fast negotiation must complete.  Prepare for that by
+changing the signature of nbd_client_new() and adding an accessor to
+get at the opaque pointer, although for now the two servers
+(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though
+they pass in a new default timeout value.
+
+Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Eric Blake <eblake@redhat.com>
+Message-ID: <20240807174943.771624-11-eblake@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan]
+Signed-off-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ blockdev-nbd.c      |  6 ++++--
+ include/block/nbd.h | 11 ++++++++++-
+ nbd/server.c        | 20 +++++++++++++++++---
+ qemu-nbd.c          |  4 +++-
+ 4 files changed, 34 insertions(+), 7 deletions(-)
+
+diff --git a/blockdev-nbd.c b/blockdev-nbd.c
+index 213012435f..267a1de903 100644
+--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
+@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
+     nbd_update_server_watch(nbd_server);
+ 
+     qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
+-    nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz,
+-                   nbd_blockdev_client_closed);
+    /* TODO - expose handshake timeout as QMP option */
+    nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+                   nbd_server->tlscreds, nbd_server->tlsauthz,
+                   nbd_blockdev_client_closed, NULL);
+ }
+ 
+ static void nbd_update_server_watch(NBDServerData *s)
+diff --git a/include/block/nbd.h b/include/block/nbd.h
+index 4e7bd6342f..1d4d65922d 100644
+--- a/include/block/nbd.h
+++ b/include/block/nbd.h
+@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts;
+ 
+ extern const BlockExportDriver blk_exp_nbd;
+ 
+/*
+ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must
+ * succeed at NBD_OPT_GO before being forcefully dropped as too slow.
+ */
+#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
+
+ /* Handshake phase structs - this struct is passed on the wire */
+ 
+ typedef struct NBDOption {
+@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp);
+ NBDExport *nbd_export_find(const char *name);
+ 
+ void nbd_client_new(QIOChannelSocket *sioc,
+                    uint32_t handshake_max_secs,
+                     QCryptoTLSCreds *tlscreds,
+                     const char *tlsauthz,
+-                    void (*close_fn)(NBDClient *, bool));
+                    void (*close_fn)(NBDClient *, bool),
+                    void *owner);
+void *nbd_client_owner(NBDClient *client);
+ void nbd_client_get(NBDClient *client);
+ void nbd_client_put(NBDClient *client);
+ 
+diff --git a/nbd/server.c b/nbd/server.c
+index 892797bb11..e50012499f 100644
+--- a/nbd/server.c
+++ b/nbd/server.c
+@@ -124,12 +124,14 @@ struct NBDMetaContexts {
+ struct NBDClient {
+     int refcount; /* atomic */
+     void (*close_fn)(NBDClient *client, bool negotiated);
+    void *owner;
+ 
+     QemuMutex lock;
+ 
+     NBDExport *exp;
+     QCryptoTLSCreds *tlscreds;
+     char *tlsauthz;
+    uint32_t handshake_max_secs;
+     QIOChannelSocket *sioc; /* The underlying data channel */
+     QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
+ 
+@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
+ 
+     qemu_co_mutex_init(&client->send_lock);
+ 
+    /* TODO - utilize client->handshake_max_secs */
+     if (nbd_negotiate(client, &local_err)) {
+         if (local_err) {
+             error_report_err(local_err);
+@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
+ }
+ 
+ /*
+- * Create a new client listener using the given channel @sioc.
+ * Create a new client listener using the given channel @sioc and @owner.
+  * Begin servicing it in a coroutine.  When the connection closes, call
+- * @close_fn with an indication of whether the client completed negotiation.
+ * @close_fn with an indication of whether the client completed negotiation
+ * within @handshake_max_secs seconds (0 for unbounded).
+  */
+ void nbd_client_new(QIOChannelSocket *sioc,
+                    uint32_t handshake_max_secs,
+                     QCryptoTLSCreds *tlscreds,
+                     const char *tlsauthz,
+-                    void (*close_fn)(NBDClient *, bool))
+                    void (*close_fn)(NBDClient *, bool),
+                    void *owner)
+ {
+     NBDClient *client;
+     Coroutine *co;
+@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc,
+         object_ref(OBJECT(client->tlscreds));
+     }
+     client->tlsauthz = g_strdup(tlsauthz);
+    client->handshake_max_secs = handshake_max_secs;
+     client->sioc = sioc;
+     qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+     object_ref(OBJECT(client->sioc));
+     client->ioc = QIO_CHANNEL(sioc);
+     object_ref(OBJECT(client->ioc));
+     client->close_fn = close_fn;
+    client->owner = owner;
+ 
+     co = qemu_coroutine_create(nbd_co_client_start, client);
+     qemu_coroutine_enter(co);
+ }
+
+void *
+nbd_client_owner(NBDClient *client)
+{
+    return client->owner;
+}
+diff --git a/qemu-nbd.c b/qemu-nbd.c
+index d7b3ccab21..48e2fa5858 100644
+--- a/qemu-nbd.c
+++ b/qemu-nbd.c
+@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
+ 
+     nb_fds++;
+     nbd_update_server_watch();
+-    nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
+    /* TODO - expose handshake timeout as command line option */
+    nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+                   tlscreds, tlsauthz, nbd_client_closed, NULL);
+ }
+ 
+ static void nbd_update_server_watch(void)
--- a/debian/patches/extra/0031-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch
+++ b/debian/patches/extra/0031-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch
@ -0,0 +1,172 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Eric Blake <eblake@redhat.com>
+Date: Tue, 6 Aug 2024 13:53:00 -0500
+Subject: [PATCH] nbd/server: CVE-2024-7409: Cap default max-connections to 100
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Allowing an unlimited number of clients to any web service is a recipe
+for a rudimentary denial of service attack: the client merely needs to
+open lots of sockets without closing them, until qemu no longer has
+any more fds available to allocate.
+
+For qemu-nbd, we default to allowing only 1 connection unless more are
+explicitly asked for (-e or --shared); this was historically picked as
+a nice default (without an explicit -t, a non-persistent qemu-nbd goes
+away after a client disconnects, without needing any additional
+follow-up commands), and we are not going to change that interface now
+(besides, someday we want to point people towards qemu-storage-daemon
+instead of qemu-nbd).
+
+But for qemu proper, and the newer qemu-storage-daemon, the QMP
+nbd-server-start command has historically had a default of unlimited
+number of connections, in part because unlike qemu-nbd it is
+inherently persistent until nbd-server-stop.  Allowing multiple client
+sockets is particularly useful for clients that can take advantage of
+MULTI_CONN (creating parallel sockets to increase throughput),
+although known clients that do so (such as libnbd's nbdcopy) typically
+use only 8 or 16 connections (the benefits of scaling diminish once
+more sockets are competing for kernel attention).  Picking a number
+large enough for typical use cases, but not unlimited, makes it
+slightly harder for a malicious client to perform a denial of service
+merely by opening lots of connections withot progressing through the
+handshake.
+
+This change does not eliminate CVE-2024-7409 on its own, but reduces
+the chance for fd exhaustion or unlimited memory usage as an attack
+surface.  On the other hand, by itself, it makes it more obvious that
+with a finite limit, we have the problem of an unauthenticated client
+holding 100 fds opened as a way to block out a legitimate client from
+being able to connect; thus, later patches will further add timeouts
+to reject clients that are not making progress.
+
+This is an INTENTIONAL change in behavior, and will break any client
+of nbd-server-start that was not passing an explicit max-connections
+parameter, yet expects more than 100 simultaneous connections.  We are
+not aware of any such client (as stated above, most clients aware of
+MULTI_CONN get by just fine on 8 or 16 connections, and probably cope
+with later connections failing by relying on the earlier connections;
+libvirt has not yet been passing max-connections, but generally
+creates NBD servers with the intent for a single client for the sake
+of live storage migration; meanwhile, the KubeSAN project anticipates
+a large cluster sharing multiple clients [up to 8 per node, and up to
+100 nodes in a cluster], but it currently uses qemu-nbd with an
+explicit --shared=0 rather than qemu-storage-daemon with
+nbd-server-start).
+
+We considered using a deprecation period (declare that omitting
+max-parameters is deprecated, and make it mandatory in 3 releases -
+then we don't need to pick an arbitrary default); that has zero risk
+of breaking any apps that accidentally depended on more than 100
+connections, and where such breakage might not be noticed under unit
+testing but only under the larger loads of production usage.  But it
+does not close the denial-of-service hole until far into the future,
+and requires all apps to change to add the parameter even if 100 was
+good enough.  It also has a drawback that any app (like libvirt) that
+is accidentally relying on an unlimited default should seriously
+consider their own CVE now, at which point they are going to change to
+pass explicit max-connections sooner than waiting for 3 qemu releases.
+Finally, if our changed default breaks an app, that app can always
+pass in an explicit max-parameters with a larger value.
+
+It is also intentional that the HMP interface to nbd-server-start is
+not changed to expose max-connections (any client needing to fine-tune
+things should be using QMP).
+
+Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
+Signed-off-by: Eric Blake <eblake@redhat.com>
+Message-ID: <20240807174943.771624-12-eblake@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+[ericb: Expand commit message to summarize Dan's argument for why we
+break corner-case back-compat behavior without a deprecation period]
+Signed-off-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/monitor/block-hmp-cmds.c | 3 ++-
+ blockdev-nbd.c                 | 8 ++++++++
+ include/block/nbd.h            | 7 +++++++
+ qapi/block-export.json         | 4 ++--
+ 4 files changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
+index d954bec6f1..bdf2eb50b6 100644
+--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
+@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
+         goto exit;
+     }
+ 
+-    nbd_server_start(addr, NULL, NULL, 0, &local_err);
+    nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS,
+                     &local_err);
+     qapi_free_SocketAddress(addr);
+     if (local_err != NULL) {
+         goto exit;
+diff --git a/blockdev-nbd.c b/blockdev-nbd.c
+index 267a1de903..24ba5382db 100644
+--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
+@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
+ 
+ void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
+ {
+    if (!arg->has_max_connections) {
+        arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
+    }
+
+     nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz,
+                      arg->max_connections, errp);
+ }
+@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
+ {
+     SocketAddress *addr_flat = socket_address_flatten(addr);
+ 
+    if (!has_max_connections) {
+        max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
+    }
+
+     nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp);
+     qapi_free_SocketAddress(addr_flat);
+ }
+diff --git a/include/block/nbd.h b/include/block/nbd.h
+index 1d4d65922d..d4f8b21aec 100644
+--- a/include/block/nbd.h
+++ b/include/block/nbd.h
+@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd;
+  */
+ #define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
+ 
+/*
+ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at
+ * once; must be large enough to allow a MULTI_CONN-aware client like
+ * nbdcopy to create its typical number of 8-16 sockets.
+ */
+#define NBD_DEFAULT_MAX_CONNECTIONS 100
+
+ /* Handshake phase structs - this struct is passed on the wire */
+ 
+ typedef struct NBDOption {
+diff --git a/qapi/block-export.json b/qapi/block-export.json
+index 3919a2d5b9..f45e4fd481 100644
+--- a/qapi/block-export.json
+++ b/qapi/block-export.json
+@@ -28,7 +28,7 @@
+ # @max-connections: The maximum number of connections to allow at the
+ #     same time, 0 for unlimited.  Setting this to 1 also stops the
+ #     server from advertising multiple client support (since 5.2;
+-#     default: 0)
+#     default: 100)
+ #
+ # Since: 4.2
+ ##
+@@ -63,7 +63,7 @@
+ # @max-connections: The maximum number of connections to allow at the
+ #     same time, 0 for unlimited.  Setting this to 1 also stops the
+ #     server from advertising multiple client support (since 5.2;
+-#     default: 0).
+#     default: 100).
+ #
+ # Errors:
+ #     - if the server is already running
--- a/debian/patches/extra/0032-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch
+++ b/debian/patches/extra/0032-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch
@ -0,0 +1,123 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Eric Blake <eblake@redhat.com>
+Date: Thu, 8 Aug 2024 16:05:08 -0500
+Subject: [PATCH] nbd/server: CVE-2024-7409: Drop non-negotiating clients
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+A client that opens a socket but does not negotiate is merely hogging
+qemu's resources (an open fd and a small amount of memory); and a
+malicious client that can access the port where NBD is listening can
+attempt a denial of service attack by intentionally opening and
+abandoning lots of unfinished connections.  The previous patch put a
+default bound on the number of such ongoing connections, but once that
+limit is hit, no more clients can connect (including legitimate ones).
+The solution is to insist that clients complete handshake within a
+reasonable time limit, defaulting to 10 seconds.  A client that has
+not successfully completed NBD_OPT_GO by then (including the case of
+where the client didn't know TLS credentials to even reach the point
+of NBD_OPT_GO) is wasting our time and does not deserve to stay
+connected.  Later patches will allow fine-tuning the limit away from
+the default value (including disabling it for doing integration
+testing of the handshake process itself).
+
+Note that this patch in isolation actually makes it more likely to see
+qemu SEGV after nbd-server-stop, as any client socket still connected
+when the server shuts down will now be closed after 10 seconds rather
+than at the client's whims.  That will be addressed in the next patch.
+
+For a demo of this patch in action:
+$ qemu-nbd -f raw -r -t -e 10 file &
+$ nbdsh --opt-mode -c '
+H = list()
+for i in range(20):
+  print(i)
+  H.insert(i, nbd.NBD())
+  H[i].set_opt_mode(True)
+  H[i].connect_uri("nbd://localhost")
+'
+$ kill $!
+
+where later connections get to start progressing once earlier ones are
+forcefully dropped for taking too long, rather than hanging.
+
+Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
+Signed-off-by: Eric Blake <eblake@redhat.com>
+Message-ID: <20240807174943.771624-13-eblake@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+[eblake: rebase to changes earlier in series, reduce scope of timer]
+Signed-off-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ nbd/server.c     | 28 +++++++++++++++++++++++++++-
+ nbd/trace-events |  1 +
+ 2 files changed, 28 insertions(+), 1 deletion(-)
+
+diff --git a/nbd/server.c b/nbd/server.c
+index e50012499f..39285cc971 100644
+--- a/nbd/server.c
+++ b/nbd/server.c
+@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client)
+     }
+ }
+ 
+static void nbd_handshake_timer_cb(void *opaque)
+{
+    QIOChannel *ioc = opaque;
+
+    trace_nbd_handshake_timer_cb();
+    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+}
+
+ static coroutine_fn void nbd_co_client_start(void *opaque)
+ {
+     NBDClient *client = opaque;
+     Error *local_err = NULL;
+    QEMUTimer *handshake_timer = NULL;
+ 
+     qemu_co_mutex_init(&client->send_lock);
+ 
+-    /* TODO - utilize client->handshake_max_secs */
+    /*
+     * Create a timer to bound the time spent in negotiation. If the
+     * timer expires, it is likely nbd_negotiate will fail because the
+     * socket was shutdown.
+     */
+    if (client->handshake_max_secs > 0) {
+        handshake_timer = aio_timer_new(qemu_get_aio_context(),
+                                        QEMU_CLOCK_REALTIME,
+                                        SCALE_NS,
+                                        nbd_handshake_timer_cb,
+                                        client->sioc);
+        timer_mod(handshake_timer,
+                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+                  client->handshake_max_secs * NANOSECONDS_PER_SECOND);
+    }
+
+     if (nbd_negotiate(client, &local_err)) {
+         if (local_err) {
+             error_report_err(local_err);
+         }
+        timer_free(handshake_timer);
+         client_close(client, false);
+         return;
+     }
+ 
+    timer_free(handshake_timer);
+     WITH_QEMU_LOCK_GUARD(&client->lock) {
+         nbd_client_receive_next_request(client);
+     }
+diff --git a/nbd/trace-events b/nbd/trace-events
+index 00ae3216a1..cbd0a4ab7e 100644
+--- a/nbd/trace-events
+++ b/nbd/trace-events
+@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload
+ nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
+ nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
+ nbd_trip(void) "Reading request"
+nbd_handshake_timer_cb(void) "client took too long to negotiate"
+ 
+ # client-connection.c
+ nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64
--- a/debian/patches/extra/0033-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch
+++ b/debian/patches/extra/0033-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch
@ -0,0 +1,161 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Eric Blake <eblake@redhat.com>
+Date: Wed, 7 Aug 2024 12:23:13 -0500
+Subject: [PATCH] nbd/server: CVE-2024-7409: Close stray clients at server-stop
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+A malicious client can attempt to connect to an NBD server, and then
+intentionally delay progress in the handshake, including if it does
+not know the TLS secrets.  Although the previous two patches reduce
+this behavior by capping the default max-connections parameter and
+killing slow clients, they did not eliminate the possibility of a
+client waiting to close the socket until after the QMP nbd-server-stop
+command is executed, at which point qemu would SEGV when trying to
+dereference the NULL nbd_server global which is no longer present.
+This amounts to a denial of service attack.  Worse, if another NBD
+server is started before the malicious client disconnects, I cannot
+rule out additional adverse effects when the old client interferes
+with the connection count of the new server (although the most likely
+is a crash due to an assertion failure when checking
+nbd_server->connections > 0).
+
+For environments without this patch, the CVE can be mitigated by
+ensuring (such as via a firewall) that only trusted clients can
+connect to an NBD server.  Note that using frameworks like libvirt
+that ensure that TLS is used and that nbd-server-stop is not executed
+while any trusted clients are still connected will only help if there
+is also no possibility for an untrusted client to open a connection
+but then stall on the NBD handshake.
+
+Given the previous patches, it would be possible to guarantee that no
+clients remain connected by having nbd-server-stop sleep for longer
+than the default handshake deadline before finally freeing the global
+nbd_server object, but that could make QMP non-responsive for a long
+time.  So intead, this patch fixes the problem by tracking all client
+sockets opened while the server is running, and forcefully closing any
+such sockets remaining without a completed handshake at the time of
+nbd-server-stop, then waiting until the coroutines servicing those
+sockets notice the state change.  nbd-server-stop now has a second
+AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the
+blk_exp_close_all_type() that disconnects all clients that completed
+handshakes), but forced socket shutdown is enough to progress the
+coroutines and quickly tear down all clients before the server is
+freed, thus finally fixing the CVE.
+
+This patch relies heavily on the fact that nbd/server.c guarantees
+that it only calls nbd_blockdev_client_closed() from the main loop
+(see the assertion in nbd_client_put() and the hoops used in
+nbd_client_put_nonzero() to achieve that); if we did not have that
+guarantee, we would also need a mutex protecting our accesses of the
+list of connections to survive re-entrancy from independent iothreads.
+
+Although I did not actually try to test old builds, it looks like this
+problem has existed since at least commit 862172f45c (v2.12.0, 2017) -
+even back when that patch started using a QIONetListener to handle
+listening on multiple sockets, nbd_server_free() was already unaware
+that the nbd_blockdev_client_closed callback can be reached later by a
+client thread that has not completed handshakes (and therefore the
+client's socket never got added to the list closed in
+nbd_export_close_all), despite that patch intentionally tearing down
+the QIONetListener to prevent new clients.
+
+Reported-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
+Fixes: CVE-2024-7409
+CC: qemu-stable@nongnu.org
+Signed-off-by: Eric Blake <eblake@redhat.com>
+Message-ID: <20240807174943.771624-14-eblake@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 34 insertions(+), 1 deletion(-)
+
+diff --git a/blockdev-nbd.c b/blockdev-nbd.c
+index 24ba5382db..f73409ae49 100644
+--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
+@@ -21,12 +21,18 @@
+ #include "io/channel-socket.h"
+ #include "io/net-listener.h"
+ 
+typedef struct NBDConn {
+    QIOChannelSocket *cioc;
+    QLIST_ENTRY(NBDConn) next;
+} NBDConn;
+
+ typedef struct NBDServerData {
+     QIONetListener *listener;
+     QCryptoTLSCreds *tlscreds;
+     char *tlsauthz;
+     uint32_t max_connections;
+     uint32_t connections;
+    QLIST_HEAD(, NBDConn) conns;
+ } NBDServerData;
+ 
+ static NBDServerData *nbd_server;
+@@ -51,6 +57,14 @@ int nbd_server_max_connections(void)
+ 
+ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+ {
+    NBDConn *conn = nbd_client_owner(client);
+
+    assert(qemu_in_main_thread() && nbd_server);
+
+    object_unref(OBJECT(conn->cioc));
+    QLIST_REMOVE(conn, next);
+    g_free(conn);
+
+     nbd_client_put(client);
+     assert(nbd_server->connections > 0);
+     nbd_server->connections--;
+@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
+                        gpointer opaque)
+ {
+    NBDConn *conn = g_new0(NBDConn, 1);
+
+    assert(qemu_in_main_thread() && nbd_server);
+     nbd_server->connections++;
+    object_ref(OBJECT(cioc));
+    conn->cioc = cioc;
+    QLIST_INSERT_HEAD(&nbd_server->conns, conn, next);
+     nbd_update_server_watch(nbd_server);
+ 
+     qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
+     /* TODO - expose handshake timeout as QMP option */
+     nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+                    nbd_server->tlscreds, nbd_server->tlsauthz,
+-                   nbd_blockdev_client_closed, NULL);
+                   nbd_blockdev_client_closed, conn);
+ }
+ 
+ static void nbd_update_server_watch(NBDServerData *s)
+@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s)
+ 
+ static void nbd_server_free(NBDServerData *server)
+ {
+    NBDConn *conn, *tmp;
+
+     if (!server) {
+         return;
+     }
+ 
+    /*
+     * Forcefully close the listener socket, and any clients that have
+     * not yet disconnected on their own.
+     */
+     qio_net_listener_disconnect(server->listener);
+     object_unref(OBJECT(server->listener));
+    QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
+        qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
+                             NULL);
+    }
+
+    AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0);
+
+     if (server->tlscreds) {
+         object_unref(OBJECT(server->tlscreds));
+     }
--- a/debian/patches/extra/0034-vnc-fix-crash-when-no-console-attached.patch
+++ b/debian/patches/extra/0034-vnc-fix-crash-when-no-console-attached.patch
@ -0,0 +1,47 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
+Date: Tue, 20 Aug 2024 17:11:12 +0400
+Subject: [PATCH] vnc: fix crash when no console attached
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Since commit e99441a3793b5 ("ui/curses: Do not use console_select()")
+qemu_text_console_put_keysym() no longer checks for NULL console
+argument, which leads to a later crash:
+
+Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
+0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
+332	        } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
+(gdb) bt
+ #0  0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
+ #1  0x00005555559e18e5 in qemu_text_console_put_keysym (s=<optimized out>, keysym=<optimized out>) at ../ui/console.c:303
+ #2  0x00005555559f2e88 in do_key_event (vs=vs@entry=0x5555579045c0, down=down@entry=1, keycode=keycode@entry=60, sym=sym@entry=65471) at ../ui/vnc.c:2034
+ #3  0x00005555559f845c in ext_key_event (vs=0x5555579045c0, down=1, sym=65471, keycode=<optimized out>) at ../ui/vnc.c:2070
+ #4  protocol_client_msg (vs=0x5555579045c0, data=<optimized out>, len=<optimized out>) at ../ui/vnc.c:2514
+ #5  0x00005555559f515c in vnc_client_read (vs=0x5555579045c0) at ../ui/vnc.c:1607
+
+Fixes: e99441a3793b5 ("ui/curses: Do not use console_select()")
+Fixes: https://issues.redhat.com/browse/RHEL-50529
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
+Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
+(picked from https://lore.kernel.org/qemu-devel/20240820131112.1267954-1-marcandre.lureau@redhat.com/)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ ui/vnc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/ui/vnc.c b/ui/vnc.c
+index b3fd78022b..953ea38318 100644
+--- a/ui/vnc.c
+++ b/ui/vnc.c
+@@ -1935,7 +1935,7 @@ static void do_key_event(VncState *vs, int down, int keycode, int sym)
+     }
+ 
+     qkbd_state_key_event(vs->vd->kbd, qcode, down);
+-    if (!qemu_console_is_graphic(vs->vd->dcl.con)) {
+    if (QEMU_IS_TEXT_CONSOLE(vs->vd->dcl.con)) {
+         QemuTextConsole *con = QEMU_TEXT_CONSOLE(vs->vd->dcl.con);
+         bool numlock = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_NUMLOCK);
+         bool control = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_CTRL);
--- a/debian/patches/extra/0035-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch
+++ b/debian/patches/extra/0035-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch
@ -0,0 +1,89 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Eric Blake <eblake@redhat.com>
+Date: Thu, 22 Aug 2024 09:35:29 -0500
+Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing
+ server
+
+Commit 3e7ef738 plugged the use-after-free of the global nbd_server
+object, but overlooked a use-after-free of nbd_server->listener.
+Although this race is harder to hit, notice that our shutdown path
+first drops the reference count of nbd_server->listener, then triggers
+actions that can result in a pending client reaching the
+nbd_blockdev_client_closed() callback, which in turn calls
+qio_net_listener_set_client_func on a potentially stale object.
+
+If we know we don't want any more clients to connect, and have already
+told the listener socket to shut down, then we should not be trying to
+update the listener socket's associated function.
+
+Reproducer:
+
+> #!/usr/bin/python3
+>
+> import os
+> from threading import Thread
+>
+> def start_stop():
+>     while 1:
+>         os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start",
+"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'')
+>         os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'')
+>
+> def nbd_list():
+>     while 1:
+>         os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock')
+>
+> def test():
+>     sst = Thread(target=start_stop)
+>     sst.start()
+>     nlt = Thread(target=nbd_list)
+>     nlt.start()
+>
+>     sst.join()
+>     nlt.join()
+>
+> test()
+
+Fixes: CVE-2024-7409
+Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop")
+CC: qemu-stable@nongnu.org
+Reported-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
+Signed-off-by: Eric Blake <eblake@redhat.com>
+Message-ID: <20240822143617.800419-2-eblake@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ blockdev-nbd.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/blockdev-nbd.c b/blockdev-nbd.c
+index f73409ae49..b36f41b7c5 100644
+--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
+@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
+ 
+ static void nbd_update_server_watch(NBDServerData *s)
+ {
+-    if (!s->max_connections || s->connections < s->max_connections) {
+-        qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL);
+-    } else {
+-        qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
+    if (s->listener) {
+        if (!s->max_connections || s->connections < s->max_connections) {
+            qio_net_listener_set_client_func(s->listener, nbd_accept, NULL,
+                                             NULL);
+        } else {
+            qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
+        }
+     }
+ }
+ 
+@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server)
+      */
+     qio_net_listener_disconnect(server->listener);
+     object_unref(OBJECT(server->listener));
+    server->listener = NULL;
+     QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
+         qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
+                              NULL);
--- a/debian/patches/extra/0036-softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch
+++ b/debian/patches/extra/0036-softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch
@ -0,0 +1,134 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 28 Aug 2024 11:07:43 +0200
+Subject: [PATCH] softmmu/physmem: fix memory leak in dirty_memory_extend()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+As reported by Peter, we might be leaking memory when removing the
+highest RAMBlock (in the weird ram_addr_t space), and adding a new one.
+
+We will fail to realize that we already allocated bitmaps for more
+dirty memory blocks, and effectively discard the pointers to them.
+
+Fix it by getting rid of last_ram_page() and by remembering the number
+of dirty memory blocks that have been allocated already.
+
+While at it, let's use "unsigned int" for the number of blocks, which
+should be sufficient until we reach ~32 exabytes.
+
+Looks like this leak was introduced as we switched from using a single
+bitmap_zero_extend() to allocating multiple bitmaps:
+bitmap_zero_extend() relies on g_renew() which should have taken care of
+this.
+
+Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com
+Reported-by: Peter Maydell <peter.maydell@linaro.org>
+Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug")
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Tested-by: Peter Maydell <peter.maydell@linaro.org>
+Cc: qemu-stable@nongnu.org
+Cc: Stefan Hajnoczi <stefanha@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+(picked from https://lore.kernel.org/qemu-devel/20240828090743.128647-1-david@redhat.com/)
+[FE: backport - remove not-yet-existing variable in context of hunk touching ram_block_add()]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ include/exec/ramlist.h |  1 +
+ system/physmem.c       | 35 +++++++++--------------------------
+ 2 files changed, 10 insertions(+), 26 deletions(-)
+
+diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
+index 2ad2a81acc..d9cfe530be 100644
+--- a/include/exec/ramlist.h
+++ b/include/exec/ramlist.h
+@@ -50,6 +50,7 @@ typedef struct RAMList {
+     /* RCU-enabled, writes protected by the ramlist lock. */
+     QLIST_HEAD(, RAMBlock) blocks;
+     DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
+    unsigned int num_dirty_blocks;
+     uint32_t version;
+     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
+ } RAMList;
+diff --git a/system/physmem.c b/system/physmem.c
+index a4fe3d2bf8..78f7db1121 100644
+--- a/system/physmem.c
+++ b/system/physmem.c
+@@ -1497,18 +1497,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
+     return offset;
+ }
+ 
+-static unsigned long last_ram_page(void)
+-{
+-    RAMBlock *block;
+-    ram_addr_t last = 0;
+-
+-    RCU_READ_LOCK_GUARD();
+-    RAMBLOCK_FOREACH(block) {
+-        last = MAX(last, block->offset + block->max_length);
+-    }
+-    return last >> TARGET_PAGE_BITS;
+-}
+-
+ static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
+ {
+     int ret;
+@@ -1762,13 +1750,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
+ }
+ 
+ /* Called with ram_list.mutex held */
+-static void dirty_memory_extend(ram_addr_t old_ram_size,
+-                                ram_addr_t new_ram_size)
+static void dirty_memory_extend(ram_addr_t new_ram_size)
+ {
+-    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
+-                                             DIRTY_MEMORY_BLOCK_SIZE);
+-    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
+-                                             DIRTY_MEMORY_BLOCK_SIZE);
+    unsigned int old_num_blocks = ram_list.num_dirty_blocks;
+    unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size,
+                                               DIRTY_MEMORY_BLOCK_SIZE);
+     int i;
+ 
+     /* Only need to extend if block count increased */
+@@ -1800,6 +1786,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
+             g_free_rcu(old_blocks, rcu);
+         }
+     }
+
+    ram_list.num_dirty_blocks = new_num_blocks;
+ }
+ 
+ static void ram_block_add(RAMBlock *new_block, Error **errp)
+@@ -1808,11 +1796,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
+     const bool shared = qemu_ram_is_shared(new_block);
+     RAMBlock *block;
+     RAMBlock *last_block = NULL;
+-    ram_addr_t old_ram_size, new_ram_size;
+    ram_addr_t ram_size;
+     Error *err = NULL;
+ 
+-    old_ram_size = last_ram_page();
+-
+     qemu_mutex_lock_ramlist();
+     new_block->offset = find_ram_offset(new_block->max_length);
+ 
+@@ -1840,11 +1826,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
+         }
+     }
+ 
+-    new_ram_size = MAX(old_ram_size,
+-              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
+-    if (new_ram_size > old_ram_size) {
+-        dirty_memory_extend(old_ram_size, new_ram_size);
+-    }
+    ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
+    dirty_memory_extend(ram_size);
+     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
+      * QLIST (which has an RCU-friendly variant) does not have insertion at
+      * tail, so save the last element in last_block.
--- a/debian/patches/extra/0037-block-reqlist-allow-adding-overlapping-requests.patch
+++ b/debian/patches/extra/0037-block-reqlist-allow-adding-overlapping-requests.patch
@ -0,0 +1,104 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 7 Nov 2024 17:51:13 +0100
+Subject: [PATCH] block/reqlist: allow adding overlapping requests
+
+Allow overlapping request by removing the assert that made it
+impossible. There are only two callers:
+
+1. block_copy_task_create()
+
+It already asserts the very same condition before calling
+reqlist_init_req().
+
+2. cbw_snapshot_read_lock()
+
+There is no need to have read requests be non-overlapping in
+copy-before-write when used for snapshot-access. In fact, there was no
+protection against two callers of cbw_snapshot_read_lock() calling
+reqlist_init_req() with overlapping ranges and this could lead to an
+assertion failure [1].
+
+In particular, with the reproducer script below [0], two
+cbw_co_snapshot_block_status() callers could race, with the second
+calling reqlist_init_req() before the first one finishes and removes
+its conflicting request.
+
+[0]:
+
+> #!/bin/bash -e
+> dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
+> ./qemu-img create /tmp/fleecing.raw -f raw 1G
+> (
+> ./qemu-system-x86_64 --qmp stdio \
+> --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
+> --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \
+> <<EOF
+> {"execute": "qmp_capabilities"}
+> {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", "file": "node0", "target": "node1", "node-name": "node3" } }
+> {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", "file": "node3", "node-name": "snap0" } }
+> {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", "data": { "path": "/tmp/nbd.socket" } } } }
+> {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "snap0", "type": "nbd", "name": "exp0"}}
+> EOF
+> ) &
+> sleep 5
+> while true; do
+> ./qemu-nbd -d /dev/nbd0
+> ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r
+> nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket'
+> done
+
+[1]:
+
+> #5  0x000071e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
+> #6  0x0000615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
+> #7  0x00006152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237
+> #8  0x00006152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304
+> #9  0x00006152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726
+> #10 0x000061528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48
+> #11 0x00006152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
+> #12 0x00006152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652
+> #13 0x00006152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
+> #14 0x00006152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473
+> #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
+> #16 0x000061528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
+> #17 0x000061528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
+> #18 0x000061528538f906 in nbd_trip (...) at ../nbd/server.c:3121
+> #19 0x00006152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175
+
+Cc: qemu-stable@nongnu.org
+Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+---
+ block/copy-before-write.c | 3 ++-
+ block/reqlist.c           | 2 --
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 8aba27a71d..3698b3bc60 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -65,7 +65,8 @@ typedef struct BDRVCopyBeforeWriteState {
+ 
+     /*
+      * @frozen_read_reqs: current read requests for fleecing user in bs->file
+-     * node. These areas must not be rewritten by guest.
+     * node. These areas must not be rewritten by guest. There can be multiple
+     * overlapping read requests.
+      */
+     BlockReqList frozen_read_reqs;
+ 
+diff --git a/block/reqlist.c b/block/reqlist.c
+index 08cb57cfa4..098e807378 100644
+--- a/block/reqlist.c
+++ b/block/reqlist.c
+@@ -20,8 +20,6 @@
+ void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
+                       int64_t bytes)
+ {
+-    assert(!reqlist_find_conflict(reqs, offset, bytes));
+-
+     *req = (BlockReq) {
+         .offset = offset,
+         .bytes = bytes,
--- a/debian/patches/pve/0001-PVE-Config-block-file-change-locking-default-to-off.patch
+++ b/debian/patches/pve/0001-PVE-Config-block-file-change-locking-default-to-off.patch
@ -14,7 +14,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/block/file-posix.c b/block/file-posix.c
-index 7f540b03ed..ca551baa42 100644
+index 35684f7e21..43bc0bd520 100644
 --- a/block/file-posix.c
 +++ b/block/file-posix.c
@@ -563,7 +563,7 @@ static QemuOptsList raw_runtime_opts = {
--- a/debian/patches/pve/0002-PVE-Config-Adjust-network-script-path-to-etc-kvm.patch
+++ b/debian/patches/pve/0002-PVE-Config-Adjust-network-script-path-to-etc-kvm.patch
@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/include/net/net.h b/include/net/net.h
-index 685ec58318..22edf4ee96 100644
+index b1f9b35fcc..096c0d52e4 100644
 --- a/include/net/net.h
 +++ b/include/net/net.h
-@@ -260,8 +260,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
+@@ -317,8 +317,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
 int net_hub_id_for_client(NetClientState *nc, int *id);
 NetClientState *net_hub_port_find(int hub_id);
 
--- a/debian/patches/pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
+++ b/debian/patches/pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/target/i386/cpu.h b/target/i386/cpu.h
-index 0893b794e9..6d650a58b9 100644
+index 6b05738079..d82869900a 100644
 --- a/target/i386/cpu.h
 +++ b/target/i386/cpu.h
-@@ -2243,9 +2243,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
+@@ -2291,9 +2291,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define CPU_RESOLVING_TYPE TYPE_X86_CPU
 
 #ifdef TARGET_X86_64
--- a/debian/patches/pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
+++ b/debian/patches/pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 9 insertions(+), 6 deletions(-)

 diff --git a/ui/spice-core.c b/ui/spice-core.c
-index 52a59386d7..b20c25aee0 100644
+index 15be640286..ea20e6153c 100644
 --- a/ui/spice-core.c
 +++ b/ui/spice-core.c
-@@ -691,32 +691,35 @@ static void qemu_spice_init(void)
+@@ -690,32 +690,35 @@ static void qemu_spice_init(void)
 
     if (tls_port) {
         x509_dir = qemu_opt_get(opts, "x509-dir");
--- a/debian/patches/pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
+++ b/debian/patches/pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
@ -9,7 +9,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 11 insertions(+), 4 deletions(-)

 diff --git a/block/gluster.c b/block/gluster.c
-index ad5fadbe79..d0011085c4 100644
+index cc74af06dc..3ba9bbfa5e 100644
 --- a/block/gluster.c
 +++ b/block/gluster.c
@@ -43,7 +43,7 @@
--- a/debian/patches/pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
+++ b/debian/patches/pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
@ -18,7 +18,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+)

 diff --git a/block/rbd.c b/block/rbd.c
-index 978671411e..a4749f3b1b 100644
+index 84bb2fa5d7..63f60d41be 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
--- a/debian/patches/pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
+++ b/debian/patches/pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
@ -16,7 +16,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 9 insertions(+), 1 deletion(-)

 diff --git a/block/gluster.c b/block/gluster.c
-index d0011085c4..2df3d6e35d 100644
+index 3ba9bbfa5e..34936eb855 100644
 --- a/block/gluster.c
 +++ b/block/gluster.c
@@ -58,6 +58,7 @@ typedef struct GlusterAIOCB {
@ -39,7 +39,7 @@ index d0011085c4..2df3d6e35d 100644
     }
 
     aio_co_schedule(acb->aio_context, acb->coroutine);
-@@ -1021,6 +1024,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
+@@ -1023,6 +1026,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
@ -47,7 +47,7 @@ index d0011085c4..2df3d6e35d 100644
 
     ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
-@@ -1201,9 +1205,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
+@@ -1203,9 +1207,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
     acb.aio_context = bdrv_get_aio_context(bs);
 
     if (write) {
@ -59,7 +59,7 @@ index d0011085c4..2df3d6e35d 100644
         ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
                                 gluster_finish_aiocb, &acb);
     }
-@@ -1266,6 +1272,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
+@@ -1268,6 +1274,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
@ -67,7 +67,7 @@ index d0011085c4..2df3d6e35d 100644
 
     ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
     if (ret < 0) {
-@@ -1314,6 +1321,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
+@@ -1316,6 +1323,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
--- a/debian/patches/pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
+++ b/debian/patches/pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/qemu-img.c b/qemu-img.c
-index 78433f3746..25d427edd1 100644
+index 7668f86769..2575e97b43 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -3062,7 +3062,8 @@ static int img_info(int argc, char **argv)
+@@ -3075,7 +3075,8 @@ static int img_info(int argc, char **argv)
     list = collect_image_info_list(image_opts, filename, fmt, chain,
                                    force_share);
     if (!list) {
--- a/debian/patches/pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
+++ b/debian/patches/pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
@ -38,10 +38,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 2 files changed, 133 insertions(+), 73 deletions(-)

 diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
-index 1b1dab5b17..d1616c045a 100644
+index c9dd70a892..048788b23d 100644
 --- a/qemu-img-cmds.hx
 +++ b/qemu-img-cmds.hx
-@@ -58,9 +58,9 @@ SRST
+@@ -60,9 +60,9 @@ SRST
 ERST
 
 DEF("dd", img_dd,
@ -54,10 +54,10 @@ index 1b1dab5b17..d1616c045a 100644
 
 DEF("info", img_info,
 diff --git a/qemu-img.c b/qemu-img.c
-index 25d427edd1..220e6ec577 100644
+index 2575e97b43..8ec68b346f 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -4899,10 +4899,12 @@ static int img_bitmap(int argc, char **argv)
+@@ -4993,10 +4993,12 @@ static int img_bitmap(int argc, char **argv)
 #define C_IF      04
 #define C_OF      010
 #define C_SKIP    020
@ -70,7 +70,7 @@ index 25d427edd1..220e6ec577 100644
 };
 
 struct DdIo {
-@@ -4978,6 +4980,19 @@ static int img_dd_skip(const char *arg,
+@@ -5072,6 +5074,19 @@ static int img_dd_skip(const char *arg,
     return 0;
 }
 
@ -90,7 +90,7 @@ index 25d427edd1..220e6ec577 100644
 static int img_dd(int argc, char **argv)
 {
     int ret = 0;
-@@ -5018,6 +5033,7 @@ static int img_dd(int argc, char **argv)
+@@ -5112,6 +5127,7 @@ static int img_dd(int argc, char **argv)
         { "if", img_dd_if, C_IF },
         { "of", img_dd_of, C_OF },
         { "skip", img_dd_skip, C_SKIP },
@ -98,7 +98,7 @@ index 25d427edd1..220e6ec577 100644
         { NULL, NULL, 0 }
     };
     const struct option long_options[] = {
-@@ -5093,91 +5109,112 @@ static int img_dd(int argc, char **argv)
+@@ -5187,91 +5203,112 @@ static int img_dd(int argc, char **argv)
         arg = NULL;
     }
 
@ -275,7 +275,7 @@ index 25d427edd1..220e6ec577 100644
     }
 
     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
-@@ -5194,20 +5231,43 @@ static int img_dd(int argc, char **argv)
+@@ -5288,20 +5325,43 @@ static int img_dd(int argc, char **argv)
     in.buf = g_new(uint8_t, in.bsz);
 
     for (out_pos = 0; in_pos < size; ) {
--- a/debian/patches/pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
+++ b/debian/patches/pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
@ -16,10 +16,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 1 file changed, 25 insertions(+), 3 deletions(-)

 diff --git a/qemu-img.c b/qemu-img.c
-index 220e6ec577..58bf9b43d1 100644
+index 8ec68b346f..b98184bba1 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -4900,11 +4900,13 @@ static int img_bitmap(int argc, char **argv)
+@@ -4994,11 +4994,13 @@ static int img_bitmap(int argc, char **argv)
 #define C_OF      010
 #define C_SKIP    020
 #define C_OSIZE   040
@ -33,7 +33,7 @@ index 220e6ec577..58bf9b43d1 100644
 };
 
 struct DdIo {
-@@ -4993,6 +4995,19 @@ static int img_dd_osize(const char *arg,
+@@ -5087,6 +5089,19 @@ static int img_dd_osize(const char *arg,
     return 0;
 }
 
@ -53,7 +53,7 @@ index 220e6ec577..58bf9b43d1 100644
 static int img_dd(int argc, char **argv)
 {
     int ret = 0;
-@@ -5007,12 +5022,14 @@ static int img_dd(int argc, char **argv)
+@@ -5101,12 +5116,14 @@ static int img_dd(int argc, char **argv)
     int c, i;
     const char *out_fmt = "raw";
     const char *fmt = NULL;
@ -69,7 +69,7 @@ index 220e6ec577..58bf9b43d1 100644
     };
     struct DdIo in = {
         .bsz = 512, /* Block size is by default 512 bytes */
-@@ -5034,6 +5051,7 @@ static int img_dd(int argc, char **argv)
+@@ -5128,6 +5145,7 @@ static int img_dd(int argc, char **argv)
         { "of", img_dd_of, C_OF },
         { "skip", img_dd_skip, C_SKIP },
         { "osize", img_dd_osize, C_OSIZE },
@ -77,7 +77,7 @@ index 220e6ec577..58bf9b43d1 100644
         { NULL, NULL, 0 }
     };
     const struct option long_options[] = {
-@@ -5230,9 +5248,10 @@ static int img_dd(int argc, char **argv)
+@@ -5324,9 +5342,10 @@ static int img_dd(int argc, char **argv)
 
     in.buf = g_new(uint8_t, in.bsz);
 
@ -90,7 +90,7 @@ index 220e6ec577..58bf9b43d1 100644
         if (blk1) {
             in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
             if (in_ret == 0) {
-@@ -5241,6 +5260,9 @@ static int img_dd(int argc, char **argv)
+@@ -5335,6 +5354,9 @@ static int img_dd(int argc, char **argv)
         } else {
             in_ret = read(STDIN_FILENO, in.buf, bytes);
             if (in_ret == 0) {
--- a/debian/patches/pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
+++ b/debian/patches/pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
@ -13,10 +13,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 3 files changed, 26 insertions(+), 12 deletions(-)

 diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
-index 15aeddc6d8..5e713e231d 100644
+index 3653adb963..d83e8fb3c0 100644
 --- a/docs/tools/qemu-img.rst
 +++ b/docs/tools/qemu-img.rst
-@@ -208,6 +208,10 @@ Parameters to convert subcommand:
+@@ -212,6 +212,10 @@ Parameters to convert subcommand:
 
 Parameters to dd subcommand:
 
@ -27,7 +27,7 @@ index 15aeddc6d8..5e713e231d 100644
 .. program:: qemu-img-dd
 
 .. option:: bs=BLOCK_SIZE
-@@ -488,7 +492,7 @@ Command description:
+@@ -492,7 +496,7 @@ Command description:
   it doesn't need to be specified separately in this case.
 
 
@ -36,7 +36,7 @@ index 15aeddc6d8..5e713e231d 100644
 
   dd copies from *INPUT* file to *OUTPUT* file converting it from
   *FMT* format to *OUTPUT_FMT* format.
-@@ -499,6 +503,11 @@ Command description:
+@@ -503,6 +507,11 @@ Command description:
 
   The size syntax is similar to :manpage:`dd(1)`'s size syntax.
 
@ -49,10 +49,10 @@ index 15aeddc6d8..5e713e231d 100644
 
   Give information about the disk image *FILENAME*. Use it in
 diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
-index d1616c045a..b5b0bb4467 100644
+index 048788b23d..0b29a67a06 100644
 --- a/qemu-img-cmds.hx
 +++ b/qemu-img-cmds.hx
-@@ -58,9 +58,9 @@ SRST
+@@ -60,9 +60,9 @@ SRST
 ERST
 
 DEF("dd", img_dd,
@ -65,10 +65,10 @@ index d1616c045a..b5b0bb4467 100644
 
 DEF("info", img_info,
 diff --git a/qemu-img.c b/qemu-img.c
-index 58bf9b43d1..9d414d639b 100644
+index b98184bba1..6fc8384f64 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -5024,7 +5024,7 @@ static int img_dd(int argc, char **argv)
+@@ -5118,7 +5118,7 @@ static int img_dd(int argc, char **argv)
     const char *fmt = NULL;
     int64_t size = 0, readsize = 0;
     int64_t out_pos, in_pos;
@ -77,7 +77,7 @@ index 58bf9b43d1..9d414d639b 100644
     struct DdInfo dd = {
         .flags = 0,
         .count = 0,
-@@ -5062,7 +5062,7 @@ static int img_dd(int argc, char **argv)
+@@ -5156,7 +5156,7 @@ static int img_dd(int argc, char **argv)
         { 0, 0, 0, 0 }
     };
 
@ -86,7 +86,7 @@ index 58bf9b43d1..9d414d639b 100644
         if (c == EOF) {
             break;
         }
-@@ -5082,6 +5082,9 @@ static int img_dd(int argc, char **argv)
+@@ -5176,6 +5176,9 @@ static int img_dd(int argc, char **argv)
         case 'h':
             help();
             break;
@ -96,7 +96,7 @@ index 58bf9b43d1..9d414d639b 100644
         case 'U':
             force_share = true;
             break;
-@@ -5212,13 +5215,15 @@ static int img_dd(int argc, char **argv)
+@@ -5306,13 +5309,15 @@ static int img_dd(int argc, char **argv)
                                 size - in.bsz * in.offset, &error_abort);
         }
 
--- a/debian/patches/pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
+++ b/debian/patches/pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
@ -12,10 +12,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 3 files changed, 36 insertions(+), 7 deletions(-)

 diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
-index 5e713e231d..9390d5e5cf 100644
+index d83e8fb3c0..61c6b21859 100644
 --- a/docs/tools/qemu-img.rst
 +++ b/docs/tools/qemu-img.rst
-@@ -492,10 +492,10 @@ Command description:
+@@ -496,10 +496,10 @@ Command description:
   it doesn't need to be specified separately in this case.
 
 
@ -30,10 +30,10 @@ index 5e713e231d..9390d5e5cf 100644
   The data is by default read and written using blocks of 512 bytes but can be
   modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
 diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
-index b5b0bb4467..36f97e1f19 100644
+index 0b29a67a06..758f397232 100644
 --- a/qemu-img-cmds.hx
 +++ b/qemu-img-cmds.hx
-@@ -58,9 +58,9 @@ SRST
+@@ -60,9 +60,9 @@ SRST
 ERST
 
 DEF("dd", img_dd,
@ -46,10 +46,10 @@ index b5b0bb4467..36f97e1f19 100644
 
 DEF("info", img_info,
 diff --git a/qemu-img.c b/qemu-img.c
-index 9d414d639b..e13a12137b 100644
+index 6fc8384f64..a6c88e0860 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -5016,6 +5016,7 @@ static int img_dd(int argc, char **argv)
+@@ -5110,6 +5110,7 @@ static int img_dd(int argc, char **argv)
     BlockDriver *drv = NULL, *proto_drv = NULL;
     BlockBackend *blk1 = NULL, *blk2 = NULL;
     QemuOpts *opts = NULL;
@ -57,7 +57,7 @@ index 9d414d639b..e13a12137b 100644
     QemuOptsList *create_opts = NULL;
     Error *local_err = NULL;
     bool image_opts = false;
-@@ -5025,6 +5026,7 @@ static int img_dd(int argc, char **argv)
+@@ -5119,6 +5120,7 @@ static int img_dd(int argc, char **argv)
     int64_t size = 0, readsize = 0;
     int64_t out_pos, in_pos;
     bool force_share = false, skip_create = false;
@ -65,7 +65,7 @@ index 9d414d639b..e13a12137b 100644
     struct DdInfo dd = {
         .flags = 0,
         .count = 0,
-@@ -5062,7 +5064,7 @@ static int img_dd(int argc, char **argv)
+@@ -5156,7 +5158,7 @@ static int img_dd(int argc, char **argv)
         { 0, 0, 0, 0 }
     };
 
@ -74,7 +74,7 @@ index 9d414d639b..e13a12137b 100644
         if (c == EOF) {
             break;
         }
-@@ -5085,6 +5087,19 @@ static int img_dd(int argc, char **argv)
+@@ -5179,6 +5181,19 @@ static int img_dd(int argc, char **argv)
         case 'n':
             skip_create = true;
             break;
@ -94,7 +94,7 @@ index 9d414d639b..e13a12137b 100644
         case 'U':
             force_share = true;
             break;
-@@ -5144,11 +5159,24 @@ static int img_dd(int argc, char **argv)
+@@ -5238,11 +5253,24 @@ static int img_dd(int argc, char **argv)
     if (dd.flags & C_IF) {
         blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
                         force_share);
@ -120,7 +120,7 @@ index 9d414d639b..e13a12137b 100644
     }
 
     if (dd.flags & C_OSIZE) {
-@@ -5303,6 +5331,7 @@ static int img_dd(int argc, char **argv)
+@@ -5397,6 +5425,7 @@ static int img_dd(int argc, char **argv)
 out:
     g_free(arg);
     qemu_opts_del(opts);
--- a/debian/patches/pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
+++ b/debian/patches/pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
@ -18,10 +18,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 4 files changed, 82 insertions(+), 4 deletions(-)

 diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
-index c3e55ef9e9..0e32e6201f 100644
+index a6ff6a4875..e7f74d1c63 100644
 --- a/hw/core/machine-hmp-cmds.c
 +++ b/hw/core/machine-hmp-cmds.c
-@@ -169,7 +169,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
+@@ -175,7 +175,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
         return;
     }
 
@ -59,10 +59,10 @@ index c3e55ef9e9..0e32e6201f 100644
     qapi_free_BalloonInfo(info);
 }
 diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
-index d004cf29d2..2660ed520b 100644
+index 609e39a821..8cb6dfcac3 100644
 --- a/hw/virtio/virtio-balloon.c
 +++ b/hw/virtio/virtio-balloon.c
-@@ -782,8 +782,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
+@@ -781,8 +781,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
 {
     VirtIOBalloon *dev = opaque;
@ -103,10 +103,10 @@ index d004cf29d2..2660ed520b 100644
 
 static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
 diff --git a/qapi/machine.json b/qapi/machine.json
-index a08b6576ca..5c9a4d55f4 100644
+index e8b60641f2..2054cdc70d 100644
 --- a/qapi/machine.json
 +++ b/qapi/machine.json
-@@ -1063,9 +1063,29 @@
+@@ -1079,9 +1079,29 @@
 # @actual: the logical size of the VM in bytes Formula used:
 #     logical_vm_size = vm_ram_size - balloon_size
 #
@ -138,10 +138,10 @@ index a08b6576ca..5c9a4d55f4 100644
 ##
 # @query-balloon:
 diff --git a/qapi/pragma.json b/qapi/pragma.json
-index 7f810b0e97..325e684411 100644
+index 59fbe74b8c..be8fa304c5 100644
 --- a/qapi/pragma.json
 +++ b/qapi/pragma.json
-@@ -35,6 +35,7 @@
+@@ -90,6 +90,7 @@
     'member-name-exceptions': [     # visible in:
         'ACPISlotType',             # query-acpi-ospm-status
         'AcpiTableOptions',         # -acpitable
--- a/debian/patches/pve/0014-PVE-qapi-modify-query-machines.patch
+++ b/debian/patches/pve/0014-PVE-qapi-modify-query-machines.patch
@ -13,10 +13,10 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
 2 files changed, 9 insertions(+), 1 deletion(-)

 diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
-index 3860a50c3b..40821e2317 100644
+index 4b72009cd3..314351cdff 100644
 --- a/hw/core/machine-qmp-cmds.c
 +++ b/hw/core/machine-qmp-cmds.c
-@@ -91,6 +91,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
+@@ -90,6 +90,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
         info->numa_mem_supported = mc->numa_mem_supported;
         info->deprecated = !!mc->deprecation_reason;
         info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
@ -30,10 +30,10 @@ index 3860a50c3b..40821e2317 100644
             info->default_cpu_type = g_strdup(mc->default_cpu_type);
         }
 diff --git a/qapi/machine.json b/qapi/machine.json
-index 5c9a4d55f4..fbb61f18e4 100644
+index 2054cdc70d..a024d5b05d 100644
 --- a/qapi/machine.json
 +++ b/qapi/machine.json
-@@ -139,6 +139,8 @@
+@@ -146,6 +146,8 @@
 #
 # @is-default: whether the machine is default
 #
@ -42,7 +42,7 @@ index 5c9a4d55f4..fbb61f18e4 100644
 # @cpu-max: maximum number of CPUs supported by the machine type
 #     (since 1.5)
 #
-@@ -163,7 +165,7 @@
+@@ -170,7 +172,7 @@
 ##
 { 'struct': 'MachineInfo',
   'data': { 'name': 'str', '*alias': 'str',
--- a/debian/patches/pve/0015-PVE-qapi-modify-spice-query.patch
+++ b/debian/patches/pve/0015-PVE-qapi-modify-spice-query.patch
@ -14,10 +14,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 2 files changed, 7 insertions(+)

 diff --git a/qapi/ui.json b/qapi/ui.json
-index 006616aa77..dfd1d3e36b 100644
+index f610bce118..6ea26a9acb 100644
 --- a/qapi/ui.json
 +++ b/qapi/ui.json
-@@ -317,11 +317,14 @@
+@@ -314,11 +314,14 @@
 #
 # @channels: a list of @SpiceChannel for each active spice channel
 #
@ -33,7 +33,7 @@ index 006616aa77..dfd1d3e36b 100644
   'if': 'CONFIG_SPICE' }
 
 diff --git a/ui/spice-core.c b/ui/spice-core.c
-index b20c25aee0..26baeb7846 100644
+index ea20e6153c..55a15fba8b 100644
 --- a/ui/spice-core.c
 +++ b/ui/spice-core.c
@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
--- a/debian/patches/pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
+++ b/debian/patches/pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
@ -14,20 +14,21 @@ Additionally, allows tracking the current position from the outside
 (intended to be used for progress tracking).

 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
- migration/channel-savevm-async.c | 183 +++++++++++++++++++++++++++++++
+ migration/channel-savevm-async.c | 184 +++++++++++++++++++++++++++++++
 migration/channel-savevm-async.h |  51 +++++++++
 migration/meson.build            |   1 +
- 3 files changed, 235 insertions(+)
+ 3 files changed, 236 insertions(+)
 create mode 100644 migration/channel-savevm-async.c
 create mode 100644 migration/channel-savevm-async.h

 diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
 new file mode 100644
-index 0000000000..aab081ce07
+index 0000000000..081a192f49
 --- /dev/null
 +++ b/migration/channel-savevm-async.c
-@@ -0,0 +1,183 @@
+@@ -0,0 +1,184 @@
 +/*
 + * QIO Channel implementation to be used by savevm-async QMP calls
 + */
@ -174,8 +175,9 @@ index 0000000000..aab081ce07
 +
 +static void
 +qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
-+                                            AioContext *ctx,
+                                            AioContext *read_ctx,
 +                                            IOHandler *io_read,
+                                            AioContext *write_ctx,
 +                                            IOHandler *io_write,
 +                                            void *opaque)
 +{
@ -269,7 +271,7 @@ index 0000000000..17ae2cb261
 +
 +#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
 diff --git a/migration/meson.build b/migration/meson.build
-index 1ae28523a1..37ddcb5d60 100644
+index 1eeb915ff6..95d1cf2250 100644
 --- a/migration/meson.build
 +++ b/migration/meson.build
@@ -13,6 +13,7 @@ system_ss.add(files(
--- a/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
+++ b/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
@ -27,7 +27,9 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
 [FE: further improve aborting
     adapt to removal of QEMUFileOps
     improve condition for entering final stage
-     adapt to QAPI and other changes for 8.0]
+     adapt to QAPI and other changes for 8.2
+     make sure to not call vm_start() from coroutine
+     stop CPU throttling after finishing]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 hmp-commands-info.hx         |  13 +
@ -35,17 +37,17 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 include/migration/snapshot.h |   2 +
 include/monitor/hmp.h        |   3 +
 migration/meson.build        |   1 +
- migration/savevm-async.c     | 531 +++++++++++++++++++++++++++++++++++
+ migration/savevm-async.c     | 545 +++++++++++++++++++++++++++++++++++
 monitor/hmp-cmds.c           |  38 +++
 qapi/migration.json          |  34 +++
- qapi/misc.json               |  16 ++
+ qapi/misc.json               |  18 ++
 qemu-options.hx              |  12 +
- softmmu/vl.c                 |  10 +
- 11 files changed, 677 insertions(+)
+ system/vl.c                  |  10 +
+ 11 files changed, 693 insertions(+)
 create mode 100644 migration/savevm-async.c

 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
-index f5b37eb74a..10fdd822e0 100644
+index ad1b1306e3..d5ab880492 100644
 --- a/hmp-commands-info.hx
 +++ b/hmp-commands-info.hx
@@ -525,6 +525,19 @@ SRST
@ -69,10 +71,10 @@ index f5b37eb74a..10fdd822e0 100644
         .name       = "balloon",
         .args_type  = "",
 diff --git a/hmp-commands.hx b/hmp-commands.hx
-index 2cbd0f77a0..e352f86872 100644
+index 2e2a3bcf98..7506de251c 100644
 --- a/hmp-commands.hx
 +++ b/hmp-commands.hx
-@@ -1865,3 +1865,20 @@ SRST
+@@ -1862,3 +1862,20 @@ SRST
   List event channels in the guest
 ERST
 #endif
@ -94,12 +96,12 @@ index 2cbd0f77a0..e352f86872 100644
 +        .coroutine  = true,
 +    },
 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
-index e72083b117..c846d37806 100644
+index 9e4dcaaa75..2581730d74 100644
 --- a/include/migration/snapshot.h
 +++ b/include/migration/snapshot.h
-@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
-                     bool has_devices, strList *devices,
-                     Error **errp);
+@@ -68,4 +68,6 @@ bool delete_snapshot(const char *name,
+  */
+ void load_snapshot_resume(RunState state);
 
 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
 +
@ -126,10 +128,10 @@ index 13f9a2dedb..7a7def7530 100644
 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
 diff --git a/migration/meson.build b/migration/meson.build
-index 37ddcb5d60..07f6057acc 100644
+index 95d1cf2250..800f12a60d 100644
 --- a/migration/meson.build
 +++ b/migration/meson.build
-@@ -26,6 +26,7 @@ system_ss.add(files(
+@@ -28,6 +28,7 @@ system_ss.add(files(
   'options.c',
   'postcopy-ram.c',
   'savevm.c',
@ -139,10 +141,10 @@ index 37ddcb5d60..07f6057acc 100644
   'threadinfo.c',
 diff --git a/migration/savevm-async.c b/migration/savevm-async.c
 new file mode 100644
-index 0000000000..e9fc18fb10
+index 0000000000..1af32604c7
 --- /dev/null
 +++ b/migration/savevm-async.c
-@@ -0,0 +1,531 @@
+@@ -0,0 +1,545 @@
 +#include "qemu/osdep.h"
 +#include "migration/channel-savevm-async.h"
 +#include "migration/migration.h"
@ -153,6 +155,7 @@ index 0000000000..e9fc18fb10
 +#include "migration/global_state.h"
 +#include "migration/ram.h"
 +#include "migration/qemu-file.h"
+#include "sysemu/cpu-throttle.h"
 +#include "sysemu/sysemu.h"
 +#include "sysemu/runstate.h"
 +#include "block/block.h"
@ -300,7 +303,6 @@ index 0000000000..e9fc18fb10
 +static void process_savevm_finalize(void *opaque)
 +{
 +    int ret;
-+    AioContext *iohandler_ctx = iohandler_get_aio_context();
 +    MigrationState *ms = migrate_get_current();
 +
 +    bool aborted = savevm_aborted();
@ -317,9 +319,7 @@ index 0000000000..e9fc18fb10
 +     * so move it back. It can stay in the main context and live out its live
 +     * there, since we're done with it after this method ends anyway.
 +     */
-+    aio_context_acquire(iohandler_ctx);
 +    blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
-+    aio_context_release(iohandler_ctx);
 +
 +    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
 +    if (ret < 0) {
@ -344,6 +344,12 @@ index 0000000000..e9fc18fb10
 +        ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
 +    ms->to_dst_file = NULL;
 +
+    /*
+     * Same as in migration_iteration_finish(): saving RAM might've turned on CPU throttling for
+     * auto-converge, make sure to disable it.
+     */
+    cpu_throttle_stop();
+
 +    qemu_savevm_state_cleanup();
 +
 +    ret = save_snapshot_cleanup();
@ -396,12 +402,12 @@ index 0000000000..e9fc18fb10
 +         * lock. Similar to what is done in migration.c, call the exact variant
 +         * only once pend_precopy in the estimate is below the threshold.
 +         */
-+        qemu_mutex_unlock_iothread();
+        bql_unlock();
 +        qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
 +        if (pend_precopy <= threshold) {
 +            qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
 +        }
-+        qemu_mutex_lock_iothread();
+        bql_lock();
 +        pending_size = pend_precopy + pend_postcopy;
 +
 +        /*
@ -441,12 +447,12 @@ index 0000000000..e9fc18fb10
 +     * so move there now and after every flush.
 +     */
 +    aio_co_reschedule_self(qemu_get_aio_context());
-+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    bdrv_graph_co_rdlock();
+    bs = bdrv_first(&it);
+    bdrv_graph_co_rdunlock();
+    while (bs) {
 +        /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
-+        if (bs == blk_bs(snap_state.target)) {
-+            continue;
-+        }
-+
+        if (bs != blk_bs(snap_state.target)) {
 +            AioContext *bs_ctx = bdrv_get_aio_context(bs);
 +            if (bs_ctx != qemu_get_aio_context()) {
 +                DPRINTF("savevm: async flushing drive %s\n", bs->filename);
@ -457,6 +463,10 @@ index 0000000000..e9fc18fb10
 +                aio_co_reschedule_self(qemu_get_aio_context());
 +            }
 +        }
+        bdrv_graph_co_rdlock();
+        bs = bdrv_next(&it);
+        bdrv_graph_co_rdunlock();
+    }
 +
 +    DPRINTF("timing: async flushing took %ld ms\n",
 +        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
@ -478,7 +488,7 @@ index 0000000000..e9fc18fb10
 +        return;
 +    }
 +
-+    if (migration_is_running(ms->state)) {
+    if (migration_is_running()) {
 +        error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
 +        return;
 +    }
@ -535,9 +545,10 @@ index 0000000000..e9fc18fb10
 +     * State is cleared in process_savevm_co, but has to be initialized
 +     * here (blocking main thread, from QMP) to avoid race conditions.
 +     */
-+    migrate_init(ms);
+    if (migrate_init(ms, errp)) {
+        return;
+    }
 +    memset(&mig_stats, 0, sizeof(mig_stats));
-+    memset(&compression_counters, 0, sizeof(compression_counters));
 +    ms->to_dst_file = snap_state.file;
 +
 +    error_setg(&snap_state.blocker, "block device is in use by savevm");
@ -546,10 +557,8 @@ index 0000000000..e9fc18fb10
 +    snap_state.state = SAVE_STATE_ACTIVE;
 +    snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
 +    snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
-+    qemu_mutex_unlock_iothread();
 +    qemu_savevm_state_header(snap_state.file);
 +    qemu_savevm_state_setup(snap_state.file);
-+    qemu_mutex_lock_iothread();
 +
 +    /* Async processing from here on out happens in iohandler context, so let
 +     * the target bdrv have its home there.
@ -570,29 +579,10 @@ index 0000000000..e9fc18fb10
 +    }
 +}
 +
-+void coroutine_fn qmp_savevm_end(Error **errp)
+static void coroutine_fn wait_for_close_co(void *opaque)
 +{
 +    int64_t timeout;
 +
-+    if (snap_state.state == SAVE_STATE_DONE) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
-+                  "VM snapshot not started\n");
-+        return;
-+    }
-+
-+    if (snap_state.state == SAVE_STATE_ACTIVE) {
-+        snap_state.state = SAVE_STATE_CANCELLED;
-+        goto wait_for_close;
-+    }
-+
-+    if (snap_state.saved_vm_running) {
-+        vm_start();
-+        snap_state.saved_vm_running = false;
-+    }
-+
-+    snap_state.state = SAVE_STATE_DONE;
-+
-+wait_for_close:
 +    if (!snap_state.target) {
 +        DPRINTF("savevm-end: no target file open\n");
 +        return;
@ -620,6 +610,32 @@ index 0000000000..e9fc18fb10
 +    DPRINTF("savevm-end: cleanup done\n");
 +}
 +
+void qmp_savevm_end(Error **errp)
+{
+    if (snap_state.state == SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot not started\n");
+        return;
+    }
+
+    Coroutine *wait_for_close = qemu_coroutine_create(wait_for_close_co, NULL);
+
+    if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_CANCELLED;
+        qemu_coroutine_enter(wait_for_close);
+        return;
+    }
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+
+    snap_state.state = SAVE_STATE_DONE;
+
+    qemu_coroutine_enter(wait_for_close);
+}
+
 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
 +{
 +    BlockBackend *be;
@ -675,7 +691,7 @@ index 0000000000..e9fc18fb10
 +    return ret;
 +}
 diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
-index 6c559b48c8..91be698308 100644
+index 871898ac46..ef4634e5c1 100644
 --- a/monitor/hmp-cmds.c
 +++ b/monitor/hmp-cmds.c
@@ -22,6 +22,7 @@
@ -685,7 +701,7 @@ index 6c559b48c8..91be698308 100644
 +#include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qmp/qdict.h"
- #include "qapi/qmp/qerror.h"
+ #include "qemu/cutils.h"
@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
 
     mtree_info(flatview, dispatch_tree, owner, disabled);
@ -728,10 +744,10 @@ index 6c559b48c8..91be698308 100644
 +    }
 +}
 diff --git a/qapi/migration.json b/qapi/migration.json
-index 8843e74b59..aca0ca1ac1 100644
+index 8c65b90328..ed20d066cd 100644
 --- a/qapi/migration.json
 +++ b/qapi/migration.json
-@@ -291,6 +291,40 @@
+@@ -297,6 +297,40 @@
            '*dirty-limit-throttle-time-per-round': 'uint64',
            '*dirty-limit-ring-full-time': 'uint64'} }
 
@ -773,10 +789,10 @@ index 8843e74b59..aca0ca1ac1 100644
 # @query-migrate:
 #
 diff --git a/qapi/misc.json b/qapi/misc.json
-index cda2effa81..94a58bb0bf 100644
+index ec30e5c570..3c68633f68 100644
 --- a/qapi/misc.json
 +++ b/qapi/misc.json
-@@ -456,6 +456,22 @@
+@@ -454,6 +454,24 @@
 ##
 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
 
@ -785,6 +801,8 @@ index cda2effa81..94a58bb0bf 100644
 +#
 +# Prepare for snapshot and halt VM. Save VM state to statefile.
 +#
+# @statefile: target file that state should be written to.
+#
 +##
 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
 +
@ -794,16 +812,16 @@ index cda2effa81..94a58bb0bf 100644
 +# Resume VM after a snapshot.
 +#
 +##
-+{ 'command': 'savevm-end', 'coroutine': true }
+{ 'command': 'savevm-end' }
 +
 ##
 # @CommandLineParameterType:
 #
 diff --git a/qemu-options.hx b/qemu-options.hx
-index 8073f5edf5..dc1ececc9c 100644
+index 8ce85d4559..511ab9415e 100644
 --- a/qemu-options.hx
 +++ b/qemu-options.hx
-@@ -4483,6 +4483,18 @@ SRST
+@@ -4610,6 +4610,18 @@ SRST
     Start right away with a saved state (``loadvm`` in monitor)
 ERST
 
@ -822,11 +840,11 @@ index 8073f5edf5..dc1ececc9c 100644
 #ifndef _WIN32
 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
     "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
-diff --git a/softmmu/vl.c b/softmmu/vl.c
-index c9e9ede237..3f2681aded 100644
--- a/softmmu/vl.c
-+++ b/softmmu/vl.c
-@@ -164,6 +164,7 @@ static const char *accelerators;
+diff --git a/system/vl.c b/system/vl.c
+index c644222982..2738ab7c91 100644
+--- a/system/vl.c
+++ b/system/vl.c
+@@ -163,6 +163,7 @@ static const char *accelerators;
 static bool have_custom_ram_size;
 static const char *ram_memdev_id;
 static QDict *machine_opts_dict;
@ -834,10 +852,10 @@ index c9e9ede237..3f2681aded 100644
 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
 static int display_remote;
-@@ -2647,6 +2648,12 @@ void qmp_x_exit_preconfig(Error **errp)
- 
-     if (loadvm) {
+@@ -2712,6 +2713,12 @@ void qmp_x_exit_preconfig(Error **errp)
+         RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
         load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
+         load_snapshot_resume(state);
 +    } else if (loadstate) {
 +        Error *local_err = NULL;
 +        if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
@ -847,7 +865,7 @@ index c9e9ede237..3f2681aded 100644
     }
     if (replay_mode != REPLAY_MODE_NONE) {
         replay_vmstate_init();
-@@ -3194,6 +3201,9 @@ void qemu_init(int argc, char **argv)
+@@ -3259,6 +3266,9 @@ void qemu_init(int argc, char **argv)
             case QEMU_OPTION_loadvm:
                 loadvm = optarg;
                 break;
--- a/debian/patches/pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
+++ b/debian/patches/pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
@ -13,18 +13,18 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 [FE: adapt to removal of QEMUFileOps]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- migration/qemu-file.c    | 49 +++++++++++++++++++++++++++-------------
+ migration/qemu-file.c    | 50 +++++++++++++++++++++++++++-------------
 migration/qemu-file.h    |  2 ++
 migration/savevm-async.c |  5 ++--
- 3 files changed, 38 insertions(+), 18 deletions(-)
+ 3 files changed, 39 insertions(+), 18 deletions(-)

 diff --git a/migration/qemu-file.c b/migration/qemu-file.c
-index 19c33c9985..e9ffff0f0a 100644
+index a10882d47f..19c1de0472 100644
 --- a/migration/qemu-file.c
 +++ b/migration/qemu-file.c
-@@ -33,8 +33,8 @@
- #include "options.h"
- #include "qapi/error.h"
+@@ -35,8 +35,8 @@
+ #include "rdma.h"
+ #include "io/channel-file.h"
 
 -#define IO_BUF_SIZE 32768
 -#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
@ -32,8 +32,8 @@ index 19c33c9985..e9ffff0f0a 100644
 +#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
 
 struct QEMUFile {
-     const QEMUFileHooks *hooks;
-@@ -46,7 +46,8 @@ struct QEMUFile {
+     QIOChannel *ioc;
+@@ -44,7 +44,8 @@ struct QEMUFile {
 
     int buf_index;
     int buf_size; /* 0 when writing */
@ -43,7 +43,7 @@ index 19c33c9985..e9ffff0f0a 100644
 
     DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
     struct iovec iov[MAX_IOV_SIZE];
-@@ -100,7 +101,9 @@ int qemu_file_shutdown(QEMUFile *f)
+@@ -101,7 +102,9 @@ int qemu_file_shutdown(QEMUFile *f)
     return 0;
 }
 
@ -54,7 +54,7 @@ index 19c33c9985..e9ffff0f0a 100644
 {
     QEMUFile *f;
 
-@@ -109,6 +112,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+@@ -110,6 +113,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
     object_ref(ioc);
     f->ioc = ioc;
     f->is_writable = is_writable;
@ -63,7 +63,7 @@ index 19c33c9985..e9ffff0f0a 100644
 
     return f;
 }
-@@ -119,17 +124,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+@@ -120,17 +125,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
  */
 QEMUFile *qemu_file_get_return_path(QEMUFile *f)
 {
@ -93,8 +93,8 @@ index 19c33c9985..e9ffff0f0a 100644
 +    return qemu_file_new_impl(ioc, false, buffer_size);
 }
 
- void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
-@@ -375,7 +390,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
+ /*
+@@ -328,7 +343,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
     do {
         len = qio_channel_read(f->ioc,
                                (char *)f->buf + pending,
@ -103,16 +103,17 @@ index 19c33c9985..e9ffff0f0a 100644
                                &local_error);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
-@@ -425,6 +440,8 @@ int qemu_fclose(QEMUFile *f)
+@@ -368,6 +383,9 @@ int qemu_fclose(QEMUFile *f)
+         ret = ret2;
     }
     g_clear_pointer(&f->ioc, object_unref);
- 
+
 +    free(f->buf);
 +
-     /* If any error was spotted before closing, we should report it
-      * instead of the close() return value.
-      */
-@@ -479,7 +496,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
+     error_free(f->last_error_obj);
+     g_free(f);
+     trace_qemu_file_fclose();
+@@ -416,7 +434,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
 {
     if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
         f->buf_index += len;
@ -121,7 +122,7 @@ index 19c33c9985..e9ffff0f0a 100644
             qemu_fflush(f);
         }
     }
-@@ -504,7 +521,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
+@@ -441,7 +459,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
     }
 
     while (size > 0) {
@ -130,7 +131,7 @@ index 19c33c9985..e9ffff0f0a 100644
         if (l > size) {
             l = size;
         }
-@@ -549,8 +566,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
+@@ -587,8 +605,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
     size_t index;
 
     assert(!qemu_file_is_writable(f));
@ -141,7 +142,7 @@ index 19c33c9985..e9ffff0f0a 100644
 
     /* The 1st byte to read from */
     index = f->buf_index + offset;
-@@ -600,7 +617,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
+@@ -638,7 +656,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
         size_t res;
         uint8_t *src;
 
@ -150,7 +151,7 @@ index 19c33c9985..e9ffff0f0a 100644
         if (res == 0) {
             return done;
         }
-@@ -634,7 +651,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
+@@ -672,7 +690,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
  */
 size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
 {
@ -159,7 +160,7 @@ index 19c33c9985..e9ffff0f0a 100644
         size_t res;
         uint8_t *src = NULL;
 
-@@ -659,7 +676,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
+@@ -697,7 +715,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
     int index = f->buf_index + offset;
 
     assert(!qemu_file_is_writable(f));
@ -168,7 +169,7 @@ index 19c33c9985..e9ffff0f0a 100644
 
     if (index >= f->buf_size) {
         qemu_fill_buffer(f);
-@@ -777,7 +794,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
+@@ -811,7 +829,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
 ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
                                   const uint8_t *p, size_t size)
 {
@ -178,24 +179,24 @@ index 19c33c9985..e9ffff0f0a 100644
     if (blen < compressBound(size)) {
         return -1;
 diff --git a/migration/qemu-file.h b/migration/qemu-file.h
-index 47015f5201..1312b7c903 100644
+index 32fd4a34fd..36a0cd8cc8 100644
 --- a/migration/qemu-file.h
 +++ b/migration/qemu-file.h
-@@ -63,7 +63,9 @@ typedef struct QEMUFileHooks {
- } QEMUFileHooks;
+@@ -30,7 +30,9 @@
+ #include "io/channel.h"
 
 QEMUFile *qemu_file_new_input(QIOChannel *ioc);
 +QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
 QEMUFile *qemu_file_new_output(QIOChannel *ioc);
 +QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
- void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
 int qemu_fclose(QEMUFile *f);
 
+ /*
 diff --git a/migration/savevm-async.c b/migration/savevm-async.c
-index e9fc18fb10..80624fada8 100644
+index 1af32604c7..be2035cd2e 100644
 --- a/migration/savevm-async.c
 +++ b/migration/savevm-async.c
-@@ -378,7 +378,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
+@@ -386,7 +386,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
 
     QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
                                                                &snap_state.bs_pos));
@ -204,7 +205,7 @@ index e9fc18fb10..80624fada8 100644
 
     if (!snap_state.file) {
         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
-@@ -496,7 +496,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
+@@ -510,7 +510,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
     blk_op_block_all(be, blocker);
 
     /* restore the VM state */
--- a/debian/patches/pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
+++ b/debian/patches/pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
@ -4,21 +4,23 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
 Subject: [PATCH] PVE: block: add the zeroinit block driver filter

 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
-[FE: adapt to changed function signatures]
+[FE: adapt to changed function signatures
+     adhere to block graph lock requirements
+     use dedicated function to open file child]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 block/meson.build |   1 +
- block/zeroinit.c  | 200 ++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 201 insertions(+)
+ block/zeroinit.c  | 207 ++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 208 insertions(+)
 create mode 100644 block/zeroinit.c

 diff --git a/block/meson.build b/block/meson.build
-index 529fc172c6..1833c71ce9 100644
+index e1f03fd773..b530e117b5 100644
 --- a/block/meson.build
 +++ b/block/meson.build
-@@ -40,6 +40,7 @@ block_ss.add(files(
-   'throttle-groups.c',
+@@ -39,6 +39,7 @@ block_ss.add(files(
   'throttle.c',
+   'throttle-groups.c',
   'write-threshold.c',
 +  'zeroinit.c',
 ), zstd, zlib, gnutls)
@ -26,10 +28,10 @@ index 529fc172c6..1833c71ce9 100644
 system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
 diff --git a/block/zeroinit.c b/block/zeroinit.c
 new file mode 100644
-index 0000000000..1257342724
+index 0000000000..7998c9332d
 --- /dev/null
 +++ b/block/zeroinit.c
-@@ -0,0 +1,200 @@
+@@ -0,0 +1,207 @@
 +/*
 + * Filter to fake a zero-initialized block device.
 + *
@ -44,6 +46,7 @@ index 0000000000..1257342724
 +#include "qapi/error.h"
 +#include "block/block_int.h"
 +#include "block/block-io.h"
+#include "block/graph-lock.h"
 +#include "qapi/qmp/qdict.h"
 +#include "qapi/qmp/qstring.h"
 +#include "qemu/cutils.h"
@ -109,12 +112,9 @@ index 0000000000..1257342724
 +    }
 +
 +    /* Open the raw file */
-+    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
-+                               bs, &child_of_bds,
-+                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-+                               false, &local_err);
-+    if (local_err) {
-+        ret = -EINVAL;
+    ret = bdrv_open_file_child(qemu_opt_get(opts, "x-next"), options, "next",
+                               bs, &local_err);
+    if (ret < 0) {
 +        error_propagate(errp, local_err);
 +        goto fail;
 +    }
@ -125,7 +125,9 @@ index 0000000000..1257342724
 +    ret = 0;
 +fail:
 +    if (ret < 0) {
+        bdrv_graph_wrlock();
 +        bdrv_unref_child(bs, bs->file);
+        bdrv_graph_wrunlock();
 +    }
 +    qemu_opts_del(opts);
 +    return ret;
@ -137,19 +139,22 @@ index 0000000000..1257342724
 +    (void)s;
 +}
 +
-+static coroutine_fn int64_t zeroinit_co_getlength(BlockDriverState *bs)
+static coroutine_fn int64_t GRAPH_RDLOCK
+zeroinit_co_getlength(BlockDriverState *bs)
 +{
 +    return bdrv_co_getlength(bs->file->bs);
 +}
 +
-+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
-+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 +}
 +
-+static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-+                                                 int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
 +    if (offset >= s->extents)
@ -157,8 +162,9 @@ index 0000000000..1257342724
 +    return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
 +}
 +
-+static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
-+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
 +    int64_t extents = offset + bytes;
@ -167,32 +173,35 @@ index 0000000000..1257342724
 +    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 +}
 +
-+static coroutine_fn int zeroinit_co_flush(BlockDriverState *bs)
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_flush(BlockDriverState *bs)
 +{
 +    return bdrv_co_flush(bs->file->bs);
 +}
 +
-+static int zeroinit_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK
+zeroinit_has_zero_init(BlockDriverState *bs)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
 +    return s->has_zero_init;
 +}
 +
-+static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
-+                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 +{
 +    return bdrv_co_pdiscard(bs->file, offset, bytes);
 +}
 +
-+static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
-+                                _Bool exact, PreallocMode prealloc,
-+                                BdrvRequestFlags req_flags, Error **errp)
+static int GRAPH_RDLOCK
+zeroinit_co_truncate(BlockDriverState *bs, int64_t offset, _Bool exact,
+                     PreallocMode prealloc, BdrvRequestFlags req_flags,
+                     Error **errp)
 +{
 +    return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
 +}
 +
-+static coroutine_fn int zeroinit_co_get_info(BlockDriverState *bs,
-+                                             BlockDriverInfo *bdi)
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 +{
 +    return bdrv_co_get_info(bs->file->bs, bdi);
 +}
--- a/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
+++ b/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
@ -10,14 +10,14 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 qemu-options.hx | 3 +++
- softmmu/vl.c    | 8 ++++++++
+ system/vl.c     | 8 ++++++++
 2 files changed, 11 insertions(+)

 diff --git a/qemu-options.hx b/qemu-options.hx
-index dc1ececc9c..848d2dfdd1 100644
+index 511ab9415e..92e301d545 100644
 --- a/qemu-options.hx
 +++ b/qemu-options.hx
-@@ -1197,6 +1197,9 @@ legacy PC, they are not recommended for modern configurations.
+@@ -1237,6 +1237,9 @@ legacy PC, they are not recommended for modern configurations.
 
 ERST
 
@ -27,11 +27,11 @@ index dc1ececc9c..848d2dfdd1 100644
 DEF("fda", HAS_ARG, QEMU_OPTION_fda,
     "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
 DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
-diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 3f2681aded..1a3b9cc4b8 100644
--- a/softmmu/vl.c
-+++ b/softmmu/vl.c
-@@ -2683,6 +2683,7 @@ void qemu_init(int argc, char **argv)
+diff --git a/system/vl.c b/system/vl.c
+index 2738ab7c91..20ebf2c920 100644
+--- a/system/vl.c
+++ b/system/vl.c
+@@ -2748,6 +2748,7 @@ void qemu_init(int argc, char **argv)
     MachineClass *machine_class;
     bool userconfig = true;
     FILE *vmstate_dump_file = NULL;
@ -39,7 +39,7 @@ index 3f2681aded..1a3b9cc4b8 100644
 
     qemu_add_opts(&qemu_drive_opts);
     qemu_add_drive_opts(&qemu_legacy_drive_opts);
-@@ -3306,6 +3307,13 @@ void qemu_init(int argc, char **argv)
+@@ -3371,6 +3372,13 @@ void qemu_init(int argc, char **argv)
                 machine_parse_property_opt(qemu_find_opts("smp-opts"),
                                            "smp", optarg);
                 break;
@ -50,6 +50,6 @@ index 3f2681aded..1a3b9cc4b8 100644
 +                    exit(1);
 +                }
 +                break;
+ #ifdef CONFIG_VNC
             case QEMU_OPTION_vnc:
                 vnc_parse(optarg);
-                 break;
--- a/debian/patches/pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
+++ b/debian/patches/pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 9 insertions(+)

 diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
-index 4a34f03047..59b917e50c 100644
+index d8fc1e2815..789694b8b3 100644
 --- a/hw/intc/apic_common.c
 +++ b/hw/intc/apic_common.c
-@@ -252,6 +252,15 @@ static void apic_reset_common(DeviceState *dev)
+@@ -263,6 +263,15 @@ static void apic_reset_common(DeviceState *dev)
     info->vapic_base_update(s);
 
     apic_init_reset(dev);
--- a/debian/patches/pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
+++ b/debian/patches/pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
@ -9,14 +9,14 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 block/file-posix.c   | 59 ++++++++++++++++++++++++++++++--------------
- qapi/block-core.json |  3 ++-
- 2 files changed, 42 insertions(+), 20 deletions(-)
+ qapi/block-core.json |  7 +++++-
+ 2 files changed, 46 insertions(+), 20 deletions(-)

 diff --git a/block/file-posix.c b/block/file-posix.c
-index ca551baa42..8b3b83e9d4 100644
+index 43bc0bd520..60e98c87f1 100644
 --- a/block/file-posix.c
 +++ b/block/file-posix.c
-@@ -2873,6 +2873,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
+@@ -2876,6 +2876,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
     int fd;
     uint64_t perm, shared;
     int result = 0;
@ -24,7 +24,7 @@ index ca551baa42..8b3b83e9d4 100644
 
     /* Validate options and set default values */
     assert(options->driver == BLOCKDEV_DRIVER_FILE);
-@@ -2913,19 +2914,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
+@@ -2916,19 +2917,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
     perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
     shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
 
@ -59,7 +59,7 @@ index ca551baa42..8b3b83e9d4 100644
     }
 
     /* Clear the file by truncating it to 0 */
-@@ -2979,13 +2983,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
+@@ -2982,13 +2986,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
     }
 
 out_unlock:
@ -82,7 +82,7 @@ index ca551baa42..8b3b83e9d4 100644
     }
 
 out_close:
-@@ -3009,6 +3015,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
+@@ -3012,6 +3018,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
     PreallocMode prealloc;
     char *buf = NULL;
     Error *local_err = NULL;
@ -90,7 +90,7 @@ index ca551baa42..8b3b83e9d4 100644
 
     /* Skip file: protocol prefix */
     strstart(filename, "file:", &filename);
-@@ -3031,6 +3038,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
+@@ -3034,6 +3041,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
         return -EINVAL;
     }
 
@ -109,7 +109,7 @@ index ca551baa42..8b3b83e9d4 100644
     options = (BlockdevCreateOptions) {
         .driver     = BLOCKDEV_DRIVER_FILE,
         .u.file     = {
-@@ -3042,6 +3061,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
+@@ -3045,6 +3064,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
             .nocow              = nocow,
             .has_extent_size_hint = has_extent_size_hint,
             .extent_size_hint   = extent_size_hint,
@ -119,10 +119,21 @@ index ca551baa42..8b3b83e9d4 100644
     };
     return raw_co_create(&options, errp);
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index a5cea82139..bb471c078d 100644
+index 905da8be72..3db587a6e4 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -4880,7 +4880,8 @@
+@@ -4956,6 +4956,10 @@
+ # @extent-size-hint: Extent size hint to add to the image file; 0 for
+ #     not adding an extent size hint (default: 1 MB, since 5.1)
+ #
+# @locking: whether to enable file locking.  If set to 'auto', only
+#     enable when Open File Descriptor (OFD) locking API is available
+#     (default: auto).
+#
+ # Since: 2.12
+ ##
+ { 'struct': 'BlockdevCreateOptionsFile',
+@@ -4963,7 +4967,8 @@
             'size':                 'size',
             '*preallocation':       'PreallocMode',
             '*nocow':               'bool',
--- a/debian/patches/pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
+++ b/debian/patches/pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/hw/core/machine.c b/hw/core/machine.c
-index f0d35c6401..1427983543 100644
+index 4273de16a0..83f1fc0293 100644
 --- a/hw/core/machine.c
 +++ b/hw/core/machine.c
-@@ -148,7 +148,8 @@ GlobalProperty hw_compat_4_0[] = {
+@@ -162,7 +162,8 @@ GlobalProperty hw_compat_4_0[] = {
     { "virtio-vga",     "edid", "false" },
     { "virtio-gpu-device", "edid", "false" },
     { "virtio-device", "use-started", "false" },
--- a/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
+++ b/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
@ -17,14 +17,14 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 hw/core/machine-qmp-cmds.c |  5 +++++
 include/hw/boards.h        |  2 ++
 qapi/machine.json          |  4 +++-
- softmmu/vl.c               | 25 +++++++++++++++++++++++++
+ system/vl.c                | 25 +++++++++++++++++++++++++
 4 files changed, 35 insertions(+), 1 deletion(-)

 diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
-index 40821e2317..ee93ddd69a 100644
+index 314351cdff..628a3537c5 100644
 --- a/hw/core/machine-qmp-cmds.c
 +++ b/hw/core/machine-qmp-cmds.c
-@@ -95,6 +95,11 @@ MachineInfoList *qmp_query_machines(Error **errp)
+@@ -94,6 +94,11 @@ MachineInfoList *qmp_query_machines(Error **errp)
         if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
             info->has_is_current = true;
             info->is_current = true;
@ -37,10 +37,10 @@ index 40821e2317..ee93ddd69a 100644
 
         if (mc->default_cpu_type) {
 diff --git a/include/hw/boards.h b/include/hw/boards.h
-index ed83360198..f8b88cd86a 100644
+index 8b8f6d5c00..dd6d0a1447 100644
 --- a/include/hw/boards.h
 +++ b/include/hw/boards.h
-@@ -235,6 +235,8 @@ struct MachineClass {
+@@ -246,6 +246,8 @@ struct MachineClass {
     const char *desc;
     const char *deprecation_reason;
 
@ -50,10 +50,10 @@ index ed83360198..f8b88cd86a 100644
     void (*reset)(MachineState *state, ShutdownCause reason);
     void (*wakeup)(MachineState *state);
 diff --git a/qapi/machine.json b/qapi/machine.json
-index fbb61f18e4..7da3c519ba 100644
+index a024d5b05d..1d69bffaa0 100644
 --- a/qapi/machine.json
 +++ b/qapi/machine.json
-@@ -161,6 +161,8 @@
+@@ -168,6 +168,8 @@
 #
 # @acpi: machine type supports ACPI (since 8.0)
 #
@ -62,7 +62,7 @@ index fbb61f18e4..7da3c519ba 100644
 # Since: 1.2
 ##
 { 'struct': 'MachineInfo',
-@@ -168,7 +170,7 @@
+@@ -175,7 +177,7 @@
             '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
             'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
             'deprecated': 'bool', '*default-cpu-type': 'str',
@ -71,19 +71,19 @@ index fbb61f18e4..7da3c519ba 100644
 
 ##
 # @query-machines:
-diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 1a3b9cc4b8..e9b5f62cc3 100644
--- a/softmmu/vl.c
-+++ b/softmmu/vl.c
-@@ -1597,6 +1597,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
+diff --git a/system/vl.c b/system/vl.c
+index 20ebf2c920..4d39e32097 100644
+--- a/system/vl.c
+++ b/system/vl.c
+@@ -1659,6 +1659,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
 static MachineClass *select_machine(QDict *qdict, Error **errp)
 {
-     const char *optarg = qdict_get_try_str(qdict, "type");
+     const char *machine_type = qdict_get_try_str(qdict, "type");
 +    const char *pvever = qdict_get_try_str(qdict, "pvever");
     GSList *machines = object_class_get_list(TYPE_MACHINE, false);
     MachineClass *machine_class;
     Error *local_err = NULL;
-@@ -1614,6 +1615,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
+@@ -1676,6 +1677,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
         }
     }
 
@ -95,7 +95,7 @@ index 1a3b9cc4b8..e9b5f62cc3 100644
     g_slist_free(machines);
     if (local_err) {
         error_append_hint(&local_err, "Use -machine help to list supported machines\n");
-@@ -3248,12 +3254,31 @@ void qemu_init(int argc, char **argv)
+@@ -3313,12 +3319,31 @@ void qemu_init(int argc, char **argv)
             case QEMU_OPTION_machine:
                 {
                     bool help;
--- a/debian/patches/pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
+++ b/debian/patches/pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
@ -25,7 +25,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 4 insertions(+), 4 deletions(-)

 diff --git a/block/backup.c b/block/backup.c
-index db3791f4d1..39410dcf8d 100644
+index 3dd2e229d2..eba5b11493 100644
 --- a/block/backup.c
 +++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
@ -48,9 +48,9 @@ index db3791f4d1..39410dcf8d 100644
     if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
         int64_t offset = 0;
         int64_t count;
-@@ -495,6 +493,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
+@@ -502,6 +500,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                        &error_abort);
+     bdrv_graph_wrunlock();
 
 +    backup_init_bcs_bitmap(job);
 +
--- a/debian/patches/pve/0027-PVE-Backup-add-vma-backup-format-code.patch
+++ b/debian/patches/pve/0027-PVE-Backup-add-vma-backup-format-code.patch
@ -15,21 +15,21 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 block/meson.build |   2 +
 meson.build       |   5 +
- vma-reader.c      | 867 ++++++++++++++++++++++++++++++++++++++++++++
- vma-writer.c      | 818 +++++++++++++++++++++++++++++++++++++++++
- vma.c             | 900 ++++++++++++++++++++++++++++++++++++++++++++++
+ vma-reader.c      | 870 ++++++++++++++++++++++++++++++++++++++++++++
+ vma-writer.c      | 817 +++++++++++++++++++++++++++++++++++++++++
+ vma.c             | 901 ++++++++++++++++++++++++++++++++++++++++++++++
 vma.h             | 150 ++++++++
- 6 files changed, 2742 insertions(+)
+ 6 files changed, 2745 insertions(+)
 create mode 100644 vma-reader.c
 create mode 100644 vma-writer.c
 create mode 100644 vma.c
 create mode 100644 vma.h

 diff --git a/block/meson.build b/block/meson.build
-index 1833c71ce9..59b71ba9f3 100644
+index b530e117b5..b245daa98e 100644
 --- a/block/meson.build
 +++ b/block/meson.build
-@@ -43,6 +43,8 @@ block_ss.add(files(
+@@ -42,6 +42,8 @@ block_ss.add(files(
   'zeroinit.c',
 ), zstd, zlib, gnutls)
 
@ -39,10 +39,10 @@ index 1833c71ce9..59b71ba9f3 100644
 system_ss.add(files('block-ram-registrar.c'))
 
 diff --git a/meson.build b/meson.build
-index a9c4f28247..cd95530d3b 100644
+index 91a0aa64c6..620cc594b2 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -1778,6 +1778,8 @@ endif
+@@ -1922,6 +1922,8 @@ endif
 
 has_gettid = cc.has_function('gettid')
 
@ -51,7 +51,7 @@ index a9c4f28247..cd95530d3b 100644
 # libselinux
 selinux = dependency('libselinux',
                      required: get_option('selinux'),
-@@ -3908,6 +3910,9 @@ if have_tools
+@@ -4023,6 +4025,9 @@ if have_tools
                dependencies: [blockdev, qemuutil, gnutls, selinux],
                install: true)
 
@ -59,14 +59,14 @@ index a9c4f28247..cd95530d3b 100644
 +                   dependencies: [authz, block, crypto, io, qom], install: true)
 +
   subdir('storage-daemon')
-   subdir('contrib/rdmacm-mux')
-   subdir('contrib/elf2dmp')
+ 
+   foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
 diff --git a/vma-reader.c b/vma-reader.c
 new file mode 100644
-index 0000000000..81a891c6b1
+index 0000000000..d0b6721812
 --- /dev/null
 +++ b/vma-reader.c
-@@ -0,0 +1,867 @@
+@@ -0,0 +1,870 @@
 +/*
 + * VMA: Virtual Machine Archive
 + *
@ -88,6 +88,7 @@ index 0000000000..81a891c6b1
 +#include "qemu/ratelimit.h"
 +#include "vma.h"
 +#include "block/block.h"
+#include "block/graph-lock.h"
 +#include "sysemu/block-backend.h"
 +
 +static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
@ -600,8 +601,10 @@ index 0000000000..81a891c6b1
 +    } else {
 +        int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
 +        if (res < 0) {
+            bdrv_graph_rdlock_main_loop();
 +            error_setg(errp, "blk_pwrite to %s failed (%d)",
 +                       bdrv_get_device_name(blk_bs(target)), res);
+            bdrv_graph_rdunlock_main_loop();
 +            return -1;
 +        }
 +    }
@ -936,10 +939,10 @@ index 0000000000..81a891c6b1
 +
 diff --git a/vma-writer.c b/vma-writer.c
 new file mode 100644
-index 0000000000..126b296647
+index 0000000000..a466652a5d
 --- /dev/null
 +++ b/vma-writer.c
-@@ -0,0 +1,818 @@
+@@ -0,0 +1,817 @@
 +/*
 + * VMA: Virtual Machine Archive
 + *
@ -1514,8 +1517,8 @@ index 0000000000..126b296647
 +    int i;
 +
 +    g_assert(vmaw != NULL);
+    g_assert(status != NULL);
 +
-+    if (status) {
 +    status->status = vmaw->status;
 +    g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
 +    for (i = 0; i <= 255; i++) {
@ -1523,7 +1526,6 @@ index 0000000000..126b296647
 +    }
 +
 +    uuid_unparse_lower(vmaw->uuid, status->uuid_str);
-+    }
 +
 +    status->closed = vmaw->closed;
 +
@ -1760,10 +1762,10 @@ index 0000000000..126b296647
 +}
 diff --git a/vma.c b/vma.c
 new file mode 100644
-index 0000000000..347f6283ca
+index 0000000000..bb715e9061
 --- /dev/null
 +++ b/vma.c
-@@ -0,0 +1,900 @@
+@@ -0,0 +1,901 @@
 +/*
 + * VMA: Virtual Machine Archive
 + *
@ -2076,17 +2078,17 @@ index 0000000000..347f6283ca
 +                        inbuf);
 +            }
 +
-+            RestoreMap *map = g_new0(RestoreMap, 1);
-+            map->devname = g_strdup(devname);
-+            map->path = g_strdup(path);
-+            map->format = format;
-+            map->throttling_bps = bps_value;
-+            map->throttling_group = group;
-+            map->cache = cache;
-+            map->write_zero = write_zero;
-+            map->skip = skip;
+            RestoreMap *restore_map = g_new0(RestoreMap, 1);
+            restore_map->devname = g_strdup(devname);
+            restore_map->path = g_strdup(path);
+            restore_map->format = format;
+            restore_map->throttling_bps = bps_value;
+            restore_map->throttling_group = group;
+            restore_map->cache = cache;
+            restore_map->write_zero = write_zero;
+            restore_map->skip = skip;
 +
-+            g_hash_table_insert(devmap, map->devname, map);
+            g_hash_table_insert(devmap, restore_map->devname, restore_map);
 +
 +        };
 +    }
@ -2385,7 +2387,7 @@ index 0000000000..347f6283ca
 +
 +static int create_archive(int argc, char **argv)
 +{
-+    int i, c;
+    int c;
 +    int verbose = 0;
 +    const char *archivename;
 +    GList *backup_coroutines = NULL;
@ -2543,6 +2545,7 @@ index 0000000000..347f6283ca
 +    vma_writer_get_status(vmaw, &vmastat);
 +
 +    if (verbose) {
+        int i;
 +        for (i = 0; i < 256; i++) {
 +            VmaStreamInfo *si = &vmastat.stream_info[i];
 +            if (si->size) {
--- a/debian/patches/pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
+++ b/debian/patches/pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
@ -12,20 +12,20 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 [FE: adapt to coroutine changes]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/backup-dump.c              | 168 +++++++++++++++++++++++++++++++
+ block/backup-dump.c              | 172 +++++++++++++++++++++++++++++++
 block/backup.c                   |  30 ++----
 block/meson.build                |   1 +
 include/block/block_int-common.h |  35 +++++++
 job.c                            |   3 +-
- 5 files changed, 214 insertions(+), 23 deletions(-)
+ 5 files changed, 218 insertions(+), 23 deletions(-)
 create mode 100644 block/backup-dump.c

 diff --git a/block/backup-dump.c b/block/backup-dump.c
 new file mode 100644
-index 0000000000..232a094426
+index 0000000000..e46abf1070
 --- /dev/null
 +++ b/block/backup-dump.c
-@@ -0,0 +1,168 @@
+@@ -0,0 +1,172 @@
 +/*
 + * BlockDriver to send backup data stream to a callback function
 + *
@ -37,6 +37,8 @@ index 0000000000..232a094426
 + */
 +
 +#include "qemu/osdep.h"
+
+#include "qapi/qmp/qdict.h"
 +#include "qom/object_interfaces.h"
 +#include "block/block_int.h"
 +
@ -169,7 +171,7 @@ index 0000000000..232a094426
 +block_init(bdrv_backup_dump_init);
 +
 +
-+BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
 +    int dump_cb_block_size,
 +    uint64_t byte_size,
 +    BackupDumpFunc *dump_cb,
@ -177,9 +179,11 @@ index 0000000000..232a094426
 +    Error **errp)
 +{
 +    BDRVBackupDumpState *state;
-+    BlockDriverState *bs = bdrv_new_open_driver(
-+        &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
 +
+    QDict *options = qdict_new();
+    qdict_put_str(options, "driver", "backup-dump-drive");
+
+    BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
 +    if (!bs) {
 +        return NULL;
 +    }
@ -195,7 +199,7 @@ index 0000000000..232a094426
 +    return bs;
 +}
 diff --git a/block/backup.c b/block/backup.c
-index 39410dcf8d..af87fa6aa9 100644
+index eba5b11493..1963e47ab9 100644
 --- a/block/backup.c
 +++ b/block/backup.c
@@ -29,28 +29,6 @@
@ -227,7 +231,7 @@ index 39410dcf8d..af87fa6aa9 100644
 static const BlockJobDriver backup_job_driver;
 
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
-@@ -457,6 +435,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+@@ -462,6 +440,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     }
 
     cluster_size = block_copy_cluster_size(bcs);
@ -243,7 +247,7 @@ index 39410dcf8d..af87fa6aa9 100644
     if (perf->max_chunk && perf->max_chunk < cluster_size) {
         error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
 diff --git a/block/meson.build b/block/meson.build
-index 59b71ba9f3..6fde9f7dcd 100644
+index b245daa98e..e99914eaa4 100644
 --- a/block/meson.build
 +++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files(
@ -251,11 +255,11 @@ index 59b71ba9f3..6fde9f7dcd 100644
   'amend.c',
   'backup.c',
 +  'backup-dump.c',
-   'copy-before-write.c',
   'blkdebug.c',
   'blklogwrites.c',
+   'blkverify.c',
 diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
-index 74195c3004..0f2e1817ad 100644
+index 761276127e..b3e6697613 100644
 --- a/include/block/block_int-common.h
 +++ b/include/block/block_int-common.h
@@ -26,6 +26,7 @@
@ -272,7 +276,7 @@ index 74195c3004..0f2e1817ad 100644
 
 +typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
 +
-+BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
 +    int dump_cb_block_size,
 +    uint64_t byte_size,
 +    BackupDumpFunc *dump_cb,
@ -308,10 +312,10 @@ index 74195c3004..0f2e1817ad 100644
     BDRV_TRACKED_READ,
     BDRV_TRACKED_WRITE,
 diff --git a/job.c b/job.c
-index 72d57f0934..93e22d180b 100644
+index 660ce22c56..baf54c8d60 100644
 --- a/job.c
 +++ b/job.c
-@@ -330,7 +330,8 @@ static bool job_started_locked(Job *job)
+@@ -331,7 +331,8 @@ static bool job_started_locked(Job *job)
 }
 
 /* Called with job_mutex held. */
--- a/debian/patches/pve/0029-PVE-Add-sequential-job-transaction-support.patch
+++ b/debian/patches/pve/0029-PVE-Add-sequential-job-transaction-support.patch
@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 2 files changed, 46 insertions(+)

 diff --git a/include/qemu/job.h b/include/qemu/job.h
-index e502787dd8..963cf2bef5 100644
+index 2b873f2576..528cd6acb9 100644
 --- a/include/qemu/job.h
 +++ b/include/qemu/job.h
-@@ -381,6 +381,18 @@ void job_unlock(void);
+@@ -362,6 +362,18 @@ void job_unlock(void);
  */
 JobTxn *job_txn_new(void);
 
@ -34,10 +34,10 @@ index e502787dd8..963cf2bef5 100644
  * Release a reference that was previously acquired with job_txn_add_job or
  * job_txn_new. If it's the last reference to the object, it will be freed.
 diff --git a/job.c b/job.c
-index 93e22d180b..2b31f1e14f 100644
+index baf54c8d60..3ac5e5cde2 100644
 --- a/job.c
 +++ b/job.c
-@@ -93,6 +93,8 @@ struct JobTxn {
+@@ -94,6 +94,8 @@ struct JobTxn {
 
     /* Reference count */
     int refcnt;
@ -46,7 +46,7 @@ index 93e22d180b..2b31f1e14f 100644
 };
 
 void job_lock(void)
-@@ -118,6 +120,25 @@ JobTxn *job_txn_new(void)
+@@ -119,6 +121,25 @@ JobTxn *job_txn_new(void)
     return txn;
 }
 
@ -72,7 +72,7 @@ index 93e22d180b..2b31f1e14f 100644
 /* Called with job_mutex held. */
 static void job_txn_ref_locked(JobTxn *txn)
 {
-@@ -1057,6 +1078,12 @@ static void job_completed_txn_success_locked(Job *job)
+@@ -1042,6 +1063,12 @@ static void job_completed_txn_success_locked(Job *job)
      */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
         if (!job_is_completed_locked(other_job)) {
@ -85,7 +85,7 @@ index 93e22d180b..2b31f1e14f 100644
             return;
         }
         assert(other_job->ret == 0);
-@@ -1268,6 +1295,13 @@ int job_finish_sync_locked(Job *job,
+@@ -1253,6 +1280,13 @@ int job_finish_sync_locked(Job *job,
         return -EBUSY;
     }
 
--- a/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
+++ b/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
@ -84,68 +84,30 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
     create jobs in a drained section]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/backup-dump.c              |   10 +-
 block/meson.build              |    5 +
 block/monitor/block-hmp-cmds.c |   39 ++
 blockdev.c                     |    1 +
 hmp-commands-info.hx           |   14 +
 hmp-commands.hx                |   29 +
- include/block/block_int-common.h |    2 +-
 include/monitor/hmp.h          |    3 +
 meson.build                    |    1 +
- monitor/hmp-cmds.c               |   72 ++
- proxmox-backup-client.c          |  146 ++++
+ monitor/hmp-cmds.c             |   72 +++
+ proxmox-backup-client.c        |  146 +++++
 proxmox-backup-client.h        |   60 ++
- pve-backup.c                     | 1067 ++++++++++++++++++++++++++++++
- qapi/block-core.json             |  229 +++++++
+ pve-backup.c                   | 1092 ++++++++++++++++++++++++++++++++
+ qapi/block-core.json           |  233 +++++++
 qapi/common.json               |   14 +
 qapi/machine.json              |   16 +-
- 16 files changed, 1690 insertions(+), 18 deletions(-)
+ 14 files changed, 1711 insertions(+), 14 deletions(-)
 create mode 100644 proxmox-backup-client.c
 create mode 100644 proxmox-backup-client.h
 create mode 100644 pve-backup.c

-diff --git a/block/backup-dump.c b/block/backup-dump.c
-index 232a094426..e46abf1070 100644
--- a/block/backup-dump.c
-+++ b/block/backup-dump.c
-@@ -9,6 +9,8 @@
-  */
- 
- #include "qemu/osdep.h"
-+
-+#include "qapi/qmp/qdict.h"
- #include "qom/object_interfaces.h"
- #include "block/block_int.h"
- 
-@@ -141,7 +143,7 @@ static void bdrv_backup_dump_init(void)
- block_init(bdrv_backup_dump_init);
- 
- 
-BlockDriverState *bdrv_backup_dump_create(
-+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
-     int dump_cb_block_size,
-     uint64_t byte_size,
-     BackupDumpFunc *dump_cb,
-@@ -149,9 +151,11 @@ BlockDriverState *bdrv_backup_dump_create(
-     Error **errp)
- {
-     BDRVBackupDumpState *state;
-    BlockDriverState *bs = bdrv_new_open_driver(
-        &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
- 
-+    QDict *options = qdict_new();
-+    qdict_put_str(options, "driver", "backup-dump-drive");
-+
-+    BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
-     if (!bs) {
-         return NULL;
-     }
 diff --git a/block/meson.build b/block/meson.build
-index 6fde9f7dcd..6d468f89e5 100644
+index e99914eaa4..6bba803f94 100644
 --- a/block/meson.build
 +++ b/block/meson.build
-@@ -45,6 +45,11 @@ block_ss.add(files(
+@@ -44,6 +44,11 @@ block_ss.add(files(
 ), zstd, zlib, gnutls)
 
 block_ss.add(files('../vma-writer.c'), libuuid)
@ -158,10 +120,10 @@ index 6fde9f7dcd..6d468f89e5 100644
 system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
 system_ss.add(files('block-ram-registrar.c'))
 diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
-index ca2599de44..6efe28cef5 100644
+index bdf2eb50b6..439a7a14c8 100644
 --- a/block/monitor/block-hmp-cmds.c
 +++ b/block/monitor/block-hmp-cmds.c
-@@ -1029,3 +1029,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
+@@ -1009,3 +1009,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
     qmp_blockdev_change_medium(device, NULL, target, arg, true, force,
                                !!read_only, read_only_mode, errp);
 }
@ -205,7 +167,7 @@ index ca2599de44..6efe28cef5 100644
 +    hmp_handle_error(mon, error);
 +}
 diff --git a/blockdev.c b/blockdev.c
-index cd5f205ad1..7793143d76 100644
+index ed8198f351..1054a69279 100644
 --- a/blockdev.c
 +++ b/blockdev.c
@@ -37,6 +37,7 @@
@ -217,7 +179,7 @@ index cd5f205ad1..7793143d76 100644
 #include "monitor/monitor.h"
 #include "qemu/error-report.h"
 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
-index 10fdd822e0..15937793c1 100644
+index d5ab880492..6c97248d1b 100644
 --- a/hmp-commands-info.hx
 +++ b/hmp-commands-info.hx
@@ -471,6 +471,20 @@ SRST
@ -242,7 +204,7 @@ index 10fdd822e0..15937793c1 100644
     {
         .name       = "usernet",
 diff --git a/hmp-commands.hx b/hmp-commands.hx
-index e352f86872..0c8b6725fb 100644
+index 7506de251c..d5f9c28194 100644
 --- a/hmp-commands.hx
 +++ b/hmp-commands.hx
@@ -101,6 +101,35 @@ ERST
@ -281,19 +243,6 @@ index e352f86872..0c8b6725fb 100644
 ERST
 
     {
-diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
-index 0f2e1817ad..0a0339eee4 100644
--- a/include/block/block_int-common.h
-+++ b/include/block/block_int-common.h
-@@ -63,7 +63,7 @@
- 
- typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
- 
-BlockDriverState *bdrv_backup_dump_create(
-+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
-     int dump_cb_block_size,
-     uint64_t byte_size,
-     BackupDumpFunc *dump_cb,
 diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
 index 7a7def7530..cba7afe70c 100644
 --- a/include/monitor/hmp.h
@ -316,10 +265,10 @@ index 7a7def7530..cba7afe70c 100644
 void hmp_device_add(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
 diff --git a/meson.build b/meson.build
-index cd95530d3b..d53976d621 100644
+index 620cc594b2..d16b97cf3c 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -1779,6 +1779,7 @@ endif
+@@ -1923,6 +1923,7 @@ endif
 has_gettid = cc.has_function('gettid')
 
 libuuid = cc.find_library('uuid', required: true)
@ -328,7 +277,7 @@ index cd95530d3b..d53976d621 100644
 # libselinux
 selinux = dependency('libselinux',
 diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
-index 91be698308..5b9c231a4c 100644
+index ef4634e5c1..6e25279f42 100644
 --- a/monitor/hmp-cmds.c
 +++ b/monitor/hmp-cmds.c
@@ -21,6 +21,7 @@
@ -637,10 +586,10 @@ index 0000000000..8cbf645b2c
 +#endif /* PROXMOX_BACKUP_CLIENT_H */
 diff --git a/pve-backup.c b/pve-backup.c
 new file mode 100644
-index 0000000000..d84d807654
+index 0000000000..c755bf302b
 --- /dev/null
 +++ b/pve-backup.c
-@@ -0,0 +1,1067 @@
+@@ -0,0 +1,1092 @@
 +#include "proxmox-backup-client.h"
 +#include "vma.h"
 +
@ -651,6 +600,7 @@ index 0000000000..d84d807654
 +#include "block/block_int-global-state.h"
 +#include "block/blockjob.h"
 +#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
 +#include "qapi/qapi-commands-block.h"
 +#include "qapi/qmp/qerror.h"
 +#include "qemu/cutils.h"
@ -676,7 +626,6 @@ index 0000000000..d84d807654
 + * ---end-bad-example--
 + *
 + * ==> Always use CoMutext inside coroutines.
-+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
 + *
 + */
 +
@ -729,7 +678,6 @@ index 0000000000..d84d807654
 +    uint64_t block_size;
 +    uint8_t dev_id;
 +    int completed_ret; // INT_MAX if not completed
-+    char targetfile[PATH_MAX];
 +    BdrvDirtyBitmap *bitmap;
 +    BlockDriverState *target;
 +    BlockJob *job;
@ -950,7 +898,12 @@ index 0000000000..d84d807654
 +
 +    qemu_co_mutex_lock(&backup_state.backup_mutex);
 +
-+    if (ret < 0) {
+    /*
+     * All jobs in the transaction will be canceled when one receives an error.
+     * The first error wins, so only set it for ECANCELED if it was the last
+     * job. This allows more interesting errors from other jobs to win.
+     */
+    if (ret < 0 && (ret != -ECANCELED || !g_list_nth(backup_state.di_list, 1))) {
 +        Error *local_err = NULL;
 +        error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
 +        pvebackup_propagate_error(local_err);
@ -974,13 +927,6 @@ index 0000000000..d84d807654
 +        }
 +    }
 +
-+    if (di->job) {
-+        WITH_JOB_LOCK_GUARD() {
-+            job_unref_locked(&di->job->job);
-+            di->job = NULL;
-+        }
-+    }
-+
 +    // remove self from job list
 +    backup_state.di_list = g_list_remove(backup_state.di_list, di);
 +
@ -1000,6 +946,16 @@ index 0000000000..d84d807654
 +    di->completed_ret = ret;
 +
 +    /*
+     * Needs to happen outside of coroutine, because it takes the graph write lock.
+     */
+    if (di->job) {
+        WITH_JOB_LOCK_GUARD() {
+            job_unref_locked(&di->job->job);
+            di->job = NULL;
+        }
+    }
+
+    /*
 +     * Schedule stream cleanup in async coroutine. close_image and finish might
 +     * take a while, so we can't block on them here. This way it also doesn't
 +     * matter if we're already running in a coroutine or not.
@ -1120,8 +1076,7 @@ index 0000000000..d84d807654
 +}
 +
 +/*
-+ * backup_job_create can *not* be run from a coroutine (and requires an
-+ * acquired AioContext), so this can't either.
+ * backup_job_create can *not* be run from a coroutine, so this can't either.
 + * The caller is responsible that backup_mutex is held nonetheless.
 + */
 +static void create_backup_jobs_bh(void *opaque) {
@ -1154,9 +1109,6 @@ index 0000000000..d84d807654
 +            sync_mode = MIRROR_SYNC_MODE_BITMAP;
 +            bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
 +        }
-+        AioContext *aio_context = bdrv_get_aio_context(di->bs);
-+        aio_context_acquire(aio_context);
-+
 +        bdrv_drained_begin(di->bs);
 +
 +        BlockJob *job = backup_job_create(
@ -1167,8 +1119,6 @@ index 0000000000..d84d807654
 +
 +        bdrv_drained_end(di->bs);
 +
-+        aio_context_release(aio_context);
-+
 +        di->job = job;
 +        if (job) {
 +            WITH_JOB_LOCK_GUARD() {
@ -1219,6 +1169,66 @@ index 0000000000..d84d807654
 +    aio_co_enter(data->ctx, data->co);
 +}
 +
+/*
+ * Returns a list of device infos, which needs to be freed by the caller. In
+ * case of an error, errp will be set, but the returned value might still be a
+ * list.
+ */
+static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+    const char *devlist,
+    Error **errp)
+{
+    gchar **devs = NULL;
+    GList *di_list = NULL;
+
+    if (devlist) {
+        devs = g_strsplit_set(devlist, ",;:", -1);
+
+        gchar **d = devs;
+        while (d && *d) {
+            BlockBackend *blk = blk_by_name(*d);
+            if (!blk) {
+                error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                          "Device '%s' not found", *d);
+                goto err;
+            }
+            BlockDriverState *bs = blk_bs(blk);
+            if (!bdrv_co_is_inserted(bs)) {
+                error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+                goto err;
+            }
+            PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+            di->bs = bs;
+            di_list = g_list_append(di_list, di);
+            d++;
+        }
+    } else {
+        BdrvNextIterator it;
+
+        for (BlockDriverState *bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+            if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs)) {
+                continue;
+            }
+
+            PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+            di->bs = bs;
+            di_list = g_list_append(di_list, di);
+        }
+    }
+
+    if (!di_list) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+        goto err;
+    }
+
+err:
+    if (devs) {
+        g_strfreev(devs);
+    }
+
+    return di_list;
+}
+
 +UuidInfo coroutine_fn *qmp_backup(
 +    const char *backup_file,
 +    const char *password,
@ -1244,13 +1254,10 @@ index 0000000000..d84d807654
 +
 +    qemu_co_mutex_lock(&backup_state.backup_mutex);
 +
-+    BlockBackend *blk;
-+    BlockDriverState *bs = NULL;
 +    Error *local_err = NULL;
 +    uuid_t uuid;
 +    VmaWriter *vmaw = NULL;
 +    ProxmoxBackupHandle *pbs = NULL;
-+    gchar **devs = NULL;
 +    GList *di_list = NULL;
 +    GList *l;
 +    UuidInfo *uuid_info;
@ -1268,48 +1275,14 @@ index 0000000000..d84d807654
 +    /* Todo: try to auto-detect format based on file name */
 +    format = has_format ? format : BACKUP_FORMAT_VMA;
 +
-+    if (devlist) {
-+        devs = g_strsplit_set(devlist, ",;:", -1);
-+
-+        gchar **d = devs;
-+        while (d && *d) {
-+            blk = blk_by_name(*d);
-+            if (blk) {
-+                bs = blk_bs(blk);
-+                if (!bdrv_co_is_inserted(bs)) {
-+                    error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
-+                    goto err;
-+                }
-+                PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
-+                di->bs = bs;
-+                di_list = g_list_append(di_list, di);
-+            } else {
-+                error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-+                          "Device '%s' not found", *d);
-+                goto err;
-+            }
-+            d++;
-+        }
-+
-+    } else {
-+        BdrvNextIterator it;
-+
-+        bs = NULL;
-+        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-+            if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs)) {
-+                continue;
-+            }
-+
-+            PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
-+            di->bs = bs;
-+            di_list = g_list_append(di_list, di);
-+        }
-+    }
-+
-+    if (!di_list) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+    bdrv_graph_co_rdlock();
+    di_list = get_device_info(devlist, &local_err);
+    bdrv_graph_co_rdunlock();
+    if (local_err) {
+        error_propagate(errp, local_err);
 +        goto err;
 +    }
+    assert(di_list);
 +
 +    size_t total = 0;
 +
@ -1317,7 +1290,11 @@ index 0000000000..d84d807654
 +    while (l) {
 +        PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
 +        l = g_list_next(l);
-+        if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+
+        bdrv_graph_co_rdlock();
+        bool blocked = bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp);
+        bdrv_graph_co_rdunlock();
+        if (blocked) {
 +            goto err;
 +        }
 +
@ -1401,7 +1378,9 @@ index 0000000000..d84d807654
 +
 +            di->block_size = dump_cb_block_size;
 +
+            bdrv_graph_co_rdlock();
 +            const char *devname = bdrv_get_device_name(di->bs);
+            bdrv_graph_co_rdunlock();
 +            PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
 +            size_t dirty = di->size;
 +
@ -1477,7 +1456,9 @@ index 0000000000..d84d807654
 +                goto err_mutex;
 +            }
 +
+            bdrv_graph_co_rdlock();
 +            const char *devname = bdrv_get_device_name(di->bs);
+            bdrv_graph_co_rdunlock();
 +            di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
 +            if (di->dev_id <= 0) {
 +                error_set(errp, ERROR_CLASS_GENERIC_ERROR,
@ -1589,18 +1570,11 @@ index 0000000000..d84d807654
 +            bdrv_co_unref(di->target);
 +        }
 +
-+        if (di->targetfile[0]) {
-+            unlink(di->targetfile);
-+        }
 +        g_free(di);
 +    }
 +    g_list_free(di_list);
 +    backup_state.di_list = NULL;
 +
-+    if (devs) {
-+        g_strfreev(devs);
-+    }
-+
 +    if (vmaw) {
 +        Error *err = NULL;
 +        vma_writer_close(vmaw, &err);
@ -1709,10 +1683,10 @@ index 0000000000..d84d807654
 +    return ret;
 +}
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index bb471c078d..1b8462a51b 100644
+index 3db587a6e4..d05fffce1d 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -839,6 +839,235 @@
+@@ -851,6 +851,239 @@
 { 'command': 'query-block', 'returns': ['BlockInfo'],
   'allow-preconfig': true }
 
@ -1781,6 +1755,9 @@ index bb471c078d..1b8462a51b 100644
 +# @config-file: a configuration file to include into
 +#               the backup archive.
 +#
+# @firewall-file: a firewall configuration file to include into the backup
+#     archive.
+#
 +# @speed: the maximum speed, in bytes per second
 +#
 +# @devlist: list of block device names (separated by ',', ';'
@ -1848,8 +1825,6 @@ index bb471c078d..1b8462a51b 100644
 +#
 +# Cancel the current executing backup process.
 +#
-+# Returns: nothing on success
-+#
 +# Notes: This command succeeds even if there is no backup process running.
 +#
 +##
@ -1873,6 +1848,9 @@ index bb471c078d..1b8462a51b 100644
 +#
 +# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
 +#
+# @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
+#     supported or not.
+#
 +##
 +{ 'struct': 'ProxmoxSupportStatus',
 +  'data': { 'pbs-dirty-bitmap': 'bool',
@ -1949,10 +1927,10 @@ index bb471c078d..1b8462a51b 100644
 # @BlockDeviceTimedStats:
 #
 diff --git a/qapi/common.json b/qapi/common.json
-index 6fed9cde1a..630a2a8f9a 100644
+index 7558ce5430..6e3d800373 100644
 --- a/qapi/common.json
 +++ b/qapi/common.json
-@@ -207,3 +207,17 @@
+@@ -200,3 +200,17 @@
 ##
 { 'struct': 'HumanReadableText',
   'data': { 'human-readable-text': 'str' } }
@ -1971,7 +1949,7 @@ index 6fed9cde1a..630a2a8f9a 100644
 +##
 +{ 'struct': 'UuidInfo', 'data': {'UUID': 'str'} }
 diff --git a/qapi/machine.json b/qapi/machine.json
-index 7da3c519ba..888457f810 100644
+index 1d69bffaa0..731d8d2f60 100644
 --- a/qapi/machine.json
 +++ b/qapi/machine.json
@@ -4,6 +4,8 @@
@ -1983,7 +1961,7 @@ index 7da3c519ba..888457f810 100644
 ##
 # = Machines
 ##
-@@ -230,20 +232,6 @@
+@@ -237,20 +239,6 @@
 ##
 { 'command': 'query-target', 'returns': 'TargetInfo' }
 
--- a/debian/patches/pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
+++ b/debian/patches/pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
@ -14,10 +14,10 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 create mode 100644 pbs-restore.c

 diff --git a/meson.build b/meson.build
-index d53976d621..c3330310d9 100644
+index d16b97cf3c..6de51c34cb 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -3914,6 +3914,10 @@ if have_tools
+@@ -4029,6 +4029,10 @@ if have_tools
   vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
                    dependencies: [authz, block, crypto, io, qom], install: true)
 
@ -26,8 +26,8 @@ index d53976d621..c3330310d9 100644
 +                    libproxmox_backup_qemu], install: true)
 +
   subdir('storage-daemon')
-   subdir('contrib/rdmacm-mux')
-   subdir('contrib/elf2dmp')
+ 
+   foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
 diff --git a/pbs-restore.c b/pbs-restore.c
 new file mode 100644
 index 0000000000..f03d9bab8d
--- a/debian/patches/pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
+++ b/debian/patches/pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
@ -14,35 +14,33 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
     getlength is now a coroutine function]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/meson.build    |   3 +
- block/pbs.c          | 305 +++++++++++++++++++++++++++++++++++++++++++
- configure            |   9 ++
+ block/meson.build    |   2 +
+ block/pbs.c          | 313 +++++++++++++++++++++++++++++++++++++++++++
 meson.build          |   2 +-
- qapi/block-core.json |  13 ++
+ qapi/block-core.json |  29 ++++
 qapi/pragma.json     |   1 +
- 6 files changed, 332 insertions(+), 1 deletion(-)
+ 5 files changed, 346 insertions(+), 1 deletion(-)
 create mode 100644 block/pbs.c

 diff --git a/block/meson.build b/block/meson.build
-index 6d468f89e5..becc99ac4e 100644
+index 6bba803f94..1945e04eeb 100644
 --- a/block/meson.build
 +++ b/block/meson.build
-@@ -50,6 +50,9 @@ block_ss.add(files(
+@@ -49,6 +49,8 @@ block_ss.add(files(
   '../pve-backup.c',
 ), libproxmox_backup_qemu)
 
-+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: files('pbs.c'))
-+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+block_ss.add(files('pbs.c'), libproxmox_backup_qemu)
 +
 
 system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
 system_ss.add(files('block-ram-registrar.c'))
 diff --git a/block/pbs.c b/block/pbs.c
 new file mode 100644
-index 0000000000..a2211e0f3b
+index 0000000000..aee66c2e93
 --- /dev/null
 +++ b/block/pbs.c
-@@ -0,0 +1,305 @@
+@@ -0,0 +1,313 @@
 +/*
 + * Proxmox Backup Server read-only block driver
 + */
@ -70,7 +68,7 @@ index 0000000000..a2211e0f3b
 +
 +typedef struct {
 +    ProxmoxRestoreHandle *conn;
-+    char aid;
+    uint8_t aid;
 +    int64_t length;
 +
 +    char *repository;
@ -203,12 +201,18 @@ index 0000000000..a2211e0f3b
 +    }
 +
 +    /* acquire handle and length */
-+    s->aid = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
-+    if (s->aid < 0) {
+    ret = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+    if (ret < 0) {
 +        if (pbs_error && errp) error_setg(errp, "PBS open_image failed: %s", pbs_error);
 +        if (pbs_error) proxmox_backup_free_error(pbs_error);
 +        return -ENODEV;
 +    }
+    if (ret > UINT8_MAX) {
+        error_setg(errp, "PBS open_image returned an ID larger than %u", UINT8_MAX);
+        return -ENODEV;
+    }
+    s->aid = ret;
+
 +    s->length = proxmox_restore_get_image_length(s->conn, s->aid, &pbs_error);
 +    if (s->length < 0) {
 +        if (pbs_error && errp) error_setg(errp, "PBS get_image_length failed: %s", pbs_error);
@ -234,7 +238,8 @@ index 0000000000..a2211e0f3b
 +    proxmox_restore_disconnect(s->conn);
 +}
 +
-+static coroutine_fn int64_t pbs_co_getlength(BlockDriverState *bs)
+static coroutine_fn int64_t GRAPH_RDLOCK
+pbs_co_getlength(BlockDriverState *bs)
 +{
 +    BDRVPBSState *s = bs->opaque;
 +    return s->length;
@ -251,8 +256,8 @@ index 0000000000..a2211e0f3b
 +    aio_co_schedule(rcb->ctx, rcb->co);
 +}
 +
-+static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
-+                                      int64_t offset, int64_t bytes,
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 +              QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    BDRVPBSState *s = bs->opaque;
@ -298,8 +303,8 @@ index 0000000000..a2211e0f3b
 +    return 0;
 +}
 +
-+static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
-+                                       int64_t offset, int64_t bytes,
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
 +               QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
@ -307,7 +312,8 @@ index 0000000000..a2211e0f3b
 +    return -EPERM;
 +}
 +
-+static void pbs_refresh_filename(BlockDriverState *bs)
+static void GRAPH_RDLOCK
+pbs_refresh_filename(BlockDriverState *bs)
 +{
 +    BDRVPBSState *s = bs->opaque;
 +    if (s->namespace) {
@ -348,52 +354,11 @@ index 0000000000..a2211e0f3b
 +}
 +
 +block_init(bdrv_pbs_init);
-diff --git a/configure b/configure
-index 133f4e3235..f5a830c1f3 100755
--- a/configure
-+++ b/configure
-@@ -256,6 +256,7 @@ qemu_suffix="qemu"
- softmmu="yes"
- linux_user=""
- bsd_user=""
-+pbs_bdrv="yes"
- plugins="$default_feature"
- ninja=""
- python=
-@@ -809,6 +810,10 @@ for opt do
-   ;;
-   --enable-download) download="enabled"; git_submodules_action=update;
-   ;;
-+  --disable-pbs-bdrv) pbs_bdrv="no"
-+  ;;
-+  --enable-pbs-bdrv) pbs_bdrv="yes"
-+  ;;
-   --enable-plugins) if test "$mingw32" = "yes"; then
-                         error_exit "TCG plugins not currently supported on Windows platforms"
-                     else
-@@ -959,6 +964,7 @@ cat << EOF
-   bsd-user        all BSD usermode emulation targets
-   pie             Position Independent Executables
-   debug-tcg       TCG debugging (default is disabled)
-+  pbs-bdrv        Proxmox backup server read-only block driver support
- 
- NOTE: The object files are built at the place where configure is launched
- EOF
-@@ -1744,6 +1750,9 @@ if test "$solaris" = "yes" ; then
- fi
- echo "SRC_PATH=$source_path" >> $config_host_mak
- echo "TARGET_DIRS=$target_list" >> $config_host_mak
-+if test "$pbs_bdrv" = "yes" ; then
-+  echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
-+fi
- 
- # XXX: suppress that
- if [ "$bsd" = "yes" ] ; then
 diff --git a/meson.build b/meson.build
-index c3330310d9..cbfc9a43fb 100644
+index 6de51c34cb..3bc039f60f 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -4319,7 +4319,7 @@ summary_info += {'bzip2 support':     libbzip2}
+@@ -4477,7 +4477,7 @@ summary_info += {'bzip2 support':     libbzip2}
 summary_info += {'lzfse support':     liblzfse}
 summary_info += {'zstd support':      zstd}
 summary_info += {'NUMA host support': numa}
@ -403,10 +368,10 @@ index c3330310d9..cbfc9a43fb 100644
 summary_info += {'libdaxctl support': libdaxctl}
 summary_info += {'libudev':           libudev}
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 1b8462a51b..d67a6d448a 100644
+index d05fffce1d..e7cf3d94f3 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -3396,6 +3396,7 @@
+@@ -3457,6 +3457,7 @@
             'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
             'raw', 'rbd',
             { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
@ -414,7 +379,7 @@ index 1b8462a51b..d67a6d448a 100644
             'ssh', 'throttle', 'vdi', 'vhdx',
             { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
             { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
-@@ -3482,6 +3483,17 @@
+@@ -3543,6 +3544,33 @@
 { 'struct': 'BlockdevOptionsNull',
   'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
 
@ -423,6 +388,22 @@ index 1b8462a51b..d67a6d448a 100644
 +#
 +# Driver specific block device options for the PBS backend.
 +#
+# @repository: Proxmox Backup Server repository.
+#
+# @snapshot: backup snapshots ID.
+#
+# @archive: archive name.
+#
+# @keyfile: keyfile to use for encryption.
+#
+# @password: password to use for connection.
+#
+# @fingerprint: backup server fingerprint.
+#
+# @key_password: password to unlock key.
+#
+# @namespace: namespace where backup snapshot lives.
+#
 +##
 +{ 'struct': 'BlockdevOptionsPbs',
 +  'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
@ -432,7 +413,7 @@ index 1b8462a51b..d67a6d448a 100644
 ##
 # @BlockdevOptionsNVMe:
 #
-@@ -4890,6 +4902,7 @@
+@@ -4977,6 +5005,7 @@
       'nfs':        'BlockdevOptionsNfs',
       'null-aio':   'BlockdevOptionsNull',
       'null-co':    'BlockdevOptionsNull',
@ -441,10 +422,10 @@ index 1b8462a51b..d67a6d448a 100644
       'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
                          'if': 'CONFIG_BLKIO' },
 diff --git a/qapi/pragma.json b/qapi/pragma.json
-index 325e684411..b6079f6a0e 100644
+index be8fa304c5..7ff46bd128 100644
 --- a/qapi/pragma.json
 +++ b/qapi/pragma.json
-@@ -45,6 +45,7 @@
+@@ -100,6 +100,7 @@
         'BlockInfo',                # query-block
         'BlockdevAioOptions',       # blockdev-add, -blockdev
         'BlockdevDriver',           # blockdev-add, query-blockstats, ...
--- a/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
+++ b/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
@ -9,15 +9,15 @@ fitting.
 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
- meson.build | 2 ++
+ meson.build | 3 ++-
 os-posix.c  | 7 +++++--
- 2 files changed, 7 insertions(+), 2 deletions(-)
+ 2 files changed, 7 insertions(+), 3 deletions(-)

 diff --git a/meson.build b/meson.build
-index cbfc9a43fb..8206270272 100644
+index 3bc039f60f..067e8956a7 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -1779,6 +1779,7 @@ endif
+@@ -1923,6 +1923,7 @@ endif
 has_gettid = cc.has_function('gettid')
 
 libuuid = cc.find_library('uuid', required: true)
@ -25,28 +25,29 @@ index cbfc9a43fb..8206270272 100644
 libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
 
 # libselinux
-@@ -3406,6 +3407,7 @@ if have_block
-   # os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
-   # os-win32.c does not
-   blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
-+  blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
-   system_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
+@@ -3530,7 +3531,7 @@ if have_block
+   if host_os == 'windows'
+     system_ss.add(files('os-win32.c'))
+   else
+-    blockdev_ss.add(files('os-posix.c'))
+    blockdev_ss.add(files('os-posix.c'), libsystemd)
+   endif
 endif
 
 diff --git a/os-posix.c b/os-posix.c
-index cfcb96533c..fb2ad87009 100644
+index a4284e2c07..197a2120fd 100644
 --- a/os-posix.c
 +++ b/os-posix.c
-@@ -28,6 +28,8 @@
+@@ -29,6 +29,8 @@
 #include <pwd.h>
 #include <grp.h>
 #include <libgen.h>
 +#include <systemd/sd-journal.h>
 +#include <syslog.h>
 
- /* Needed early for CONFIG_BSD etc. */
- #include "net/slirp.h"
-@@ -310,9 +312,10 @@ void os_setup_post(void)
+ #include "qemu/error-report.h"
+ #include "qemu/log.h"
+@@ -302,9 +304,10 @@ void os_setup_post(void)
 
         dup2(fd, 0);
         dup2(fd, 1);
--- a/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
+++ b/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
@ -26,10 +26,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 create mode 100644 migration/pbs-state.c

 diff --git a/include/migration/misc.h b/include/migration/misc.h
-index 7dcc0b5c2c..4c940b2475 100644
+index c9e200f4eb..12c99ebc69 100644
 --- a/include/migration/misc.h
 +++ b/include/migration/misc.h
-@@ -77,4 +77,7 @@ bool migration_in_bg_snapshot(void);
+@@ -117,4 +117,7 @@ bool migration_in_bg_snapshot(void);
 /* migration/block-dirty-bitmap.c */
 void dirty_bitmap_mig_init(void);
 
@ -38,7 +38,7 @@ index 7dcc0b5c2c..4c940b2475 100644
 +
 #endif
 diff --git a/migration/meson.build b/migration/meson.build
-index 07f6057acc..343994d891 100644
+index 800f12a60d..35a4306183 100644
 --- a/migration/meson.build
 +++ b/migration/meson.build
@@ -7,7 +7,9 @@ migration_files = files(
@ -52,17 +52,17 @@ index 07f6057acc..343994d891 100644
 system_ss.add(files(
   'block-dirty-bitmap.c',
 diff --git a/migration/migration.c b/migration/migration.c
-index 7a4c8beb5d..0a955a2a18 100644
+index 86bf76e925..b8d7e471a4 100644
 --- a/migration/migration.c
 +++ b/migration/migration.c
-@@ -162,6 +162,7 @@ void migration_object_init(void)
+@@ -239,6 +239,7 @@ void migration_object_init(void)
     blk_mig_init();
     ram_mig_init();
     dirty_bitmap_mig_init();
 +    pbs_state_mig_init();
 }
 
- void migration_cancel(const Error *error)
+ typedef struct {
 diff --git a/migration/pbs-state.c b/migration/pbs-state.c
 new file mode 100644
 index 0000000000..887e998b9e
@ -174,10 +174,10 @@ index 0000000000..887e998b9e
 +                         NULL);
 +}
 diff --git a/pve-backup.c b/pve-backup.c
-index d84d807654..9c8b88d075 100644
+index c755bf302b..5ebb6a3947 100644
 --- a/pve-backup.c
 +++ b/pve-backup.c
-@@ -1060,6 +1060,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+@@ -1085,6 +1085,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
     ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
     ret->pbs_dirty_bitmap = true;
     ret->pbs_dirty_bitmap_savevm = true;
@ -186,10 +186,10 @@ index d84d807654..9c8b88d075 100644
     ret->pbs_masterkey = true;
     ret->backup_max_workers = true;
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index d67a6d448a..09de550c95 100644
+index e7cf3d94f3..282e2e8a8c 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -991,6 +991,11 @@
+@@ -1004,6 +1004,11 @@
 # @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
 #                           safely be set for savevm-async.
 #
@ -201,7 +201,7 @@ index d67a6d448a..09de550c95 100644
 # @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
 #                 parameter.
 #
-@@ -1001,6 +1006,7 @@
+@@ -1017,6 +1022,7 @@
   'data': { 'pbs-dirty-bitmap': 'bool',
             'query-bitmap-info': 'bool',
             'pbs-dirty-bitmap-savevm': 'bool',
--- a/debian/patches/pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
+++ b/debian/patches/pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
@ -19,7 +19,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
-index e1ae3b7316..285dd1d148 100644
+index 2708abf3d7..fb17c01308 100644
 --- a/migration/block-dirty-bitmap.c
 +++ b/migration/block-dirty-bitmap.c
@@ -540,7 +540,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
--- a/debian/patches/pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
+++ b/debian/patches/pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 30 insertions(+)

 diff --git a/block/iscsi.c b/block/iscsi.c
-index 34f97ab646..398782963d 100644
+index 2ff14b7472..46f275fbf7 100644
 --- a/block/iscsi.c
 +++ b/block/iscsi.c
-@@ -1391,12 +1391,42 @@ static char *get_initiator_name(QemuOpts *opts)
+@@ -1392,12 +1392,42 @@ static char *get_initiator_name(QemuOpts *opts)
     const char *name;
     char *iscsi_name;
     UuidInfo *uuid_info;
--- a/debian/patches/pve/0037-PVE-block-stream-increase-chunk-size.patch
+++ b/debian/patches/pve/0037-PVE-block-stream-increase-chunk-size.patch
@ -11,7 +11,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/block/stream.c b/block/stream.c
-index e522bbdec5..afed72db55 100644
+index 7031eef12b..d2da83ae7c 100644
 --- a/block/stream.c
 +++ b/block/stream.c
@@ -27,7 +27,7 @@ enum {
--- a/debian/patches/pve/0038-block-add-alloc-track-driver.patch
+++ b/debian/patches/pve/0038-block-add-alloc-track-driver.patch
@ -19,27 +19,33 @@ well.
 This only worked if the target supports backing images, so up until now
 only for qcow2, with alloc-track any driver for the target can be used.

-If 'auto-remove' is set, alloc-track will automatically detach itself
-once the backing image is removed. It will be replaced by 'file'.
+Replacing the node cannot be done in the
+track_co_change_backing_file() callback, because replacing a node
+cannot happen in a coroutine and requires the block graph lock
+exclusively. Could either become a special option for the stream job,
+or maybe the upcoming blockdev-replace QMP command can be used in the
+future.

 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 [FE: adapt to changed function signatures
     make error return value consistent with QEMU
-     avoid premature break during read]
+     avoid premature break during read
+     adhere to block graph lock requirements]
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/alloc-track.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
+ block/alloc-track.c | 366 ++++++++++++++++++++++++++++++++++++++++++++
 block/meson.build   |   1 +
- 2 files changed, 353 insertions(+)
+ block/stream.c      |  34 ++++
+ 3 files changed, 401 insertions(+)
 create mode 100644 block/alloc-track.c

 diff --git a/block/alloc-track.c b/block/alloc-track.c
 new file mode 100644
-index 0000000000..b75d7c6460
+index 0000000000..b9f8ea9137
 --- /dev/null
 +++ b/block/alloc-track.c
-@@ -0,0 +1,352 @@
+@@ -0,0 +1,366 @@
 +/*
 + * Node to allow backing images to be applied to any node. Assumes a blank
 + * image to begin with, only new writes are tracked as allocated, thus this
@ -56,9 +62,11 @@ index 0000000000..b75d7c6460
 +#include "qapi/error.h"
 +#include "block/block_int.h"
 +#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
 +#include "qapi/qmp/qdict.h"
 +#include "qapi/qmp/qstring.h"
 +#include "qemu/cutils.h"
+#include "qemu/error-report.h"
 +#include "qemu/option.h"
 +#include "qemu/module.h"
 +#include "sysemu/block-backend.h"
@ -67,12 +75,12 @@ index 0000000000..b75d7c6460
 +
 +typedef enum DropState {
 +    DropNone,
-+    DropRequested,
 +    DropInProgress,
 +} DropState;
 +
 +typedef struct {
 +    BdrvDirtyBitmap *bitmap;
+    uint64_t granularity;
 +    DropState drop_state;
 +    bool auto_remove;
 +} BDRVAllocTrackState;
@ -91,26 +99,29 @@ index 0000000000..b75d7c6460
 +    },
 +};
 +
-+static void track_refresh_limits(BlockDriverState *bs, Error **errp)
+static void GRAPH_RDLOCK
+track_refresh_limits(BlockDriverState *bs, Error **errp)
 +{
-+    BlockDriverInfo bdi;
+    BDRVAllocTrackState *s = bs->opaque;
 +
 +    if (!bs->file) {
 +        return;
 +    }
 +
-+    /* always use alignment from underlying write device so RMW cycle for
-+     * bdrv_pwritev reads data from our backing via track_co_preadv (no partial
-+     * cluster allocation in 'file') */
-+    bdrv_get_info(bs->file->bs, &bdi);
+    /*
+     * Always use alignment from underlying write device so RMW cycle for
+     * bdrv_pwritev reads data from our backing via track_co_preadv. Also use at
+     * least the bitmap granularity.
+     */
 +    bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
-+                                   MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+                                   s->granularity);
 +}
 +
 +static int track_open(BlockDriverState *bs, QDict *options, int flags,
 +                      Error **errp)
 +{
 +    BDRVAllocTrackState *s = bs->opaque;
+    BdrvChild *file = NULL;
 +    QemuOpts *opts;
 +    Error *local_err = NULL;
 +    int ret = 0;
@ -126,18 +137,45 @@ index 0000000000..b75d7c6460
 +    s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
 +
 +    /* open the target (write) node, backing will be attached by block layer */
-+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+    file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
 +                           BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
 +                           &local_err);
+    bdrv_graph_wrlock();
+    bs->file = file;
+    bdrv_graph_wrunlock();
 +    if (local_err) {
 +        ret = -EINVAL;
 +        error_propagate(errp, local_err);
 +        goto fail;
 +    }
 +
+    bdrv_graph_rdlock_main_loop();
+    BlockDriverInfo bdi = {0};
+    ret = bdrv_get_info(bs->file->bs, &bdi);
+    if (ret < 0) {
+        /*
+         * Not a hard failure. Worst that can happen is partial cluster
+         * allocation in the write target. However, the driver here returns its
+         * allocation status based on the dirty bitmap, so any other data that
+         * maps to such a cluster will still be copied later by a stream job (or
+         * during writes to that cluster).
+         */
+        warn_report("alloc-track: unable to query cluster size for write target: %s",
+                    strerror(ret));
+    }
+    ret = 0;
+    /*
+     * Always consider alignment from underlying write device so RMW cycle for
+     * bdrv_pwritev reads data from our backing via track_co_preadv. Also try to
+     * avoid partial cluster allocation in the write target by considering the
+     * cluster size.
+     */
+    s->granularity = MAX(bs->file->bs->bl.request_alignment,
+                         MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
 +    track_refresh_limits(bs, errp);
-+    uint64_t gran = bs->bl.request_alignment;
-+    s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, gran, NULL, &local_err);
+    s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, s->granularity, NULL,
+                                         &local_err);
+    bdrv_graph_rdunlock_main_loop();
 +    if (local_err) {
 +        ret = -EIO;
 +        error_propagate(errp, local_err);
@ -148,7 +186,9 @@ index 0000000000..b75d7c6460
 +
 +fail:
 +    if (ret < 0) {
+        bdrv_graph_wrlock();
 +        bdrv_unref_child(bs, bs->file);
+        bdrv_graph_wrunlock();
 +        if (s->bitmap) {
 +            bdrv_release_dirty_bitmap(s->bitmap);
 +        }
@ -165,13 +205,15 @@ index 0000000000..b75d7c6460
 +    }
 +}
 +
-+static coroutine_fn int64_t track_co_getlength(BlockDriverState *bs)
+static coroutine_fn int64_t GRAPH_RDLOCK
+track_co_getlength(BlockDriverState *bs)
 +{
 +    return bdrv_co_getlength(bs->file->bs);
 +}
 +
-+static int coroutine_fn track_co_preadv(BlockDriverState *bs,
-+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+track_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    BDRVAllocTrackState *s = bs->opaque;
 +    QEMUIOVector local_qiov;
@ -229,31 +271,34 @@ index 0000000000..b75d7c6460
 +    return ret;
 +}
 +
-+static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
-+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 +}
 +
-+static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
-+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       BdrvRequestFlags flags)
 +{
 +    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 +}
 +
-+static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
-+    int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 +{
 +    return bdrv_co_pdiscard(bs->file, offset, bytes);
 +}
 +
-+static coroutine_fn int track_co_flush(BlockDriverState *bs)
+static coroutine_fn int GRAPH_RDLOCK
+track_co_flush(BlockDriverState *bs)
 +{
 +    return bdrv_co_flush(bs->file->bs);
 +}
 +
-+static int coroutine_fn track_co_block_status(BlockDriverState *bs,
-+                                              bool want_zero,
+static int coroutine_fn GRAPH_RDLOCK
+track_co_block_status(BlockDriverState *bs, bool want_zero,
 +                                            int64_t offset,
 +                                            int64_t bytes,
 +                                            int64_t *pnum,
@ -284,9 +329,9 @@ index 0000000000..b75d7c6460
 +    return 0;
 +}
 +
-+static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
-+                             BdrvChildRole role, BlockReopenQueue *reopen_queue,
-+                             uint64_t perm, uint64_t shared,
+static void GRAPH_RDLOCK
+track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                 BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
 +                 uint64_t *nperm, uint64_t *nshared)
 +{
 +    BDRVAllocTrackState *s = bs->opaque;
@ -310,53 +355,28 @@ index 0000000000..b75d7c6460
 +    }
 +}
 +
-+static void track_drop(void *opaque)
-+{
-+    BlockDriverState *bs = (BlockDriverState*)opaque;
-+    BlockDriverState *file = bs->file->bs;
-+    BDRVAllocTrackState *s = bs->opaque;
-+
-+    assert(file);
-+
-+    /* we rely on the fact that we're not used anywhere else, so let's wait
-+     * until we're only used once - in the drive connected to the guest (and one
-+     * ref is held by bdrv_ref in track_change_backing_file) */
-+    if (bs->refcnt > 2) {
-+        aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
-+        return;
-+    }
-+    AioContext *aio_context = bdrv_get_aio_context(bs);
-+    aio_context_acquire(aio_context);
-+
-+    bdrv_drained_begin(bs);
-+
-+    /* now that we're drained, we can safely set 'DropInProgress' */
-+    s->drop_state = DropInProgress;
-+    bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-+
-+    bdrv_replace_node(bs, file, &error_abort);
-+    bdrv_set_backing_hd(bs, NULL, &error_abort);
-+    bdrv_drained_end(bs);
-+    bdrv_unref(bs);
-+    aio_context_release(aio_context);
-+}
-+
-+static int track_change_backing_file(BlockDriverState *bs,
-+                                     const char *backing_file,
+static int coroutine_fn GRAPH_RDLOCK
+track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
 +                             const char *backing_fmt)
 +{
+    /*
+     * Note that the actual backing file graph change is already done in the
+     * stream job itself with bdrv_set_backing_hd_drained(), so no need to
+     * actually do anything here. But still needs to be implemented, to make
+     * our caller (i.e. bdrv_co_change_backing_file() do the right thing).
+     *
+     * FIXME
+     * We'd like to auto-remove ourselves from the block graph, but it cannot
+     * be done from a coroutine. Currently done in the stream job, where it
+     * kinda fits better, but in the long-term, a special parameter would be
+     * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+     */
+    if (backing_file == NULL) {
 +        BDRVAllocTrackState *s = bs->opaque;
-+    if (s->auto_remove && s->drop_state == DropNone &&
-+        backing_file == NULL && backing_fmt == NULL)
-+    {
-+        /* backing file has been disconnected, there's no longer any use for
-+         * this node, so let's remove ourselves from the block graph - we need
-+         * to schedule this for later however, since when this function is
-+         * called, the blockjob modifying us is probably not done yet and has a
-+         * blocker on 'bs' */
-+        s->drop_state = DropRequested;
-+        bdrv_ref(bs);
-+        aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+        bdrv_drained_begin(bs);
+        s->drop_state = DropInProgress;
+        bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+        bdrv_drained_end(bs);
 +    }
 +
 +    return 0;
@ -383,7 +403,7 @@ index 0000000000..b75d7c6460
 +    .supports_backing                 = true,
 +
 +    .bdrv_co_block_status             = track_co_block_status,
-+    .bdrv_change_backing_file         = track_change_backing_file,
+    .bdrv_co_change_backing_file      = track_co_change_backing_file,
 +};
 +
 +static void bdrv_alloc_track_init(void)
@ -393,7 +413,7 @@ index 0000000000..b75d7c6460
 +
 +block_init(bdrv_alloc_track_init);
 diff --git a/block/meson.build b/block/meson.build
-index becc99ac4e..0a69836593 100644
+index 1945e04eeb..2873f3a25a 100644
 --- a/block/meson.build
 +++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh)
@ -404,3 +424,48 @@ index becc99ac4e..0a69836593 100644
   'amend.c',
   'backup.c',
   'backup-dump.c',
+diff --git a/block/stream.c b/block/stream.c
+index d2da83ae7c..f941cba14e 100644
+--- a/block/stream.c
+++ b/block/stream.c
+@@ -120,6 +120,40 @@ static int stream_prepare(Job *job)
+             ret = -EPERM;
+             goto out;
+         }
+
+        /*
+         * This cannot be done in the co_change_backing_file callback, because
+         * bdrv_replace_node() cannot be done in a coroutine. The latter also
+         * requires the graph lock exclusively. Only required for the
+         * alloc-track driver.
+         *
+         * The long-term plan is to either have an explicit parameter for the
+         * stream job or use the upcoming blockdev-replace QMP command.
+         */
+        if (base_id == NULL && strcmp(unfiltered_bs->drv->format_name, "alloc-track") == 0) {
+            BlockDriverState *file_bs;
+
+            bdrv_graph_rdlock_main_loop();
+            file_bs = unfiltered_bs->file->bs;
+            bdrv_graph_rdunlock_main_loop();
+
+            bdrv_ref(unfiltered_bs); // unrefed by bdrv_replace_node()
+            bdrv_drained_begin(file_bs);
+            bdrv_graph_wrlock();
+
+            bdrv_replace_node(unfiltered_bs, file_bs, &local_err);
+
+            bdrv_graph_wrunlock();
+            bdrv_drained_end(file_bs);
+            bdrv_unref(unfiltered_bs);
+
+            if (local_err) {
+                error_prepend(&local_err, "failed to replace alloc-track node: ");
+                error_report_err(local_err);
+                ret = -EPERM;
+                goto out;
+            }
+        }
+     }
+ 
+ out:
--- a/debian/patches/pve/0038-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
+++ b/debian/patches/pve/0038-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
@ -1,33 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Stefan Reiter <s.reiter@proxmox.com>
-Date: Tue, 2 Mar 2021 16:11:54 +0100
-Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request
-
-Some operations, e.g. block-stream, perform reads while discarding the
-results (only copy-on-read matters). In this case they will pass NULL as
-the target QEMUIOVector, which will however trip bdrv_pad_request, since
-it wants to extend its passed vector.
-
-Simply check for NULL and do nothing, there's no reason to pad the
-target if it will be discarded anyway.
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- block/io.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/block/io.c b/block/io.c
-index 83d1b1dfdc..24a3c84c93 100644
--- a/block/io.c
-+++ b/block/io.c
-@@ -1710,6 +1710,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
-     int sliced_niov;
-     size_t sliced_head, sliced_tail;
- 
-+    if (!qiov) {
-+        return 0;
-+    }
-+
-     /* Should have been checked by the caller already */
-     ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
-     if (ret < 0) {
--- a/debian/patches/pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
+++ b/debian/patches/pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
@ -7,15 +7,16 @@ This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
 preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.

 Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 block/rbd.c | 42 ++----------------------------------------
 1 file changed, 2 insertions(+), 40 deletions(-)

 diff --git a/block/rbd.c b/block/rbd.c
-index a4749f3b1b..53e0396b51 100644
+index 63f60d41be..367db42dce 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
-@@ -1511,7 +1511,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+@@ -1515,7 +1515,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
     int status, r;
     RBDDiffIterateReq req = { .offs = offset };
     uint64_t features, flags;
@ -23,7 +24,7 @@ index a4749f3b1b..53e0396b51 100644
 
     assert(offset + bytes <= s->image_size);
 
-@@ -1539,43 +1538,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+@@ -1543,43 +1542,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
         return status;
     }
 
@ -68,7 +69,7 @@ index a4749f3b1b..53e0396b51 100644
                           qemu_rbd_diff_iterate_cb, &req);
     if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
         return status;
-@@ -1594,8 +1557,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+@@ -1598,8 +1561,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
         status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
     }
 
--- a/debian/patches/pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
+++ b/debian/patches/pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
@ -8,15 +8,16 @@ This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
 preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.

 Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 block/rbd.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

 diff --git a/block/rbd.c b/block/rbd.c
-index 53e0396b51..0913a0af39 100644
+index 367db42dce..347b121626 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
-@@ -1470,11 +1470,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
+@@ -1474,11 +1474,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
     RBDDiffIterateReq *req = opaque;
 
     assert(req->offs + req->bytes <= offs);
--- a/debian/patches/pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
+++ b/debian/patches/pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
@ -18,12 +18,13 @@ Upstream bug report:
 https://gitlab.com/qemu-project/qemu/-/issues/1026

 Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 block/rbd.c | 112 ----------------------------------------------------
 1 file changed, 112 deletions(-)

 diff --git a/block/rbd.c b/block/rbd.c
-index 0913a0af39..1dab254517 100644
+index 347b121626..e61b359b97 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
@@ -108,12 +108,6 @@ typedef struct RBDTask {
@ -39,7 +40,7 @@ index 0913a0af39..1dab254517 100644
 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
                             BlockdevOptionsRbd *opts, bool cache,
                             const char *keypairs, const char *secretid,
-@@ -1456,111 +1450,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
+@@ -1460,111 +1454,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
     return spec_info;
 }
 
@ -151,7 +152,7 @@ index 0913a0af39..1dab254517 100644
 static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
-@@ -1796,7 +1685,6 @@ static BlockDriver bdrv_rbd = {
+@@ -1800,7 +1689,6 @@ static BlockDriver bdrv_rbd = {
 #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
     .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
 #endif
--- a/debian/patches/pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
+++ b/debian/patches/pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Tue, 26 Mar 2024 14:57:51 +0100
+Subject: [PATCH] alloc-track: error out when auto-remove is not set
+
+Since replacing the node now happens in the stream job, where the
+option cannot be read from (it's internal to the driver), it will
+always be treated as on.
+
+qemu-server will always set it, make sure to have other users notice
+the change (should they even exist). The option can be fully dropped
+in the future while adding a version guard in qemu-server.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/alloc-track.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/block/alloc-track.c b/block/alloc-track.c
+index b9f8ea9137..f3ed2935c4 100644
+--- a/block/alloc-track.c
+++ b/block/alloc-track.c
+@@ -34,7 +34,6 @@ typedef struct {
+     BdrvDirtyBitmap *bitmap;
+     uint64_t granularity;
+     DropState drop_state;
+-    bool auto_remove;
+ } BDRVAllocTrackState;
+ 
+ static QemuOptsList runtime_opts = {
+@@ -86,7 +85,11 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
+         goto fail;
+     }
+ 
+-    s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+    if (!qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false)) {
+        error_setg(errp, "alloc-track: requires auto-remove option to be set to on");
+        ret = -EINVAL;
+        goto fail;
+    }
+ 
+     /* open the target (write) node, backing will be attached by block layer */
+     file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
--- a/debian/patches/pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
+++ b/debian/patches/pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
@ -0,0 +1,84 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Wed, 27 Mar 2024 11:15:39 +0100
+Subject: [PATCH] alloc-track: avoid seemingly superfluous child permission
+ update
+
+Doesn't seem necessary nowadays (maybe after commit "alloc-track: fix
+deadlock during drop" where the dropping is not rescheduled and delayed
+anymore or some upstream change). Should there really be some issue,
+instead of having a drop state, this could also be just based off the
+fact whether there is still a backing child.
+
+Dumping the cumulative (shared) permissions for the BDS with a debug
+print yields the same values after this patch and with QEMU 8.1,
+namely 3 and 5.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/alloc-track.c | 26 --------------------------
+ 1 file changed, 26 deletions(-)
+
+diff --git a/block/alloc-track.c b/block/alloc-track.c
+index f3ed2935c4..29138dcc49 100644
+--- a/block/alloc-track.c
+++ b/block/alloc-track.c
+@@ -25,15 +25,9 @@
+ 
+ #define TRACK_OPT_AUTO_REMOVE "auto-remove"
+ 
+-typedef enum DropState {
+-    DropNone,
+-    DropInProgress,
+-} DropState;
+-
+ typedef struct {
+     BdrvDirtyBitmap *bitmap;
+     uint64_t granularity;
+-    DropState drop_state;
+ } BDRVAllocTrackState;
+ 
+ static QemuOptsList runtime_opts = {
+@@ -137,8 +131,6 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
+         goto fail;
+     }
+ 
+-    s->drop_state = DropNone;
+-
+ fail:
+     if (ret < 0) {
+         bdrv_graph_wrlock();
+@@ -289,18 +281,8 @@ track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                  BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
+                  uint64_t *nperm, uint64_t *nshared)
+ {
+-    BDRVAllocTrackState *s = bs->opaque;
+-
+     *nshared = BLK_PERM_ALL;
+ 
+-    /* in case we're currently dropping ourselves, claim to not use any
+-     * permissions at all - which is fine, since from this point on we will
+-     * never issue a read or write anymore */
+-    if (s->drop_state == DropInProgress) {
+-        *nperm = 0;
+-        return;
+-    }
+-
+     if (role & BDRV_CHILD_DATA) {
+         *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+     } else {
+@@ -326,14 +308,6 @@ track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+      * kinda fits better, but in the long-term, a special parameter would be
+      * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+      */
+-    if (backing_file == NULL) {
+-        BDRVAllocTrackState *s = bs->opaque;
+-        bdrv_drained_begin(bs);
+-        s->drop_state = DropInProgress;
+-        bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+-        bdrv_drained_end(bs);
+-    }
+-
+     return 0;
+ }
+ 
--- a/debian/patches/pve/0043-alloc-track-fix-deadlock-during-drop.patch
+++ b/debian/patches/pve/0043-alloc-track-fix-deadlock-during-drop.patch
@ -1,153 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 6 Apr 2023 14:59:31 +0200
-Subject: [PATCH] alloc-track: fix deadlock during drop
-
-by replacing the block node directly after changing the backing file
-instead of rescheduling it.
-
-With changes in QEMU 8.0, calling bdrv_get_info (and bdrv_unref)
-during drop can lead to a deadlock when using iothread (only triggered
-with multiple disks, except during debugging where it also triggered
-with one disk sometimes):
-1. job_unref_locked acquires the AioContext and calls job->driver->free
-2. track_drop gets scheduled
-3. bdrv_graph_wrlock is called and polls which leads to track_drop being
-   called
-4. track_drop acquires the AioContext recursively
-5. bdrv_get_info is a wrapped coroutine (since 8.0) and thus polls for
-   bdrv_co_get_info. This releases the AioContext, but only once! The
-   documentation for the AIO_WAIT_WHILE macro states that the
-   AioContext lock needs to be acquired exactly once, but there does
-   not seem to be a way for track_drop to know if it acquired the lock
-   recursively or not (without adding further hacks).
-6. Because the AioContext is still held by the main thread once, it can't
-   be acquired before entering bdrv_co_get_info in co_schedule_bh_cb
-   which happens in the iothread
-
-When doing the operation in change_backing_file, the AioContext has
-already been acquired by the caller, so the issue with the recursive
-lock goes away.
-
-The comment explaining why delaying the replace is necessary is
-> we need to schedule this for later however, since when this function
-> is called, the blockjob modifying us is probably not done yet and
-> has a blocker on 'bs'
-
-However, there is no check for blockers in bdrv_replace_node. It would
-need to be done by us, the caller, with check_to_replace_node.
-Furthermore, the mirror job also does its call to bdrv_replace_node
-while there is an active blocker (inserted by mirror itself) and they
-use a specialized version to check for blockers instead of
-check_to_replace_node there. Alloc-track could also do something
-similar to check for other blockers, but it should be fine to rely on
-Proxmox VE that no other operation with the blockdev is going on.
-
-Mirror also drains the target before replacing the node, but the
-target can have other users. In case of alloc-track the file child
-should not be accessible by anybody else and so there can't be an
-in-flight operation for the file child when alloc-track is drained.
-
-The rescheduling based on refcounting is a hack and it doesn't seem to
-be necessary anymore. It's not clear what the original issue from the
-comment was. Testing with older builds with track_drop done directly
-without rescheduling also didn't lead to any noticable issue for me.
-
-One issue it might have been is the one fixed by b1e1af394d
-("block/stream: Drain subtree around graph change"), where
-block-stream had a use-after-free if the base node changed at an
-inconvenient time (which alloc-track's auto-drop does).
-
-It's also not possible to just not auto-replace the alloc-track. Not
-replacing it at all leads to other operations like block resize
-hanging, and there is no good way to replace it manually via QMP
-(there is x-blockdev-change, but it is experimental and doesn't
-implement the required operation yet). Also, it's just cleaner in
-general to not leave unnecessary block nodes lying around.
-
-Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- block/alloc-track.c | 54 ++++++++++++++-------------------------------
- 1 file changed, 16 insertions(+), 38 deletions(-)
-
-diff --git a/block/alloc-track.c b/block/alloc-track.c
-index b75d7c6460..76da140a68 100644
--- a/block/alloc-track.c
-+++ b/block/alloc-track.c
-@@ -25,7 +25,6 @@
- 
- typedef enum DropState {
-     DropNone,
-    DropRequested,
-     DropInProgress,
- } DropState;
- 
-@@ -268,37 +267,6 @@ static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
-     }
- }
- 
-static void track_drop(void *opaque)
-{
-    BlockDriverState *bs = (BlockDriverState*)opaque;
-    BlockDriverState *file = bs->file->bs;
-    BDRVAllocTrackState *s = bs->opaque;
-
-    assert(file);
-
-    /* we rely on the fact that we're not used anywhere else, so let's wait
-     * until we're only used once - in the drive connected to the guest (and one
-     * ref is held by bdrv_ref in track_change_backing_file) */
-    if (bs->refcnt > 2) {
-        aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
-        return;
-    }
-    AioContext *aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
-    bdrv_drained_begin(bs);
-
-    /* now that we're drained, we can safely set 'DropInProgress' */
-    s->drop_state = DropInProgress;
-    bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-
-    bdrv_replace_node(bs, file, &error_abort);
-    bdrv_set_backing_hd(bs, NULL, &error_abort);
-    bdrv_drained_end(bs);
-    bdrv_unref(bs);
-    aio_context_release(aio_context);
-}
-
- static int track_change_backing_file(BlockDriverState *bs,
-                                      const char *backing_file,
-                                      const char *backing_fmt)
-@@ -308,13 +276,23 @@ static int track_change_backing_file(BlockDriverState *bs,
-         backing_file == NULL && backing_fmt == NULL)
-     {
-         /* backing file has been disconnected, there's no longer any use for
-         * this node, so let's remove ourselves from the block graph - we need
-         * to schedule this for later however, since when this function is
-         * called, the blockjob modifying us is probably not done yet and has a
-         * blocker on 'bs' */
-        s->drop_state = DropRequested;
-+         * this node, so let's remove ourselves from the block graph */
-+        BlockDriverState *file = bs->file->bs;
-+
-+        /* Just to be sure, because bdrv_replace_node unrefs it */
-         bdrv_ref(bs);
-        aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
-+        bdrv_drained_begin(bs);
-+
-+        /* now that we're drained, we can safely set 'DropInProgress' */
-+        s->drop_state = DropInProgress;
-+
-+        bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-+
-+        bdrv_replace_node(bs, file, &error_abort);
-+        bdrv_set_backing_hd(bs, NULL, &error_abort);
-+
-+        bdrv_drained_end(bs);
-+        bdrv_unref(bs);
-     }
- 
-     return 0;
--- a/debian/patches/pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
+++ b/debian/patches/pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
@ -0,0 +1,133 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:26 +0200
+Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Copy-before-write operations will use at least this granularity and in
+particular, discard requests to the source node will too. If the
+granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+The QAPI uses uint32 so the value will be non-negative, but still fit
+into a uint64_t.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c         | 17 +++++++++++++----
+ block/copy-before-write.c  |  3 ++-
+ include/block/block-copy.h |  1 +
+ qapi/block-core.json       |  8 +++++++-
+ 4 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index cc618e4561..12d662e9d4 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+ }
+ 
+ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                                                 int64_t min_cluster_size,
+                                                  Error **errp)
+ {
+     int ret;
+@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                     "used. If the actual block size of the target exceeds "
+                     "this default, the backup may be unusable",
+                     BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+     } else if (ret < 0 && !target_does_cow) {
+         error_setg_errno(errp, -ret,
+             "Couldn't determine the cluster size of the target image, "
+@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+         return ret;
+     } else if (ret < 0 && target_does_cow) {
+         /* Not fatal; just trudge on ahead. */
+-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+     }
+ 
+-    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+    return MAX(min_cluster_size,
+               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
+ }
+ 
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      bool discard_source,
+                                     int64_t min_cluster_size,
+                                      Error **errp)
+ {
+     ERRP_GUARD();
+@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ 
+     GLOBAL_STATE_CODE();
+ 
+-    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
+        error_setg(errp, "min-cluster-size needs to be a power of 2");
+        return NULL;
+    }
+
+    cluster_size = block_copy_calculate_cluster_size(target->bs,
+                                                     min_cluster_size, errp);
+     if (cluster_size < 0) {
+         return NULL;
+     }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 28f6a096cd..ef4e666303 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -478,7 +478,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+ 
+     s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+     s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+-                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
+                                  opts->min_cluster_size, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index bdc703bacd..77857c6c68 100644
+--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
+@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      bool discard_source,
+                                     int64_t min_cluster_size,
+                                      Error **errp);
+ 
+ /* Function should be called prior any actual copy request */
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 282e2e8a8c..9caf04cbe9 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -4926,12 +4926,18 @@
+ #     @on-cbw-error parameter will decide how this failure is handled.
+ #     Default 0.  (Since 7.1)
+ #
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+#     operations.  Has to be a power of 2.  No effect if smaller than
+#     the maximum of the target's cluster size and 64 KiB.  Default 0.
+#     (Since 8.1)
+#
+ # Since: 6.2
+ ##
+ { 'struct': 'BlockdevOptionsCbw',
+   'base': 'BlockdevOptionsGenericFormat',
+   'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
+-            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
+            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
+            '*min-cluster-size': 'uint32' } }
+ 
+ ##
+ # @BlockdevOptions:
--- a/debian/patches/pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
+++ b/debian/patches/pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
@ -1,190 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Fri, 5 May 2023 13:39:53 +0200
-Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
-
-In spirit, this is a partial revert of commit 9b09503752 ("migration:
-run setup callbacks out of big lock"), but only for the snapshot case.
-
-For snapshots, the bdrv_writev_vmstate() function is used during setup
-(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
-while calling it could lead to an assertion failure. To understand
-how, first note the following:
-
-1. Generated coroutine wrappers for block layer functions spawn the
-coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
-2. If the host OS switches threads at an inconvenient time, it can
-happen that a bottom half scheduled for the main thread's AioContext
-is executed as part of a vCPU thread's aio_poll().
-
-An example leading to the assertion failure is as follows:
-
-main thread:
-1. A snapshot-save QMP command gets issued.
-2. snapshot_save_job_bh() is scheduled.
-
-vCPU thread:
-3. aio_poll() for the main thread's AioContext is called (e.g. when
-the guest writes to a pflash device, as part of blk_pwrite which is a
-generated coroutine wrapper).
-4. snapshot_save_job_bh() is executed as part of aio_poll().
-3. qemu_savevm_state() is called.
-4. qemu_mutex_unlock_iothread() is called. Now
-qemu_get_current_aio_context() returns 0x0.
-5. bdrv_writev_vmstate() is executed during the usual savevm setup.
-But this function is a generated coroutine wrapper, so it uses
-AIO_WAIT_WHILE. There, the assertion
-assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-will fail.
-
-To fix it, ensure that the BQL is held during setup. To avoid changing
-the behavior for migration too, introduce conditionals for the setup
-callbacks that need the BQL and only take the lock if it's not already
-held.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- include/migration/register.h   |  2 +-
- migration/block-dirty-bitmap.c | 15 ++++++++++++---
- migration/block.c              | 15 ++++++++++++---
- migration/ram.c                | 16 +++++++++++++---
- migration/savevm.c             |  2 --
- 5 files changed, 38 insertions(+), 12 deletions(-)
-
-diff --git a/include/migration/register.h b/include/migration/register.h
-index 90914f32f5..c728fd9120 100644
--- a/include/migration/register.h
-+++ b/include/migration/register.h
-@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
-      * by other locks.
-      */
-     int (*save_live_iterate)(QEMUFile *f, void *opaque);
-+    int (*save_setup)(QEMUFile *f, void *opaque);
- 
-     /* This runs outside the iothread lock!  */
-    int (*save_setup)(QEMUFile *f, void *opaque);
-     /* Note for save_live_pending:
-      * must_precopy:
-      * - must be migrated in precopy or in stopped state
-diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
-index 285dd1d148..f7ee5a74d9 100644
--- a/migration/block-dirty-bitmap.c
-+++ b/migration/block-dirty-bitmap.c
-@@ -1219,10 +1219,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
- {
-     DBMSaveState *s = &((DBMState *)opaque)->save;
-     SaveBitmapState *dbms = NULL;
-+    bool release_lock = false;
- 
-    qemu_mutex_lock_iothread();
-+    /* For snapshots, the BQL is held during setup. */
-+    if (!qemu_mutex_iothread_locked()) {
-+        qemu_mutex_lock_iothread();
-+        release_lock = true;
-+    }
-     if (init_dirty_bitmap_migration(s) < 0) {
-        qemu_mutex_unlock_iothread();
-+        if (release_lock) {
-+            qemu_mutex_unlock_iothread();
-+        }
-         return -1;
-     }
- 
-@@ -1230,7 +1237,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
-         send_bitmap_start(f, s, dbms);
-     }
-     qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
-    qemu_mutex_unlock_iothread();
-+    if (release_lock) {
-+        qemu_mutex_unlock_iothread();
-+    }
-     return 0;
- }
- 
-diff --git a/migration/block.c b/migration/block.c
-index 86c2256a2b..8423e0c9f9 100644
--- a/migration/block.c
-+++ b/migration/block.c
-@@ -725,21 +725,30 @@ static void block_migration_cleanup(void *opaque)
- static int block_save_setup(QEMUFile *f, void *opaque)
- {
-     int ret;
-+    bool release_lock = false;
- 
-     trace_migration_block_save("setup", block_mig_state.submitted,
-                                block_mig_state.transferred);
- 
-    qemu_mutex_lock_iothread();
-+    /* For snapshots, the BQL is held during setup. */
-+    if (!qemu_mutex_iothread_locked()) {
-+        qemu_mutex_lock_iothread();
-+        release_lock = true;
-+    }
-     ret = init_blk_migration(f);
-     if (ret < 0) {
-        qemu_mutex_unlock_iothread();
-+        if (release_lock) {
-+            qemu_mutex_unlock_iothread();
-+        }
-         return ret;
-     }
- 
-     /* start track dirty blocks */
-     ret = set_dirty_tracking();
- 
-    qemu_mutex_unlock_iothread();
-+    if (release_lock) {
-+        qemu_mutex_unlock_iothread();
-+    }
- 
-     if (ret) {
-         return ret;
-diff --git a/migration/ram.c b/migration/ram.c
-index 6e1514f69f..6a1aec7031 100644
--- a/migration/ram.c
-+++ b/migration/ram.c
-@@ -2896,8 +2896,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
- 
- static void ram_init_bitmaps(RAMState *rs)
- {
-    /* For memory_global_dirty_log_start below.  */
-    qemu_mutex_lock_iothread();
-+    bool release_lock = false;
-+
-+    /*
-+     * For memory_global_dirty_log_start below.
-+     * For snapshots, the BQL is held during setup.
-+     */
-+    if (!qemu_mutex_iothread_locked()) {
-+        qemu_mutex_lock_iothread();
-+        release_lock = true;
-+    }
-     qemu_mutex_lock_ramlist();
- 
-     WITH_RCU_READ_LOCK_GUARD() {
-@@ -2909,7 +2917,9 @@ static void ram_init_bitmaps(RAMState *rs)
-         }
-     }
-     qemu_mutex_unlock_ramlist();
-    qemu_mutex_unlock_iothread();
-+    if (release_lock) {
-+        qemu_mutex_unlock_iothread();
-+    }
- 
-     /*
-      * After an eventual first bitmap sync, fixup the initial bitmap
-diff --git a/migration/savevm.c b/migration/savevm.c
-index d60c4f487a..3c015722f7 100644
--- a/migration/savevm.c
-+++ b/migration/savevm.c
-@@ -1625,10 +1625,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
-     reset_vfio_bytes_transferred();
-     ms->to_dst_file = f;
- 
-    qemu_mutex_unlock_iothread();
-     qemu_savevm_state_header(f);
-     qemu_savevm_state_setup(f);
-    qemu_mutex_lock_iothread();
- 
-     while (qemu_file_get_error(f) == 0) {
-         if (qemu_savevm_state_iterate(f, false) > 0) {
--- a/debian/patches/pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
+++ b/debian/patches/pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
@ -0,0 +1,106 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:27 +0200
+Subject: [PATCH] backup: add minimum cluster size to performance options
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Backup/block-copy will use at least this granularity for copy operations
+and in particular, discard requests to the backup source will too. If
+the granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c            | 2 +-
+ block/copy-before-write.c | 2 ++
+ block/copy-before-write.h | 1 +
+ blockdev.c                | 3 +++
+ qapi/block-core.json      | 9 +++++++--
+ 5 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index 1963e47ab9..fe69723ada 100644
+--- a/block/backup.c
+++ b/block/backup.c
+@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+     }
+ 
+     cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+-                          &bcs, errp);
+                          perf->min_cluster_size, &bcs, errp);
+     if (!cbw) {
+         goto error;
+     }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index ef4e666303..adb27649a8 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -547,6 +547,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                   bool discard_source,
+                                  int64_t min_cluster_size,
+                                   BlockCopyState **bcs,
+                                   Error **errp)
+ {
+@@ -565,6 +566,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+     }
+     qdict_put_str(opts, "file", bdrv_get_node_name(source));
+     qdict_put_str(opts, "target", bdrv_get_node_name(target));
+    qdict_put_int(opts, "min-cluster-size", min_cluster_size);
+ 
+     top = bdrv_insert_node(source, opts, flags, errp);
+     if (!top) {
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 01af0cd3c4..dc6cafe7fa 100644
+--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
+@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                   bool discard_source,
+                                  int64_t min_cluster_size,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/blockdev.c b/blockdev.c
+index 1054a69279..cbe224387b 100644
+--- a/blockdev.c
+++ b/blockdev.c
+@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+         if (backup->x_perf->has_max_chunk) {
+             perf.max_chunk = backup->x_perf->max_chunk;
+         }
+        if (backup->x_perf->has_min_cluster_size) {
+            perf.min_cluster_size = backup->x_perf->min_cluster_size;
+        }
+     }
+ 
+     if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 9caf04cbe9..df934647ed 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -1790,11 +1790,16 @@
+ #     it should not be less than job cluster size which is calculated
+ #     as maximum of target image cluster size and 64k.  Default 0.
+ #
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+#     and background copy operations.  Has to be a power of 2.  No
+#     effect if smaller than the maximum of the target's cluster size
+#     and 64 KiB.  Default 0. (Since 8.1)
+#
+ # Since: 6.0
+ ##
+ { 'struct': 'BackupPerf',
+-  'data': { '*use-copy-range': 'bool',
+-            '*max-workers': 'int', '*max-chunk': 'int64' } }
+  'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
+            '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
+ 
+ ##
+ # @BackupCommon:
--- a/debian/patches/pve/0045-savevm-async-don-t-hold-BQL-during-setup.patch
+++ b/debian/patches/pve/0045-savevm-async-don-t-hold-BQL-during-setup.patch
@ -1,29 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Fri, 5 May 2023 15:30:16 +0200
-Subject: [PATCH] savevm-async: don't hold BQL during setup
-
-See commit "migration: for snapshots, hold the BQL during setup
-callbacks" for why. This is separate, because a version of that one
-will hopefully land upstream.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
- migration/savevm-async.c | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/migration/savevm-async.c b/migration/savevm-async.c
-index 80624fada8..b1d85a4b41 100644
--- a/migration/savevm-async.c
-+++ b/migration/savevm-async.c
-@@ -401,10 +401,8 @@ void qmp_savevm_start(const char *statefile, Error **errp)
-     snap_state.state = SAVE_STATE_ACTIVE;
-     snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
-     snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
-    qemu_mutex_unlock_iothread();
-     qemu_savevm_state_header(snap_state.file);
-     qemu_savevm_state_setup(snap_state.file);
-    qemu_mutex_lock_iothread();
- 
-     /* Async processing from here on out happens in iohandler context, so let
-      * the target bdrv have its home there.
--- a/debian/patches/pve/0046-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0046-PVE-backup-add-fleecing-option.patch
@ -0,0 +1,337 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:28 +0200
+Subject: [PATCH] PVE backup: add fleecing option
+
+When a fleecing option is given, it is expected that each device has
+a corresponding "-fleecing" block device already attached, except for
+EFI disk and TPM state, where fleecing is never used.
+
+The following graph was adapted from [0] which also contains more
+details about fleecing.
+
+[guest]
+   |
+   | root
+   v                 file
+[copy-before-write]<------[snapshot-access]
+   |           |
+   | file      | target
+   v           v
+[source] [fleecing]
+
+For fleecing, a copy-before-write filter is inserted on top of the
+source node, as well as a snapshot-access node pointing to the filter
+node which allows to read the consistent state of the image at the
+time it was inserted. New guest writes are passed through the
+copy-before-write filter which will first copy over old data to the
+fleecing image in case that old data is still needed by the
+snapshot-access node.
+
+The backup process will sequentially read from the snapshot access,
+which has a bitmap and knows whether to read from the original image
+or the fleecing image to get the "snapshot" state, i.e. data from the
+source image at the time when the copy-before-write filter was
+inserted. After reading, the copied sections are discarded from the
+fleecing image to reduce space usage.
+
+All of this can be restricted by an initial dirty bitmap to parts of
+the source image that are required for an incremental backup.
+
+For discard to work, it is necessary that the fleecing image does not
+have a larger cluster size than the backup job granularity. Since
+querying that size does not always work, e.g. for RBD with krbd, the
+cluster size will not be reported, a minimum of 4 MiB is used. A job
+with PBS target already has at least this granularity, so it's just
+relevant for other targets. I.e. edge cases where this minimum is not
+enough should be very rare in practice. If ever necessary in the
+future, can still add a passed-in value for the backup QMP command to
+override.
+
+Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
+are set when installing the copy-before-write filter and
+snapshot-access. When an error or timeout occurs, the problematic (and
+each further) snapshot operation will fail and thus cancel the backup
+instead of breaking the guest write.
+
+Note that job_id cannot be inferred from the snapshot-access bs because
+it has no parent, so just pass the one from the original bs.
+
+[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/monitor/block-hmp-cmds.c |   1 +
+ pve-backup.c                   | 135 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json           |  10 ++-
+ 3 files changed, 142 insertions(+), 4 deletions(-)
+
+diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
+index 439a7a14c8..d0e7771dcc 100644
+--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
+@@ -1044,6 +1044,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
+         NULL, NULL,
+         devlist, qdict_haskey(qdict, "speed"), speed,
+         false, 0, // BackupPerf max-workers
+        false, false, // fleecing
+         &error);
+ 
+     hmp_handle_error(mon, error);
+diff --git a/pve-backup.c b/pve-backup.c
+index 5ebb6a3947..a747d12d3d 100644
+--- a/pve-backup.c
+++ b/pve-backup.c
+@@ -7,9 +7,11 @@
+ #include "sysemu/blockdev.h"
+ #include "block/block_int-global-state.h"
+ #include "block/blockjob.h"
+#include "block/copy-before-write.h"
+ #include "block/dirty-bitmap.h"
+ #include "block/graph-lock.h"
+ #include "qapi/qapi-commands-block.h"
+#include "qapi/qmp/qdict.h"
+ #include "qapi/qmp/qerror.h"
+ #include "qemu/cutils.h"
+ 
+@@ -80,8 +82,15 @@ static void pvebackup_init(void)
+ // initialize PVEBackupState at startup
+ opts_init(pvebackup_init);
+ 
+typedef struct PVEBackupFleecingInfo {
+    BlockDriverState *bs;
+    BlockDriverState *cbw;
+    BlockDriverState *snapshot_access;
+} PVEBackupFleecingInfo;
+
+ typedef struct PVEBackupDevInfo {
+     BlockDriverState *bs;
+    PVEBackupFleecingInfo fleecing;
+     size_t size;
+     uint64_t block_size;
+     uint8_t dev_id;
+@@ -353,6 +362,22 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+     PVEBackupDevInfo *di = opaque;
+     di->completed_ret = ret;
+ 
+    /*
+     * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
+     * won't be done as a coroutine anyways:
+     * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
+     *   just spawn a BH calling bdrv_unref().
+     * - For cbw, draining would need to spawn a BH.
+     */
+    if (di->fleecing.snapshot_access) {
+        bdrv_unref(di->fleecing.snapshot_access);
+        di->fleecing.snapshot_access = NULL;
+    }
+    if (di->fleecing.cbw) {
+        bdrv_cbw_drop(di->fleecing.cbw);
+        di->fleecing.cbw = NULL;
+    }
+
+     /*
+      * Needs to happen outside of coroutine, because it takes the graph write lock.
+      */
+@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
+         }
+         bdrv_drained_begin(di->bs);
+ 
+        BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
+
+        BlockDriverState *source_bs = di->bs;
+        bool discard_source = false;
+        bdrv_graph_co_rdlock();
+        const char *job_id = bdrv_get_device_name(di->bs);
+        bdrv_graph_co_rdunlock();
+        if (di->fleecing.bs) {
+            QDict *cbw_opts = qdict_new();
+            qdict_put_str(cbw_opts, "driver", "copy-before-write");
+            qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
+            qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
+
+            if (di->bitmap) {
+                /*
+                 * Only guest writes to parts relevant for the backup need to be intercepted with
+                 * old data being copied to the fleecing image.
+                 */
+                qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
+                qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
+            }
+            /*
+             * Fleecing storage is supposed to be fast and it's better to break backup than guest
+             * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
+             * abort a bit before that.
+             */
+            qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
+            qdict_put_int(cbw_opts, "cbw-timeout", 45);
+
+            di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
+
+            if (!di->fleecing.cbw) {
+                error_setg(errp, "appending cbw node for fleecing failed: %s",
+                           local_err ? error_get_pretty(local_err) : "unknown error");
+                break;
+            }
+
+            QDict *snapshot_access_opts = qdict_new();
+            qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
+            qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
+
+            di->fleecing.snapshot_access =
+                bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
+            if (!di->fleecing.snapshot_access) {
+                error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+                           local_err ? error_get_pretty(local_err) : "unknown error");
+                break;
+            }
+            source_bs = di->fleecing.snapshot_access;
+            discard_source = true;
+
+            /*
+             * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
+             * on the fleecing image won't work if the backup job's granularity is less than the RBD
+             * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
+             * target, the backup job granularity would already be at least this much.
+             */
+            perf.min_cluster_size = 4 * 1024 * 1024;
+            /*
+             * For discard to work, cluster size for the backup job must be at least the same as for
+             * the fleecing image.
+             */
+            BlockDriverInfo bdi;
+            if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
+                perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
+            }
+        }
+
+         BlockJob *job = backup_job_create(
+-            NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+-            bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
+            job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+            bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
+             BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+             &local_err);
+ 
+@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
+     aio_co_enter(data->ctx, data->co);
+ }
+ 
+/*
+ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
+ */
+static bool device_uses_fleecing(const char *device_id)
+{
+    return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
+}
+
+ /*
+  * Returns a list of device infos, which needs to be freed by the caller. In
+  * case of an error, errp will be set, but the returned value might still be a
+@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
+  */
+ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+     const char *devlist,
+    bool fleecing,
+     Error **errp)
+ {
+     gchar **devs = NULL;
+@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+             }
+             PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+             di->bs = bs;
+
+            if (fleecing && device_uses_fleecing(*d)) {
+                g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
+                BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
+                if (!fleecing_blk) {
+                    error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                              "Device '%s' not found", fleecing_devid);
+                    goto err;
+                }
+                BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
+                if (!bdrv_co_is_inserted(fleecing_bs)) {
+                    error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
+                    goto err;
+                }
+                /*
+                 * Fleecing image needs to be the same size to act as a cbw target.
+                 */
+                if (bs->total_sectors != fleecing_bs->total_sectors) {
+                    error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
+                               fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
+                    goto err;
+                }
+                di->fleecing.bs = fleecing_bs;
+            }
+
+             di_list = g_list_append(di_list, di);
+             d++;
+         }
+@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
+     const char *devlist,
+     bool has_speed, int64_t speed,
+     bool has_max_workers, int64_t max_workers,
+    bool has_fleecing, bool fleecing,
+     Error **errp)
+ {
+     assert(qemu_in_coroutine());
+@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
+     format = has_format ? format : BACKUP_FORMAT_VMA;
+ 
+     bdrv_graph_co_rdlock();
+-    di_list = get_device_info(devlist, &local_err);
+    di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
+     bdrv_graph_co_rdunlock();
+     if (local_err) {
+         error_propagate(errp, local_err);
+@@ -1089,5 +1217,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+     ret->query_bitmap_info = true;
+     ret->pbs_masterkey = true;
+     ret->backup_max_workers = true;
+    ret->backup_fleecing = true;
+     return ret;
+ }
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index df934647ed..ff441d4258 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -948,6 +948,10 @@
+ #
+ # @max-workers: see @BackupPerf for details. Default 16.
+ #
+# @fleecing: perform a backup with fleecing. For each device in @devlist, a
+#            corresponing '-fleecing' device with the same size already needs to
+#            be present.
+#
+ # Returns: the uuid of the backup job
+ #
+ ##
+@@ -968,7 +972,8 @@
+                                     '*firewall-file': 'str',
+                                     '*devlist': 'str',
+                                     '*speed': 'int',
+-                                    '*max-workers': 'int' },
+                                    '*max-workers': 'int',
+                                    '*fleecing': 'bool' },
+   'returns': 'UuidInfo', 'coroutine': true }
+ 
+ ##
+@@ -1014,6 +1019,8 @@
+ #
+ # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+ #
+# @backup-fleecing: Whether backup fleecing is supported or not.
+#
+ # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
+ #     supported or not.
+ #
+@@ -1025,6 +1032,7 @@
+             'pbs-dirty-bitmap-migration': 'bool',
+             'pbs-masterkey': 'bool',
+             'pbs-library-version': 'str',
+            'backup-fleecing': 'bool',
+             'backup-max-workers': 'bool' } }
+ 
+ ##
--- a/Show More
+++ b/Show More