Add Vitastor support

bump version to 9.0.0-2
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2024-05-20 19:53:28 +03:00 · 2024-05-17 17:05:10 +02:00 · 2024-05-17 15:56:12 +02:00 · 2024-04-29 17:25:20 +02:00 · 2024-04-29 17:25:20 +02:00 · 2024-04-29 15:29:52 +02:00
118 changed files with 11202 additions and 10426 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
+/*.build
+/*.buildinfo
+/*.changes
+/*.deb
+/*.dsc
+/*.tar*
+/pve-qemu-kvm-*.*/
--- a/85
+++ b/85
@@ -1,57 +1,90 @@
-include /usr/share/dpkg/pkg-info.mk
-include /usr/share/dpkg/architecture.mk
+include /usr/share/dpkg/default.mk

 PACKAGE = pve-qemu-kvm

 SRCDIR := qemu
-BUILDDIR ?= ${PACKAGE}-${DEB_VERSION_UPSTREAM}
+BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
+ORIG_SRC_TAR=$(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz

 GITVERSION := $(shell git rev-parse HEAD)

-DEB = ${PACKAGE}_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
-DEB_DBG = ${PACKAGE}-dbg_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
+DSC=$(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION).dsc
+DEB = $(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
+DEB_DBG = $(PACKAGE)-dbgsym_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
 DEBS = $(DEB) $(DEB_DBG)

 all: $(DEBS)

 .PHONY: submodule
 submodule:
-	test -f "${SRCDIR}/configure" || git submodule update --init --recursive
+ifeq ($(shell test -f "$(SRCDIR)/configure" && echo 1 || echo 0), 0)
+	git submodule update --init --recursive
+	cd $(SRCDIR); meson subprojects download
+endif

-$(BUILDDIR): keycodemapdb | submodule
-	rm -rf $(BUILDDIR)
-	cp -a $(SRCDIR) $(BUILDDIR)
-	cp -a debian $(BUILDDIR)/debian
-	rm -rf $(BUILDDIR)/ui/keycodemapdb
-	cp -a keycodemapdb $(BUILDDIR)/ui/
-	echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $(BUILDDIR)/debian/SOURCE
+PC_BIOS_FW_PURGE_LIST_IN = \
+	hppa-firmware.img \
+	hppa-firmware64.img \
+	openbios-ppc \
+	openbios-sparc32 \
+	openbios-sparc64 \
+	palcode-clipper \
+	s390-ccw.img \
+	s390-netboot.img \
+	u-boot.e500 \
+	.*[a-zA-Z0-9]\.dtb \
+	.*[a-zA-Z0-9]\.dts \
+	qemu_vga.ndrv \
+	slof.bin \
+	opensbi-riscv.*-generic-fw_dynamic.bin \
+
+BLOB_PURGE_SED_CMDS = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "/$(FILE)/d")
+BLOB_PURGE_FILTER = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "$(FILE)")
+
+$(BUILDDIR): submodule
+	# check if qemu/ was used for a build
+	# if so, please run 'make distclean' in the submodule and try again
+	test ! -f $(SRCDIR)/build/config.status
+	rm -rf $@.tmp $@
+	cp -a $(SRCDIR) $@.tmp
+	cp -a debian $@.tmp/debian
+	rm -rf $@.tmp/roms/edk2 # packaged separately
+	find $@.tmp/pc-bios -type f | grep $(BLOB_PURGE_FILTER) | xargs rm -f
+	sed -i $(BLOB_PURGE_SED_CMDS) $@.tmp/pc-bios/meson.build
+	echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $@.tmp/debian/SOURCE
+	mv $@.tmp $@

 .PHONY: deb kvm
 deb kvm: $(DEBS)
 $(DEB_DBG): $(DEB)
 $(DEB): $(BUILDDIR)
-	cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j
+	cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j32
 	lintian $(DEBS)

-.PHONY: update
-update:
-	cd $(SRCDIR) && git submodule deinit ui/keycodemapdb || true
-	rm -rf $(SRCDIR)/ui/keycodemapdb
-	mkdir $(SRCDIR)/ui/keycodemapdb
-	cd $(SRCDIR) && git submodule update --init ui/keycodemapdb
-	rm -rf keycodemapdb
-	mkdir keycodemapdb
-	cp -R $(SRCDIR)/ui/keycodemapdb/* keycodemapdb/
-	git add keycodemapdb
+sbuild: $(DSC)
+	sbuild $(DSC)
+
+$(ORIG_SRC_TAR): $(BUILDDIR)
+	tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
+
+.PHONY: dsc
+dsc:
+	rm -rf *.dsc $(BUILDDIR)
+	$(MAKE) $(DSC)
+	lintian $(DSC)
+
+$(DSC): $(ORIG_SRC_TAR) $(BUILDDIR)
+	cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d

 .PHONY: upload
+upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
 upload: $(DEBS)
-	tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist buster
+	tar cf - $(DEBS) | ssh repoman@repo.proxmox.com upload --product pve --dist $(UPLOAD_DIST)

 .PHONY: distclean clean
 distclean: clean
 clean:
-	rm -rf $(BUILDDIR) $(PACKAGE)*.deb *.buildinfo *.changes
+	rm -rf $(PACKAGE)-[0-9]*/ $(PACKAGE)*.tar* *.deb *.dsc *.build *.buildinfo *.changes

 .PHONY: dinstall
 dinstall: $(DEBS)
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,7 +1,720 @@
+pve-qemu-kvm (9.0.0-2+vitastor1) bookworm; urgency=medium
+
+  * Add Vitastor support
+
+ -- Vitaliy Filippov <vitalif@yourcmc.ru>  Mon, 20 May 2024 19:53:28 +0300
+
+pve-qemu-kvm (9.0.0-2) bookworm; urgency=medium
+
+  * fix #5409: backup: fix copy-before-write timeout
+
+  * backup: improve error when copy-before-write fails for fleecing
+
+  * fix forwards and backwards migration with VirtIO-GPU display
+
+  * fix a regression in pflash device introduced in 8.2
+
+  * revert a commit for VirtIO PCI devices that turned out to cause more
+    potential security issues than what it fixed
+
+  * move compatibility flags for a new VirtIO-net feature to the correct
+    machine type. The feature was introduced in QEMU 8.2, but the
+    compatibility flags got added to machine version 8.0 instead of 8.1. This
+    breaks backwards migration with machine version 8.1 from a 8.2/9.0 binary
+    to an 8.1 binary, in cases where the guest kernel enables the feature
+    (e.g. Ubuntu 23.10).
+    While that breaks migration with machine version 8.1 from an unpatched to
+    a patched binary, Proxmox VE only ever had 8.2 on the test repository and
+    9.0 not yet in any public repository.
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 17 May 2024 17:04:52 +0200
+
+pve-qemu-kvm (9.0.0-1) bookworm; urgency=medium
+
+  * update submodule and patches to QEMU 9.0.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 29 Apr 2024 10:51:37 +0200
+
+pve-qemu-kvm (8.2.2-1) bookworm; urgency=medium
+
+  * update submodule and patches to QEMU 8.2.2
+
+ -- Proxmox Support Team <support@proxmox.com>  Sat, 27 Apr 2024 12:44:30 +0200
+
+pve-qemu-kvm (8.1.5-5) bookworm; urgency=medium
+
+  * implement support for backup fleecing
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 11 Apr 2024 17:46:48 +0200
+
+pve-qemu-kvm (8.1.5-4) bookworm; urgency=medium
+
+  * fix live-import for certain kinds of VMDK images that rely on padding
+
+  * backup: avoid bubbling up first error if it's an ECANCELED one, as those
+    are often a result of cancling the job due to running into an actual
+    issue.
+
+  * backup: factor out & clean up gathering device info into helper
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 12 Mar 2024 14:08:40 +0100
+
+pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
+
+  * backport fix for potential deadlock during QMP stop command if the VM has
+    disks attached through VirtIO-Block and IO-Thread enabled
+
+  * fix #4507: add patch to automatically increase NOFILE soft limit
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 21 Feb 2024 20:11:23 +0100
+
+pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium
+
+  * work around for a situation where guest IO might get stuck, if the VM is
+    configure  with iothread and VirtIO block/SCSI
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 02 Feb 2024 19:41:27 +0100
+
+pve-qemu-kvm (8.1.5-1) bookworm; urgency=medium
+
+  * update to 8.1.5 stable release, including more relevant fixes like:
+    - virtio-net: correctly copy vnet header when flushing TX
+    - hw/pflash: implement update buffer for block writes
+    - Fixes to i386 emulation and ARM emulation.
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 02 Feb 2024 19:08:13 +0100
+
+pve-qemu-kvm (8.1.2-6) bookworm; urgency=medium
+
+  * revert attempted fix to avoid rare issue with stuck guest IO when using
+    iothread, because it caused a much more common issue with iothreads
+    consuming too much CPU
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 15 Dec 2023 14:22:06 +0100
+
+pve-qemu-kvm (8.1.2-5) bookworm; urgency=medium
+
+  * backport workaround for stuck guest IO with iothread and VirtIO block/SCSI
+    in some rare edge cases
+
+  * backport fix for potential deadlock when issuing the "resize" QMP command
+    for a disk that is using iothread
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 11 Dec 2023 16:58:27 +0100
+
+pve-qemu-kvm (8.1.2-4) bookworm; urgency=medium
+
+  * fix vnc clipboard in the host to guest direction
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 22 Nov 2023 14:28:21 +0100
+
+pve-qemu-kvm (8.1.2-3) bookworm; urgency=medium
+
+  * fix #5054: backport fix for software reset with SATA, avoiding breakage
+    with, e.g., some FreeBSD VMs
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 20 Nov 2023 10:24:50 +0100
+
+pve-qemu-kvm (8.1.2-2) bookworm; urgency=medium
+
+  * revert "x86: acpi: workaround Windows not handling name references in
+    Package properly" as that seems to have broken networking (and possibly
+    other things) one some localized variants of Windows (e.g., the German
+    versions).
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 17 Nov 2023 11:55:23 +0100
+
+pve-qemu-kvm (8.1.2-1) bookworm; urgency=medium
+
+  * update submodule and patches to QEMU 8.1.2
+
+  * use QEMU's keycode-map-db again instead of our static copy from QEMU 6.0
+
+  * disable graph locking, newly introduced in the 8.1 release, as it has
+    still various deadlock issuess, e.g., during canceling backup jobs.
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 24 Oct 2023 13:42:45 +0200
+
+pve-qemu-kvm (8.0.2-7) bookworm; urgency=medium
+
+  * fix #2874: SATA: avoid unsolicited write to sector 0 during reset
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 04 Oct 2023 08:33:35 +0200
+
+pve-qemu-kvm (8.0.2-6) bookworm; urgency=medium
+
+  * fix #1534: vma: add extract-filter for disk images allowing users to pass
+    a comma separated list of the disks they want to extract from an archive.
+
+  * backup: create jobs in a drained section to avoid subtle bugs where
+    something interferes with the block-copy-state bitmap on initialization
+
+  * backup: drop experimental, and since a while also fully broken, directory
+    backup format (BACKUP_FORMAT_DIR). This format was never exposed via the
+    Proxmox VE API, but only available via QMP, as its broken since QEMU 8 and
+    we got zero reports about that, it's safe to assume that there are no
+    public users, so just remove it completely.
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 06 Sep 2023 17:03:59 +0200
+
+pve-qemu-kvm (8.0.2-5) bookworm; urgency=medium
+
+  * improve memory footprint after backup by not keeping as much memory
+    resident.
+
+  * fix file descriptor leak for vhost (used by default by vNICs).
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 16 Aug 2023 11:52:24 +0200
+
+pve-qemu-kvm (8.0.2-4) bookworm; urgency=medium
+
+  * fix resume for snapshot and hibernate in combination with iothread and
+    dirty bitmap
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 28 Jul 2023 12:58:22 +0200
+
+pve-qemu-kvm (8.0.2-3) bookworm; urgency=medium
+
+  * fix regression in QEMU 8.0 for drive mirror with bitmap
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 15 Jun 2023 13:57:46 +0200
+
+pve-qemu-kvm (8.0.2-2) bookworm; urgency=medium
+
+  * drop custom get_link_status QMP command, was never really used.
+
+  * drop custom & deprecated drive snapshot QMP commands, we use a better
+    alternative since a while.
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 09 Jun 2023 07:57:56 +0200
+
+pve-qemu-kvm (8.0.2-1) bookworm; urgency=medium
+
+  * update to QEMU stable release 8.0.2
+
+  * update patches for avoiding issues with DMA reentrancy to current,
+    slightly optimized version.
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 06 Jun 2023 16:34:50 +0200
+
+pve-qemu-kvm (8.0.0-1) bookworm; urgency=medium
+
+  * update to QEMU stable release 8.0.0
+
+  * re-build for Proxmox VE 8 / Debian 12 Bookworm
+
+  * adapt to the local virtiofsd C variant being dropped, it has been
+    rewritten in Rust and is now hosted in a separate source repository.
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 22 May 2023 13:45:49 +0200
+
+pve-qemu-kvm (7.2.0-8) bullseye; urgency=medium
+
+  * backport fix for ACPI CPU hotplug issue with TCG
+
+  * cherry-pick TCG-related stable fixes for 7.2 for users that turned off KVM
+    HW acceleration
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 17 Mar 2023 15:47:08 +0100
+
+pve-qemu-kvm (7.2.0-7) bullseye; urgency=medium
+
+  * improve fix for potential deadlock with trim for IDE/SATA and draining
+
+  * backport stable fixes:
+    - hw/nvme: fix missing endian conversions for doorbell buffers
+    - hw/smbios: fix field corruption in type 4 table
+    - virtio-rng-pci: fix transitional migration compat for vectors
+    - hw/timer/hpet: Fix expiration time overflow
+    - vhost/vdpa: stop all svq on device deletion
+    - vhost: avoid a potential use of an uninitialized variable in the call to
+      vhost_svq_poll
+    - chardev/char-socket: set s->listener = NULL in char_socket_finalize to
+      fix a potential crash after live-migration
+    - intel-iommu: fail MAP notifier without caching mode
+    - intel-iommu: fail DEVIOTLB_UNMAP without dt mode
+
+  * fix a regression for when the LSI SCSI controller is used
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 13 Mar 2023 17:42:49 +0100
+
+pve-qemu-kvm (7.2.0-6) bullseye; urgency=medium
+
+  * fix 7.2 regression for Linux boot failures with megasas SCSI
+
+  * fix 7.0 regression for a potential deadlock with trim for IDE/SATA and
+    draining
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 08 Mar 2023 14:32:17 +0100
+
+pve-qemu-kvm (7.2.0-5) bullseye; urgency=medium
+
+  * fix #4476: savevm-async: avoid looping without progress
+
+  * savevm-async: decrease the boundary for free space for (memory) state left
+    on target from 30 MiB to 100 MiB, improving the heuristic for when to
+    enter the final "pause and sync" stage.
+
+  * QMP backup: use correct error number when getting blockdrive length fails
+
+  * backport fix for some DMA reentrancy issues, better protecting against
+    malicious guests
+
+  * backport fix for iSCSI double free issue leading to crashes
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 21 Feb 2023 13:49:43 +0100
+
+pve-qemu-kvm (7.2.0-4) bullseye; urgency=medium
+
+  * backport fix for a 7.2 regression when using VirtIO disk with
+    detect-zeroes=unmap
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 27 Jan 2023 09:37:49 +0100
+
+pve-qemu-kvm (7.2.0-3) bullseye; urgency=medium
+
+  * add fix for live-migration with virtio-rng devices, which regressed in
+    QEMU 7.2.0.
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 12 Jan 2023 13:13:14 +0100
+
+pve-qemu-kvm (7.2.0-2) bullseye; urgency=medium
+
+  * enable slirp again for now, as in qemu-server, user networking is
+    supported (via CLI/API) when no bridge is set on a virtual NIC
+
+  * cherry-pick stable fixes for 7.2. Two for virtio-mem and one for vIOMMU.
+    Both features are not yet exposed in PVE's qemu-server, but there's work
+    going on to change that.
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 10 Jan 2023 15:47:48 +0100
+
+pve-qemu-kvm (7.2.0-1) bullseye; urgency=medium
+
+  * update to QEMU stable release 7.2.0
+
+  * drop 'slirp' networking
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 16 Dec 2022 13:18:21 +0100
+
+pve-qemu-kvm (7.1.0-4) bullseye; urgency=medium
+
+  * cherry-pick "block/block-backend: blk_set_enable_write_cache is IO_CODE"
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 22 Nov 2022 09:21:06 +0100
+
+pve-qemu-kvm (7.1.0-3) bullseye; urgency=medium
+
+  * init: daemonize: defuse PID file resolve error to a warning at max, fixing
+    some usecases that regressed with 7.1, like tracking start up in our
+    file-restore VM.
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 28 Oct 2022 10:27:49 +0200
+
+pve-qemu-kvm (7.1.0-2) bullseye; urgency=medium
+
+  * fix an issue with error handling in async backup code
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 18 Oct 2022 15:33:44 +0200
+
+pve-qemu-kvm (7.1.0-1) bullseye; urgency=medium
+
+  * update to QEMU stable release 7.1.0
+
+  * add fix for io_uring_register_ring_fd from upstream
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 14 Oct 2022 14:54:09 +0200
+
+pve-qemu-kvm (7.0.0-4) bullseye; urgency=medium
+
+  * add revision to version output
+
+  * PVE Backup: allow passing max-workers performance setting
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 10 Oct 2022 11:55:37 +0200
+
+pve-qemu-kvm (7.0.0-3) bullseye; urgency=medium
+
+  * savevm-async: avoid segfault when aborting snapshot creation task
+
+  * savevm-async: set SAVE_STATE_DONE when closing state file was successful
+    allowing one to start a new snapshot task after aborting one.
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 30 Aug 2022 12:54:03 +0200
+
+pve-qemu-kvm (7.0.0-2) bullseye; urgency=medium
+
+  * backport "io_uring: fix short read slow path"
+
+  * backport "e1000: set RX descriptor status in a separate operation"
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 20 Jul 2022 09:17:07 +0200
+
+pve-qemu-kvm (7.0.0-1) bullseye; urgency=medium
+
+  * update to QEMU stable release 7.0.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 30 Jun 2022 11:07:37 +0200
+
+pve-qemu-kvm (6.2.0-11) bullseye; urgency=medium
+
+  * add 'namespace' to BlockdevOptionsPbs for live-restore support
+
+  * vma: create: support 64KiB-unaligned input images like to improve backing
+    up some VM templates
+
+  * block: alloc-track: avoid unlikely, but possible premature break
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 22 Jun 2022 15:54:54 +0200
+
+pve-qemu-kvm (6.2.0-10) bullseye; urgency=medium
+
+  * fix #4101: fix backup cancellation bug with iothreads
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 9 Jun 2022 16:35:51 +0200
+
+pve-qemu-kvm (6.2.0-9) bullseye; urgency=medium
+
+  * fix possible race conditions during cancellation of a PBS backup
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 08 Jun 2022 14:03:22 +0200
+
+pve-qemu-kvm (6.2.0-8) bullseye; urgency=medium
+
+  * revert "block/rbd: implement bdrv_co_block_status" to work around
+    performance regression when backing up large RBD disk
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 19 May 2022 09:24:45 +0200
+
+pve-qemu-kvm (6.2.0-7) bullseye; urgency=medium
+
+  * Proxmox Backup Server namespace support
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 12 May 2022 16:05:56 +0200
+
+pve-qemu-kvm (6.2.0-6) bullseye; urgency=medium
+
+  * block/gluster: correctly set max_pdiscard which is int64_t to avoid
+    triggering assertion
+
+  * ui/vnc.c: Fixed a deadlock bug
+
+  * display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) and
+    integer overflow in cursor_alloc (CVE-2021-4206)
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 11 May 2022 10:42:53 +0200
+
+pve-qemu-kvm (6.2.0-5) bullseye; urgency=medium
+
+  * vma: allow partial restore by skipping some disk
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 25 Apr 2022 10:13:46 +0200
+
+pve-qemu-kvm (6.2.0-4) bullseye; urgency=medium
+
+  * d/control: add libgbm to build dependencies
+
+  * d/control: add suggest dependency-hint for libgl1
+
+  * various stable backports:
+    + virtio-net: fix map leaking on error during receive
+    + memory: Fix incorrect calls of log_global_start/stop
+    + acpi: fix OEM ID/OEM Table ID padding
+    + vhost-vsock: detach the virqueue element in case of error
+    + vhost-user: remove VirtQ notifier restore
+    + vhost-user: fix VirtQ notifier cleanup
+    + virtio: fix the condition for iommu_platform not supported
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 22 Apr 2022 11:52:30 +0200
+
+pve-qemu-kvm (6.2.0-3) bullseye; urgency=medium
+
+  *  cherry-pick fix for some manually added ACPI table SLIC entries via the
+     custom args flag.
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 15 Apr 2022 09:09:37 +0200
+
+pve-qemu-kvm (6.2.0-2) bullseye; urgency=medium
+
+  * compile in virgl support
+
+  * enable zstd support
+
+  * drop sdl dependency (it was disabled at compile time already)
+
+  * recommend 'numactl'
+
+  * fix an issue with multi-disk backups where chunks would be written
+    multiple times
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 03 Mar 2022 12:03:44 +0100
+
+pve-qemu-kvm (6.2.0-1) bullseye; urgency=medium
+
+  * update to QEMU stable release 6.2.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 17 Feb 2022 06:23:14 +0100
+
+pve-qemu-kvm (6.1.1-2) bullseye; urgency=medium
+
+  * vma: create: register all streams before entering coroutines to avoid that
+    an early stream starts to write already before all got registered.
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 14 Feb 2022 15:53:15 +0100
+
+pve-qemu-kvm (6.1.1-1) bullseye; urgency=medium
+
+  * update to 6.1.1 stable release
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 13 Jan 2022 10:57:43 +0100
+
+pve-qemu-kvm (6.1.0-3) bullseye; urgency=medium
+
+  * fix #3738: cherry-pick "block: introduce max_hw_iov for use in scsi-
+    generic
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 01 Dec 2021 15:35:43 +0100
+
+pve-qemu-kvm (6.1.0-2) bullseye; urgency=medium
+
+  * avoid a possible segmentation fault during block (disk) mirror
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 16 Nov 2021 09:38:10 +0100
+
+pve-qemu-kvm (6.1.0-1) bullseye; urgency=medium
+
+  * update to QEMU stable release 6.1.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 11 Oct 2021 15:15:19 +0200
+
+pve-qemu-kvm (6.0.0-4) bullseye; urgency=medium
+
+  * drop the ancient workaround that force disabled SMM due to observing VM
+    hangs on old kernel versions.
+
+  * monitor/qmp: fix race with clients disconnecting early resulting in other
+    clients receiving a message with the (now wrong) ID of the former
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 06 Sep 2021 07:30:00 +0200
+
+pve-qemu-kvm (6.0.0-3) bullseye; urgency=medium
+
+  * io_uring: resubmit when result is -EAGAIN
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 3 Aug 2021 15:01:31 +0200
+
+pve-qemu-kvm (6.0.0-2) bullseye; urgency=medium
+
+  * enable io-uring support in QEMU builds
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 23 Jun 2021 11:03:54 +0200
+
+pve-qemu-kvm (6.0.0-1) bullseye; urgency=medium
+
+  * update to QEMU stable release 6.0.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 28 May 2021 11:30:50 +0200
+
+pve-qemu-kvm (5.2.0-11) bullseye; urgency=medium
+
+  * re-build for Proxmox VE 7 / Debian Bullseye
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 13 May 2021 14:03:00 +0200
+
+pve-qemu-kvm (5.2.0-6) pve; urgency=medium
+
+  * improve the alloc-track and Proxmox Backup Server special block driver when
+    used with IO-threads for the new live-restore feature
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 15 Apr 2021 16:29:48 +0200
+
+pve-qemu-kvm (5.2.0-5) pve; urgency=medium
+
+  * cherry-pick fixes for a possible deadlock when resizing a disk while using
+    IO-threads
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 30 Mar 2021 18:18:18 +0200
+
+pve-qemu-kvm (5.2.0-4) pve; urgency=medium
+
+  * monitor/qmp: fix race on CHR_EVENT_CLOSED without OOB
+
+  * improve saving and loading dirty bitmaps in live-snapshots
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 23 Mar 2021 15:41:26 +0100
+
+pve-qemu-kvm (5.2.0-3) pve; urgency=medium
+
+  * backport "i386/acpi: restore device paths for pre-5.1 vms" patch
+
+  * ship list of 'i440fx' and 'q35' machine versions this QEMU version supports
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 05 Mar 2021 16:23:06 +0100
+
+pve-qemu-kvm (5.2.0-2) pve; urgency=medium
+
+  * Proxmox Backup restore: ensure all caches are flushed for userspace
+    attached storages (for example, Ceph when librbd and not KRBD is used)
+
+  * ship VirtIOFSd daemon
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 24 Feb 2021 18:25:14 +0100
+
+pve-qemu-kvm (5.2.0-1) pve; urgency=medium
+
+  * update to QEMU stable release 5.2.0
+
+  * fix #3084: fall back to open-iscsi initiatorname
+
+  * fix snapshot abort and improve performance in some edge cases
+
+  * add basis for Proxmox Backup Server master key support
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 12 Feb 2021 12:09:59 +0100
+
+pve-qemu-kvm (5.1.0-8) pve; urgency=medium
+
+  * disable jemalloc, as it does not play nice with our library written in
+    rust
+
+  * fix #3225: properly cancel jobs in 'created' state when cancelling or
+    failing backup jobs
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 07 Jan 2021 10:27:33 +0100
+
+pve-qemu-kvm (5.1.0-7) pve; urgency=medium
+
+  * allow to query the loaded Proxmox Backup library version
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 25 Nov 2020 14:09:16 +0100
+
+pve-qemu-kvm (5.1.0-6) pve; urgency=medium
+
+  * migration/block-dirty-bitmap: avoid telling QEMU that the bitmap migration
+    is active longer than required
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 05 Nov 2020 18:59:40 +0100
+
+pve-qemu-kvm (5.1.0-5) pve; urgency=medium
+
+  * migration/block-dirty-bitmap: migrate other bitmaps even if one fails
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 04 Nov 2020 18:36:32 +0100
+
+pve-qemu-kvm (5.1.0-4) pve; urgency=medium
+
+  * several fixes for backup abort edgecases and error reporting
+
+  * allow to migrate dirty-bitmap and Proxmox Backup Server state
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 29 Oct 2020 18:09:16 +0100
+
+pve-qemu-kvm (5.1.0-3) pve; urgency=medium
+
+  * backup: make more use of coroutines and do not block on finishing
+
+  * backup: use transactions to synchronize the disk job states
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 29 Sep 2020 09:22:56 +0200
+
+pve-qemu-kvm (5.1.0-2) pve; urgency=medium
+
+  * cherry-pick fix to harden checks for USB devices (CVE-2020-14364)
+
+  * work around #3002: revert "qemu-img convert: Don't pre-zero images" as it
+    correlates with an issue when using LVM as a target storage for disk move
+    operations.
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 14 Sep 2020 20:01:24 +0200
+
+pve-qemu-kvm (5.1.0-1) pve; urgency=medium
+
+  * update to QEMU 5.1.0
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 20 Aug 2020 13:42:10 +0200
+
+pve-qemu-kvm (5.0.0-13) pve; urgency=medium
+
+  * improve zero block handling for PBS backups
+
+  * allow querying a more detailed dirty-bitmap state per VM disk
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 19 Aug 2020 18:19:26 +0200
+
+pve-qemu-kvm (5.0.0-12) pve; urgency=medium
+
+  * patch for possible DOS in qemu network packet processing
+
+  * fix PBS write callback with big blocks
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 11 Aug 2020 11:29:13 +0200
+
+pve-qemu-kvm (5.0.0-11) pve; urgency=medium
+
+  * improve dirty-bitmap Proxmox Backup Server backup with multiple drives
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 14 Jul 2020 12:44:25 +0200
+
+pve-qemu-kvm (5.0.0-10) pve; urgency=medium
+
+  * fix compression and encryption related backup parameters not being passed
+    on from the HMP command properly
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 10 Jul 2020 13:32:11 +0200
+
+pve-qemu-kvm (5.0.0-9) pve; urgency=medium
+
+  * adapt to new compress and encrypt params of the proxmox-backup library
+
+  * add block driver for Proxmox Backup Server backed images
+
+  * add 'query-proxmox-support' QMP command
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 09 Jul 2020 13:18:45 +0200
+
+pve-qemu-kvm (5.0.0-8) pve; urgency=medium
+
+  * backup: rename parameter for dirty-bitmap PBS backup to "use-dirty-bitmap"
+
+  * backup: improve checking if a previous backup is available when trying to
+    reuse it
+
+ -- Proxmox Support Team <support@proxmox.com>  Mon, 06 Jul 2020 22:58:45 +0200
+
+pve-qemu-kvm (5.0.0-6) pve; urgency=medium
+
+  * backup: improve query-backup information and remove all dirty-bitmaps on
+    failed drive-job
+
+ -- Proxmox Support Team <support@proxmox.com>  Fri, 03 Jul 2020 17:00:30 +0200
+
+pve-qemu-kvm (5.0.0-5) pve; urgency=medium
+
+  * fix backup for not 64k-aligned storages
+
+  * fix #2794: Include legacy-igd passthrough fix
+
+  * add initial support for incremental backup for running VMs and
+    Proxmox Backup Server as a target
+
+ -- Proxmox Support Team <support@proxmox.com>  Tue, 30 Jun 2020 11:12:55 +0200
+
+pve-qemu-kvm (5.0.0-4) pve; urgency=medium
+
+  * install missing restore helper binary
+
+ -- Proxmox Support Team <support@proxmox.com>  Sat, 30 May 2020 15:25:38 +0200
+
+pve-qemu-kvm (5.0.0-3) pve; urgency=medium
+
+  * ensure that a data-flush for all drives uses the correct AioContext. Fixes
+    a potential VM hang happening on some storage types if IOThreads are used.
+
+ -- Proxmox Support Team <support@proxmox.com>  Wed, 27 May 2020 14:41:31 +0200
+
 pve-qemu-kvm (5.0.0-2) pve; urgency=medium

-  * fix saving a VM-state (snapshot, suspend-to-disk) in combination with
-    IOThreads
+  * fix saving a VM-state (snapshot, suspend-to-disk) with QEMU 5.0

  * try to use bigger chunks for saving a VM-state to improve performance on
    storage backends like Ceph RBD
--- a/debian/compat
+++ b/debian/compat
@@ -1 +0,0 @@
-10
--- a/debian/control
+++ b/debian/control
@@ -2,36 +2,43 @@ Source: pve-qemu-kvm
 Section: admin
 Priority: optional
 Maintainer: Proxmox Support Team <support@proxmox.com>
-Build-Depends: autotools-dev,
+Build-Depends: debhelper-compat (= 13),
               check,
-               debhelper (>= 9),
               libacl1-dev,
               libaio-dev,
               libattr1-dev,
               libcap-ng-dev,
               libcurl4-gnutls-dev,
+               libepoxy-dev,
               libfdt-dev,
+               libgbm-dev,
               libglusterfs-dev (>= 5.2-2),
               libgnutls28-dev,
               libiscsi-dev (>= 1.12.0),
-               libjemalloc-dev,
               libjpeg-dev,
+               libjson-perl,
               libnuma-dev,
               libpci-dev,
               libpixman-1-dev,
-               libproxmox-backup-qemu0-dev,
+               libproxmox-backup-qemu0-dev (>= 1.3.0),
               librbd-dev (>= 0.48),
               libsdl1.2-dev,
               libseccomp-dev,
+               libslirp-dev,
               libspice-protocol-dev (>= 0.12.14~),
               libspice-server-dev (>= 0.14.0~),
-               libusb-1.0-0-dev (>= 1.0.17-1),
+               libsystemd-dev,
+               liburing-dev,
+               libusb-1.0-0-dev (>= 1.0.17),
               libusbredirparser-dev (>= 0.6-2),
+               libvirglrenderer-dev,
+               libzstd-dev,
+               meson,
               python3-minimal,
               python3-sphinx,
+               python3-sphinx-rtd-theme,
+               python3-venv,
               quilt,
-               texi2html,
-               texinfo,
               uuid-dev,
               xfslibs-dev,
 Standards-Version: 3.7.2
@@ -40,7 +47,6 @@ Package: pve-qemu-kvm
 Architecture: any
 Depends: ceph-common (>= 0.48),
         iproute2,
-         libaio1,
         libgfapi0 | glusterfs-common (>= 5.6),
         libgfchangelog0 | glusterfs-common (>= 5.6),
         libgfdb0 | glusterfs-common (>= 5.6),
@@ -49,17 +55,16 @@ Depends: ceph-common (>= 0.48),
         libglusterfs-dev | glusterfs-common (>= 5.6),
         libglusterfs0 | glusterfs-common (>= 5.6),
         libiscsi4 (>= 1.12.0) | libiscsi7,
-         libjemalloc2,
         libjpeg62-turbo,
-         libsdl1.2debian,
         libspice-server1 (>= 0.14.0~),
         libusb-1.0-0 (>= 1.0.17-1),
         libusbredirparser1 (>= 0.6-2),
+         vitastor-client (>= 0.9.4),
         libuuid1,
-         numactl,
-         python,
         ${misc:Depends},
         ${shlibs:Depends},
+Recommends: numactl,
+Suggests: libgl1,
 Conflicts: kvm,
           pve-kvm,
           pve-qemu-kvm-2.6.18,
@@ -67,22 +72,17 @@ Conflicts: kvm,
           qemu-kvm,
           qemu-system-arm,
           qemu-system-common,
+           qemu-system-data,
           qemu-system-x86,
           qemu-utils,
-Provides: qemu-system-arm, qemu-system-x86, qemu-utils
+Provides: qemu-system-arm, qemu-system-x86, qemu-utils,
 Replaces: pve-kvm,
          pve-qemu-kvm-2.6.18,
          qemu-system-arm,
          qemu-system-x86,
          qemu-utils,
+Breaks: qemu-server (<= 8.0.6)
 Description: Full virtualization on x86 hardware
 Using KVM, one can run multiple virtual PCs, each running unmodified Linux or
 Windows images. Each virtual machine has private virtualized hardware: a
 network card, disk, graphics adapter, etc.
-
-Package: pve-qemu-kvm-dbg
-Architecture: any
-Section: debug
-Depends: pve-qemu-kvm (= ${binary:Version})
-Description: pve qemu debugging symbols
- This package contains the debugging symbols for pve-qemu-kvm.
--- a/debian/copyright
+++ b/debian/copyright
@@ -25,7 +25,7 @@ License:

    In particular, the QEMU virtual CPU core library (libqemu.a) is
    released under the GNU Lesser General Public License version 2 or later.
-    On Debian systems, the complete text of the GNU Lesser General Public 
+    On Debian systems, the complete text of the GNU Lesser General Public
    License can be found in the file /usr/share/common-licenses/LGPL.

    Some hardware device emulation sources and other QEMU functionality are
--- a/debian/parse-machines.pl
+++ b/debian/parse-machines.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+use JSON;
+
+my $machines = [];
+
+while (<STDIN>) {
+    if (/^\s*Supported machines are:/) {
+	next;
+    }
+
+    s/^\s+//;
+    my @machine = split(/\s+/);
+    next if $machine[0] !~ m/^pc-(i440fx|q35)-(.+)$/;
+    push @$machines, {
+        'id' => $machine[0],
+        'type' => $1,
+        'version' => $2,
+    };
+}
+
+die "no QEMU machine types detected from STDIN input" if scalar (@$machines) <= 0;
+
+print to_json($machines, { utf8 => 1, canonical => 1 })
+    or die "failed to encode detected machines as JSON - $!\n";
--- a/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
+++ b/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
@@ -26,19 +26,22 @@ Suggested-by: Ma Haocong <mahaocong@didichuxing.com>
 Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
 Signed-off-by: John Snow <jsnow@redhat.com>
 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: rebased for 8.2.2]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/mirror.c              | 98 ++++++++++++++++++++++++++++++-------
- blockdev.c                  | 39 +++++++++++++--
- include/block/block_int.h   |  4 +-
- qapi/block-core.json        | 29 +++++++++--
- tests/test-block-iothread.c |  4 +-
- 5 files changed, 145 insertions(+), 29 deletions(-)
+ block/mirror.c                         | 99 ++++++++++++++++++++------
+ blockdev.c                             | 38 +++++++++-
+ include/block/block_int-global-state.h |  4 +-
+ qapi/block-core.json                   | 25 ++++++-
+ tests/unit/test-block-iothread.c       |  4 +-
+ 5 files changed, 142 insertions(+), 28 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index c26fd9260d..3c9cd42c50 100644
+index 1bdce3b657..0c5c72df2e 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -49,7 +49,7 @@ typedef struct MirrorBlockJob {
+@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
     BlockDriverState *to_replace;
     /* Used to block operations on the drive-mirror-replace target */
     Error *replace_blocker;
@@ -47,7 +50,7 @@ index c26fd9260d..3c9cd42c50 100644
     BlockMirrorBackingMode backing_mode;
     /* Whether the target image requires explicit zero-initialization */
     bool zero_target;
-@@ -64,6 +64,8 @@ typedef struct MirrorBlockJob {
+@@ -73,6 +73,8 @@ typedef struct MirrorBlockJob {
     size_t buf_size;
     int64_t bdev_length;
     unsigned long *cow_bitmap;
@@ -56,17 +59,17 @@ index c26fd9260d..3c9cd42c50 100644
     BdrvDirtyBitmap *dirty_bitmap;
     BdrvDirtyBitmapIter *dbi;
     uint8_t *buf;
-@@ -676,7 +678,8 @@ static int mirror_exit_common(Job *job)
-     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
+@@ -722,7 +724,8 @@ static int mirror_exit_common(Job *job)
                              &error_abort);
+ 
     if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
 -        BlockDriverState *backing = s->is_none_mode ? src : s->base;
 +        BlockDriverState *backing;
 +        backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base;
-         if (backing_bs(target_bs) != backing) {
-             bdrv_set_backing_hd(target_bs, backing, &local_err);
-             if (local_err) {
-@@ -771,6 +774,16 @@ static void mirror_abort(Job *job)
+         BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
+ 
+         if (bdrv_cow_bs(unfiltered_target) != backing) {
+@@ -819,6 +822,16 @@ static void mirror_abort(Job *job)
     assert(ret == 0);
 }
 
@@ -83,7 +86,7 @@ index c26fd9260d..3c9cd42c50 100644
 static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
 {
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-@@ -949,7 +962,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
+@@ -1015,7 +1028,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     mirror_free_init(s);
 
     s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -93,23 +96,23 @@ index c26fd9260d..3c9cd42c50 100644
         ret = mirror_dirty_init(s);
         if (ret < 0 || job_is_cancelled(&s->common.job)) {
             goto immediate_exit;
-@@ -1181,6 +1195,7 @@ static const BlockJobDriver mirror_job_driver = {
+@@ -1304,6 +1318,7 @@ static const BlockJobDriver mirror_job_driver = {
         .run                    = mirror_run,
         .prepare                = mirror_prepare,
         .abort                  = mirror_abort,
 +        .clean                  = mirror_clean,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
-     },
-@@ -1196,6 +1211,7 @@ static const BlockJobDriver commit_active_job_driver = {
+         .cancel                 = mirror_cancel,
+@@ -1322,6 +1337,7 @@ static const BlockJobDriver commit_active_job_driver = {
         .run                    = mirror_run,
         .prepare                = mirror_prepare,
         .abort                  = mirror_abort,
 +        .clean                  = mirror_clean,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
-     },
-@@ -1542,7 +1558,10 @@ static BlockJob *mirror_start_job(
+         .cancel                 = commit_active_cancel,
+@@ -1714,7 +1730,10 @@ static BlockJob *mirror_start_job(
                              BlockCompletionFunc *cb,
                              void *opaque,
                              const BlockJobDriver *driver,
@@ -121,9 +124,9 @@ index c26fd9260d..3c9cd42c50 100644
                              bool auto_complete, const char *filter_node_name,
                              bool is_mirror, MirrorCopyMode copy_mode,
                              Error **errp)
-@@ -1555,10 +1574,39 @@ static BlockJob *mirror_start_job(
-     Error *local_err = NULL;
-     int ret;
+@@ -1728,10 +1747,39 @@ static BlockJob *mirror_start_job(
+ 
+     GLOBAL_STATE_CODE();
 
 -    if (granularity == 0) {
 -        granularity = bdrv_get_default_bitmap_granularity(target);
@@ -163,7 +166,7 @@ index c26fd9260d..3c9cd42c50 100644
     assert(is_power_of_2(granularity));
 
     if (buf_size < 0) {
-@@ -1662,7 +1710,9 @@ static BlockJob *mirror_start_job(
+@@ -1871,7 +1919,9 @@ static BlockJob *mirror_start_job(
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
@@ -173,10 +176,10 @@ index c26fd9260d..3c9cd42c50 100644
 +    s->bitmap_mode = bitmap_mode;
     s->backing_mode = backing_mode;
     s->zero_target = zero_target;
-     s->copy_mode = copy_mode;
-@@ -1682,6 +1732,18 @@ static BlockJob *mirror_start_job(
-         bdrv_disable_dirty_bitmap(s->dirty_bitmap);
-     }
+     qatomic_set(&s->copy_mode, copy_mode);
+@@ -1897,6 +1947,18 @@ static BlockJob *mirror_start_job(
+      */
+     bdrv_disable_dirty_bitmap(s->dirty_bitmap);
 
 +    if (s->sync_bitmap) {
 +        bdrv_dirty_bitmap_set_busy(s->sync_bitmap, true);
@@ -190,10 +193,10 @@ index c26fd9260d..3c9cd42c50 100644
 +        }
 +    }
 +
+     bdrv_graph_wrlock();
     ret = block_job_add_bdrv(&s->common, "source", bs, 0,
                              BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
-                              BLK_PERM_CONSISTENT_READ,
-@@ -1735,6 +1797,9 @@ fail:
+@@ -1979,6 +2041,9 @@ fail:
         if (s->dirty_bitmap) {
             bdrv_release_dirty_bitmap(s->dirty_bitmap);
         }
@@ -203,7 +206,7 @@ index c26fd9260d..3c9cd42c50 100644
         job_early_fail(&s->common.job);
     }
 
-@@ -1752,29 +1817,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
+@@ -2001,35 +2066,28 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, const char *replaces,
                   int creation_flags, int64_t speed,
                   uint32_t granularity, int64_t buf_size,
@@ -220,14 +223,20 @@ index c26fd9260d..3c9cd42c50 100644
 -    bool is_none_mode;
     BlockDriverState *base;
 
+     GLOBAL_STATE_CODE();
+ 
 -    if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
 -        (mode == MIRROR_SYNC_MODE_BITMAP)) {
 -        error_setg(errp, "Sync mode '%s' not supported",
 -                   MirrorSyncMode_str(mode));
 -        return;
 -    }
+-
+     bdrv_graph_rdlock_main_loop();
 -    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-     base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
+     base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
+     bdrv_graph_rdunlock_main_loop();
+ 
     mirror_start_job(job_id, bs, creation_flags, target, replaces,
                      speed, granularity, buf_size, backing_mode, zero_target,
                      on_source_error, on_target_error, unmap, NULL, NULL,
@@ -238,7 +247,7 @@ index c26fd9260d..3c9cd42c50 100644
 }
 
 BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
-@@ -1800,7 +1859,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
+@@ -2056,7 +2114,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
                      job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN, false,
                      on_error, on_error, true, cb, opaque,
@@ -246,36 +255,35 @@ index c26fd9260d..3c9cd42c50 100644
 +                     &commit_active_job_driver, MIRROR_SYNC_MODE_FULL,
 +                     NULL, 0, base, auto_complete,
                      filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
-                      &local_err);
-     if (local_err) {
+                      errp);
+     if (!job) {
 diff --git a/blockdev.c b/blockdev.c
-index f391c3b3c7..bbeff9c439 100644
+index 057601dcf0..8682814a7a 100644
 --- a/blockdev.c
 +++ b/blockdev.c
-@@ -3159,6 +3159,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2776,6 +2776,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    BlockDriverState *target,
-                                    bool has_replaces, const char *replaces,
+                                    const char *replaces,
                                    enum MirrorSyncMode sync,
-+                                   bool has_bitmap,
 +                                   const char *bitmap_name,
 +                                   bool has_bitmap_mode,
 +                                   BitmapSyncMode bitmap_mode,
                                    BlockMirrorBackingMode backing_mode,
                                    bool zero_target,
                                    bool has_speed, int64_t speed,
-@@ -3177,6 +3181,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
-                                    Error **errp)
+@@ -2794,6 +2797,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
 {
+     BlockDriverState *unfiltered_bs;
     int job_flags = JOB_DEFAULT;
 +    BdrvDirtyBitmap *bitmap = NULL;
 
-     if (!has_speed) {
-         speed = 0;
-@@ -3231,6 +3236,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+     GLOBAL_STATE_CODE();
+     GRAPH_RDLOCK_GUARD_MAINLOOP();
+@@ -2848,6 +2852,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
-+    if (has_bitmap) {
+    if (bitmap_name) {
 +        if (granularity) {
 +            error_setg(errp, "Granularity and bitmap cannot both be set");
 +            return;
@@ -298,53 +306,53 @@ index f391c3b3c7..bbeff9c439 100644
 +        }
 +    }
 +
-     if (has_replaces) {
-         BlockDriverState *to_replace_bs;
-         AioContext *replace_aio_context;
-@@ -3268,8 +3296,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+     if (!replaces) {
+         /* We want to mirror from @bs, but keep implicit filters on top */
+         unfiltered_bs = bdrv_skip_implicit_filters(bs);
+@@ -2889,8 +2916,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
      * and will allow to check whether the node still exist at mirror completion
      */
     mirror_start(job_id, bs, target,
-                 has_replaces ? replaces : NULL, job_flags,
+-                 replaces, job_flags,
 -                 speed, granularity, buf_size, sync, backing_mode, zero_target,
-+                 has_replaces ? replaces : NULL, job_flags, speed, granularity,
-+                 buf_size, sync, bitmap, bitmap_mode, backing_mode, zero_target,
+                 replaces, job_flags, speed, granularity, buf_size, sync,
+                 bitmap, bitmap_mode, backing_mode, zero_target,
                  on_source_error, on_target_error, unmap, filter_node_name,
                  copy_mode, errp);
 }
-@@ -3410,6 +3438,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
+@@ -3034,6 +3061,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
 
-     blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
-                            arg->has_replaces, arg->replaces, arg->sync,
-+                           arg->has_bitmap, arg->bitmap,
+     blockdev_mirror_common(arg->job_id, bs, target_bs,
+                            arg->replaces, arg->sync,
+                           arg->bitmap,
 +                           arg->has_bitmap_mode, arg->bitmap_mode,
                            backing_mode, zero_target,
                            arg->has_speed, arg->speed,
                            arg->has_granularity, arg->granularity,
-@@ -3432,6 +3462,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
+@@ -3053,6 +3082,8 @@ void qmp_blockdev_mirror(const char *job_id,
                          const char *device, const char *target,
-                          bool has_replaces, const char *replaces,
+                          const char *replaces,
                          MirrorSyncMode sync,
-+                         bool has_bitmap, const char *bitmap,
+                         const char *bitmap,
 +                         bool has_bitmap_mode, BitmapSyncMode bitmap_mode,
                          bool has_speed, int64_t speed,
                          bool has_granularity, uint32_t granularity,
                          bool has_buf_size, int64_t buf_size,
-@@ -3482,7 +3514,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
+@@ -3093,7 +3124,8 @@ void qmp_blockdev_mirror(const char *job_id,
     }
 
-     blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
-                           has_replaces, replaces, sync, backing_mode,
-+                           has_replaces, replaces, sync, has_bitmap,
+     blockdev_mirror_common(job_id, bs, target_bs,
+-                           replaces, sync, backing_mode,
+                           replaces, sync,
 +                           bitmap, has_bitmap_mode, bitmap_mode, backing_mode,
                            zero_target, has_speed, speed,
                            has_granularity, granularity,
                            has_buf_size, buf_size,
-diff --git a/include/block/block_int.h b/include/block/block_int.h
-index 6d234f1de9..180a5e00fd 100644
--- a/include/block/block_int.h
-+++ b/include/block/block_int.h
-@@ -1210,7 +1210,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
+diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
+index d2201e27f4..cc1387ae02 100644
+--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
+@@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, const char *replaces,
                   int creation_flags, int64_t speed,
                   uint32_t granularity, int64_t buf_size,
@@ -356,31 +364,26 @@ index 6d234f1de9..180a5e00fd 100644
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 97d1f64636..8bdbccb397 100644
+index 746d1694c2..45ab548dfe 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -2054,10 +2054,19 @@
- #        (all the disk, only the sectors allocated in the topmost image, or
- #        only new I/O).
+@@ -2174,6 +2174,15 @@
+ #     destination (all the disk, only the sectors allocated in the
+ #     topmost image, or only new I/O).
 #
-+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
-+#          be present for bitmap mode and absent otherwise. The bitmap's
-+#          granularity is used instead of @granularity (since 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+#     argument must be present for bitmap mode and absent otherwise.
+#     The bitmap's granularity is used instead of @granularity (Since
+#     4.1).
 +#
-+# @bitmap-mode: Specifies the type of data the bitmap should contain after
-+#               the operation concludes. Must be present if sync is "bitmap".
-+#               Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+#     after the operation concludes. Must be present if sync is
+#     "bitmap". Must NOT be present otherwise. (Since 4.1)
 +#
- # @granularity: granularity of the dirty bitmap, default is 64K
- #               if the image format doesn't have clusters, 4K if the clusters
- #               are smaller than that, else the cluster size.  Must be a
-#               power of 2 between 512 and 64M (since 1.4).
-+#               power of 2 between 512 and 64M. Must not be specified if
-+#               @bitmap is present (since 1.4).
- #
- # @buf-size: maximum amount of data in flight from source to
- #            target (since 1.4).
-@@ -2095,7 +2104,9 @@
+ # @granularity: granularity of the dirty bitmap, default is 64K if the
+ #     image format doesn't have clusters, 4K if the clusters are
+ #     smaller than that, else the cluster size.  Must be a power of 2
+@@ -2216,7 +2225,9 @@
 { 'struct': 'DriveMirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             '*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -391,28 +394,23 @@ index 97d1f64636..8bdbccb397 100644
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
-@@ -2362,10 +2373,19 @@
- #        (all the disk, only the sectors allocated in the topmost image, or
- #        only new I/O).
+@@ -2496,6 +2507,15 @@
+ #     destination (all the disk, only the sectors allocated in the
+ #     topmost image, or only new I/O).
 #
-+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
-+#          be present for bitmap mode and absent otherwise. The bitmap's
-+#          granularity is used instead of @granularity (since 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+#     argument must be present for bitmap mode and absent otherwise.
+#     The bitmap's granularity is used instead of @granularity (since
+#     4.1).
 +#
-+# @bitmap-mode: Specifies the type of data the bitmap should contain after
-+#               the operation concludes. Must be present if sync is "bitmap".
-+#               Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+#     after the operation concludes. Must be present if sync is
+#     "bitmap". Must NOT be present otherwise. (Since 4.1)
 +#
- # @granularity: granularity of the dirty bitmap, default is 64K
- #               if the image format doesn't have clusters, 4K if the clusters
- #               are smaller than that, else the cluster size.  Must be a
-#               power of 2 between 512 and 64M
-+#               power of 2 between 512 and 64M . Must not be specified if
-+#               @bitmap is present.
- #
- # @buf-size: maximum amount of data in flight from source to
- #            target
-@@ -2414,7 +2434,8 @@
+ # @granularity: granularity of the dirty bitmap, default is 64K if the
+ #     image format doesn't have clusters, 4K if the clusters are
+ #     smaller than that, else the cluster size.  Must be a power of 2
+@@ -2544,7 +2564,8 @@
 { 'command': 'blockdev-mirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             '*replaces': 'str',
@@ -422,11 +420,11 @@ index 97d1f64636..8bdbccb397 100644
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
-diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
-index 0c861809f0..da87a67a57 100644
--- a/tests/test-block-iothread.c
-+++ b/tests/test-block-iothread.c
-@@ -611,8 +611,8 @@ static void test_propagate_mirror(void)
+diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
+index 3766d5de6b..afa44cbd34 100644
+--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
+@@ -755,8 +755,8 @@ static void test_propagate_mirror(void)
 
     /* Start a mirror job */
     mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@@ -436,4 +434,4 @@ index 0c861809f0..da87a67a57 100644
 +                 false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
                  false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
                  &error_abort);
-     job = job_get("job0");
+ 
--- a/debian/patches/bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
+++ b/debian/patches/bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
@@ -18,15 +18,16 @@ incremental backup modes; we can use this bitmap to later refresh a
 successfully created mirror.

 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 block/mirror.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index 3c9cd42c50..08ac9827f2 100644
+index 0c5c72df2e..37fee3fa25 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -653,8 +653,6 @@ static int mirror_exit_common(Job *job)
+@@ -693,8 +693,6 @@ static int mirror_exit_common(Job *job)
         bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
     }
 
@@ -35,9 +36,9 @@ index 3c9cd42c50..08ac9827f2 100644
     /* Make sure that the source BDS doesn't go away during bdrv_replace_node,
      * before we can call bdrv_drained_end */
     bdrv_ref(src);
-@@ -752,6 +750,18 @@ static int mirror_exit_common(Job *job)
-     blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
-     blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+@@ -800,6 +798,18 @@ static int mirror_exit_common(Job *job)
+     bdrv_drained_end(target_bs);
+     bdrv_unref(target_bs);
 
 +    if (s->sync_bitmap) {
 +        if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@@ -54,7 +55,7 @@ index 3c9cd42c50..08ac9827f2 100644
     bs_opaque->job = NULL;
 
     bdrv_drained_end(src);
-@@ -1584,10 +1594,6 @@ static BlockJob *mirror_start_job(
+@@ -1757,10 +1767,6 @@ static BlockJob *mirror_start_job(
                        " sync mode",
                        MirrorSyncMode_str(sync_mode));
             return NULL;
@@ -65,7 +66,7 @@ index 3c9cd42c50..08ac9827f2 100644
         }
     } else if (bitmap) {
         error_setg(errp,
-@@ -1604,6 +1610,12 @@ static BlockJob *mirror_start_job(
+@@ -1777,6 +1783,12 @@ static BlockJob *mirror_start_job(
             return NULL;
         }
         granularity = bdrv_dirty_bitmap_granularity(bitmap);
--- a/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
+++ b/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -10,15 +10,16 @@ as one without the other does not make much sense with the current set
 of modes.

 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 blockdev.c | 3 +++
 1 file changed, 3 insertions(+)

 diff --git a/blockdev.c b/blockdev.c
-index bbeff9c439..fa3c2f5548 100644
+index 8682814a7a..5b75a085ee 100644
 --- a/blockdev.c
 +++ b/blockdev.c
-@@ -3257,6 +3257,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2873,6 +2873,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
             return;
         }
@@ -27,4 +28,4 @@ index bbeff9c439..fa3c2f5548 100644
 +        return;
     }
 
-     if (has_replaces) {
+     if (!replaces) {
--- a/debian/patches/bitmap-mirror/0004-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
+++ b/debian/patches/bitmap-mirror/0004-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
@@ -10,15 +10,16 @@ since sync_bitmap is busy at the point of merging, and we checked access
 beforehand.

 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
- block/mirror.c | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
+ block/mirror.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index 08ac9827f2..c56b0f87e3 100644
+index 37fee3fa25..6b3cce1007 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -756,8 +756,8 @@ static int mirror_exit_common(Job *job)
+@@ -804,8 +804,8 @@ static int mirror_exit_common(Job *job)
              job->ret == 0 && ret == 0)) {
             /* Success; synchronize copy back to sync. */
             bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@@ -29,14 +30,17 @@ index 08ac9827f2..c56b0f87e3 100644
         }
     }
     bdrv_release_dirty_bitmap(s->dirty_bitmap);
-@@ -1749,8 +1749,8 @@ static BlockJob *mirror_start_job(
+@@ -1964,11 +1964,8 @@ static BlockJob *mirror_start_job(
     }
 
     if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
 -        bdrv_merge_dirty_bitmap(s->dirty_bitmap, s->sync_bitmap,
 -                                NULL, &local_err);
+-        if (local_err) {
+-            goto fail;
+-        }
 +        bdrv_dirty_bitmap_merge_internal(s->dirty_bitmap, s->sync_bitmap,
 +                                         NULL, true);
-         if (local_err) {
-             goto fail;
-         }
+     }
+ 
+     bdrv_graph_wrlock();
--- a/debian/patches/bitmap-mirror/0005-iotests-add-test-for-bitmap-mirror.patch
+++ b/debian/patches/bitmap-mirror/0005-iotests-add-test-for-bitmap-mirror.patch
@@ -20,11 +20,11 @@ intentionally keeping copyright and ownership of original test case to
 honor provenance.

 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 tests/qemu-iotests/384     |  547 +++++++
 tests/qemu-iotests/384.out | 2846 ++++++++++++++++++++++++++++++++++++
- tests/qemu-iotests/group   |    1 +
- 3 files changed, 3394 insertions(+)
+ 2 files changed, 3393 insertions(+)
 create mode 100755 tests/qemu-iotests/384
 create mode 100644 tests/qemu-iotests/384.out

@@ -3433,15 +3433,3 @@ index 0000000000..9b7408b6d6
 +{"execute": "blockdev-mirror", "arguments": {"bitmap": "bitmap0", "device": "drive0", "filter-node-name": "mirror-top", "job-id": "api_job", "sync": "none", "target": "mirror_target"}}
 +{"error": {"class": "GenericError", "desc": "bitmap-mode must be specified if a bitmap is provided"}}
 +
-diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
-index 435dccd5af..939efd9c70 100644
--- a/tests/qemu-iotests/group
-+++ b/tests/qemu-iotests/group
-@@ -270,6 +270,7 @@
- 253 rw quick
- 254 rw backing quick
- 255 rw quick
-+384 rw
- 256 rw auto quick
- 257 rw
- 258 rw quick
--- a/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
+++ b/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
@@ -11,6 +11,9 @@ mode was never available for drive-mirror, it makes the interface more
 uniform w.r.t. backup block jobs.

 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: rebase for 8.2.2]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 block/mirror.c             | 28 +++------------
 blockdev.c                 | 29 +++++++++++++++
@@ -18,12 +21,12 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
 3 files changed, 70 insertions(+), 59 deletions(-)

 diff --git a/block/mirror.c b/block/mirror.c
-index c56b0f87e3..dbba6fc80e 100644
+index 6b3cce1007..2f1223852b 100644
 --- a/block/mirror.c
 +++ b/block/mirror.c
-@@ -1584,31 +1584,13 @@ static BlockJob *mirror_start_job(
-     Error *local_err = NULL;
-     int ret;
+@@ -1757,31 +1757,13 @@ static BlockJob *mirror_start_job(
+ 
+     GLOBAL_STATE_CODE();
 
 -    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
 -        error_setg(errp, "Sync mode '%s' not supported",
@@ -59,17 +62,17 @@ index c56b0f87e3..dbba6fc80e 100644
 
         if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
 diff --git a/blockdev.c b/blockdev.c
-index fa3c2f5548..206de2b6c2 100644
+index 5b75a085ee..d27d8c38ec 100644
 --- a/blockdev.c
 +++ b/blockdev.c
-@@ -3236,7 +3236,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2852,7 +2852,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
 +    if ((sync == MIRROR_SYNC_MODE_BITMAP) ||
 +        (sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
 +        /* done before desugaring 'incremental' to print the right message */
-+        if (!has_bitmap) {
+        if (!bitmap_name) {
 +            error_setg(errp, "Must provide a valid bitmap name for "
 +                       "'%s' sync mode", MirrorSyncMode_str(sync));
 +            return;
@@ -90,7 +93,7 @@ index fa3c2f5548..206de2b6c2 100644
 +        bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
 +    }
 +
-     if (has_bitmap) {
+     if (bitmap_name) {
 +        if (sync != MIRROR_SYNC_MODE_BITMAP) {
 +            error_setg(errp, "Sync mode '%s' not supported with bitmap.",
 +                       MirrorSyncMode_str(sync));
--- a/debian/patches/extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
+++ b/debian/patches/extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
@@ -0,0 +1,206 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Mon, 23 Aug 2021 11:28:32 +0200
+Subject: [PATCH] monitor/qmp: fix race with clients disconnecting early
+
+The following sequence can produce a race condition that results in
+responses meant for different clients being sent to the wrong one:
+
+(QMP, no OOB)
+1) client A connects
+2) client A sends 'qmp_capabilities'
+3) 'qmp_dispatch' runs in coroutine, schedules out to
+   'do_qmp_dispatch_bh' and yields
+4) client A disconnects (i.e. aborts, crashes, etc...)
+5) client B connects
+6) 'do_qmp_dispatch_bh' runs 'qmp_capabilities' and wakes calling coroutine
+7) capabilities are now set and 'mon->commands' is set to '&qmp_commands'
+8) 'qmp_dispatch' returns to 'monitor_qmp_dispatch'
+9) success message is sent to client B *without it ever having sent
+   'qmp_capabilities' itself*
+9a) even if client B ignores it, it will now presumably send it's own
+   greeting, which will error because caps are already set
+
+The fix proposed here uses an atomic, sequential connection number
+stored in the MonitorQMP struct, which is incremented everytime a new
+client connects. Since it is not changed on CHR_EVENT_CLOSED, the
+behaviour of allowing a client to disconnect only one side of the
+connection is retained.
+
+The connection_nr needs to be exposed outside of the monitor subsystem,
+since qmp_dispatch lives in qapi code. It needs to be checked twice,
+once for actually running the command in the BH (fixes 7), and once for
+sending back a response (fixes 9).
+
+This satisfies my local reproducer - using multiple clients constantly
+looping to open a connection, send the greeting, then exiting no longer
+crashes other, normally behaving clients with unrelated responses.
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ include/monitor/monitor.h  |  1 +
+ monitor/monitor-internal.h |  7 +++++++
+ monitor/monitor.c          | 15 +++++++++++++++
+ monitor/qmp.c              | 15 ++++++++++++++-
+ qapi/qmp-dispatch.c        | 21 +++++++++++++++++----
+ stubs/monitor-core.c       |  5 +++++
+ 6 files changed, 59 insertions(+), 5 deletions(-)
+
+diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
+index 965f5d5450..e04bd059b6 100644
+--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
+@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
+ Monitor *monitor_cur(void);
+ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon);
+ bool monitor_cur_is_qmp(void);
+int monitor_get_connection_nr(const Monitor *mon);
+ 
+ void monitor_init_globals(void);
+ void monitor_init_globals_core(void);
+diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
+index 252de85681..8db28f9272 100644
+--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
+@@ -151,6 +151,13 @@ typedef struct {
+     QemuMutex qmp_queue_lock;
+     /* Input queue that holds all the parsed QMP requests */
+     GQueue *qmp_requests;
+
+    /*
+     * A sequential number that gets incremented on every new CHR_EVENT_OPENED.
+     * Used to avoid leftover responses in BHs from being sent to the wrong
+     * client. Access with atomics.
+     */
+    int connection_nr;
+ } MonitorQMP;
+ 
+ /**
+diff --git a/monitor/monitor.c b/monitor/monitor.c
+index 01ede1babd..5681bca346 100644
+--- a/monitor/monitor.c
+++ b/monitor/monitor.c
+@@ -117,6 +117,21 @@ bool monitor_cur_is_qmp(void)
+     return cur_mon && monitor_is_qmp(cur_mon);
+ }
+ 
+/**
+ * If @mon is a QMP monitor, return the connection_nr, otherwise -1.
+ */
+int monitor_get_connection_nr(const Monitor *mon)
+{
+    MonitorQMP *qmp_mon;
+
+    if (!monitor_is_qmp(mon)) {
+        return -1;
+    }
+
+    qmp_mon = container_of(mon, MonitorQMP, common);
+    return qatomic_read(&qmp_mon->connection_nr);
+}
+
+ /**
+  * Is @mon is using readline?
+  * Note: not all HMP monitors use readline, e.g., gdbserver has a
+diff --git a/monitor/qmp.c b/monitor/qmp.c
+index a239945e8d..589c9524f8 100644
+--- a/monitor/qmp.c
+++ b/monitor/qmp.c
+@@ -165,6 +165,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
+     QDict *rsp;
+     QDict *error;
+ 
+    int conn_nr_before = qatomic_read(&mon->connection_nr);
+
+     rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
+                        &mon->common);
+ 
+@@ -180,7 +182,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
+         }
+     }
+ 
+-    monitor_qmp_respond(mon, rsp);
+    /*
+     * qmp_dispatch might have yielded and waited for a BH, in which case there
+     * is a chance a new client connected in the meantime - if this happened,
+     * the command will not have been executed, but we also need to ensure that
+     * we don't send back a corresponding response on a line that no longer
+     * belongs to this request.
+     */
+    if (conn_nr_before == qatomic_read(&mon->connection_nr)) {
+        monitor_qmp_respond(mon, rsp);
+    }
+
+     qobject_unref(rsp);
+ }
+ 
+@@ -461,6 +473,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
+ 
+     switch (event) {
+     case CHR_EVENT_OPENED:
+        qatomic_inc_fetch(&mon->connection_nr);
+         mon->commands = &qmp_cap_negotiation_commands;
+         monitor_qmp_caps_reset(mon);
+         data = qmp_greeting(mon);
+diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
+index f3488afeef..2624eb3470 100644
+--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
+@@ -117,16 +117,28 @@ typedef struct QmpDispatchBH {
+     QObject **ret;
+     Error **errp;
+     Coroutine *co;
+    int conn_nr;
+ } QmpDispatchBH;
+ 
+ static void do_qmp_dispatch_bh(void *opaque)
+ {
+     QmpDispatchBH *data = opaque;
+ 
+-    assert(monitor_cur() == NULL);
+-    monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
+-    data->cmd->fn(data->args, data->ret, data->errp);
+-    monitor_set_cur(qemu_coroutine_self(), NULL);
+    /*
+     * A QMP monitor tracks it's client with a connection number, if this
+     * changes during the scheduling delay of this BH, we must not execute the
+     * command. Otherwise a badly placed 'qmp_capabilities' might affect the
+     * connection state of a client it was never meant for.
+     */
+    if (data->conn_nr == monitor_get_connection_nr(data->cur_mon)) {
+        assert(monitor_cur() == NULL);
+        monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
+        data->cmd->fn(data->args, data->ret, data->errp);
+        monitor_set_cur(qemu_coroutine_self(), NULL);
+    } else {
+        error_setg(data->errp, "active monitor connection changed");
+    }
+
+     aio_co_wake(data->co);
+ }
+ 
+@@ -250,6 +262,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
+             .ret        = &ret,
+             .errp       = &err,
+             .co         = qemu_coroutine_self(),
+            .conn_nr    = monitor_get_connection_nr(cur_mon),
+         };
+         aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh,
+                                 &data);
+diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
+index afa477aae6..d3ff124bf3 100644
+--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
+@@ -12,6 +12,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
+     return NULL;
+ }
+ 
+int monitor_get_connection_nr(const Monitor *mon)
+{
+    return -1;
+}
+
+ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
+ {
+ }
--- a/debian/patches/extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
+++ b/debian/patches/extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Guenter Roeck <linux@roeck-us.net>
+Date: Tue, 28 Feb 2023 09:11:29 -0800
+Subject: [PATCH] scsi: megasas: Internal cdbs have 16-byte length
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Host drivers do not necessarily set cdb_len in megasas io commands.
+With commits 6d1511cea0 ("scsi: Reject commands if the CDB length
+exceeds buf_len") and fe9d8927e2 ("scsi: Add buf_len parameter to
+scsi_req_new()"), this results in failures to boot Linux from affected
+SCSI drives because cdb_len is set to 0 by the host driver.
+Set the cdb length to its actual size to solve the problem.
+
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+(picked-up from https://lists.nongnu.org/archive/html/qemu-devel/2023-02/msg08653.html)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/megasas.c | 14 ++------------
+ 1 file changed, 2 insertions(+), 12 deletions(-)
+
+diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
+index 2d0c607177..97e51733af 100644
+--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
+@@ -1781,7 +1781,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
+     uint8_t cdb[16];
+     int len;
+     struct SCSIDevice *sdev = NULL;
+-    int target_id, lun_id, cdb_len;
+    int target_id, lun_id;
+ 
+     lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
+     lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
+@@ -1790,7 +1790,6 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
+ 
+     target_id = cmd->frame->header.target_id;
+     lun_id = cmd->frame->header.lun_id;
+-    cdb_len = cmd->frame->header.cdb_len;
+ 
+     if (target_id < MFI_MAX_LD && lun_id == 0) {
+         sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
+@@ -1805,15 +1804,6 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
+         return MFI_STAT_DEVICE_NOT_FOUND;
+     }
+ 
+-    if (cdb_len > 16) {
+-        trace_megasas_scsi_invalid_cdb_len(
+-            mfi_frame_desc(frame_cmd), 1, target_id, lun_id, cdb_len);
+-        megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
+-        cmd->frame->header.scsi_status = CHECK_CONDITION;
+-        s->event_count++;
+-        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+-    }
+-
+     cmd->iov_size = lba_count * sdev->blocksize;
+     if (megasas_map_sgl(s, cmd, &cmd->frame->io.sgl)) {
+         megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
+@@ -1824,7 +1814,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
+ 
+     megasas_encode_lba(cdb, lba_start, lba_count, is_write);
+     cmd->req = scsi_req_new(sdev, cmd->index,
+-                            lun_id, cdb, cdb_len, cmd);
+                            lun_id, cdb, sizeof(cdb), cmd);
+     if (!cmd->req) {
+         trace_megasas_scsi_req_alloc_failed(
+             mfi_frame_desc(frame_cmd), target_id, lun_id);
--- a/debian/patches/extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
+++ b/debian/patches/extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
@@ -0,0 +1,100 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Tue, 7 Mar 2023 15:03:02 +0100
+Subject: [PATCH] ide: avoid potential deadlock when draining during trim
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The deadlock can happen as follows:
+1. ide_issue_trim is called, and increments the in_flight counter.
+2. ide_issue_trim_cb calls blk_aio_pdiscard.
+3. Somebody else starts draining (e.g. backup to insert the cbw node).
+4. ide_issue_trim_cb is called as the completion callback for
+   blk_aio_pdiscard.
+5. ide_issue_trim_cb issues yet another blk_aio_pdiscard request.
+6. The request is added to the wait queue via blk_wait_while_drained,
+   because draining has been started.
+7. Nobody ever decrements the in_flight counter and draining can't
+   finish. This would be done by ide_trim_bh_cb, which is called after
+   ide_issue_trim_cb has issued its last request, but
+   ide_issue_trim_cb is not called anymore, because it's the
+   completion callback of blk_aio_pdiscard, which waits on draining.
+
+Quoting Hanna Czenczek:
+> The point of 7e5cdb345f was that we need any in-flight count to
+> accompany a set s->bus->dma->aiocb. While blk_aio_pdiscard() is
+> happening, we don’t necessarily need another count. But we do need
+> it while there is no blk_aio_pdiscard().
+> ide_issue_trim_cb() returns in two cases (and, recursively through
+> its callers, leaves s->bus->dma->aiocb set):
+> 1. After calling blk_aio_pdiscard(), which will keep an in-flight
+>    count,
+> 2. After calling replay_bh_schedule_event() (i.e.
+>    qemu_bh_schedule()), which does not keep an in-flight count.
+
+Thus, even after moving the blk_inc_in_flight to above the
+replay_bh_schedule_event call, the invariant "ide_issue_trim_cb
+returns with an accompanying in-flight count" is still satisfied.
+
+However, the issue 7e5cdb345f fixed for canceling resurfaces, because
+ide_cancel_dma_sync assumes that it just needs to drain once. But now
+the in_flight count is not consistently > 0 during the trim operation.
+So, change it to drain until !s->bus->dma->aiocb, which means that the
+operation finished (s->bus->dma->aiocb is cleared by ide_set_inactive
+via the ide_dma_cb when the end of the transfer is reached).
+
+Discussion here:
+https://lists.nongnu.org/archive/html/qemu-devel/2023-03/msg02506.html
+
+Fixes: 7e5cdb345f ("ide: Increment BB in-flight counter for TRIM BH")
+Suggested-by: Hanna Czenczek <hreitz@redhat.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/ide/core.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/hw/ide/core.c b/hw/ide/core.c
+index e8cb2dac92..3b21acf651 100644
+--- a/hw/ide/core.c
+++ b/hw/ide/core.c
+@@ -456,7 +456,7 @@ static void ide_trim_bh_cb(void *opaque)
+     iocb->bh = NULL;
+     qemu_aio_unref(iocb);
+ 
+-    /* Paired with an increment in ide_issue_trim() */
+    /* Paired with an increment in ide_issue_trim_cb() */
+     blk_dec_in_flight(blk);
+ }
+ 
+@@ -516,6 +516,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
+ done:
+     iocb->aiocb = NULL;
+     if (iocb->bh) {
+        /* Paired with a decrement in ide_trim_bh_cb() */
+        blk_inc_in_flight(s->blk);
+         replay_bh_schedule_event(iocb->bh);
+     }
+ }
+@@ -528,9 +530,6 @@ BlockAIOCB *ide_issue_trim(
+     IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
+     TrimAIOCB *iocb;
+ 
+-    /* Paired with a decrement in ide_trim_bh_cb() */
+-    blk_inc_in_flight(s->blk);
+-
+     iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
+     iocb->s = s;
+     iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
+@@ -754,8 +753,9 @@ void ide_cancel_dma_sync(IDEState *s)
+      */
+     if (s->bus->dma->aiocb) {
+         trace_ide_cancel_dma_sync_remaining();
+-        blk_drain(s->blk);
+-        assert(s->bus->dma->aiocb == NULL);
+        while (s->bus->dma->aiocb) {
+            blk_drain(s->blk);
+        }
+     }
+ }
+ 
--- a/debian/patches/extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
+++ b/debian/patches/extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
@@ -0,0 +1,45 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Fri, 17 Nov 2023 11:18:06 +0100
+Subject: [PATCH] Revert "x86: acpi: workaround Windows not handling name
+ references in Package properly"
+
+This reverts commit 44d975ef340e2f21f236f9520c53e1b30d2213a4.
+
+As reported in the community forum [0] and reproduced locally this
+breaks VirtIO network adapters in (at least) the German ISO of Windows
+Server 2022. The fix itself was for
+
+> Issue is not fatal but as result acpi-index/"PCI Label ID" property
+> is either not shown in device details page or shows incorrect value.
+
+so revert and tolerate that as a stop-gap, rather than have the
+devices not working at all.
+
+[0]: https://forum.proxmox.com/threads/92094/post-605684
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/i386/acpi-build.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
+index 53f804ac16..9b1b9f0412 100644
+--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
+@@ -347,13 +347,9 @@ Aml *aml_pci_device_dsm(void)
+     {
+         Aml *params = aml_local(0);
+         Aml *pkg = aml_package(2);
+-        aml_append(pkg, aml_int(0));
+-        aml_append(pkg, aml_int(0));
+        aml_append(pkg, aml_name("BSEL"));
+        aml_append(pkg, aml_name("ASUN"));
+         aml_append(method, aml_store(pkg, params));
+-        aml_append(method,
+-            aml_store(aml_name("BSEL"), aml_index(params, aml_int(0))));
+-        aml_append(method,
+-            aml_store(aml_name("ASUN"), aml_index(params, aml_int(1))));
+         aml_append(method,
+             aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1),
+                                  aml_arg(2), aml_arg(3), params))
--- a/debian/patches/extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
+++ b/debian/patches/extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
@@ -0,0 +1,35 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 29 Apr 2024 15:41:11 +0200
+Subject: [PATCH] block/copy-before-write: use uint64_t for timeout in
+ nanoseconds
+
+rather than the uint32_t for which the maximum is slightly more than 4
+seconds and larger values would overflow. The QAPI interface allows
+specifying the number of seconds, so only values 0 to 4 are safe right
+now, other values lead to a much lower timeout than a user expects.
+
+The block_copy() call where this is used already takes a uint64_t for
+the timeout, so no change required there.
+
+Fixes: 6db7fd1ca9 ("block/copy-before-write: implement cbw-timeout option")
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+---
+ block/copy-before-write.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 8aba27a71d..026fa9840f 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -43,7 +43,7 @@ typedef struct BDRVCopyBeforeWriteState {
+     BlockCopyState *bcs;
+     BdrvChild *target;
+     OnCbwError on_cbw_error;
+-    uint32_t cbw_timeout_ns;
+    uint64_t cbw_timeout_ns;
+ 
+     /*
+      * @lock: protects access to @access_bitmap, @done_bitmap and
--- a/debian/patches/extra/0006-virtio-gpu-fix-v2-migration.patch
+++ b/debian/patches/extra/0006-virtio-gpu-fix-v2-migration.patch
@@ -0,0 +1,98 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
+Date: Thu, 16 May 2024 12:40:22 +0400
+Subject: [PATCH] virtio-gpu: fix v2 migration
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke
+forward/backward version migration. Versioning of nested VMSD structures
+is not straightforward, as the wire format doesn't have nested
+structures versions. Introduce x-scanout-vmstate-version and a field
+test to save/load appropriately according to the machine version.
+
+Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load")
+Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
+Signed-off-by: Peter Xu <peterx@redhat.com>
+---
+ hw/core/machine.c              |  1 +
+ hw/display/virtio-gpu.c        | 24 ++++++++++++++++--------
+ include/hw/virtio/virtio-gpu.h |  1 +
+ 3 files changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/hw/core/machine.c b/hw/core/machine.c
+index 37ede0e7d4..d33a37a6f6 100644
+--- a/hw/core/machine.c
+++ b/hw/core/machine.c
+@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_2[] = {
+     { "migration", "zero-page-detection", "legacy"},
+     { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
+     { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
+    { "virtio-gpu-device", "x-scanout-vmstate-version", "1" },
+ };
+ const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
+ 
+diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
+index ae831b6b3e..85323daf99 100644
+--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
+@@ -1166,10 +1166,17 @@ static void virtio_gpu_cursor_bh(void *opaque)
+     virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
+ }
+ 
+static bool scanout_vmstate_after_v2(void *opaque, int version)
+{
+    struct VirtIOGPUBase *base = container_of(opaque, VirtIOGPUBase, scanout);
+    struct VirtIOGPU *gpu = container_of(base, VirtIOGPU, parent_obj);
+
+    return gpu->scanout_vmstate_version >= 2;
+}
+
+ static const VMStateDescription vmstate_virtio_gpu_scanout = {
+     .name = "virtio-gpu-one-scanout",
+-    .version_id = 2,
+-    .minimum_version_id = 1,
+    .version_id = 1,
+     .fields = (const VMStateField[]) {
+         VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout),
+         VMSTATE_UINT32(width, struct virtio_gpu_scanout),
+@@ -1181,12 +1188,12 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
+         VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout),
+         VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout),
+         VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout),
+-        VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
+-        VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
+-        VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
+-        VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
+-        VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
+-        VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),
+        VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout, scanout_vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout, scanout_vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout, scanout_vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout, scanout_vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout, scanout_vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout, scanout_vmstate_after_v2),
+         VMSTATE_END_OF_LIST()
+     },
+ };
+@@ -1659,6 +1666,7 @@ static Property virtio_gpu_properties[] = {
+     DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags,
+                     VIRTIO_GPU_FLAG_BLOB_ENABLED, false),
+     DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0),
+    DEFINE_PROP_UINT8("x-scanout-vmstate-version", VirtIOGPU, scanout_vmstate_version, 2),
+     DEFINE_PROP_END_OF_LIST(),
+ };
+ 
+diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
+index ed44cdad6b..842315d51d 100644
+--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
+@@ -177,6 +177,7 @@ typedef struct VGPUDMABuf {
+ struct VirtIOGPU {
+     VirtIOGPUBase parent_obj;
+ 
+    uint8_t scanout_vmstate_version;
+     uint64_t conf_max_hostmem;
+ 
+     VirtQueue *ctrl_vq;
--- a/debian/patches/extra/0007-hw-pflash-fix-block-write-start.patch
+++ b/debian/patches/extra/0007-hw-pflash-fix-block-write-start.patch
@@ -0,0 +1,59 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Thu, 16 May 2024 10:46:34 +0200
+Subject: [PATCH] hw/pflash: fix block write start
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Move the pflash_blk_write_start() call.  We need the offset of the
+first data write, not the offset for the setup (number-of-bytes)
+write.  Without this fix u-boot can do block writes to the first
+flash block only.
+
+While being at it drop a leftover FIXME.
+
+Cc: qemu-stable@nongnu.org
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2343
+Fixes: fcc79f2e0955 ("hw/pflash: implement update buffer for block writes")
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+(picked up from https://lists.nongnu.org/archive/html/qemu-stable/2024-05/msg00091.html)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/block/pflash_cfi01.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
+index 1bda8424b9..c8f1cf5a87 100644
+--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
+@@ -518,10 +518,6 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
+             break;
+         case 0xe8: /* Write to buffer */
+             trace_pflash_write(pfl->name, "write to buffer");
+-            /* FIXME should save @offset, @width for case 1+ */
+-            qemu_log_mask(LOG_UNIMP,
+-                          "%s: Write to buffer emulation is flawed\n",
+-                          __func__);
+             pfl->status |= 0x80; /* Ready! */
+             break;
+         case 0xf0: /* Probe for AMD flash */
+@@ -574,7 +570,6 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
+             }
+             pfl->counter = value;
+             pfl->wcycle++;
+-            pflash_blk_write_start(pfl, offset);
+             break;
+         case 0x60:
+             if (cmd == 0xd0) {
+@@ -605,6 +600,9 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
+         switch (pfl->cmd) {
+         case 0xe8: /* Block write */
+             /* FIXME check @offset, @width */
+            if (pfl->blk_offset == -1 && pfl->counter) {
+                pflash_blk_write_start(pfl, offset);
+            }
+             if (!pfl->ro && (pfl->blk_offset != -1)) {
+                 pflash_data_write(pfl, offset, value, width, be);
+             } else {
--- a/debian/patches/extra/0008-target-i386-fix-operand-size-for-DATA16-REX.W-POPCNT.patch
+++ b/debian/patches/extra/0008-target-i386-fix-operand-size-for-DATA16-REX.W-POPCNT.patch
@@ -0,0 +1,51 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 9 May 2024 12:38:10 +0200
+Subject: [PATCH] target/i386: fix operand size for DATA16 REX.W POPCNT
+
+According to the manual, 32-bit vs 64-bit is governed by REX.W
+and REX ignores the 0x66 prefix.  This can be confirmed with this
+program:
+
+    #include <stdio.h>
+    int main()
+    {
+       int x = 0x12340000;
+       int y;
+       asm("popcntl %1, %0" : "=r" (y) : "r" (x)); printf("%x\n", y);
+       asm("mov $-1, %0; .byte 0x66; popcntl %1, %0" : "+r" (y) : "r" (x)); printf("%x\n", y);
+       asm("mov $-1, %0; .byte 0x66; popcntq %q1, %q0" : "+r" (y) : "r" (x)); printf("%x\n", y);
+    }
+
+which prints 5/ffff0000/5 on real hardware and 5/ffff0000/ffff0000
+on QEMU.
+
+Cc: qemu-stable@nongnu.org
+Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 41c685dc59bb611096f3bb6a663cfa82e4cba97b)
+[FE: keep mo_64_32 helper which still has other users in 9.0.0]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/translate.c | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
+index 76a42c679c..b60f3bd642 100644
+--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
+@@ -6799,12 +6799,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
+         modrm = x86_ldub_code(env, s);
+         reg = ((modrm >> 3) & 7) | REX_R(s);
+ 
+-        if (s->prefix & PREFIX_DATA) {
+-            ot = MO_16;
+-        } else {
+-            ot = mo_64_32(dflag);
+-        }
+-
+        ot = dflag;
+         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+         gen_extu(ot, s->T0);
+         tcg_gen_mov_tl(cpu_cc_src, s->T0);
--- a/debian/patches/extra/0009-target-i386-rdpkru-wrpkru-are-no-prefix-instructions.patch
+++ b/debian/patches/extra/0009-target-i386-rdpkru-wrpkru-are-no-prefix-instructions.patch
@@ -0,0 +1,40 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 9 May 2024 15:55:47 +0200
+Subject: [PATCH] target/i386: rdpkru/wrpkru are no-prefix instructions
+
+Reject 0x66/0xf3/0xf2 in front of them.
+
+Cc: qemu-stable@nongnu.org
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 40a3ec7b5ffde500789d016660a171057d6b467c)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/translate.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
+index b60f3bd642..3e949fe964 100644
+--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
+@@ -6083,7 +6083,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
+             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+             break;
+         case 0xee: /* rdpkru */
+-            if (prefixes & PREFIX_LOCK) {
+            if (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+                             | PREFIX_REPZ | PREFIX_REPNZ)) {
+                 goto illegal_op;
+             }
+             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+@@ -6091,7 +6092,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
+             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
+             break;
+         case 0xef: /* wrpkru */
+-            if (prefixes & PREFIX_LOCK) {
+            if (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+                             | PREFIX_REPZ | PREFIX_REPNZ)) {
+                 goto illegal_op;
+             }
+             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
--- a/debian/patches/extra/0010-target-i386-fix-feature-dependency-for-WAITPKG.patch
+++ b/debian/patches/extra/0010-target-i386-fix-feature-dependency-for-WAITPKG.patch
@@ -0,0 +1,33 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Wed, 8 May 2024 11:10:54 +0200
+Subject: [PATCH] target/i386: fix feature dependency for WAITPKG
+
+The VMX feature bit depends on general availability of WAITPKG,
+not the other way round.
+
+Fixes: 33cc88261c3 ("target/i386: add support for VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE", 2023-08-28)
+Cc: qemu-stable@nongnu.org
+Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit fe01af5d47d4cf7fdf90c54d43f784e5068c8d72)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/cpu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 33760a2ee1..e693f8ca9a 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -1550,8 +1550,8 @@ static FeatureDep feature_dependencies[] = {
+         .to = { FEAT_SVM,                   ~0ull },
+     },
+     {
+-        .from = { FEAT_VMX_SECONDARY_CTLS,  VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
+-        .to = { FEAT_7_0_ECX,               CPUID_7_0_ECX_WAITPKG },
+        .from = { FEAT_7_0_ECX,             CPUID_7_0_ECX_WAITPKG },
+        .to = { FEAT_VMX_SECONDARY_CTLS,    VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
+     },
+ };
+ 
--- a/debian/patches/extra/0011-Revert-virtio-pci-fix-use-of-a-released-vector.patch
+++ b/debian/patches/extra/0011-Revert-virtio-pci-fix-use-of-a-released-vector.patch
@@ -0,0 +1,87 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 16 May 2024 12:59:52 +0200
+Subject: [PATCH] Revert "virtio-pci: fix use of a released vector"
+
+This reverts commit 2ce6cff94df2650c460f809e5ad263f1d22507c0.
+
+The fix causes some issues:
+https://gitlab.com/qemu-project/qemu/-/issues/2321
+https://gitlab.com/qemu-project/qemu/-/issues/2334
+
+The CVE fixed by commit 2ce6cff94d ("virtio-pci: fix use of a released
+vector") is CVE-2024-4693 [0] and allows a malicious guest that
+controls the boot process in the guest to crash its QEMU process.
+
+The issues sound worse than the CVE, so revert until there is a proper
+fix.
+
+[0]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-4693
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/virtio/virtio-pci.c | 37 ++-----------------------------------
+ 1 file changed, 2 insertions(+), 35 deletions(-)
+
+diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
+index cb159fd078..cb6940fc0e 100644
+--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
+@@ -1424,38 +1424,6 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
+     return offset;
+ }
+ 
+-static void virtio_pci_set_vector(VirtIODevice *vdev,
+-                                  VirtIOPCIProxy *proxy,
+-                                  int queue_no, uint16_t old_vector,
+-                                  uint16_t new_vector)
+-{
+-    bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+-        msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled();
+-
+-    if (new_vector == old_vector) {
+-        return;
+-    }
+-
+-    /*
+-     * If the device uses irqfd and the vector changes after DRIVER_OK is
+-     * set, we need to release the old vector and set up the new one.
+-     * Otherwise just need to set the new vector on the device.
+-     */
+-    if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) {
+-        kvm_virtio_pci_vector_release_one(proxy, queue_no);
+-    }
+-    /* Set the new vector on the device. */
+-    if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
+-        vdev->config_vector = new_vector;
+-    } else {
+-        virtio_queue_set_vector(vdev, queue_no, new_vector);
+-    }
+-    /* If the new vector changed need to set it up. */
+-    if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) {
+-        kvm_virtio_pci_vector_use_one(proxy, queue_no);
+-    }
+-}
+-
+ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
+                            uint8_t bar, uint64_t offset, uint64_t length,
+                            uint8_t id)
+@@ -1602,8 +1570,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
+         } else {
+             val = VIRTIO_NO_VECTOR;
+         }
+-        virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX,
+-                              vdev->config_vector, val);
+        vdev->config_vector = val;
+         break;
+     case VIRTIO_PCI_COMMON_STATUS:
+         if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+@@ -1643,7 +1610,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
+         } else {
+             val = VIRTIO_NO_VECTOR;
+         }
+-        virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val);
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+         break;
+     case VIRTIO_PCI_COMMON_Q_ENABLE:
+         if (val == 1) {
--- a/debian/patches/extra/0012-hw-core-machine-move-compatibility-flags-for-VirtIO-.patch
+++ b/debian/patches/extra/0012-hw-core-machine-move-compatibility-flags-for-VirtIO-.patch
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 16 May 2024 15:21:07 +0200
+Subject: [PATCH] hw/core/machine: move compatibility flags for VirtIO-net USO
+ to machine 8.1
+
+Migration from an 8.2 or 9.0 binary to an 8.1 binary with machine
+version 8.1 can fail with:
+
+> kvm: Features 0x1c0010130afffa7 unsupported. Allowed features: 0x10179bfffe7
+> kvm: Failed to load virtio-net:virtio
+> kvm: error while loading state for instance 0x0 of device '0000:00:12.0/virtio-net'
+> kvm: load of migration failed: Operation not permitted
+
+The series
+
+53da8b5a99 virtio-net: Add support for USO features
+9da1684954 virtio-net: Add USO flags to vhost support.
+f03e0cf63b tap: Add check for USO features
+2ab0ec3121 tap: Add USO support to tap device.
+
+only landed in QEMU 8.2, so the compatibility flags should be part of
+machine version 8.1.
+
+Moving the flags unfortunately breaks forward migration with machine
+version 8.1 from a binary without this patch to a binary with this
+patch when the feature is enabled by the guest.
+
+Fixes: 53da8b5a99 ("virtio-net: Add support for USO features")
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/core/machine.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/hw/core/machine.c b/hw/core/machine.c
+index d33a37a6f6..4273de16a0 100644
+--- a/hw/core/machine.c
+++ b/hw/core/machine.c
+@@ -46,15 +46,15 @@ GlobalProperty hw_compat_8_1[] = {
+     { "ramfb", "x-migrate", "off" },
+     { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" },
+     { "igb", "x-pcie-flr-init", "off" },
+    { TYPE_VIRTIO_NET, "host_uso", "off"},
+    { TYPE_VIRTIO_NET, "guest_uso4", "off"},
+    { TYPE_VIRTIO_NET, "guest_uso6", "off"},
+ };
+ const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
+ 
+ GlobalProperty hw_compat_8_0[] = {
+     { "migration", "multifd-flush-after-each-section", "on"},
+     { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
+-    { TYPE_VIRTIO_NET, "host_uso", "off"},
+-    { TYPE_VIRTIO_NET, "guest_uso4", "off"},
+-    { TYPE_VIRTIO_NET, "guest_uso6", "off"},
+ };
+ const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
+ 
--- a/debian/patches/pve-qemu-9.0-vitastor.patch
+++ b/debian/patches/pve-qemu-9.0-vitastor.patch
--- a/debian/patches/pve/0001-PVE-Config-block-file-change-locking-default-to-off.patch
+++ b/debian/patches/pve/0001-PVE-Config-block-file-change-locking-default-to-off.patch
@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/block/file-posix.c b/block/file-posix.c
-index 7e19bbff5f..b527e82a82 100644
+index 35684f7e21..43bc0bd520 100644
 --- a/block/file-posix.c
 +++ b/block/file-posix.c
-@@ -450,7 +450,7 @@ static QemuOptsList raw_runtime_opts = {
+@@ -563,7 +563,7 @@ static QemuOptsList raw_runtime_opts = {
         {
             .name = "locking",
             .type = QEMU_OPT_STRING,
@@ -26,7 +26,7 @@ index 7e19bbff5f..b527e82a82 100644
         },
         {
             .name = "pr-manager",
-@@ -550,7 +550,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
+@@ -663,7 +663,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
         s->use_lock = false;
         break;
     case ON_OFF_AUTO_AUTO:
--- a/debian/patches/pve/0002-PVE-Config-Adjust-network-script-path-to-etc-kvm.patch
+++ b/debian/patches/pve/0002-PVE-Config-Adjust-network-script-path-to-etc-kvm.patch
@@ -5,22 +5,21 @@ Subject: [PATCH] PVE: [Config] Adjust network script path to /etc/kvm/

 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
- include/net/net.h | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
+ include/net/net.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/include/net/net.h b/include/net/net.h
-index 39085d9444..487e3ea1b4 100644
+index b1f9b35fcc..096c0d52e4 100644
 --- a/include/net/net.h
 +++ b/include/net/net.h
-@@ -208,8 +208,9 @@ void netdev_add(QemuOpts *opts, Error **errp);
+@@ -317,8 +317,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
 int net_hub_id_for_client(NetClientState *nc, int *id);
 NetClientState *net_hub_port_find(int hub_id);
 
-#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
-+#define DEFAULT_NETWORK_SCRIPT "/etc/kvm/kvm-ifup"
-+#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/kvm/kvm-ifdown"
-+
+-#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifup"
+-#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifdown"
+#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/kvm/kvm-ifup"
+#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/kvm/kvm-ifdown"
 #define DEFAULT_BRIDGE_HELPER CONFIG_QEMU_HELPERDIR "/qemu-bridge-helper"
 #define DEFAULT_BRIDGE_INTERFACE "br0"
 
--- a/debian/patches/pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
+++ b/debian/patches/pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
@@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/target/i386/cpu.h b/target/i386/cpu.h
-index e818fc712a..dd9bf7b3da 100644
+index 6b05738079..d82869900a 100644
 --- a/target/i386/cpu.h
 +++ b/target/i386/cpu.h
-@@ -1954,9 +1954,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
+@@ -2291,9 +2291,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define CPU_RESOLVING_TYPE TYPE_X86_CPU
 
 #ifdef TARGET_X86_64
@@ -24,4 +24,4 @@ index e818fc712a..dd9bf7b3da 100644
 +#define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("kvm32")
 #endif
 
- #define cpu_signal_handler cpu_x86_signal_handler
+ #define cpu_list x86_cpu_list
--- a/debian/patches/pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
+++ b/debian/patches/pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 9 insertions(+), 6 deletions(-)

 diff --git a/ui/spice-core.c b/ui/spice-core.c
-index ecc2ec2c55..ca04965ead 100644
+index 15be640286..ea20e6153c 100644
 --- a/ui/spice-core.c
 +++ b/ui/spice-core.c
-@@ -668,32 +668,35 @@ void qemu_spice_init(void)
+@@ -690,32 +690,35 @@ static void qemu_spice_init(void)
 
     if (tls_port) {
         x509_dir = qemu_opt_get(opts, "x509-dir");
--- a/debian/patches/pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
+++ b/debian/patches/pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 11 insertions(+), 4 deletions(-)

 diff --git a/block/gluster.c b/block/gluster.c
-index 0aa1f2cda4..dcd1ef7ebc 100644
+index cc74af06dc..3ba9bbfa5e 100644
 --- a/block/gluster.c
 +++ b/block/gluster.c
-@@ -42,7 +42,7 @@
+@@ -43,7 +43,7 @@
 #define GLUSTER_DEBUG_DEFAULT       4
 #define GLUSTER_DEBUG_MAX           9
 #define GLUSTER_OPT_LOGFILE         "logfile"
@@ -21,15 +21,15 @@ index 0aa1f2cda4..dcd1ef7ebc 100644
 /*
  * Several versions of GlusterFS (3.12? -> 6.0.1) fail when the transfer size
  * is greater or equal to 1024 MiB, so we are limiting the transfer size to 512
-@@ -424,6 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
+@@ -425,6 +425,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
     int old_errno;
     SocketAddressList *server;
-     unsigned long long port;
+     uint64_t port;
 +    const char *logfile;
 
     glfs = glfs_find_preopened(gconf->volume);
     if (glfs) {
-@@ -466,9 +467,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
+@@ -467,9 +468,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
         }
     }
 
--- a/debian/patches/pve/0005-PVE-Config-smm_available-false.patch
+++ b/debian/patches/pve/0005-PVE-Config-smm_available-false.patch
@@ -1,24 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Alexandre Derumier <aderumier@odiso.com>
-Date: Mon, 6 Apr 2020 12:16:34 +0200
-Subject: [PATCH] PVE: [Config] smm_available = false
-
-Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- hw/i386/x86.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/hw/i386/x86.c b/hw/i386/x86.c
-index b82770024c..bd05b3c79a 100644
--- a/hw/i386/x86.c
-+++ b/hw/i386/x86.c
-@@ -896,7 +896,7 @@ bool x86_machine_is_smm_enabled(X86MachineState *x86ms)
-     if (tcg_enabled() || qtest_enabled()) {
-         smm_available = true;
-     } else if (kvm_enabled()) {
-        smm_available = kvm_has_smm();
-+        smm_available = false;
-     }
- 
-     if (smm_available) {
--- a/debian/patches/pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
+++ b/debian/patches/pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+)

 diff --git a/block/rbd.c b/block/rbd.c
-index e637639a07..5717e7258c 100644
+index 84bb2fa5d7..63f60d41be 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
-@@ -651,6 +651,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
+@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
         rados_conf_set(*cluster, "rbd_cache", "false");
     }
 
--- a/debian/patches/pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
+++ b/debian/patches/pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 9 insertions(+), 1 deletion(-)

 diff --git a/block/gluster.c b/block/gluster.c
-index dcd1ef7ebc..ac79b4bdb4 100644
+index 3ba9bbfa5e..34936eb855 100644
 --- a/block/gluster.c
 +++ b/block/gluster.c
-@@ -57,6 +57,7 @@ typedef struct GlusterAIOCB {
+@@ -58,6 +58,7 @@ typedef struct GlusterAIOCB {
     int ret;
     Coroutine *coroutine;
     AioContext *aio_context;
@@ -27,7 +27,7 @@ index dcd1ef7ebc..ac79b4bdb4 100644
 } GlusterAIOCB;
 
 typedef struct BDRVGlusterState {
-@@ -763,8 +764,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
+@@ -753,8 +754,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
         acb->ret = 0; /* Success */
     } else if (ret < 0) {
         acb->ret = -errno; /* Read/Write failed */
@@ -39,15 +39,15 @@ index dcd1ef7ebc..ac79b4bdb4 100644
     }
 
     aio_co_schedule(acb->aio_context, acb->coroutine);
-@@ -1035,6 +1038,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
+@@ -1023,6 +1026,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
 +    acb.is_write = true;
 
-     ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+     ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
-@@ -1216,9 +1220,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
+@@ -1203,9 +1207,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
     acb.aio_context = bdrv_get_aio_context(bs);
 
     if (write) {
@@ -59,7 +59,7 @@ index dcd1ef7ebc..ac79b4bdb4 100644
         ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
                                 gluster_finish_aiocb, &acb);
     }
-@@ -1281,6 +1287,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
+@@ -1268,6 +1274,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
@@ -67,11 +67,11 @@ index dcd1ef7ebc..ac79b4bdb4 100644
 
     ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
     if (ret < 0) {
-@@ -1327,6 +1334,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
+@@ -1316,6 +1323,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
 +    acb.is_write = true;
 
-     ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+     ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
--- a/debian/patches/pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
+++ b/debian/patches/pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/qemu-img.c b/qemu-img.c
-index 821cbf610e..667c540a89 100644
+index 7668f86769..2575e97b43 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -2821,7 +2821,8 @@ static int img_info(int argc, char **argv)
+@@ -3075,7 +3075,8 @@ static int img_info(int argc, char **argv)
     list = collect_image_info_list(image_opts, filename, fmt, chain,
                                    force_share);
     if (!list) {
--- a/debian/patches/pve/0008-PVE-Up-qmp-add-get_link_status.patch
+++ b/debian/patches/pve/0008-PVE-Up-qmp-add-get_link_status.patch
@@ -1,88 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Mon, 6 Apr 2020 12:16:37 +0200
-Subject: [PATCH] PVE: [Up] qmp: add get_link_status
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- net/net.c        | 27 +++++++++++++++++++++++++++
- qapi/net.json    | 15 +++++++++++++++
- qapi/pragma.json |  1 +
- 3 files changed, 43 insertions(+)
-
-diff --git a/net/net.c b/net/net.c
-index 38778e831d..dabfb482f0 100644
--- a/net/net.c
-+++ b/net/net.c
-@@ -1331,6 +1331,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
-     }
- }
- 
-+int64_t qmp_get_link_status(const char *name, Error **errp)
-+{
-+    NetClientState *ncs[MAX_QUEUE_NUM];
-+    NetClientState *nc;
-+    int queues;
-+    bool ret;
-+
-+    queues = qemu_find_net_clients_except(name, ncs,
-+                                          NET_CLIENT_DRIVER__MAX,
-+                                          MAX_QUEUE_NUM);
-+
-+    if (queues == 0) {
-+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-+                  "Device '%s' not found", name);
-+        return (int64_t) -1;
-+    }
-+
-+    nc = ncs[0];
-+    ret = ncs[0]->link_down;
-+
-+    if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
-+      ret = ncs[0]->peer->link_down;
-+    }
-+
-+    return (int64_t) ret ? 0 : 1;
-+}
-+
- void colo_notify_filters_event(int event, Error **errp)
- {
-     NetClientState *nc;
-diff --git a/qapi/net.json b/qapi/net.json
-index cebb1b52e3..f6854483b1 100644
--- a/qapi/net.json
-+++ b/qapi/net.json
-@@ -34,6 +34,21 @@
- ##
- { 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
- 
-+##
-+# @get_link_status:
-+#
-+# Get the current link state of the nics or nic.
-+#
-+# @name: name of the nic you get the state of
-+#
-+# Return: If link is up 1
-+#         If link is down 0
-+#         If an error occure an empty string.
-+#
-+# Notes: this is an Proxmox VE extension and not offical part of Qemu.
-+##
-+{ 'command': 'get_link_status', 'data': {'name': 'str'} , 'returns': 'int' }
-+
- ##
- # @netdev_add:
- #
-diff --git a/qapi/pragma.json b/qapi/pragma.json
-index cffae27666..5a3e3de95f 100644
--- a/qapi/pragma.json
-+++ b/qapi/pragma.json
-@@ -5,6 +5,7 @@
- { 'pragma': {
-     # Commands allowed to return a non-dictionary:
-     'returns-whitelist': [
-+	'get_link_status',
-         'human-monitor-command',
-         'qom-get',
-         'query-migrate-cache-size',
--- a/debian/patches/pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
+++ b/debian/patches/pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
@@ -31,16 +31,17 @@ override the output file's size.

 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 qemu-img-cmds.hx |   4 +-
- qemu-img.c       | 192 +++++++++++++++++++++++++++++------------------
- 2 files changed, 122 insertions(+), 74 deletions(-)
+ qemu-img.c       | 202 ++++++++++++++++++++++++++++++-----------------
+ 2 files changed, 133 insertions(+), 73 deletions(-)

 diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
-index c9c54de1df..0f98033658 100644
+index c9dd70a892..048788b23d 100644
 --- a/qemu-img-cmds.hx
 +++ b/qemu-img-cmds.hx
-@@ -51,9 +51,9 @@ SRST
+@@ -60,9 +60,9 @@ SRST
 ERST
 
 DEF("dd", img_dd,
@@ -53,10 +54,10 @@ index c9c54de1df..0f98033658 100644
 
 DEF("info", img_info,
 diff --git a/qemu-img.c b/qemu-img.c
-index 667c540a89..6b7d1fcb51 100644
+index 2575e97b43..8ec68b346f 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -4444,10 +4444,12 @@ out:
+@@ -4993,10 +4993,12 @@ static int img_bitmap(int argc, char **argv)
 #define C_IF      04
 #define C_OF      010
 #define C_SKIP    020
@@ -69,7 +70,7 @@ index 667c540a89..6b7d1fcb51 100644
 };
 
 struct DdIo {
-@@ -4526,6 +4528,20 @@ static int img_dd_skip(const char *arg,
+@@ -5072,6 +5074,19 @@ static int img_dd_skip(const char *arg,
     return 0;
 }
 
@@ -77,10 +78,9 @@ index 667c540a89..6b7d1fcb51 100644
 +                        struct DdIo *in, struct DdIo *out,
 +                        struct DdInfo *dd)
 +{
-+    dd->osize = cvtnum(arg);
+    dd->osize = cvtnum("size", arg);
 +
 +    if (dd->osize < 0) {
-+        error_report("invalid number: '%s'", arg);
 +        return 1;
 +    }
 +
@@ -90,7 +90,7 @@ index 667c540a89..6b7d1fcb51 100644
 static int img_dd(int argc, char **argv)
 {
     int ret = 0;
-@@ -4566,6 +4582,7 @@ static int img_dd(int argc, char **argv)
+@@ -5112,6 +5127,7 @@ static int img_dd(int argc, char **argv)
         { "if", img_dd_if, C_IF },
         { "of", img_dd_of, C_OF },
         { "skip", img_dd_skip, C_SKIP },
@@ -98,7 +98,7 @@ index 667c540a89..6b7d1fcb51 100644
         { NULL, NULL, 0 }
     };
     const struct option long_options[] = {
-@@ -4644,8 +4661,13 @@ static int img_dd(int argc, char **argv)
+@@ -5187,91 +5203,112 @@ static int img_dd(int argc, char **argv)
         arg = NULL;
     }
 
@@ -106,53 +106,30 @@ index 667c540a89..6b7d1fcb51 100644
 -        error_report("Must specify both input and output files");
 +    if (!(dd.flags & C_IF) && (!fmt || strcmp(fmt, "raw") != 0)) {
 +        error_report("Input format must be raw when readin from stdin");
-+        ret = -1;
-+        goto out;
-+    }
+         ret = -1;
+         goto out;
+     }
+-
+-    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+-                    force_share);
+-
+-    if (!blk1) {
 +    if (!(dd.flags & C_OF) && strcmp(out_fmt, "raw") != 0) {
 +        error_report("Output format must be raw when writing to stdout");
         ret = -1;
         goto out;
     }
-@@ -4657,85 +4679,101 @@ static int img_dd(int argc, char **argv)
-         goto out;
-     }
- 
-    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
-                    force_share);
-+    if (dd.flags & C_IF) {
-+        blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
-+                        force_share);
- 
-    if (!blk1) {
-        ret = -1;
-        goto out;
-+        if (!blk1) {
-+            ret = -1;
-+            goto out;
-+        }
-     }
 
 -    drv = bdrv_find_format(out_fmt);
 -    if (!drv) {
 -        error_report("Unknown file format");
-+    if (dd.flags & C_OSIZE) {
-+        size = dd.osize;
-+    } else if (dd.flags & C_IF) {
-+        size = blk_getlength(blk1);
-+        if (size < 0) {
-+            error_report("Failed to get size for '%s'", in.filename);
-+            ret = -1;
-+            goto out;
-+        }
-+    } else if (dd.flags & C_COUNT) {
-+        size = dd.count * in.bsz;
-+    } else {
-+        error_report("Output size must be known when reading from stdin");
-         ret = -1;
-         goto out;
-     }
+-        ret = -1;
+-        goto out;
+-    }
 -    proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
+    if (dd.flags & C_IF) {
+        blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+                        force_share);
 
 -    if (!proto_drv) {
 -        error_report_err(local_err);
@@ -170,14 +147,50 @@ index 667c540a89..6b7d1fcb51 100644
 -                     proto_drv->format_name);
 -        ret = -1;
 -        goto out;
-+    if (!(dd.flags & C_OSIZE) && dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
-+        dd.count * in.bsz < size) {
-+        size = dd.count * in.bsz;
+        if (!blk1) {
+            ret = -1;
+            goto out;
+        }
     }
 -    create_opts = qemu_opts_append(create_opts, drv->create_opts);
 -    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
- 
+-
 -    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
+ 
+-    size = blk_getlength(blk1);
+-    if (size < 0) {
+-        error_report("Failed to get size for '%s'", in.filename);
+    if (dd.flags & C_OSIZE) {
+        size = dd.osize;
+    } else if (dd.flags & C_IF) {
+        size = blk_getlength(blk1);
+        if (size < 0) {
+            error_report("Failed to get size for '%s'", in.filename);
+            ret = -1;
+            goto out;
+        }
+    } else if (dd.flags & C_COUNT) {
+        size = dd.count * in.bsz;
+    } else {
+        error_report("Output size must be known when reading from stdin");
+         ret = -1;
+         goto out;
+     }
+ 
+-    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
+    if (!(dd.flags & C_OSIZE) && dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
+         dd.count * in.bsz < size) {
+         size = dd.count * in.bsz;
+     }
+ 
+-    /* Overflow means the specified offset is beyond input image's size */
+-    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
+-                              size < in.bsz * in.offset)) {
+-        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
+-    } else {
+-        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+-                            size - in.bsz * in.offset, &error_abort);
+-    }
 +    if (dd.flags & C_OF) {
 +        drv = bdrv_find_format(out_fmt);
 +        if (!drv) {
@@ -187,9 +200,11 @@ index 667c540a89..6b7d1fcb51 100644
 +        }
 +        proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
 
-    size = blk_getlength(blk1);
-    if (size < 0) {
-        error_report("Failed to get size for '%s'", in.filename);
+-    ret = bdrv_create(drv, out.filename, opts, &local_err);
+-    if (ret < 0) {
+-        error_reportf_err(local_err,
+-                          "%s: error while creating output image: ",
+-                          out.filename);
 -        ret = -1;
 -        goto out;
 -    }
@@ -213,20 +228,18 @@ index 667c540a89..6b7d1fcb51 100644
 +        create_opts = qemu_opts_append(create_opts, drv->create_opts);
 +        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
 
-    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
-        dd.count * in.bsz < size) {
-        size = dd.count * in.bsz;
-    }
+-    /* TODO, we can't honour --image-opts for the target,
+-     * since it needs to be given in a format compatible
+-     * with the bdrv_create() call above which does not
+-     * support image-opts style.
+-     */
+-    blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
+-                         false, false, false);
 +        opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
 
-    /* Overflow means the specified offset is beyond input image's size */
-    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
-                              size < in.bsz * in.offset)) {
-        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
-    } else {
-        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
-                            size - in.bsz * in.offset, &error_abort);
-    }
+-    if (!blk2) {
+-        ret = -1;
+-        goto out;
 +        /* Overflow means the specified offset is beyond input image's size */
 +        if (dd.flags & C_OSIZE) {
 +            qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
@@ -237,15 +250,7 @@ index 667c540a89..6b7d1fcb51 100644
 +            qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
 +                                size - in.bsz * in.offset, &error_abort);
 +        }
- 
-    ret = bdrv_create(drv, out.filename, opts, &local_err);
-    if (ret < 0) {
-        error_reportf_err(local_err,
-                          "%s: error while creating output image: ",
-                          out.filename);
-        ret = -1;
-        goto out;
-    }
+
 +        ret = bdrv_create(drv, out.filename, opts, &local_err);
 +        if (ret < 0) {
 +            error_reportf_err(local_err,
@@ -254,14 +259,7 @@ index 667c540a89..6b7d1fcb51 100644
 +            ret = -1;
 +            goto out;
 +        }
- 
-    /* TODO, we can't honour --image-opts for the target,
-     * since it needs to be given in a format compatible
-     * with the bdrv_create() call above which does not
-     * support image-opts style.
-     */
-    blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
-                         false, false, false);
+
 +        /* TODO, we can't honour --image-opts for the target,
 +         * since it needs to be given in a format compatible
 +         * with the bdrv_create() call above which does not
@@ -269,10 +267,7 @@ index 667c540a89..6b7d1fcb51 100644
 +         */
 +        blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
 +                             false, false, false);
- 
-    if (!blk2) {
-        ret = -1;
-        goto out;
+
 +        if (!blk2) {
 +            ret = -1;
 +            goto out;
@@ -280,41 +275,54 @@ index 667c540a89..6b7d1fcb51 100644
     }
 
     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
-@@ -4753,11 +4791,17 @@ static int img_dd(int argc, char **argv)
+@@ -5288,20 +5325,43 @@ static int img_dd(int argc, char **argv)
+     in.buf = g_new(uint8_t, in.bsz);
 
-     for (out_pos = 0; in_pos < size; block_count++) {
-         int in_ret, out_ret;
+     for (out_pos = 0; in_pos < size; ) {
+        int in_ret, out_ret;
+         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
 -
-        if (in_pos + in.bsz > size) {
-            in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
-+        size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+-        ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+-        if (ret < 0) {
 +        if (blk1) {
-+            in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
-         } else {
-            in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
-+            in_ret = read(STDIN_FILENO, in.buf, in_bsz);
+            in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+            if (in_ret == 0) {
+                in_ret = bytes;
+            }
+        } else {
+            in_ret = read(STDIN_FILENO, in.buf, bytes);
 +            if (in_ret == 0) {
 +                /* early EOF is considered an error */
 +                error_report("Input ended unexpectedly");
 +                ret = -1;
 +                goto out;
 +            }
-         }
-         if (in_ret < 0) {
+        }
+        if (in_ret < 0) {
             error_report("error while reading from input image file: %s",
-@@ -4767,9 +4811,13 @@ static int img_dd(int argc, char **argv)
+-                         strerror(-ret));
+                         strerror(-in_ret));
+            ret = -1;
+             goto out;
         }
-         in_pos += in_ret;
+         in_pos += bytes;
 
-        out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+-        ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
+-        if (ret < 0) {
 +        if (blk2) {
-+            out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+            out_ret = blk_pwrite(blk2, out_pos, in_ret, in.buf, 0);
+            if (out_ret == 0) {
+                out_ret = in_ret;
+            }
 +        } else {
 +            out_ret = write(STDOUT_FILENO, in.buf, in_ret);
 +        }
- 
-        if (out_ret < 0) {
+
 +        if (out_ret != in_ret) {
             error_report("error while writing to output image file: %s",
-                          strerror(-out_ret));
-             ret = -1;
+-                         strerror(-ret));
+                         strerror(-out_ret));
+            ret = -1;
+             goto out;
+         }
+         out_pos += bytes;
--- a/debian/patches/pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
+++ b/debian/patches/pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
@@ -10,15 +10,16 @@ an expected end of input.

 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- qemu-img.c | 29 ++++++++++++++++++++++++++---
- 1 file changed, 26 insertions(+), 3 deletions(-)
+ qemu-img.c | 28 +++++++++++++++++++++++++---
+ 1 file changed, 25 insertions(+), 3 deletions(-)

 diff --git a/qemu-img.c b/qemu-img.c
-index 6b7d1fcb51..17393b2f53 100644
+index 8ec68b346f..b98184bba1 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
-@@ -4445,11 +4445,13 @@ out:
+@@ -4994,11 +4994,13 @@ static int img_bitmap(int argc, char **argv)
 #define C_OF      010
 #define C_SKIP    020
 #define C_OSIZE   040
@@ -32,7 +33,7 @@ index 6b7d1fcb51..17393b2f53 100644
 };
 
 struct DdIo {
-@@ -4542,6 +4544,20 @@ static int img_dd_osize(const char *arg,
+@@ -5087,6 +5089,19 @@ static int img_dd_osize(const char *arg,
     return 0;
 }
 
@@ -40,10 +41,9 @@ index 6b7d1fcb51..17393b2f53 100644
 +                        struct DdIo *in, struct DdIo *out,
 +                        struct DdInfo *dd)
 +{
-+    dd->isize = cvtnum(arg);
+    dd->isize = cvtnum("size", arg);
 +
 +    if (dd->isize < 0) {
-+        error_report("invalid number: '%s'", arg);
 +        return 1;
 +    }
 +
@@ -53,13 +53,13 @@ index 6b7d1fcb51..17393b2f53 100644
 static int img_dd(int argc, char **argv)
 {
     int ret = 0;
-@@ -4556,12 +4572,14 @@ static int img_dd(int argc, char **argv)
+@@ -5101,12 +5116,14 @@ static int img_dd(int argc, char **argv)
     int c, i;
     const char *out_fmt = "raw";
     const char *fmt = NULL;
 -    int64_t size = 0;
 +    int64_t size = 0, readsize = 0;
-     int64_t block_count = 0, out_pos, in_pos;
+     int64_t out_pos, in_pos;
     bool force_share = false;
     struct DdInfo dd = {
         .flags = 0,
@@ -69,7 +69,7 @@ index 6b7d1fcb51..17393b2f53 100644
     };
     struct DdIo in = {
         .bsz = 512, /* Block size is by default 512 bytes */
-@@ -4583,6 +4601,7 @@ static int img_dd(int argc, char **argv)
+@@ -5128,6 +5145,7 @@ static int img_dd(int argc, char **argv)
         { "of", img_dd_of, C_OF },
         { "skip", img_dd_skip, C_SKIP },
         { "osize", img_dd_osize, C_OSIZE },
@@ -77,20 +77,22 @@ index 6b7d1fcb51..17393b2f53 100644
         { NULL, NULL, 0 }
     };
     const struct option long_options[] = {
-@@ -4789,14 +4808,18 @@ static int img_dd(int argc, char **argv)
+@@ -5324,9 +5342,10 @@ static int img_dd(int argc, char **argv)
 
     in.buf = g_new(uint8_t, in.bsz);
 
-    for (out_pos = 0; in_pos < size; block_count++) {
+-    for (out_pos = 0; in_pos < size; ) {
 +    readsize = (dd.isize > 0) ? dd.isize : size;
-+    for (out_pos = 0; in_pos < readsize; block_count++) {
+    for (out_pos = 0; in_pos < readsize; ) {
         int in_ret, out_ret;
-        size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
-+        size_t in_bsz = in_pos + in.bsz > readsize ? readsize - in_pos : in.bsz;
+-        int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
+        int bytes = (in_pos + in.bsz > readsize) ? readsize - in_pos : in.bsz;
         if (blk1) {
-             in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
+             in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+             if (in_ret == 0) {
+@@ -5335,6 +5354,9 @@ static int img_dd(int argc, char **argv)
         } else {
-             in_ret = read(STDIN_FILENO, in.buf, in_bsz);
+             in_ret = read(STDIN_FILENO, in.buf, bytes);
             if (in_ret == 0) {
 +                if (dd.isize == 0) {
 +                    goto out;
--- a/debian/patches/pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
+++ b/debian/patches/pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
@@ -0,0 +1,121 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alexandre Derumier <aderumier@odiso.com>
+Date: Mon, 6 Apr 2020 12:16:42 +0200
+Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: fix getopt-string + add documentation]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ docs/tools/qemu-img.rst | 11 ++++++++++-
+ qemu-img-cmds.hx        |  4 ++--
+ qemu-img.c              | 23 ++++++++++++++---------
+ 3 files changed, 26 insertions(+), 12 deletions(-)
+
+diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
+index 3653adb963..d83e8fb3c0 100644
+--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
+@@ -212,6 +212,10 @@ Parameters to convert subcommand:
+ 
+ Parameters to dd subcommand:
+ 
+.. option:: -n
+
+  Skip the creation of the target volume
+
+ .. program:: qemu-img-dd
+ 
+ .. option:: bs=BLOCK_SIZE
+@@ -492,7 +496,7 @@ Command description:
+   it doesn't need to be specified separately in this case.
+ 
+ 
+-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+ 
+   dd copies from *INPUT* file to *OUTPUT* file converting it from
+   *FMT* format to *OUTPUT_FMT* format.
+@@ -503,6 +507,11 @@ Command description:
+ 
+   The size syntax is similar to :manpage:`dd(1)`'s size syntax.
+ 
+  If the ``-n`` option is specified, the target volume creation will be
+  skipped. This is useful for formats such as ``rbd`` if the target
+  volume has already been created with site specific options that cannot
+  be supplied through ``qemu-img``.
+
+ .. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME
+ 
+   Give information about the disk image *FILENAME*. Use it in
+diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
+index 048788b23d..0b29a67a06 100644
+--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
+@@ -60,9 +60,9 @@ SRST
+ ERST
+ 
+ DEF("dd", img_dd,
+-    "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+    "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ SRST
+-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+ ERST
+ 
+ DEF("info", img_info,
+diff --git a/qemu-img.c b/qemu-img.c
+index b98184bba1..6fc8384f64 100644
+--- a/qemu-img.c
+++ b/qemu-img.c
+@@ -5118,7 +5118,7 @@ static int img_dd(int argc, char **argv)
+     const char *fmt = NULL;
+     int64_t size = 0, readsize = 0;
+     int64_t out_pos, in_pos;
+-    bool force_share = false;
+    bool force_share = false, skip_create = false;
+     struct DdInfo dd = {
+         .flags = 0,
+         .count = 0,
+@@ -5156,7 +5156,7 @@ static int img_dd(int argc, char **argv)
+         { 0, 0, 0, 0 }
+     };
+ 
+-    while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+    while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
+         if (c == EOF) {
+             break;
+         }
+@@ -5176,6 +5176,9 @@ static int img_dd(int argc, char **argv)
+         case 'h':
+             help();
+             break;
+        case 'n':
+            skip_create = true;
+            break;
+         case 'U':
+             force_share = true;
+             break;
+@@ -5306,13 +5309,15 @@ static int img_dd(int argc, char **argv)
+                                 size - in.bsz * in.offset, &error_abort);
+         }
+ 
+-        ret = bdrv_create(drv, out.filename, opts, &local_err);
+-        if (ret < 0) {
+-            error_reportf_err(local_err,
+-                              "%s: error while creating output image: ",
+-                              out.filename);
+-            ret = -1;
+-            goto out;
+        if (!skip_create) {
+            ret = bdrv_create(drv, out.filename, opts, &local_err);
+            if (ret < 0) {
+                error_reportf_err(local_err,
+                                  "%s: error while creating output image: ",
+                                  out.filename);
+                ret = -1;
+                goto out;
+            }
+         }
+ 
+         /* TODO, we can't honour --image-opts for the target,
--- a/debian/patches/pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
+++ b/debian/patches/pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
@@ -0,0 +1,130 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fabian Ebner <f.ebner@proxmox.com>
+Date: Mon, 7 Feb 2022 14:21:01 +0100
+Subject: [PATCH] qemu-img dd: add -l option for loading a snapshot
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ docs/tools/qemu-img.rst |  6 +++---
+ qemu-img-cmds.hx        |  4 ++--
+ qemu-img.c              | 33 +++++++++++++++++++++++++++++++--
+ 3 files changed, 36 insertions(+), 7 deletions(-)
+
+diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
+index d83e8fb3c0..61c6b21859 100644
+--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
+@@ -496,10 +496,10 @@ Command description:
+   it doesn't need to be specified separately in this case.
+ 
+ 
+-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+ 
+-  dd copies from *INPUT* file to *OUTPUT* file converting it from
+-  *FMT* format to *OUTPUT_FMT* format.
+  dd copies from *INPUT* file or snapshot *SNAPSHOT_PARAM* to *OUTPUT* file
+  converting it from *FMT* format to *OUTPUT_FMT* format.
+ 
+   The data is by default read and written using blocks of 512 bytes but can be
+   modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
+diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
+index 0b29a67a06..758f397232 100644
+--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
+@@ -60,9 +60,9 @@ SRST
+ ERST
+ 
+ DEF("dd", img_dd,
+-    "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+    "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [-l snapshot_param] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ SRST
+-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+ ERST
+ 
+ DEF("info", img_info,
+diff --git a/qemu-img.c b/qemu-img.c
+index 6fc8384f64..a6c88e0860 100644
+--- a/qemu-img.c
+++ b/qemu-img.c
+@@ -5110,6 +5110,7 @@ static int img_dd(int argc, char **argv)
+     BlockDriver *drv = NULL, *proto_drv = NULL;
+     BlockBackend *blk1 = NULL, *blk2 = NULL;
+     QemuOpts *opts = NULL;
+    QemuOpts *sn_opts = NULL;
+     QemuOptsList *create_opts = NULL;
+     Error *local_err = NULL;
+     bool image_opts = false;
+@@ -5119,6 +5120,7 @@ static int img_dd(int argc, char **argv)
+     int64_t size = 0, readsize = 0;
+     int64_t out_pos, in_pos;
+     bool force_share = false, skip_create = false;
+    const char *snapshot_name = NULL;
+     struct DdInfo dd = {
+         .flags = 0,
+         .count = 0,
+@@ -5156,7 +5158,7 @@ static int img_dd(int argc, char **argv)
+         { 0, 0, 0, 0 }
+     };
+ 
+-    while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
+    while ((c = getopt_long(argc, argv, ":hf:O:l:Un", long_options, NULL))) {
+         if (c == EOF) {
+             break;
+         }
+@@ -5179,6 +5181,19 @@ static int img_dd(int argc, char **argv)
+         case 'n':
+             skip_create = true;
+             break;
+        case 'l':
+            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+                                                  optarg, false);
+                if (!sn_opts) {
+                    error_report("Failed in parsing snapshot param '%s'",
+                                 optarg);
+                    goto out;
+                }
+            } else {
+                snapshot_name = optarg;
+            }
+            break;
+         case 'U':
+             force_share = true;
+             break;
+@@ -5238,11 +5253,24 @@ static int img_dd(int argc, char **argv)
+     if (dd.flags & C_IF) {
+         blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+                         force_share);
+-
+         if (!blk1) {
+             ret = -1;
+             goto out;
+         }
+        if (sn_opts) {
+            bdrv_snapshot_load_tmp(blk_bs(blk1),
+                                   qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+                                   qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+                                   &local_err);
+        } else if (snapshot_name != NULL) {
+            bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(blk1), snapshot_name,
+                                                 &local_err);
+        }
+        if (local_err) {
+            error_reportf_err(local_err, "Failed to load snapshot: ");
+            ret = -1;
+            goto out;
+        }
+     }
+ 
+     if (dd.flags & C_OSIZE) {
+@@ -5397,6 +5425,7 @@ static int img_dd(int argc, char **argv)
+ out:
+     g_free(arg);
+     qemu_opts_del(opts);
+    qemu_opts_del(sn_opts);
+     qemu_opts_free(create_opts);
+     blk_unref(blk1);
+     blk_unref(blk2);
--- a/debian/patches/pve/0013-PVE-Up-qemu-img-dd-add-n-skip_create.patch
+++ b/debian/patches/pve/0013-PVE-Up-qemu-img-dd-add-n-skip_create.patch
@@ -1,65 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Alexandre Derumier <aderumier@odiso.com>
-Date: Mon, 6 Apr 2020 12:16:42 +0200
-Subject: [PATCH] PVE: [Up] qemu-img dd : add -n skip_create
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- qemu-img.c | 23 ++++++++++++++---------
- 1 file changed, 14 insertions(+), 9 deletions(-)
-
-diff --git a/qemu-img.c b/qemu-img.c
-index 17393b2f53..574bb3c73d 100644
--- a/qemu-img.c
-+++ b/qemu-img.c
-@@ -4574,7 +4574,7 @@ static int img_dd(int argc, char **argv)
-     const char *fmt = NULL;
-     int64_t size = 0, readsize = 0;
-     int64_t block_count = 0, out_pos, in_pos;
-    bool force_share = false;
-+    bool force_share = false, skip_create = false;
-     struct DdInfo dd = {
-         .flags = 0,
-         .count = 0,
-@@ -4612,7 +4612,7 @@ static int img_dd(int argc, char **argv)
-         { 0, 0, 0, 0 }
-     };
- 
-    while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
-+    while ((c = getopt_long(argc, argv, ":hf:O:U:n", long_options, NULL))) {
-         if (c == EOF) {
-             break;
-         }
-@@ -4632,6 +4632,9 @@ static int img_dd(int argc, char **argv)
-         case 'h':
-             help();
-             break;
-+        case 'n':
-+            skip_create = true;
-+            break;
-         case 'U':
-             force_share = true;
-             break;
-@@ -4772,13 +4775,15 @@ static int img_dd(int argc, char **argv)
-                                 size - in.bsz * in.offset, &error_abort);
-         }
- 
-        ret = bdrv_create(drv, out.filename, opts, &local_err);
-        if (ret < 0) {
-            error_reportf_err(local_err,
-                              "%s: error while creating output image: ",
-                              out.filename);
-            ret = -1;
-            goto out;
-+        if (!skip_create) {
-+            ret = bdrv_create(drv, out.filename, opts, &local_err);
-+            if (ret < 0) {
-+                error_reportf_err(local_err,
-+                                  "%s: error while creating output image: ",
-+                                  out.filename);
-+                ret = -1;
-+                goto out;
-+            }
-         }
- 
-         /* TODO, we can't honour --image-opts for the target,
--- a/debian/patches/pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
+++ b/debian/patches/pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
@@ -7,17 +7,62 @@ Actually provide memory information via the query-balloon
 command.

 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: add BalloonInfo to member name exceptions list
+     rebase for 8.0 - moved to hw/core/machine-hmp-cmds.c]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
+ hw/core/machine-hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
 hw/virtio/virtio-balloon.c | 33 +++++++++++++++++++++++++++++++--
- monitor/hmp-cmds.c         | 30 +++++++++++++++++++++++++++++-
- qapi/misc.json             | 22 +++++++++++++++++++++-
- 3 files changed, 81 insertions(+), 4 deletions(-)
+ qapi/machine.json          | 22 +++++++++++++++++++++-
+ qapi/pragma.json           |  1 +
+ 4 files changed, 82 insertions(+), 4 deletions(-)

+diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
+index a6ff6a4875..e7f74d1c63 100644
+--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
+@@ -175,7 +175,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
+         return;
+     }
+ 
+-    monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+    monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+    monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+    if (info->has_total_mem) {
+        monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+    }
+    if (info->has_free_mem) {
+        monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+    }
+
+    if (info->has_mem_swapped_in) {
+        monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+    }
+    if (info->has_mem_swapped_out) {
+        monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+    }
+    if (info->has_major_page_faults) {
+        monitor_printf(mon, " major_page_faults=%" PRId64,
+                       info->major_page_faults);
+    }
+    if (info->has_minor_page_faults) {
+        monitor_printf(mon, " minor_page_faults=%" PRId64,
+                       info->minor_page_faults);
+    }
+    if (info->has_last_update) {
+        monitor_printf(mon, " last_update=%" PRId64,
+                       info->last_update);
+    }
+
+    monitor_printf(mon, "\n");
+ 
+     qapi_free_BalloonInfo(info);
+ }
 diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
-index a4729f7fc9..97c1c16ccf 100644
+index 609e39a821..8cb6dfcac3 100644
 --- a/hw/virtio/virtio-balloon.c
 +++ b/hw/virtio/virtio-balloon.c
-@@ -713,8 +713,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
+@@ -781,8 +781,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
 {
     VirtIOBalloon *dev = opaque;
@@ -57,54 +102,13 @@ index a4729f7fc9..97c1c16ccf 100644
 }
 
 static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
-diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
-index 9b94e67879..0c6f6ff331 100644
--- a/monitor/hmp-cmds.c
-+++ b/monitor/hmp-cmds.c
-@@ -653,7 +653,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
-         return;
-     }
- 
-    monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
-+    monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
-+    monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
-+    if (info->has_total_mem) {
-+        monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
-+    }
-+    if (info->has_free_mem) {
-+        monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
-+    }
-+
-+    if (info->has_mem_swapped_in) {
-+        monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
-+    }
-+    if (info->has_mem_swapped_out) {
-+        monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
-+    }
-+    if (info->has_major_page_faults) {
-+        monitor_printf(mon, " major_page_faults=%" PRId64,
-+                       info->major_page_faults);
-+    }
-+    if (info->has_minor_page_faults) {
-+        monitor_printf(mon, " minor_page_faults=%" PRId64,
-+                       info->minor_page_faults);
-+    }
-+    if (info->has_last_update) {
-+        monitor_printf(mon, " last_update=%" PRId64,
-+                       info->last_update);
-+    }
-+
-+    monitor_printf(mon, "\n");
- 
-     qapi_free_BalloonInfo(info);
- }
-diff --git a/qapi/misc.json b/qapi/misc.json
-index 99b90ac80b..e2a6678eae 100644
--- a/qapi/misc.json
-+++ b/qapi/misc.json
-@@ -225,10 +225,30 @@
- #
- # @actual: the number of bytes the balloon currently contains
+diff --git a/qapi/machine.json b/qapi/machine.json
+index e8b60641f2..2054cdc70d 100644
+--- a/qapi/machine.json
+++ b/qapi/machine.json
+@@ -1079,9 +1079,29 @@
+ # @actual: the logical size of the VM in bytes Formula used:
+ #     logical_vm_size = vm_ram_size - balloon_size
 #
 +# @last_update: time when stats got updated from guest
 +#
@@ -122,8 +126,7 @@ index 99b90ac80b..e2a6678eae 100644
 +#
 +# @max_mem: amount of memory (in bytes) assigned to the guest
 +#
- # Since: 0.14.0
- #
+ # Since: 0.14
 ##
 -{ 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
 +{ 'struct': 'BalloonInfo',
@@ -134,3 +137,15 @@ index 99b90ac80b..e2a6678eae 100644
 
 ##
 # @query-balloon:
+diff --git a/qapi/pragma.json b/qapi/pragma.json
+index 59fbe74b8c..be8fa304c5 100644
+--- a/qapi/pragma.json
+++ b/qapi/pragma.json
+@@ -90,6 +90,7 @@
+     'member-name-exceptions': [     # visible in:
+         'ACPISlotType',             # query-acpi-ospm-status
+         'AcpiTableOptions',         # -acpitable
+        'BalloonInfo',              # query-balloon
+         'BlkdebugEvent',            # blockdev-add, -blockdev
+         'BlkdebugSetStateOptions',  # blockdev-add, -blockdev
+         'BlockDeviceInfo',          # query-block
--- a/debian/patches/pve/0014-PVE-qapi-modify-query-machines.patch
+++ b/debian/patches/pve/0014-PVE-qapi-modify-query-machines.patch
@@ -13,13 +13,13 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
 2 files changed, 9 insertions(+), 1 deletion(-)

 diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
-index eed5aeb2f7..1953633e82 100644
+index 4b72009cd3..314351cdff 100644
 --- a/hw/core/machine-qmp-cmds.c
 +++ b/hw/core/machine-qmp-cmds.c
-@@ -230,6 +230,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
-         info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
+@@ -90,6 +90,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
         info->numa_mem_supported = mc->numa_mem_supported;
         info->deprecated = !!mc->deprecation_reason;
+         info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
 +
 +        if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
 +            info->has_is_current = true;
@@ -28,26 +28,26 @@ index eed5aeb2f7..1953633e82 100644
 +
         if (mc->default_cpu_type) {
             info->default_cpu_type = g_strdup(mc->default_cpu_type);
-             info->has_default_cpu_type = true;
+         }
 diff --git a/qapi/machine.json b/qapi/machine.json
-index ff7b5032e3..f6cf28f9fd 100644
+index 2054cdc70d..a024d5b05d 100644
 --- a/qapi/machine.json
 +++ b/qapi/machine.json
-@@ -340,6 +340,8 @@
+@@ -146,6 +146,8 @@
 #
 # @is-default: whether the machine is default
 #
 +# @is-current: whether this machine is currently used
 +#
 # @cpu-max: maximum number of CPUs supported by the machine type
- #           (since 1.5.0)
+ #     (since 1.5)
 #
-@@ -359,7 +361,7 @@
+@@ -170,7 +172,7 @@
 ##
 { 'struct': 'MachineInfo',
   'data': { 'name': 'str', '*alias': 'str',
 -            '*is-default': 'bool', 'cpu-max': 'int',
 +            '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
             'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
-             'deprecated': 'bool', '*default-cpu-type': 'str' } }
- 
+             'deprecated': 'bool', '*default-cpu-type': 'str',
+             '*default-ram-id': 'str', 'acpi': 'bool' } }
--- a/debian/patches/pve/0015-PVE-qapi-modify-spice-query.patch
+++ b/debian/patches/pve/0015-PVE-qapi-modify-spice-query.patch
@@ -6,40 +6,41 @@ Subject: [PATCH] PVE: qapi: modify spice query
 Provide the last ticket in the SpiceInfo struct optionally.

 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: adapt to QAPI change]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 qapi/ui.json    | 3 +++
- ui/spice-core.c | 5 +++++
- 2 files changed, 8 insertions(+)
+ ui/spice-core.c | 4 ++++
+ 2 files changed, 7 insertions(+)

 diff --git a/qapi/ui.json b/qapi/ui.json
-index e16e98a060..feda6ef090 100644
+index f610bce118..6ea26a9acb 100644
 --- a/qapi/ui.json
 +++ b/qapi/ui.json
-@@ -213,11 +213,14 @@
+@@ -314,11 +314,14 @@
 #
 # @channels: a list of @SpiceChannel for each active spice channel
 #
 +# @ticket: The last ticket set with set_password
 +#
- # Since: 0.14.0
+ # Since: 0.14
 ##
 { 'struct': 'SpiceInfo',
   'data': {'enabled': 'bool', 'migrated': 'bool', '*host': 'str', '*port': 'int',
            '*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
 +           '*ticket': 'str',
            'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
-   'if': 'defined(CONFIG_SPICE)' }
+   'if': 'CONFIG_SPICE' }
 
 diff --git a/ui/spice-core.c b/ui/spice-core.c
-index ca04965ead..243466c13d 100644
+index ea20e6153c..55a15fba8b 100644
 --- a/ui/spice-core.c
 +++ b/ui/spice-core.c
-@@ -539,6 +539,11 @@ SpiceInfo *qmp_query_spice(Error **errp)
+@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
     micro = SPICE_SERVER_VERSION & 0xff;
     info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);
 
 +    if (auth_passwd) {
-+        info->has_ticket = true;
 +        info->ticket =  g_strdup(auth_passwd);
 +    }
 +
--- a/debian/patches/pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
+++ b/debian/patches/pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
@@ -0,0 +1,284 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 13 Oct 2022 11:33:50 +0200
+Subject: [PATCH] PVE: add IOChannel implementation for savevm-async
+
+based on migration/channel-block.c and the implementation that was
+present in migration/savevm-async.c before QEMU 7.1.
+
+Passes along read/write requests to the given BlockBackend, while
+ensuring that a read request going beyond the end results in a
+graceful short read.
+
+Additionally, allows tracking the current position from the outside
+(intended to be used for progress tracking).
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ migration/channel-savevm-async.c | 184 +++++++++++++++++++++++++++++++
+ migration/channel-savevm-async.h |  51 +++++++++
+ migration/meson.build            |   1 +
+ 3 files changed, 236 insertions(+)
+ create mode 100644 migration/channel-savevm-async.c
+ create mode 100644 migration/channel-savevm-async.h
+
+diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
+new file mode 100644
+index 0000000000..081a192f49
+--- /dev/null
+++ b/migration/channel-savevm-async.c
+@@ -0,0 +1,184 @@
+/*
+ * QIO Channel implementation to be used by savevm-async QMP calls
+ */
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "qapi/error.h"
+#include "sysemu/block-backend.h"
+#include "trace.h"
+
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos)
+{
+    QIOChannelSavevmAsync *ioc;
+
+    ioc = QIO_CHANNEL_SAVEVM_ASYNC(object_new(TYPE_QIO_CHANNEL_SAVEVM_ASYNC));
+
+    bdrv_ref(blk_bs(be));
+    ioc->be = be;
+    ioc->bs_pos = bs_pos;
+
+    return ioc;
+}
+
+
+static void
+qio_channel_savevm_async_finalize(Object *obj)
+{
+    QIOChannelSavevmAsync *ioc = QIO_CHANNEL_SAVEVM_ASYNC(obj);
+
+    if (ioc->be) {
+        bdrv_unref(blk_bs(ioc->be));
+        ioc->be = NULL;
+    }
+    ioc->bs_pos = NULL;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_readv(QIOChannel *ioc,
+                               const struct iovec *iov,
+                               size_t niov,
+                               int **fds,
+                               size_t *nfds,
+                               int flags,
+                               Error **errp)
+{
+    QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+    BlockBackend *be = saioc->be;
+    int64_t maxlen = blk_getlength(be);
+    QEMUIOVector qiov;
+    size_t size;
+    int ret;
+
+    qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+    if (*saioc->bs_pos >= maxlen) {
+        error_setg(errp, "cannot read beyond maxlen");
+        return -1;
+    }
+
+    if (maxlen - *saioc->bs_pos < qiov.size) {
+        size = maxlen - *saioc->bs_pos;
+    } else {
+        size = qiov.size;
+    }
+
+    // returns 0 on success
+    ret = blk_preadv(be, *saioc->bs_pos, size, &qiov, 0);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blk_preadv failed");
+        return -1;
+    }
+
+    *saioc->bs_pos += size;
+    return size;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_writev(QIOChannel *ioc,
+                                const struct iovec *iov,
+                                size_t niov,
+                                int *fds,
+                                size_t nfds,
+                                int flags,
+                                Error **errp)
+{
+    QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+    BlockBackend *be = saioc->be;
+    QEMUIOVector qiov;
+    int ret;
+
+    qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+    if (qemu_in_coroutine()) {
+        ret = blk_co_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+        aio_wait_kick();
+    } else {
+        ret = blk_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+    }
+
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blk(_co)_pwritev failed");
+        return -1;
+    }
+
+    *saioc->bs_pos += qiov.size;
+    return qiov.size;
+}
+
+
+static int
+qio_channel_savevm_async_set_blocking(QIOChannel *ioc,
+                                      bool enabled,
+                                      Error **errp)
+{
+    if (!enabled) {
+        error_setg(errp, "Non-blocking mode not supported for savevm-async");
+        return -1;
+    }
+    return 0;
+}
+
+
+static int
+qio_channel_savevm_async_close(QIOChannel *ioc,
+                               Error **errp)
+{
+    QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+    int rv = bdrv_flush(blk_bs(saioc->be));
+
+    if (rv < 0) {
+        error_setg_errno(errp, -rv, "Unable to flush VMState");
+        return -1;
+    }
+
+    bdrv_unref(blk_bs(saioc->be));
+    saioc->be = NULL;
+    saioc->bs_pos = NULL;
+
+    return 0;
+}
+
+
+static void
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
+                                            AioContext *read_ctx,
+                                            IOHandler *io_read,
+                                            AioContext *write_ctx,
+                                            IOHandler *io_write,
+                                            void *opaque)
+{
+    // if channel-block starts doing something, check if this needs adaptation
+}
+
+
+static void
+qio_channel_savevm_async_class_init(ObjectClass *klass,
+                             void *class_data G_GNUC_UNUSED)
+{
+    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+    ioc_klass->io_writev = qio_channel_savevm_async_writev;
+    ioc_klass->io_readv = qio_channel_savevm_async_readv;
+    ioc_klass->io_set_blocking = qio_channel_savevm_async_set_blocking;
+    ioc_klass->io_close = qio_channel_savevm_async_close;
+    ioc_klass->io_set_aio_fd_handler = qio_channel_savevm_async_set_aio_fd_handler;
+}
+
+static const TypeInfo qio_channel_savevm_async_info = {
+    .parent = TYPE_QIO_CHANNEL,
+    .name = TYPE_QIO_CHANNEL_SAVEVM_ASYNC,
+    .instance_size = sizeof(QIOChannelSavevmAsync),
+    .instance_finalize = qio_channel_savevm_async_finalize,
+    .class_init = qio_channel_savevm_async_class_init,
+};
+
+static void
+qio_channel_savevm_async_register_types(void)
+{
+    type_register_static(&qio_channel_savevm_async_info);
+}
+
+type_init(qio_channel_savevm_async_register_types);
+diff --git a/migration/channel-savevm-async.h b/migration/channel-savevm-async.h
+new file mode 100644
+index 0000000000..17ae2cb261
+--- /dev/null
+++ b/migration/channel-savevm-async.h
+@@ -0,0 +1,51 @@
+/*
+ * QEMU I/O channels driver for savevm-async.c
+ *
+ * Copyright (c) 2022 Proxmox Server Solutions
+ *
+ * Authors:
+ *  Fiona Ebner (f.ebner@proxmox.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QIO_CHANNEL_SAVEVM_ASYNC_H
+#define QIO_CHANNEL_SAVEVM_ASYNC_H
+
+#include "io/channel.h"
+#include "qom/object.h"
+
+#define TYPE_QIO_CHANNEL_SAVEVM_ASYNC "qio-channel-savevm-async"
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSavevmAsync, QIO_CHANNEL_SAVEVM_ASYNC)
+
+
+/**
+ * QIOChannelSavevmAsync:
+ *
+ * The QIOChannelBlock object provides a channel implementation that is able to
+ * perform I/O on any BlockBackend whose BlockDriverState directly contains a
+ * VMState (as opposed to indirectly, like qcow2). It allows tracking the
+ * current position from the outside.
+ */
+struct QIOChannelSavevmAsync {
+    QIOChannel parent;
+    BlockBackend *be;
+    size_t *bs_pos;
+};
+
+
+/**
+ * qio_channel_savevm_async_new:
+ * @be: the block backend
+ * @bs_pos: used to keep track of the IOChannels current position
+ *
+ * Create a new IO channel object that can perform I/O on a BlockBackend object
+ * whose BlockDriverState directly contains a VMState.
+ *
+ * Returns: the new channel object
+ */
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos);
+
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
+diff --git a/migration/meson.build b/migration/meson.build
+index 1eeb915ff6..95d1cf2250 100644
+--- a/migration/meson.build
+++ b/migration/meson.build
+@@ -13,6 +13,7 @@ system_ss.add(files(
+   'block-dirty-bitmap.c',
+   'channel.c',
+   'channel-block.c',
+  'channel-savevm-async.c',
+   'dirtyrate.c',
+   'exec.c',
+   'fd.c',
--- a/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
+++ b/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
@@ -0,0 +1,861 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dietmar Maurer <dietmar@proxmox.com>
+Date: Mon, 6 Apr 2020 12:16:46 +0200
+Subject: [PATCH] PVE: add savevm-async for background state snapshots
+
+Put qemu_savevm_state_{header,setup} into the main loop and the rest
+of the iteration into a coroutine. The former need to lock the
+iothread (and we can't unlock it in the coroutine), and the latter
+can't deal with being in a separate thread, so a coroutine it must
+be.
+
+Truncate output file at 1024 boundary.
+
+Do not block the VM and save the state on aborting a snapshot, as the
+snapshot will be invalid anyway.
+
+Also, when aborting, wait for the target file to be closed, otherwise a
+client might run into race-conditions when trying to remove the file
+still opened by QEMU.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+[SR: improve aborting
+     register yank before migration_incoming_state_destroy]
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+[FE: further improve aborting
+     adapt to removal of QEMUFileOps
+     improve condition for entering final stage
+     adapt to QAPI and other changes for 8.2]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hmp-commands-info.hx         |  13 +
+ hmp-commands.hx              |  17 ++
+ include/migration/snapshot.h |   2 +
+ include/monitor/hmp.h        |   3 +
+ migration/meson.build        |   1 +
+ migration/savevm-async.c     | 531 +++++++++++++++++++++++++++++++++++
+ monitor/hmp-cmds.c           |  38 +++
+ qapi/migration.json          |  34 +++
+ qapi/misc.json               |  18 ++
+ qemu-options.hx              |  12 +
+ system/vl.c                  |  10 +
+ 11 files changed, 679 insertions(+)
+ create mode 100644 migration/savevm-async.c
+
+diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
+index ad1b1306e3..d5ab880492 100644
+--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
+@@ -525,6 +525,19 @@ SRST
+     Show current migration parameters.
+ ERST
+ 
+    {
+        .name       = "savevm",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show savevm status",
+        .cmd = hmp_info_savevm,
+    },
+
+SRST
+  ``info savevm``
+    Show savevm status.
+ERST
+
+     {
+         .name       = "balloon",
+         .args_type  = "",
+diff --git a/hmp-commands.hx b/hmp-commands.hx
+index 2e2a3bcf98..7506de251c 100644
+--- a/hmp-commands.hx
+++ b/hmp-commands.hx
+@@ -1862,3 +1862,20 @@ SRST
+   List event channels in the guest
+ ERST
+ #endif
+
+    {
+        .name       = "savevm-start",
+        .args_type  = "statefile:s?",
+        .params     = "[statefile]",
+        .help       = "Prepare for snapshot and halt VM. Save VM state to statefile.",
+        .cmd = hmp_savevm_start,
+    },
+
+    {
+        .name       = "savevm-end",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Resume VM after snaphot.",
+        .cmd        = hmp_savevm_end,
+        .coroutine  = true,
+    },
+diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
+index 9e4dcaaa75..2581730d74 100644
+--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
+@@ -68,4 +68,6 @@ bool delete_snapshot(const char *name,
+  */
+ void load_snapshot_resume(RunState state);
+ 
+int load_snapshot_from_blockdev(const char *filename, Error **errp);
+
+ #endif
+diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
+index 13f9a2dedb..7a7def7530 100644
+--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
+@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
+ void hmp_info_uuid(Monitor *mon, const QDict *qdict);
+ void hmp_info_chardev(Monitor *mon, const QDict *qdict);
+ void hmp_info_mice(Monitor *mon, const QDict *qdict);
+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
+ void hmp_info_migrate(Monitor *mon, const QDict *qdict);
+ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
+ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
+@@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
+ void hmp_mouse_move(Monitor *mon, const QDict *qdict);
+ void hmp_mouse_button(Monitor *mon, const QDict *qdict);
+ void hmp_mouse_set(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
+ void hmp_sendkey(Monitor *mon, const QDict *qdict);
+ void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
+ void hmp_chardev_add(Monitor *mon, const QDict *qdict);
+diff --git a/migration/meson.build b/migration/meson.build
+index 95d1cf2250..800f12a60d 100644
+--- a/migration/meson.build
+++ b/migration/meson.build
+@@ -28,6 +28,7 @@ system_ss.add(files(
+   'options.c',
+   'postcopy-ram.c',
+   'savevm.c',
+  'savevm-async.c',
+   'socket.c',
+   'tls.c',
+   'threadinfo.c',
+diff --git a/migration/savevm-async.c b/migration/savevm-async.c
+new file mode 100644
+index 0000000000..779e4e2a78
+--- /dev/null
+++ b/migration/savevm-async.c
+@@ -0,0 +1,531 @@
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "migration/migration.h"
+#include "migration/migration-stats.h"
+#include "migration/options.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
+#include "migration/ram.h"
+#include "migration/qemu-file.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
+#include "block/block.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/cutils.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+#include "qemu/yank.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+    do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+enum {
+    SAVE_STATE_DONE,
+    SAVE_STATE_ERROR,
+    SAVE_STATE_ACTIVE,
+    SAVE_STATE_COMPLETED,
+    SAVE_STATE_CANCELLED
+};
+
+
+static struct SnapshotState {
+    BlockBackend *target;
+    size_t bs_pos;
+    int state;
+    Error *error;
+    Error *blocker;
+    int saved_vm_running;
+    QEMUFile *file;
+    int64_t total_time;
+    QEMUBH *finalize_bh;
+    Coroutine *co;
+    QemuCoSleep target_close_wait;
+} snap_state;
+
+static bool savevm_aborted(void)
+{
+    return snap_state.state == SAVE_STATE_CANCELLED ||
+        snap_state.state == SAVE_STATE_ERROR;
+}
+
+SaveVMInfo *qmp_query_savevm(Error **errp)
+{
+    SaveVMInfo *info = g_malloc0(sizeof(*info));
+    struct SnapshotState *s = &snap_state;
+
+    if (s->state != SAVE_STATE_DONE) {
+        info->has_bytes = true;
+        info->bytes = s->bs_pos;
+        switch (s->state) {
+        case SAVE_STATE_ERROR:
+            info->status = g_strdup("failed");
+            info->has_total_time = true;
+            info->total_time = s->total_time;
+            if (s->error) {
+                info->error = g_strdup(error_get_pretty(s->error));
+            }
+            break;
+        case SAVE_STATE_ACTIVE:
+            info->status = g_strdup("active");
+            info->has_total_time = true;
+            info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+                - s->total_time;
+            break;
+        case SAVE_STATE_COMPLETED:
+            info->status = g_strdup("completed");
+            info->has_total_time = true;
+            info->total_time = s->total_time;
+            break;
+        }
+    }
+
+    return info;
+}
+
+static int save_snapshot_cleanup(void)
+{
+    int ret = 0;
+
+    DPRINTF("save_snapshot_cleanup\n");
+
+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+        snap_state.total_time;
+
+    if (snap_state.file) {
+        ret = qemu_fclose(snap_state.file);
+        snap_state.file = NULL;
+    }
+
+    if (snap_state.target) {
+        if (!savevm_aborted()) {
+            /* try to truncate, but ignore errors (will fail on block devices).
+            * note1: bdrv_read() need whole blocks, so we need to round up
+            * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
+            */
+            size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+            blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+        }
+        blk_op_unblock_all(snap_state.target, snap_state.blocker);
+        error_free(snap_state.blocker);
+        snap_state.blocker = NULL;
+        blk_unref(snap_state.target);
+        snap_state.target = NULL;
+
+        qemu_co_sleep_wake(&snap_state.target_close_wait);
+    }
+
+    return ret;
+}
+
+static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
+{
+    va_list ap;
+    char *msg;
+
+    va_start(ap, fmt);
+    msg = g_strdup_vprintf(fmt, ap);
+    va_end(ap);
+
+    DPRINTF("save_snapshot_error: %s\n", msg);
+
+    if (!snap_state.error) {
+        error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
+    }
+
+    g_free (msg);
+
+    snap_state.state = SAVE_STATE_ERROR;
+}
+
+static void process_savevm_finalize(void *opaque)
+{
+    int ret;
+    MigrationState *ms = migrate_get_current();
+
+    bool aborted = savevm_aborted();
+
+#ifdef DEBUG_SAVEVM_STATE
+    int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+    qemu_bh_delete(snap_state.finalize_bh);
+    snap_state.finalize_bh = NULL;
+    snap_state.co = NULL;
+
+    /* We need to own the target bdrv's context for the following functions,
+     * so move it back. It can stay in the main context and live out its live
+     * there, since we're done with it after this method ends anyway.
+     */
+    blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+
+    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+    if (ret < 0) {
+        save_snapshot_error("vm_stop_force_state error %d", ret);
+    }
+
+    if (!aborted) {
+        /* skip state saving if we aborted, snapshot will be invalid anyway */
+        (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+        ret = qemu_file_get_error(snap_state.file);
+        if (ret < 0) {
+            save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
+        }
+    }
+
+    DPRINTF("state saving complete\n");
+    DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+    /* clear migration state */
+    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+        ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+    ms->to_dst_file = NULL;
+
+    qemu_savevm_state_cleanup();
+
+    ret = save_snapshot_cleanup();
+    if (ret < 0) {
+        save_snapshot_error("save_snapshot_cleanup error %d", ret);
+    } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_COMPLETED;
+    } else if (aborted) {
+        /*
+         * If there was an error, there's no need to set a new one here.
+         * If the snapshot was canceled, leave setting the state to
+         * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
+         */
+    } else {
+        save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+                            snap_state.state);
+    }
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+
+    DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+}
+
+static void coroutine_fn process_savevm_co(void *opaque)
+{
+    int ret;
+    int64_t maxlen;
+    BdrvNextIterator it;
+    BlockDriverState *bs = NULL;
+
+#ifdef DEBUG_SAVEVM_STATE
+    int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+    ret = qemu_file_get_error(snap_state.file);
+    if (ret < 0) {
+        save_snapshot_error("qemu_savevm_state_setup failed");
+        return;
+    }
+
+    while (snap_state.state == SAVE_STATE_ACTIVE) {
+        uint64_t pending_size, pend_precopy, pend_postcopy;
+        uint64_t threshold = 400 * 1000;
+
+        /*
+         * pending_{estimate,exact} are expected to be called without iothread
+         * lock. Similar to what is done in migration.c, call the exact variant
+         * only once pend_precopy in the estimate is below the threshold.
+         */
+        bql_unlock();
+        qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
+        if (pend_precopy <= threshold) {
+            qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
+        }
+        bql_lock();
+        pending_size = pend_precopy + pend_postcopy;
+
+        /*
+         * A guest reaching this cutoff is dirtying lots of RAM. It should be
+         * large enough so that the guest can't dirty this much between the
+         * check and the guest actually being stopped, but it should be small
+         * enough to avoid long downtimes for non-hibernation snapshots.
+         */
+        maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
+
+        /* Note that there is no progress for pend_postcopy when iterating */
+        if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
+            ret = qemu_savevm_state_iterate(snap_state.file, false);
+            if (ret < 0) {
+                save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+                break;
+            }
+            DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+        } else {
+            qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+            global_state_store();
+
+            DPRINTF("savevm iterate complete\n");
+            break;
+        }
+    }
+
+    DPRINTF("timing: process_savevm_co took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+#ifdef DEBUG_SAVEVM_STATE
+    int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+    /* If a drive runs in an IOThread we can flush it async, and only
+     * need to sync-flush whatever IO happens between now and
+     * vm_stop_force_state. bdrv_next can only be called from main AioContext,
+     * so move there now and after every flush.
+     */
+    aio_co_reschedule_self(qemu_get_aio_context());
+    bdrv_graph_co_rdlock();
+    bs = bdrv_first(&it);
+    bdrv_graph_co_rdunlock();
+    while (bs) {
+        /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+        if (bs != blk_bs(snap_state.target)) {
+            AioContext *bs_ctx = bdrv_get_aio_context(bs);
+            if (bs_ctx != qemu_get_aio_context()) {
+                DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+                aio_co_reschedule_self(bs_ctx);
+                bdrv_graph_co_rdlock();
+                bdrv_flush(bs);
+                bdrv_graph_co_rdunlock();
+                aio_co_reschedule_self(qemu_get_aio_context());
+            }
+        }
+        bdrv_graph_co_rdlock();
+        bs = bdrv_next(&it);
+        bdrv_graph_co_rdunlock();
+    }
+
+    DPRINTF("timing: async flushing took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
+
+    qemu_bh_schedule(snap_state.finalize_bh);
+}
+
+void qmp_savevm_start(const char *statefile, Error **errp)
+{
+    Error *local_err = NULL;
+    MigrationState *ms = migrate_get_current();
+    AioContext *iohandler_ctx = iohandler_get_aio_context();
+
+    int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
+
+    if (snap_state.state != SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot already started\n");
+        return;
+    }
+
+    if (migration_is_running()) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+        return;
+    }
+
+    if (migrate_block()) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "Block migration and snapshots are incompatible");
+        return;
+    }
+
+    /* initialize snapshot info */
+    snap_state.saved_vm_running = runstate_is_running();
+    snap_state.bs_pos = 0;
+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    snap_state.blocker = NULL;
+    snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
+
+    if (snap_state.error) {
+        error_free(snap_state.error);
+        snap_state.error = NULL;
+    }
+
+    if (!statefile) {
+        vm_stop(RUN_STATE_SAVE_VM);
+        snap_state.state = SAVE_STATE_COMPLETED;
+        return;
+    }
+
+    if (qemu_savevm_state_blocked(errp)) {
+        return;
+    }
+
+    /* Open the image */
+    QDict *options = NULL;
+    options = qdict_new();
+    qdict_put_str(options, "driver", "raw");
+    snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
+    if (!snap_state.target) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+        goto restart;
+    }
+
+    QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
+                                                               &snap_state.bs_pos));
+    snap_state.file = qemu_file_new_output(ioc);
+
+    if (!snap_state.file) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+        goto restart;
+    }
+
+    /*
+     * qemu_savevm_* paths use migration code and expect a migration state.
+     * State is cleared in process_savevm_co, but has to be initialized
+     * here (blocking main thread, from QMP) to avoid race conditions.
+     */
+    if (migrate_init(ms, errp)) {
+        return;
+    }
+    memset(&mig_stats, 0, sizeof(mig_stats));
+    ms->to_dst_file = snap_state.file;
+
+    error_setg(&snap_state.blocker, "block device is in use by savevm");
+    blk_op_block_all(snap_state.target, snap_state.blocker);
+
+    snap_state.state = SAVE_STATE_ACTIVE;
+    snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+    snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+    qemu_savevm_state_header(snap_state.file);
+    qemu_savevm_state_setup(snap_state.file);
+
+    /* Async processing from here on out happens in iohandler context, so let
+     * the target bdrv have its home there.
+     */
+    blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+
+    aio_co_schedule(iohandler_ctx, snap_state.co);
+
+    return;
+
+restart:
+
+    save_snapshot_error("setup failed");
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+}
+
+void coroutine_fn qmp_savevm_end(Error **errp)
+{
+    int64_t timeout;
+
+    if (snap_state.state == SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot not started\n");
+        return;
+    }
+
+    if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_CANCELLED;
+        goto wait_for_close;
+    }
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+
+    snap_state.state = SAVE_STATE_DONE;
+
+wait_for_close:
+    if (!snap_state.target) {
+        DPRINTF("savevm-end: no target file open\n");
+        return;
+    }
+
+    /* wait until cleanup is done before returning, this ensures that after this
+     * call exits the statefile will be closed and can be removed immediately */
+    DPRINTF("savevm-end: waiting for cleanup\n");
+    timeout = 30L * 1000 * 1000 * 1000;
+    qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
+                              QEMU_CLOCK_REALTIME, timeout);
+    if (snap_state.target) {
+        save_snapshot_error("timeout waiting for target file close in "
+                            "qmp_savevm_end");
+        /* we cannot assume the snapshot finished in this case, so leave the
+         * state alone - caller has to figure something out */
+        return;
+    }
+
+    // File closed and no other error, so ensure next snapshot can be started.
+    if (snap_state.state != SAVE_STATE_ERROR) {
+        snap_state.state = SAVE_STATE_DONE;
+    }
+
+    DPRINTF("savevm-end: cleanup done\n");
+}
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+    BlockBackend *be;
+    Error *local_err = NULL;
+    Error *blocker = NULL;
+
+    QEMUFile *f;
+    size_t bs_pos = 0;
+    int ret = -EINVAL;
+
+    be = blk_new_open(filename, NULL, NULL, 0, &local_err);
+
+    if (!be) {
+        error_setg(errp, "Could not open VM state file");
+        goto the_end;
+    }
+
+    error_setg(&blocker, "block device is in use by load state");
+    blk_op_block_all(be, blocker);
+
+    /* restore the VM state */
+    f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+    if (!f) {
+        error_setg(errp, "Could not open VM state file");
+        goto the_end;
+    }
+
+    qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+    ret = qemu_loadvm_state(f);
+
+    /* dirty bitmap migration has a special case we need to trigger manually */
+    dirty_bitmap_mig_before_vm_start();
+
+    qemu_fclose(f);
+
+    /* state_destroy assumes a real migration which would have added a yank */
+    yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
+    migration_incoming_state_destroy();
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Error while loading VM state");
+        goto the_end;
+    }
+
+    ret = 0;
+
+ the_end:
+    if (be) {
+        blk_op_unblock_all(be, blocker);
+        error_free(blocker);
+        blk_unref(be);
+    }
+    return ret;
+}
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index 871898ac46..ef4634e5c1 100644
+--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
+@@ -22,6 +22,7 @@
+ #include "monitor/monitor-internal.h"
+ #include "qapi/error.h"
+ #include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-migration.h"
+ #include "qapi/qapi-commands-misc.h"
+ #include "qapi/qmp/qdict.h"
+ #include "qemu/cutils.h"
+@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
+ 
+     mtree_info(flatview, dispatch_tree, owner, disabled);
+ }
+
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+    qmp_savevm_start(statefile, &errp);
+    hmp_handle_error(mon, errp);
+}
+
+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+
+    qmp_savevm_end(&errp);
+    hmp_handle_error(mon, errp);
+}
+
+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
+{
+    SaveVMInfo *info;
+    info = qmp_query_savevm(NULL);
+
+    if (info->status) {
+        monitor_printf(mon, "savevm status: %s\n", info->status);
+        monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+                       info->total_time);
+    } else {
+        monitor_printf(mon, "savevm status: not running\n");
+    }
+    if (info->has_bytes) {
+        monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+    }
+    if (info->error) {
+        monitor_printf(mon, "Error: %s\n", info->error);
+    }
+}
+diff --git a/qapi/migration.json b/qapi/migration.json
+index 8c65b90328..ed20d066cd 100644
+--- a/qapi/migration.json
+++ b/qapi/migration.json
+@@ -297,6 +297,40 @@
+            '*dirty-limit-throttle-time-per-round': 'uint64',
+            '*dirty-limit-ring-full-time': 'uint64'} }
+ 
+##
+# @SaveVMInfo:
+#
+# Information about current migration process.
+#
+# @status: string describing the current savevm status.
+#          This can be 'active', 'completed', 'failed'.
+#          If this field is not returned, no savevm process
+#          has been initiated
+#
+# @error: string containing error message is status is failed.
+#
+# @total-time: total amount of milliseconds since savevm started.
+#              If savevm has ended, it returns the total save time
+#
+# @bytes: total amount of data transfered
+#
+# Since: 1.3
+##
+{ 'struct': 'SaveVMInfo',
+  'data': {'*status': 'str', '*error': 'str',
+           '*total-time': 'int', '*bytes': 'int'} }
+
+##
+# @query-savevm:
+#
+# Returns information about current savevm process.
+#
+# Returns: @SaveVMInfo
+#
+# Since: 1.3
+##
+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
+
+ ##
+ # @query-migrate:
+ #
+diff --git a/qapi/misc.json b/qapi/misc.json
+index ec30e5c570..7147199a12 100644
+--- a/qapi/misc.json
+++ b/qapi/misc.json
+@@ -454,6 +454,24 @@
+ ##
+ { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
+ 
+##
+# @savevm-start:
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+# @statefile: target file that state should be written to.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end', 'coroutine': true }
+
+ ##
+ # @CommandLineParameterType:
+ #
+diff --git a/qemu-options.hx b/qemu-options.hx
+index 8ce85d4559..511ab9415e 100644
+--- a/qemu-options.hx
+++ b/qemu-options.hx
+@@ -4610,6 +4610,18 @@ SRST
+     Start right away with a saved state (``loadvm`` in monitor)
+ ERST
+ 
+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
+    "-loadstate file\n" \
+    "                start right away with a saved state\n",
+    QEMU_ARCH_ALL)
+SRST
+``-loadstate file``
+  Start right away with a saved state. This option does not rollback
+  disk state like @code{loadvm}, so user must make sure that disk
+  have correct state. @var{file} can be any valid device URL. See the section
+  for "Device URL Syntax" for more information.
+ERST
+
+ #ifndef _WIN32
+ DEF("daemonize", 0, QEMU_OPTION_daemonize, \
+     "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
+diff --git a/system/vl.c b/system/vl.c
+index c644222982..2738ab7c91 100644
+--- a/system/vl.c
+++ b/system/vl.c
+@@ -163,6 +163,7 @@ static const char *accelerators;
+ static bool have_custom_ram_size;
+ static const char *ram_memdev_id;
+ static QDict *machine_opts_dict;
+static const char *loadstate;
+ static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
+ static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
+ static int display_remote;
+@@ -2712,6 +2713,12 @@ void qmp_x_exit_preconfig(Error **errp)
+         RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
+         load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
+         load_snapshot_resume(state);
+    } else if (loadstate) {
+        Error *local_err = NULL;
+        if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
+            error_report_err(local_err);
+            autostart = 0;
+        }
+     }
+     if (replay_mode != REPLAY_MODE_NONE) {
+         replay_vmstate_init();
+@@ -3259,6 +3266,9 @@ void qemu_init(int argc, char **argv)
+             case QEMU_OPTION_loadvm:
+                 loadvm = optarg;
+                 break;
+            case QEMU_OPTION_loadstate:
+                loadstate = optarg;
+                break;
+             case QEMU_OPTION_full_screen:
+                 dpy.has_full_screen = true;
+                 dpy.full_screen = true;
--- a/debian/patches/pve/0017-PVE-internal-snapshot-async.patch
+++ b/debian/patches/pve/0017-PVE-internal-snapshot-async.patch
@@ -1,818 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dietmar Maurer <dietmar@proxmox.com>
-Date: Mon, 6 Apr 2020 12:16:46 +0200
-Subject: [PATCH] PVE: internal snapshot async
-
-Truncate at 1024 boundary (Fabian Ebner will send a patch for stable)
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
-Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
---
- Makefile.objs                |   1 +
- hmp-commands-info.hx         |  13 +
- hmp-commands.hx              |  32 +++
- include/migration/snapshot.h |   1 +
- include/monitor/hmp.h        |   5 +
- monitor/hmp-cmds.c           |  57 +++++
- qapi/migration.json          |  34 +++
- qapi/misc.json               |  32 +++
- qemu-options.hx              |  12 +
- savevm-async.c               | 464 +++++++++++++++++++++++++++++++++++
- softmmu/vl.c                 |  10 +
- 11 files changed, 661 insertions(+)
- create mode 100644 savevm-async.c
-
-diff --git a/Makefile.objs b/Makefile.objs
-index a7c967633a..d0b4dde836 100644
--- a/Makefile.objs
-+++ b/Makefile.objs
-@@ -47,6 +47,7 @@ common-obj-y += bootdevice.o iothread.o
- common-obj-y += dump/
- common-obj-y += job-qmp.o
- common-obj-y += monitor/
-+common-obj-y += savevm-async.o
- common-obj-y += net/
- common-obj-y += qdev-monitor.o
- common-obj-$(CONFIG_WIN32) += os-win32.o
-diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
-index ca5198438d..89fea71972 100644
--- a/hmp-commands-info.hx
-+++ b/hmp-commands-info.hx
-@@ -579,6 +579,19 @@ SRST
-     Show current migration xbzrle cache size.
- ERST
- 
-+    {
-+        .name       = "savevm",
-+        .args_type  = "",
-+        .params     = "",
-+        .help       = "show savevm status",
-+        .cmd = hmp_info_savevm,
-+    },
-+
-+SRST
-+  ``info savevm``
-+    Show savevm status.
-+ERST
-+
-     {
-         .name       = "balloon",
-         .args_type  = "",
-diff --git a/hmp-commands.hx b/hmp-commands.hx
-index 7f0f3974ad..81fe305d07 100644
--- a/hmp-commands.hx
-+++ b/hmp-commands.hx
-@@ -1814,3 +1814,35 @@ ERST
-         .flags      = "p",
-     },
- 
-+
-+    {
-+        .name       = "savevm-start",
-+        .args_type  = "statefile:s?",
-+        .params     = "[statefile]",
-+        .help       = "Prepare for snapshot and halt VM. Save VM state to statefile.",
-+        .cmd = hmp_savevm_start,
-+    },
-+
-+    {
-+        .name       = "snapshot-drive",
-+        .args_type  = "device:s,name:s",
-+        .params     = "device name",
-+        .help       = "Create internal snapshot.",
-+        .cmd = hmp_snapshot_drive,
-+    },
-+
-+    {
-+        .name       = "delete-drive-snapshot",
-+        .args_type  = "device:s,name:s",
-+        .params     = "device name",
-+        .help       = "Delete internal snapshot.",
-+        .cmd = hmp_delete_drive_snapshot,
-+    },
-+
-+    {
-+        .name       = "savevm-end",
-+        .args_type  = "",
-+        .params     = "",
-+        .help       = "Resume VM after snaphot.",
-+        .cmd = hmp_savevm_end,
-+    },
-diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
-index c85b6ec75b..4411b7121d 100644
--- a/include/migration/snapshot.h
-+++ b/include/migration/snapshot.h
-@@ -17,5 +17,6 @@
- 
- int save_snapshot(const char *name, Error **errp);
- int load_snapshot(const char *name, Error **errp);
-+int load_snapshot_from_blockdev(const char *filename, Error **errp);
- 
- #endif
-diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
-index e33ca5a911..601827d43f 100644
--- a/include/monitor/hmp.h
-+++ b/include/monitor/hmp.h
-@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
- void hmp_info_uuid(Monitor *mon, const QDict *qdict);
- void hmp_info_chardev(Monitor *mon, const QDict *qdict);
- void hmp_info_mice(Monitor *mon, const QDict *qdict);
-+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
- void hmp_info_migrate(Monitor *mon, const QDict *qdict);
- void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
- void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
-@@ -83,6 +84,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
- void hmp_netdev_del(Monitor *mon, const QDict *qdict);
- void hmp_getfd(Monitor *mon, const QDict *qdict);
- void hmp_closefd(Monitor *mon, const QDict *qdict);
-+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
-+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
-+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
-+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
- void hmp_sendkey(Monitor *mon, const QDict *qdict);
- void hmp_screendump(Monitor *mon, const QDict *qdict);
- void hmp_chardev_add(Monitor *mon, const QDict *qdict);
-diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
-index 0c6f6ff331..39c7474cea 100644
--- a/monitor/hmp-cmds.c
-+++ b/monitor/hmp-cmds.c
-@@ -1876,6 +1876,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
-     hmp_handle_error(mon, err);
- }
- 
-+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
-+{
-+    Error *errp = NULL;
-+    const char *statefile = qdict_get_try_str(qdict, "statefile");
-+
-+    qmp_savevm_start(statefile != NULL, statefile, &errp);
-+    hmp_handle_error(mon, errp);
-+}
-+
-+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
-+{
-+    Error *errp = NULL;
-+    const char *name = qdict_get_str(qdict, "name");
-+    const char *device = qdict_get_str(qdict, "device");
-+
-+    qmp_snapshot_drive(device, name, &errp);
-+    hmp_handle_error(mon, errp);
-+}
-+
-+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
-+{
-+    Error *errp = NULL;
-+    const char *name = qdict_get_str(qdict, "name");
-+    const char *device = qdict_get_str(qdict, "device");
-+
-+    qmp_delete_drive_snapshot(device, name, &errp);
-+    hmp_handle_error(mon, errp);
-+}
-+
-+void hmp_savevm_end(Monitor *mon, const QDict *qdict)
-+{
-+    Error *errp = NULL;
-+
-+    qmp_savevm_end(&errp);
-+    hmp_handle_error(mon, errp);
-+}
-+
-+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
-+{
-+    SaveVMInfo *info;
-+    info = qmp_query_savevm(NULL);
-+
-+    if (info->has_status) {
-+        monitor_printf(mon, "savevm status: %s\n", info->status);
-+        monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
-+                       info->total_time);
-+    } else {
-+        monitor_printf(mon, "savevm status: not running\n");
-+    }
-+    if (info->has_bytes) {
-+        monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
-+    }
-+    if (info->has_error) {
-+        monitor_printf(mon, "Error: %s\n", info->error);
-+    }
-+}
-+
- void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
- {
-     IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
-diff --git a/qapi/migration.json b/qapi/migration.json
-index eca2981d0a..081663d67a 100644
--- a/qapi/migration.json
-+++ b/qapi/migration.json
-@@ -222,6 +222,40 @@
-            '*compression': 'CompressionStats',
-            '*socket-address': ['SocketAddress'] } }
- 
-+##
-+# @SaveVMInfo:
-+#
-+# Information about current migration process.
-+#
-+# @status: string describing the current savevm status.
-+#          This can be 'active', 'completed', 'failed'.
-+#          If this field is not returned, no savevm process
-+#          has been initiated
-+#
-+# @error: string containing error message is status is failed.
-+#
-+# @total-time: total amount of milliseconds since savevm started.
-+#        If savevm has ended, it returns the total save time
-+#
-+# @bytes: total amount of data transfered
-+#
-+# Since: 1.3
-+##
-+{ 'struct': 'SaveVMInfo',
-+  'data': {'*status': 'str', '*error': 'str',
-+           '*total-time': 'int', '*bytes': 'int'} }
-+
-+##
-+# @query-savevm:
-+#
-+# Returns information about current savevm process.
-+#
-+# Returns: @SaveVMInfo
-+#
-+# Since: 1.3
-+##
-+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
-+
- ##
- # @query-migrate:
- #
-diff --git a/qapi/misc.json b/qapi/misc.json
-index e2a6678eae..0868de22b7 100644
--- a/qapi/misc.json
-+++ b/qapi/misc.json
-@@ -1165,6 +1165,38 @@
- ##
- { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
- 
-+##
-+# @savevm-start:
-+#
-+# Prepare for snapshot and halt VM. Save VM state to statefile.
-+#
-+##
-+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
-+
-+##
-+# @snapshot-drive:
-+#
-+# Create an internal drive snapshot.
-+#
-+##
-+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
-+
-+##
-+# @delete-drive-snapshot:
-+#
-+# Delete a drive snapshot.
-+#
-+##
-+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
-+
-+##
-+# @savevm-end:
-+#
-+# Resume VM after a snapshot.
-+#
-+##
-+{ 'command': 'savevm-end' }
-+
- ##
- # @AcpiTableOptions:
- #
-diff --git a/qemu-options.hx b/qemu-options.hx
-index 292d4e7c0c..55eef64ddf 100644
--- a/qemu-options.hx
-+++ b/qemu-options.hx
-@@ -3832,6 +3832,18 @@ SRST
-     Start right away with a saved state (``loadvm`` in monitor)
- ERST
- 
-+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
-+    "-loadstate file\n" \
-+    "                start right away with a saved state\n",
-+    QEMU_ARCH_ALL)
-+SRST
-+``-loadstate file``
-+  Start right away with a saved state. This option does not rollback
-+  disk state like @code{loadvm}, so user must make sure that disk
-+  have correct state. @var{file} can be any valid device URL. See the section
-+  for "Device URL Syntax" for more information.
-+ERST
-+
- #ifndef _WIN32
- DEF("daemonize", 0, QEMU_OPTION_daemonize, \
-     "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
-diff --git a/savevm-async.c b/savevm-async.c
-new file mode 100644
-index 0000000000..54ceeae26c
--- /dev/null
-+++ b/savevm-async.c
-@@ -0,0 +1,464 @@
-+#include "qemu/osdep.h"
-+#include "migration/migration.h"
-+#include "migration/savevm.h"
-+#include "migration/snapshot.h"
-+#include "migration/global_state.h"
-+#include "migration/ram.h"
-+#include "migration/qemu-file.h"
-+#include "sysemu/sysemu.h"
-+#include "sysemu/runstate.h"
-+#include "block/block.h"
-+#include "sysemu/block-backend.h"
-+#include "qapi/error.h"
-+#include "qapi/qmp/qerror.h"
-+#include "qapi/qmp/qdict.h"
-+#include "qapi/qapi-commands-migration.h"
-+#include "qapi/qapi-commands-misc.h"
-+#include "qapi/qapi-commands-block.h"
-+#include "qemu/cutils.h"
-+#include "qemu/main-loop.h"
-+#include "qemu/rcu.h"
-+
-+/* #define DEBUG_SAVEVM_STATE */
-+
-+/* used while emulated sync operation in progress */
-+#define NOT_DONE -EINPROGRESS
-+
-+#ifdef DEBUG_SAVEVM_STATE
-+#define DPRINTF(fmt, ...) \
-+    do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
-+#else
-+#define DPRINTF(fmt, ...) \
-+    do { } while (0)
-+#endif
-+
-+enum {
-+    SAVE_STATE_DONE,
-+    SAVE_STATE_ERROR,
-+    SAVE_STATE_ACTIVE,
-+    SAVE_STATE_COMPLETED,
-+    SAVE_STATE_CANCELLED
-+};
-+
-+
-+static struct SnapshotState {
-+    BlockBackend *target;
-+    size_t bs_pos;
-+    int state;
-+    Error *error;
-+    Error *blocker;
-+    int saved_vm_running;
-+    QEMUFile *file;
-+    int64_t total_time;
-+    QEMUBH *cleanup_bh;
-+    QemuThread thread;
-+} snap_state;
-+
-+SaveVMInfo *qmp_query_savevm(Error **errp)
-+{
-+    SaveVMInfo *info = g_malloc0(sizeof(*info));
-+    struct SnapshotState *s = &snap_state;
-+
-+    if (s->state != SAVE_STATE_DONE) {
-+        info->has_bytes = true;
-+        info->bytes = s->bs_pos;
-+        switch (s->state) {
-+        case SAVE_STATE_ERROR:
-+            info->has_status = true;
-+            info->status = g_strdup("failed");
-+            info->has_total_time = true;
-+            info->total_time = s->total_time;
-+            if (s->error) {
-+                info->has_error = true;
-+                info->error = g_strdup(error_get_pretty(s->error));
-+            }
-+            break;
-+        case SAVE_STATE_ACTIVE:
-+            info->has_status = true;
-+            info->status = g_strdup("active");
-+            info->has_total_time = true;
-+            info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
-+                - s->total_time;
-+            break;
-+        case SAVE_STATE_COMPLETED:
-+            info->has_status = true;
-+            info->status = g_strdup("completed");
-+            info->has_total_time = true;
-+            info->total_time = s->total_time;
-+            break;
-+        }
-+    }
-+
-+    return info;
-+}
-+
-+static int save_snapshot_cleanup(void)
-+{
-+    int ret = 0;
-+
-+    DPRINTF("save_snapshot_cleanup\n");
-+
-+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
-+        snap_state.total_time;
-+
-+    if (snap_state.file) {
-+        ret = qemu_fclose(snap_state.file);
-+    }
-+
-+    if (snap_state.target) {
-+        /* try to truncate, but ignore errors (will fail on block devices).
-+         * note1: bdrv_read() need whole blocks, so we need to round up
-+         * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
-+         */
-+        size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
-+        blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, NULL);
-+        blk_op_unblock_all(snap_state.target, snap_state.blocker);
-+        error_free(snap_state.blocker);
-+        snap_state.blocker = NULL;
-+        blk_unref(snap_state.target);
-+        snap_state.target = NULL;
-+    }
-+
-+    return ret;
-+}
-+
-+static void save_snapshot_error(const char *fmt, ...)
-+{
-+    va_list ap;
-+    char *msg;
-+
-+    va_start(ap, fmt);
-+    msg = g_strdup_vprintf(fmt, ap);
-+    va_end(ap);
-+
-+    DPRINTF("save_snapshot_error: %s\n", msg);
-+
-+    if (!snap_state.error) {
-+        error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
-+    }
-+
-+    g_free (msg);
-+
-+    snap_state.state = SAVE_STATE_ERROR;
-+}
-+
-+static int block_state_close(void *opaque, Error **errp)
-+{
-+    snap_state.file = NULL;
-+    return blk_flush(snap_state.target);
-+}
-+
-+typedef struct BlkRwCo {
-+    int64_t offset;
-+    QEMUIOVector *qiov;
-+    ssize_t ret;
-+} BlkRwCo;
-+
-+static void coroutine_fn block_state_write_entry(void *opaque) {
-+    BlkRwCo *rwco = opaque;
-+    rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
-+                               rwco->qiov, 0);
-+}
-+
-+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
-+                                         int iovcnt, int64_t pos, Error **errp)
-+{
-+    QEMUIOVector qiov;
-+    BlkRwCo rwco;
-+
-+    assert(pos == snap_state.bs_pos);
-+    rwco = (BlkRwCo) {
-+        .offset = pos,
-+        .qiov = &qiov,
-+        .ret = NOT_DONE,
-+    };
-+
-+    qemu_iovec_init_external(&qiov, iov, iovcnt);
-+
-+    if (qemu_in_coroutine()) {
-+        block_state_write_entry(&rwco);
-+    } else {
-+        Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
-+        bdrv_coroutine_enter(blk_bs(snap_state.target), co);
-+        BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
-+    }
-+    if (rwco.ret < 0) {
-+        return rwco.ret;
-+    }
-+
-+    snap_state.bs_pos += qiov.size;
-+    return qiov.size;
-+}
-+
-+static const QEMUFileOps block_file_ops = {
-+    .writev_buffer =  block_state_writev_buffer,
-+    .close =          block_state_close,
-+};
-+
-+static void process_savevm_cleanup(void *opaque)
-+{
-+    int ret;
-+    qemu_bh_delete(snap_state.cleanup_bh);
-+    snap_state.cleanup_bh = NULL;
-+    qemu_mutex_unlock_iothread();
-+    qemu_thread_join(&snap_state.thread);
-+    qemu_mutex_lock_iothread();
-+    ret = save_snapshot_cleanup();
-+    if (ret < 0) {
-+        save_snapshot_error("save_snapshot_cleanup error %d", ret);
-+    } else if (snap_state.state == SAVE_STATE_ACTIVE) {
-+        snap_state.state = SAVE_STATE_COMPLETED;
-+    } else {
-+        save_snapshot_error("process_savevm_cleanup: invalid state: %d",
-+                            snap_state.state);
-+    }
-+    if (snap_state.saved_vm_running) {
-+        vm_start();
-+        snap_state.saved_vm_running = false;
-+    }
-+}
-+
-+static void *process_savevm_thread(void *opaque)
-+{
-+    int ret;
-+    int64_t maxlen;
-+
-+    rcu_register_thread();
-+
-+    qemu_savevm_state_header(snap_state.file);
-+    qemu_savevm_state_setup(snap_state.file);
-+    ret = qemu_file_get_error(snap_state.file);
-+
-+    if (ret < 0) {
-+        save_snapshot_error("qemu_savevm_state_setup failed");
-+        rcu_unregister_thread();
-+        return NULL;
-+    }
-+
-+    while (snap_state.state == SAVE_STATE_ACTIVE) {
-+        uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
-+
-+        qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
-+        pending_size = pend_precopy + pend_compatible + pend_postcopy;
-+
-+        maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
-+
-+        if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
-+            qemu_mutex_lock_iothread();
-+            ret = qemu_savevm_state_iterate(snap_state.file, false);
-+            if (ret < 0) {
-+                save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
-+                break;
-+            }
-+            qemu_mutex_unlock_iothread();
-+            DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
-+        } else {
-+            qemu_mutex_lock_iothread();
-+            qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
-+            ret = global_state_store();
-+            if (ret) {
-+                save_snapshot_error("global_state_store error %d", ret);
-+                break;
-+            }
-+            ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-+            if (ret < 0) {
-+                save_snapshot_error("vm_stop_force_state error %d", ret);
-+                break;
-+            }
-+            DPRINTF("savevm inerate finished\n");
-+            /* upstream made the return value here inconsistent
-+             * (-1 instead of 'ret' in one case and 0 after flush which can
-+             * still set a file error...)
-+             */
-+            (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
-+            ret = qemu_file_get_error(snap_state.file);
-+            if (ret < 0) {
-+                    save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
-+                    break;
-+            }
-+            qemu_savevm_state_cleanup();
-+            DPRINTF("save complete\n");
-+            break;
-+        }
-+    }
-+
-+    qemu_bh_schedule(snap_state.cleanup_bh);
-+    qemu_mutex_unlock_iothread();
-+
-+    rcu_unregister_thread();
-+    return NULL;
-+}
-+
-+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
-+{
-+    Error *local_err = NULL;
-+
-+    int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
-+
-+    if (snap_state.state != SAVE_STATE_DONE) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
-+                  "VM snapshot already started\n");
-+        return;
-+    }
-+
-+    /* initialize snapshot info */
-+    snap_state.saved_vm_running = runstate_is_running();
-+    snap_state.bs_pos = 0;
-+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-+    snap_state.blocker = NULL;
-+
-+    if (snap_state.error) {
-+        error_free(snap_state.error);
-+        snap_state.error = NULL;
-+    }
-+
-+    if (!has_statefile) {
-+        vm_stop(RUN_STATE_SAVE_VM);
-+        snap_state.state = SAVE_STATE_COMPLETED;
-+        return;
-+    }
-+
-+    if (qemu_savevm_state_blocked(errp)) {
-+        return;
-+    }
-+
-+    /* Open the image */
-+    QDict *options = NULL;
-+    options = qdict_new();
-+    qdict_put_str(options, "driver", "raw");
-+    snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
-+    if (!snap_state.target) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
-+        goto restart;
-+    }
-+
-+    snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
-+
-+    if (!snap_state.file) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
-+        goto restart;
-+    }
-+
-+
-+    error_setg(&snap_state.blocker, "block device is in use by savevm");
-+    blk_op_block_all(snap_state.target, snap_state.blocker);
-+
-+    snap_state.state = SAVE_STATE_ACTIVE;
-+    snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
-+    qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
-+                       NULL, QEMU_THREAD_JOINABLE);
-+
-+    return;
-+
-+restart:
-+
-+    save_snapshot_error("setup failed");
-+
-+    if (snap_state.saved_vm_running) {
-+        vm_start();
-+    }
-+}
-+
-+void qmp_savevm_end(Error **errp)
-+{
-+    if (snap_state.state == SAVE_STATE_DONE) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
-+                  "VM snapshot not started\n");
-+        return;
-+    }
-+
-+    if (snap_state.state == SAVE_STATE_ACTIVE) {
-+        snap_state.state = SAVE_STATE_CANCELLED;
-+        return;
-+    }
-+
-+    if (snap_state.saved_vm_running) {
-+        vm_start();
-+    }
-+
-+    snap_state.state = SAVE_STATE_DONE;
-+}
-+
-+// FIXME: Deprecated
-+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
-+{
-+    // Compatibility to older qemu-server.
-+    qmp_blockdev_snapshot_internal_sync(device, name, errp);
-+}
-+
-+// FIXME: Deprecated
-+void qmp_delete_drive_snapshot(const char *device, const char *name,
-+                               Error **errp)
-+{
-+    // Compatibility to older qemu-server.
-+    (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
-+                                                     true, name, errp);
-+}
-+
-+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
-+                                    size_t size, Error **errp)
-+{
-+    BlockBackend *be = opaque;
-+    int64_t maxlen = blk_getlength(be);
-+    if (pos > maxlen) {
-+        return -EIO;
-+    }
-+    if ((pos + size) > maxlen) {
-+        size = maxlen - pos - 1;
-+    }
-+    if (size == 0) {
-+        return 0;
-+    }
-+    return blk_pread(be, pos, buf, size);
-+}
-+
-+static const QEMUFileOps loadstate_file_ops = {
-+    .get_buffer = loadstate_get_buffer,
-+};
-+
-+int load_snapshot_from_blockdev(const char *filename, Error **errp)
-+{
-+    BlockBackend *be;
-+    Error *local_err = NULL;
-+    Error *blocker = NULL;
-+
-+    QEMUFile *f;
-+    int ret = -EINVAL;
-+
-+    be = blk_new_open(filename, NULL, NULL, 0, &local_err);
-+
-+    if (!be) {
-+        error_setg(errp, "Could not open VM state file");
-+        goto the_end;
-+    }
-+
-+    error_setg(&blocker, "block device is in use by load state");
-+    blk_op_block_all(be, blocker);
-+
-+    /* restore the VM state */
-+    f = qemu_fopen_ops(be, &loadstate_file_ops);
-+    if (!f) {
-+        error_setg(errp, "Could not open VM state file");
-+        goto the_end;
-+    }
-+
-+    qemu_system_reset(SHUTDOWN_CAUSE_NONE);
-+    ret = qemu_loadvm_state(f);
-+
-+    qemu_fclose(f);
-+    migration_incoming_state_destroy();
-+    if (ret < 0) {
-+        error_setg_errno(errp, -ret, "Error while loading VM state");
-+        goto the_end;
-+    }
-+
-+    ret = 0;
-+
-+ the_end:
-+    if (be) {
-+        blk_op_unblock_all(be, blocker);
-+        error_free(blocker);
-+        blk_unref(be);
-+    }
-+    return ret;
-+}
-diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 32c0047889..4b45eb0c37 100644
--- a/softmmu/vl.c
-+++ b/softmmu/vl.c
-@@ -2827,6 +2827,7 @@ void qemu_init(int argc, char **argv, char **envp)
-     int optind;
-     const char *optarg;
-     const char *loadvm = NULL;
-+    const char *loadstate = NULL;
-     MachineClass *machine_class;
-     const char *cpu_option;
-     const char *vga_model = NULL;
-@@ -3391,6 +3392,9 @@ void qemu_init(int argc, char **argv, char **envp)
-             case QEMU_OPTION_loadvm:
-                 loadvm = optarg;
-                 break;
-+            case QEMU_OPTION_loadstate:
-+                loadstate = optarg;
-+                break;
-             case QEMU_OPTION_full_screen:
-                 dpy.has_full_screen = true;
-                 dpy.full_screen = true;
-@@ -4447,6 +4451,12 @@ void qemu_init(int argc, char **argv, char **envp)
-             autostart = 0;
-             exit(1);
-         }
-+    } else if (loadstate) {
-+        Error *local_err = NULL;
-+        if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
-+            error_report_err(local_err);
-+            autostart = 0;
-+        }
-     }
-     if (replay_mode != REPLAY_MODE_NONE) {
-         replay_vmstate_init();
--- a/debian/patches/pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
+++ b/debian/patches/pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
@@ -0,0 +1,217 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Mon, 4 May 2020 11:05:08 +0200
+Subject: [PATCH] PVE: add optional buffer size to QEMUFile
+
+So we can use a 4M buffer for savevm-async which should
+increase performance storing the state onto ceph.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+[increase max IOV count in QEMUFile to actually write more data]
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: adapt to removal of QEMUFileOps]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ migration/qemu-file.c    | 50 +++++++++++++++++++++++++++-------------
+ migration/qemu-file.h    |  2 ++
+ migration/savevm-async.c |  5 ++--
+ 3 files changed, 39 insertions(+), 18 deletions(-)
+
+diff --git a/migration/qemu-file.c b/migration/qemu-file.c
+index a10882d47f..19c1de0472 100644
+--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
+@@ -35,8 +35,8 @@
+ #include "rdma.h"
+ #include "io/channel-file.h"
+ 
+-#define IO_BUF_SIZE 32768
+-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
+ 
+ struct QEMUFile {
+     QIOChannel *ioc;
+@@ -44,7 +44,8 @@ struct QEMUFile {
+ 
+     int buf_index;
+     int buf_size; /* 0 when writing */
+-    uint8_t buf[IO_BUF_SIZE];
+    size_t buf_allocated_size;
+    uint8_t *buf;
+ 
+     DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
+     struct iovec iov[MAX_IOV_SIZE];
+@@ -101,7 +102,9 @@ int qemu_file_shutdown(QEMUFile *f)
+     return 0;
+ }
+ 
+-static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
+                                    bool is_writable,
+                                    size_t buffer_size)
+ {
+     QEMUFile *f;
+ 
+@@ -110,6 +113,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+     object_ref(ioc);
+     f->ioc = ioc;
+     f->is_writable = is_writable;
+    f->buf_allocated_size = buffer_size;
+    f->buf = malloc(buffer_size);
+ 
+     return f;
+ }
+@@ -120,17 +125,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+  */
+ QEMUFile *qemu_file_get_return_path(QEMUFile *f)
+ {
+-    return qemu_file_new_impl(f->ioc, !f->is_writable);
+    return qemu_file_new_impl(f->ioc, !f->is_writable, DEFAULT_IO_BUF_SIZE);
+ }
+ 
+ QEMUFile *qemu_file_new_output(QIOChannel *ioc)
+ {
+-    return qemu_file_new_impl(ioc, true);
+    return qemu_file_new_impl(ioc, true, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size)
+{
+    return qemu_file_new_impl(ioc, true, buffer_size);
+ }
+ 
+ QEMUFile *qemu_file_new_input(QIOChannel *ioc)
+ {
+-    return qemu_file_new_impl(ioc, false);
+    return qemu_file_new_impl(ioc, false, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size)
+{
+    return qemu_file_new_impl(ioc, false, buffer_size);
+ }
+ 
+ /*
+@@ -328,7 +343,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
+     do {
+         len = qio_channel_read(f->ioc,
+                                (char *)f->buf + pending,
+-                               IO_BUF_SIZE - pending,
+                               f->buf_allocated_size - pending,
+                                &local_error);
+         if (len == QIO_CHANNEL_ERR_BLOCK) {
+             if (qemu_in_coroutine()) {
+@@ -368,6 +383,9 @@ int qemu_fclose(QEMUFile *f)
+         ret = ret2;
+     }
+     g_clear_pointer(&f->ioc, object_unref);
+
+    free(f->buf);
+
+     error_free(f->last_error_obj);
+     g_free(f);
+     trace_qemu_file_fclose();
+@@ -416,7 +434,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
+ {
+     if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
+         f->buf_index += len;
+-        if (f->buf_index == IO_BUF_SIZE) {
+        if (f->buf_index == f->buf_allocated_size) {
+             qemu_fflush(f);
+         }
+     }
+@@ -441,7 +459,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
+     }
+ 
+     while (size > 0) {
+-        l = IO_BUF_SIZE - f->buf_index;
+        l = f->buf_allocated_size - f->buf_index;
+         if (l > size) {
+             l = size;
+         }
+@@ -587,8 +605,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
+     size_t index;
+ 
+     assert(!qemu_file_is_writable(f));
+-    assert(offset < IO_BUF_SIZE);
+-    assert(size <= IO_BUF_SIZE - offset);
+    assert(offset < f->buf_allocated_size);
+    assert(size <= f->buf_allocated_size - offset);
+ 
+     /* The 1st byte to read from */
+     index = f->buf_index + offset;
+@@ -638,7 +656,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
+         size_t res;
+         uint8_t *src;
+ 
+-        res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+        res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
+         if (res == 0) {
+             return done;
+         }
+@@ -672,7 +690,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
+  */
+ size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
+ {
+-    if (size < IO_BUF_SIZE) {
+    if (size < f->buf_allocated_size) {
+         size_t res;
+         uint8_t *src = NULL;
+ 
+@@ -697,7 +715,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
+     int index = f->buf_index + offset;
+ 
+     assert(!qemu_file_is_writable(f));
+-    assert(offset < IO_BUF_SIZE);
+    assert(offset < f->buf_allocated_size);
+ 
+     if (index >= f->buf_size) {
+         qemu_fill_buffer(f);
+@@ -811,7 +829,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
+ ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
+                                   const uint8_t *p, size_t size)
+ {
+-    ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+    ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
+ 
+     if (blen < compressBound(size)) {
+         return -1;
+diff --git a/migration/qemu-file.h b/migration/qemu-file.h
+index 32fd4a34fd..36a0cd8cc8 100644
+--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
+@@ -30,7 +30,9 @@
+ #include "io/channel.h"
+ 
+ QEMUFile *qemu_file_new_input(QIOChannel *ioc);
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
+ QEMUFile *qemu_file_new_output(QIOChannel *ioc);
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
+ int qemu_fclose(QEMUFile *f);
+ 
+ /*
+diff --git a/migration/savevm-async.c b/migration/savevm-async.c
+index 779e4e2a78..bf36fc06d2 100644
+--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
+@@ -379,7 +379,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
+ 
+     QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
+                                                                &snap_state.bs_pos));
+-    snap_state.file = qemu_file_new_output(ioc);
+    snap_state.file = qemu_file_new_output_sized(ioc, 4 * 1024 * 1024);
+ 
+     if (!snap_state.file) {
+         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+@@ -496,7 +496,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
+     blk_op_block_all(be, blocker);
+ 
+     /* restore the VM state */
+-    f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+    f = qemu_file_new_input_sized(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)),
+                                  4 * 1024 * 1024);
+     if (!f) {
+         error_setg(errp, "Could not open VM state file");
+         goto the_end;
--- a/debian/patches/pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
+++ b/debian/patches/pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
@@ -4,30 +4,33 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
 Subject: [PATCH] PVE: block: add the zeroinit block driver filter

 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: adapt to changed function signatures
+     adhere to block graph lock requirements]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/Makefile.objs |   1 +
- block/zeroinit.c    | 197 ++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 198 insertions(+)
+ block/meson.build |   1 +
+ block/zeroinit.c  | 214 ++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 215 insertions(+)
 create mode 100644 block/zeroinit.c

-diff --git a/block/Makefile.objs b/block/Makefile.objs
-index 3635b6b4c1..1282445672 100644
--- a/block/Makefile.objs
-+++ b/block/Makefile.objs
-@@ -11,6 +11,7 @@ block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
- block-obj-$(CONFIG_QED) += qed-check.o
- block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
- block-obj-y += quorum.o
-+block-obj-y += zeroinit.o
- block-obj-y += blkdebug.o blkverify.o blkreplay.o
- block-obj-$(CONFIG_PARALLELS) += parallels.o
- block-obj-y += blklogwrites.o
+diff --git a/block/meson.build b/block/meson.build
+index e1f03fd773..b530e117b5 100644
+--- a/block/meson.build
+++ b/block/meson.build
+@@ -39,6 +39,7 @@ block_ss.add(files(
+   'throttle.c',
+   'throttle-groups.c',
+   'write-threshold.c',
+  'zeroinit.c',
+ ), zstd, zlib, gnutls)
+ 
+ system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
 diff --git a/block/zeroinit.c b/block/zeroinit.c
 new file mode 100644
-index 0000000000..ff38388d94
+index 0000000000..696558d8d6
 --- /dev/null
 +++ b/block/zeroinit.c
-@@ -0,0 +1,197 @@
+@@ -0,0 +1,214 @@
 +/*
 + * Filter to fake a zero-initialized block device.
 + *
@@ -41,6 +44,8 @@ index 0000000000..ff38388d94
 +#include "qemu/osdep.h"
 +#include "qapi/error.h"
 +#include "block/block_int.h"
+#include "block/block-io.h"
+#include "block/graph-lock.h"
 +#include "qapi/qmp/qdict.h"
 +#include "qapi/qmp/qstring.h"
 +#include "qemu/cutils.h"
@@ -91,6 +96,7 @@ index 0000000000..ff38388d94
 +                          Error **errp)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
+    BdrvChild *file = NULL;
 +    QemuOpts *opts;
 +    Error *local_err = NULL;
 +    int ret;
@@ -106,8 +112,13 @@ index 0000000000..ff38388d94
 +    }
 +
 +    /* Open the raw file */
-+    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
-+                               bs, &child_file, false, &local_err);
+    file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next", bs,
+                           &child_of_bds,
+                           BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false,
+                           &local_err);
+    bdrv_graph_wrlock();
+    bs->file = file;
+    bdrv_graph_wrunlock();
 +    if (local_err) {
 +        ret = -EINVAL;
 +        error_propagate(errp, local_err);
@@ -120,7 +131,9 @@ index 0000000000..ff38388d94
 +    ret = 0;
 +fail:
 +    if (ret < 0) {
+        bdrv_graph_wrlock();
 +        bdrv_unref_child(bs, bs->file);
+        bdrv_graph_wrunlock();
 +    }
 +    qemu_opts_del(opts);
 +    return ret;
@@ -132,28 +145,32 @@ index 0000000000..ff38388d94
 +    (void)s;
 +}
 +
-+static int64_t zeroinit_getlength(BlockDriverState *bs)
+static coroutine_fn int64_t GRAPH_RDLOCK
+zeroinit_co_getlength(BlockDriverState *bs)
 +{
-+    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 +}
 +
-+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
-+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 +}
 +
-+static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-+                                                 int count, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
 +    if (offset >= s->extents)
 +        return 0;
-+    return bdrv_pwrite_zeroes(bs->file, offset, count, flags);
+    return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
 +}
 +
-+static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
-+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
 +    int64_t extents = offset + bytes;
@@ -162,32 +179,37 @@ index 0000000000..ff38388d94
 +    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 +}
 +
-+static coroutine_fn int zeroinit_co_flush(BlockDriverState *bs)
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_flush(BlockDriverState *bs)
 +{
 +    return bdrv_co_flush(bs->file->bs);
 +}
 +
-+static int zeroinit_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK
+zeroinit_has_zero_init(BlockDriverState *bs)
 +{
 +    BDRVZeroinitState *s = bs->opaque;
 +    return s->has_zero_init;
 +}
 +
-+static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
-+                                             int64_t offset, int count)
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 +{
-+    return bdrv_co_pdiscard(bs->file, offset, count);
+    return bdrv_co_pdiscard(bs->file, offset, bytes);
 +}
 +
-+static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
-+                                _Bool exact, PreallocMode prealloc, Error **errp)
+static int GRAPH_RDLOCK
+zeroinit_co_truncate(BlockDriverState *bs, int64_t offset, _Bool exact,
+                     PreallocMode prealloc, BdrvRequestFlags req_flags,
+                     Error **errp)
 +{
-+    return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
+    return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
 +}
 +
-+static int zeroinit_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 +{
-+    return bdrv_get_info(bs->file->bs, bdi);
+    return bdrv_co_get_info(bs->file->bs, bdi);
 +}
 +
 +static BlockDriver bdrv_zeroinit = {
@@ -198,8 +220,8 @@ index 0000000000..ff38388d94
 +    .bdrv_parse_filename              = zeroinit_parse_filename,
 +    .bdrv_file_open                   = zeroinit_open,
 +    .bdrv_close                       = zeroinit_close,
-+    .bdrv_getlength                   = zeroinit_getlength,
-+    .bdrv_child_perm                  = bdrv_filter_default_perms,
+    .bdrv_co_getlength                = zeroinit_co_getlength,
+    .bdrv_child_perm                  = bdrv_default_perms,
 +    .bdrv_co_flush_to_disk            = zeroinit_co_flush,
 +
 +    .bdrv_co_pwrite_zeroes            = zeroinit_co_pwrite_zeroes,
@@ -211,12 +233,10 @@ index 0000000000..ff38388d94
 +
 +    .bdrv_has_zero_init               = zeroinit_has_zero_init,
 +
-+    .bdrv_co_block_status             = bdrv_co_block_status_from_file,
-+
 +    .bdrv_co_pdiscard                 = zeroinit_co_pdiscard,
 +
 +    .bdrv_co_truncate                 = zeroinit_co_truncate,
-+    .bdrv_get_info                    = zeroinit_get_info,
+    .bdrv_co_get_info                 = zeroinit_co_get_info,
 +};
 +
 +static void bdrv_zeroinit_init(void)
--- a/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
+++ b/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
@@ -10,16 +10,16 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
 qemu-options.hx | 3 +++
- softmmu/vl.c    | 8 ++++++++
+ system/vl.c     | 8 ++++++++
 2 files changed, 11 insertions(+)

 diff --git a/qemu-options.hx b/qemu-options.hx
-index 55eef64ddf..e11b4f8ff5 100644
+index 511ab9415e..92e301d545 100644
 --- a/qemu-options.hx
 +++ b/qemu-options.hx
-@@ -904,6 +904,9 @@ DEFHEADING()
+@@ -1237,6 +1237,9 @@ legacy PC, they are not recommended for modern configurations.
 
- DEFHEADING(Block device options:)
+ ERST
 
 +DEF("id", HAS_ARG, QEMU_OPTION_id,
 +    "-id n           set the VMID", QEMU_ARCH_ALL)
@@ -27,21 +27,21 @@ index 55eef64ddf..e11b4f8ff5 100644
 DEF("fda", HAS_ARG, QEMU_OPTION_fda,
     "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
 DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
-diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 4b45eb0c37..9de81875fd 100644
--- a/softmmu/vl.c
-+++ b/softmmu/vl.c
-@@ -2815,6 +2815,7 @@ static void create_default_memdev(MachineState *ms, const char *path)
- void qemu_init(int argc, char **argv, char **envp)
- {
-     int i;
+diff --git a/system/vl.c b/system/vl.c
+index 2738ab7c91..20ebf2c920 100644
+--- a/system/vl.c
+++ b/system/vl.c
+@@ -2748,6 +2748,7 @@ void qemu_init(int argc, char **argv)
+     MachineClass *machine_class;
+     bool userconfig = true;
+     FILE *vmstate_dump_file = NULL;
 +    long vm_id;
-     int snapshot, linux_boot;
-     const char *initrd_filename;
-     const char *kernel_filename, *kernel_cmdline;
-@@ -3518,6 +3519,13 @@ void qemu_init(int argc, char **argv, char **envp)
-                     exit(1);
-                 }
+ 
+     qemu_add_opts(&qemu_drive_opts);
+     qemu_add_drive_opts(&qemu_legacy_drive_opts);
+@@ -3371,6 +3372,13 @@ void qemu_init(int argc, char **argv)
+                 machine_parse_property_opt(qemu_find_opts("smp-opts"),
+                                            "smp", optarg);
                 break;
 +            case QEMU_OPTION_id:
 +                vm_id = strtol(optarg, (char **)&optarg, 10);
@@ -50,6 +50,6 @@ index 4b45eb0c37..9de81875fd 100644
 +                    exit(1);
 +                }
 +                break;
+ #ifdef CONFIG_VNC
             case QEMU_OPTION_vnc:
-                 vnc_parse(optarg, &error_fatal);
-                 break;
+                 vnc_parse(optarg);
--- a/debian/patches/pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
+++ b/debian/patches/pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 9 insertions(+)

 diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
-index 9ec0f2deb2..a00d45251f 100644
+index d8fc1e2815..789694b8b3 100644
 --- a/hw/intc/apic_common.c
 +++ b/hw/intc/apic_common.c
-@@ -259,6 +259,15 @@ static void apic_reset_common(DeviceState *dev)
+@@ -263,6 +263,15 @@ static void apic_reset_common(DeviceState *dev)
     info->vapic_base_update(s);
 
     apic_init_reset(dev);
--- a/debian/patches/pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
+++ b/debian/patches/pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
@@ -8,15 +8,15 @@ Otherwise creating images on nfs/cifs can be problematic.
 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
- block/file-posix.c   | 61 +++++++++++++++++++++++++++++---------------
- qapi/block-core.json |  3 ++-
- 2 files changed, 43 insertions(+), 21 deletions(-)
+ block/file-posix.c   | 59 ++++++++++++++++++++++++++++++--------------
+ qapi/block-core.json |  7 +++++-
+ 2 files changed, 46 insertions(+), 20 deletions(-)

 diff --git a/block/file-posix.c b/block/file-posix.c
-index b527e82a82..36ebd0967e 100644
+index 43bc0bd520..60e98c87f1 100644
 --- a/block/file-posix.c
 +++ b/block/file-posix.c
-@@ -2309,6 +2309,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
+@@ -2876,6 +2876,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
     int fd;
     uint64_t perm, shared;
     int result = 0;
@@ -24,7 +24,7 @@ index b527e82a82..36ebd0967e 100644
 
     /* Validate options and set default values */
     assert(options->driver == BLOCKDEV_DRIVER_FILE);
-@@ -2342,19 +2343,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
+@@ -2916,19 +2917,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
     perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
     shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
 
@@ -59,7 +59,7 @@ index b527e82a82..36ebd0967e 100644
     }
 
     /* Clear the file by truncating it to 0 */
-@@ -2387,13 +2391,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
+@@ -2982,13 +2986,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
     }
 
 out_unlock:
@@ -82,7 +82,7 @@ index b527e82a82..36ebd0967e 100644
     }
 
 out_close:
-@@ -2416,6 +2422,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
+@@ -3012,6 +3018,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
     PreallocMode prealloc;
     char *buf = NULL;
     Error *local_err = NULL;
@@ -90,7 +90,7 @@ index b527e82a82..36ebd0967e 100644
 
     /* Skip file: protocol prefix */
     strstart(filename, "file:", &filename);
-@@ -2433,6 +2440,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
+@@ -3034,6 +3041,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
         return -EINVAL;
     }
 
@@ -109,34 +109,36 @@ index b527e82a82..36ebd0967e 100644
     options = (BlockdevCreateOptions) {
         .driver     = BLOCKDEV_DRIVER_FILE,
         .u.file     = {
-@@ -2442,6 +2461,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-             .preallocation      = prealloc,
-             .has_nocow          = true,
+@@ -3045,6 +3064,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
             .nocow              = nocow,
+             .has_extent_size_hint = has_extent_size_hint,
+             .extent_size_hint   = extent_size_hint,
 +            .has_locking        = true,
 +            .locking            = locking,
         },
     };
     return raw_co_create(&options, errp);
-@@ -2983,7 +3004,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
-     }
- 
-     /* Copy locks to the new fd */
-    if (s->perm_change_fd) {
-+    if (s->use_lock && s->perm_change_fd) {
-         ret = raw_apply_lock_bytes(NULL, s->perm_change_fd, perm, ~shared,
-                                    false, errp);
-         if (ret < 0) {
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 943df1926a..4c55464f86 100644
+index 45ab548dfe..f7c2b63c5d 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -4183,7 +4183,8 @@
-   'data': { 'filename':         'str',
-             'size':             'size',
-             '*preallocation':   'PreallocMode',
-            '*nocow':           'bool' } }
-+            '*nocow':           'bool',
+@@ -4956,6 +4956,10 @@
+ # @extent-size-hint: Extent size hint to add to the image file; 0 for
+ #     not adding an extent size hint (default: 1 MB, since 5.1)
+ #
+# @locking: whether to enable file locking.  If set to 'auto', only
+#     enable when Open File Descriptor (OFD) locking API is available
+#     (default: auto).
+#
+ # Since: 2.12
+ ##
+ { 'struct': 'BlockdevCreateOptionsFile',
+@@ -4963,7 +4967,8 @@
+             'size':                 'size',
+             '*preallocation':       'PreallocMode',
+             '*nocow':               'bool',
+-            '*extent-size-hint':    'size'} }
+            '*extent-size-hint':    'size',
 +            '*locking':         'OnOffAuto' } }
 
 ##
--- a/debian/patches/pve/0022-PVE-savevm-async-kick-AIO-wait-on-block-state-write.patch
+++ b/debian/patches/pve/0022-PVE-savevm-async-kick-AIO-wait-on-block-state-write.patch
@@ -1,22 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Lamprecht <t.lamprecht@proxmox.com>
-Date: Mon, 6 Apr 2020 12:16:51 +0200
-Subject: [PATCH] PVE: savevm-async: kick AIO wait on block state write
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- savevm-async.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/savevm-async.c b/savevm-async.c
-index 54ceeae26c..393d55af2a 100644
--- a/savevm-async.c
-+++ b/savevm-async.c
-@@ -158,6 +158,7 @@ static void coroutine_fn block_state_write_entry(void *opaque) {
-     BlkRwCo *rwco = opaque;
-     rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
-                                rwco->qiov, 0);
-+    aio_wait_kick();
- }
- 
- static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
--- a/debian/patches/pve/0023-PVE-monitor-disable-oob-capability.patch
+++ b/debian/patches/pve/0023-PVE-monitor-disable-oob-capability.patch
@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 1 insertion(+), 2 deletions(-)

 diff --git a/monitor/qmp.c b/monitor/qmp.c
-index f89e7daf27..ed5e39fcf7 100644
+index 589c9524f8..2505dd658a 100644
 --- a/monitor/qmp.c
 +++ b/monitor/qmp.c
-@@ -406,8 +406,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
+@@ -536,8 +536,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
     qemu_chr_fe_set_echo(&mon->common.chr, true);
 
     /* Note: we run QMP monitor in I/O thread when @chr supports that */
--- a/debian/patches/pve/0023-PVE-move-snapshot-cleanup-into-bottom-half.patch
+++ b/debian/patches/pve/0023-PVE-move-snapshot-cleanup-into-bottom-half.patch
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Mon, 6 Apr 2020 12:16:52 +0200
-Subject: [PATCH] PVE: move snapshot cleanup into bottom half
-
-as per:
-    (0ceccd858a8d) migration: qemu_savevm_state_cleanup() in cleanup
-
-may affect held locks and therefore change assumptions made
-by that function!
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- savevm-async.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/savevm-async.c b/savevm-async.c
-index 393d55af2a..790e27ae37 100644
--- a/savevm-async.c
-+++ b/savevm-async.c
-@@ -201,6 +201,8 @@ static void process_savevm_cleanup(void *opaque)
-     int ret;
-     qemu_bh_delete(snap_state.cleanup_bh);
-     snap_state.cleanup_bh = NULL;
-+    qemu_savevm_state_cleanup();
-+
-     qemu_mutex_unlock_iothread();
-     qemu_thread_join(&snap_state.thread);
-     qemu_mutex_lock_iothread();
-@@ -277,7 +279,6 @@ static void *process_savevm_thread(void *opaque)
-                     save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
-                     break;
-             }
-            qemu_savevm_state_cleanup();
-             DPRINTF("save complete\n");
-             break;
-         }
--- a/debian/patches/pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
+++ b/debian/patches/pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
@@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/hw/core/machine.c b/hw/core/machine.c
-index c1a444cb75..9f56ecc4e8 100644
+index 4273de16a0..83f1fc0293 100644
 --- a/hw/core/machine.c
 +++ b/hw/core/machine.c
-@@ -56,7 +56,8 @@ GlobalProperty hw_compat_4_0[] = {
+@@ -162,7 +162,8 @@ GlobalProperty hw_compat_4_0[] = {
     { "virtio-vga",     "edid", "false" },
     { "virtio-gpu-device", "edid", "false" },
     { "virtio-device", "use-started", "false" },
--- a/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
+++ b/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
@@ -0,0 +1,129 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dietmar Maurer <dietmar@proxmox.com>
+Date: Mon, 6 Apr 2020 12:16:55 +0200
+Subject: [PATCH] PVE: Allow version code in machine type
+
+E.g. pc-i440fx-4.0+pve3 would print 'pve3' as version code while
+selecting pc-i440fx-4.0 as machine type.
+
+Version is made available as 'pve-version' in query-machines (same as,
+and only if 'is-current').
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: adapt to QAPI changes]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/core/machine-qmp-cmds.c |  5 +++++
+ include/hw/boards.h        |  2 ++
+ qapi/machine.json          |  4 +++-
+ system/vl.c                | 25 +++++++++++++++++++++++++
+ 4 files changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
+index 314351cdff..628a3537c5 100644
+--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
+@@ -94,6 +94,11 @@ MachineInfoList *qmp_query_machines(Error **errp)
+         if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
+             info->has_is_current = true;
+             info->is_current = true;
+
+            // PVE version string only exists for current machine
+            if (mc->pve_version) {
+                info->pve_version = g_strdup(mc->pve_version);
+            }
+         }
+ 
+         if (mc->default_cpu_type) {
+diff --git a/include/hw/boards.h b/include/hw/boards.h
+index 8b8f6d5c00..dd6d0a1447 100644
+--- a/include/hw/boards.h
+++ b/include/hw/boards.h
+@@ -246,6 +246,8 @@ struct MachineClass {
+     const char *desc;
+     const char *deprecation_reason;
+ 
+    const char *pve_version;
+
+     void (*init)(MachineState *state);
+     void (*reset)(MachineState *state, ShutdownCause reason);
+     void (*wakeup)(MachineState *state);
+diff --git a/qapi/machine.json b/qapi/machine.json
+index a024d5b05d..1d69bffaa0 100644
+--- a/qapi/machine.json
+++ b/qapi/machine.json
+@@ -168,6 +168,8 @@
+ #
+ # @acpi: machine type supports ACPI (since 8.0)
+ #
+# @pve-version: custom PVE version suffix specified as 'machine+pveN'
+#
+ # Since: 1.2
+ ##
+ { 'struct': 'MachineInfo',
+@@ -175,7 +177,7 @@
+             '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
+             'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
+             'deprecated': 'bool', '*default-cpu-type': 'str',
+-            '*default-ram-id': 'str', 'acpi': 'bool' } }
+            '*default-ram-id': 'str', 'acpi': 'bool', '*pve-version': 'str' } }
+ 
+ ##
+ # @query-machines:
+diff --git a/system/vl.c b/system/vl.c
+index 20ebf2c920..4d39e32097 100644
+--- a/system/vl.c
+++ b/system/vl.c
+@@ -1659,6 +1659,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
+ static MachineClass *select_machine(QDict *qdict, Error **errp)
+ {
+     const char *machine_type = qdict_get_try_str(qdict, "type");
+    const char *pvever = qdict_get_try_str(qdict, "pvever");
+     GSList *machines = object_class_get_list(TYPE_MACHINE, false);
+     MachineClass *machine_class;
+     Error *local_err = NULL;
+@@ -1676,6 +1677,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
+         }
+     }
+ 
+    if (machine_class) {
+        machine_class->pve_version = g_strdup(pvever);
+        qdict_del(qdict, "pvever");
+    }
+
+     g_slist_free(machines);
+     if (local_err) {
+         error_append_hint(&local_err, "Use -machine help to list supported machines\n");
+@@ -3313,12 +3319,31 @@ void qemu_init(int argc, char **argv)
+             case QEMU_OPTION_machine:
+                 {
+                     bool help;
+                    size_t pvever_index, name_len;
+                    const gchar *name;
+                    gchar *name_clean, *pvever;
+ 
+                     keyval_parse_into(machine_opts_dict, optarg, "type", &help, &error_fatal);
+                     if (help) {
+                         machine_help_func(machine_opts_dict);
+                         exit(EXIT_SUCCESS);
+                     }
+
+                    // PVE version is specified with '+' as seperator, e.g. pc-i440fx+pvever
+                    name = qdict_get_try_str(machine_opts_dict, "type");
+                    if (name != NULL) {
+                        name_len = strlen(name);
+                        pvever_index = strcspn(name, "+");
+                        if (pvever_index < name_len) {
+                            name_clean = g_strndup(name, pvever_index);
+                            pvever = g_strndup(name + pvever_index + 1, name_len - pvever_index - 1);
+                            qdict_put_str(machine_opts_dict, "pvever", pvever);
+                            qdict_put_str(machine_opts_dict, "type", name_clean);
+                            g_free(name_clean);
+                            g_free(pvever);
+                        }
+                    }
+
+                     break;
+                 }
+             case QEMU_OPTION_accel:
--- a/debian/patches/pve/0026-PVE-Allow-version-code-in-machine-type.patch
+++ b/debian/patches/pve/0026-PVE-Allow-version-code-in-machine-type.patch
@@ -1,101 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dietmar Maurer <dietmar@proxmox.com>
-Date: Mon, 6 Apr 2020 12:16:55 +0200
-Subject: [PATCH] PVE: Allow version code in machine type
-
-E.g. pc-i440fx-4.0+pve3 would print 'pve3' as version code while
-selecting pc-i440fx-4.0 as machine type.
-
-Version is made available as 'pve-version' in query-machines (same as,
-and only if 'is-current').
-
-Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
- hw/core/machine-qmp-cmds.c |  6 ++++++
- include/hw/boards.h        |  2 ++
- qapi/machine.json          |  3 ++-
- softmmu/vl.c               | 15 ++++++++++++++-
- 4 files changed, 24 insertions(+), 2 deletions(-)
-
-diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
-index 1953633e82..ca8c0dc53d 100644
--- a/hw/core/machine-qmp-cmds.c
-+++ b/hw/core/machine-qmp-cmds.c
-@@ -234,6 +234,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
-         if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
-             info->has_is_current = true;
-             info->is_current = true;
-+
-+            // PVE version string only exists for current machine
-+            if (mc->pve_version) {
-+                info->has_pve_version = true;
-+                info->pve_version = g_strdup(mc->pve_version);
-+            }
-         }
- 
-         if (mc->default_cpu_type) {
-diff --git a/include/hw/boards.h b/include/hw/boards.h
-index fd4d62b501..dd395e9232 100644
--- a/include/hw/boards.h
-+++ b/include/hw/boards.h
-@@ -170,6 +170,8 @@ struct MachineClass {
-     const char *desc;
-     const char *deprecation_reason;
- 
-+    const char *pve_version;
-+
-     void (*init)(MachineState *state);
-     void (*reset)(MachineState *state);
-     void (*wakeup)(MachineState *state);
-diff --git a/qapi/machine.json b/qapi/machine.json
-index f6cf28f9fd..a7f9c79a59 100644
--- a/qapi/machine.json
-+++ b/qapi/machine.json
-@@ -363,7 +363,8 @@
-   'data': { 'name': 'str', '*alias': 'str',
-             '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
-             'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
-            'deprecated': 'bool', '*default-cpu-type': 'str' } }
-+            'deprecated': 'bool', '*default-cpu-type': 'str',
-+            '*pve-version': 'str' } }
- 
- ##
- # @query-machines:
-diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 9de81875fd..8340c4ca53 100644
--- a/softmmu/vl.c
-+++ b/softmmu/vl.c
-@@ -2300,6 +2300,8 @@ static MachineClass *machine_parse(const char *name, GSList *machines)
- {
-     MachineClass *mc;
-     GSList *el;
-+    size_t pvever_index = 0;
-+    gchar *name_clean;
- 
-     if (is_help_option(name)) {
-         printf("Supported machines are:\n");
-@@ -2316,12 +2318,23 @@ static MachineClass *machine_parse(const char *name, GSList *machines)
-         exit(0);
-     }
- 
-    mc = find_machine(name, machines);
-+    // PVE version is specified with '+' as seperator, e.g. pc-i440fx+pvever
-+    pvever_index = strcspn(name, "+");
-+
-+    name_clean = g_strndup(name, pvever_index);
-+    mc = find_machine(name_clean, machines);
-+    g_free(name_clean);
-+
-     if (!mc) {
-         error_report("unsupported machine type");
-         error_printf("Use -machine help to list supported machines\n");
-         exit(1);
-     }
-+
-+    if (pvever_index < strlen(name)) {
-+        mc->pve_version = &name[pvever_index+1];
-+    }
-+
-     return mc;
- }
- 
--- a/debian/patches/pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
+++ b/debian/patches/pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
@@ -0,0 +1,59 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fabian Ebner <f.ebner@proxmox.com>
+Date: Wed, 2 Mar 2022 08:35:05 +0100
+Subject: [PATCH] block/backup: move bcs bitmap initialization to job creation
+
+For backing up the state of multiple disks from the same time, a job
+for each disk has to be created. It's convenient if the jobs don't
+have to be started at the same time and if operation of the VM can be
+resumed after job creation. This would lead to a window between job
+creation and running the job, where writes can happen. But no writes
+should happen between setting up the copy-before-write filter and
+setting up the block copy state bitmap, because then new writes would
+just pass through.
+
+Commit 06e0a9c16405c0a4c1eca33cf286cc04c42066a2 moved initalization of
+the bitmap to setting up the copy-before-write filter when sync_mode
+is not MIRROR_SYNC_MODE_BITMAP. Ensure that the bitmap is initialized
+upon job creation for the remaining case too, by moving the
+backup_init_bcs_bitmap call to backup_job_create.
+
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index ec29d6b810..270957c0cd 100644
+--- a/block/backup.c
+++ b/block/backup.c
+@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
+                                          true);
+     } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
+         /*
+-         * We can't hog the coroutine to initialize this thoroughly.
+-         * Set a flag and resume work when we are able to yield safely.
+         * Initialization is costly here. Simply set a flag and let the
+         * backup_run coroutine resume work once it can yield safely.
+          */
+         block_copy_set_skip_unallocated(job->bcs, true);
+     }
+@@ -252,8 +252,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
+     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+     int ret;
+ 
+-    backup_init_bcs_bitmap(s);
+-
+     if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
+         int64_t offset = 0;
+         int64_t count;
+@@ -501,6 +499,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                        &error_abort);
+     bdrv_graph_wrunlock();
+ 
+    backup_init_bcs_bitmap(job);
+
+     return &job->common;
+ 
+  error:
--- a/debian/patches/pve/0027-PVE-Backup-add-vma-backup-format-code.patch
+++ b/debian/patches/pve/0027-PVE-Backup-add-vma-backup-format-code.patch
@@ -3,58 +3,70 @@ From: Dietmar Maurer <dietmar@proxmox.com>
 Date: Mon, 6 Apr 2020 12:16:57 +0200
 Subject: [PATCH] PVE-Backup: add vma backup format code

+Notes about partial restoring: skipping a certain drive is done via a
+map line of the form skip=drive-scsi0. Since in PVE, most archives are
+compressed and piped to vma for restore, it's not easily possible to
+skip reads.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: improvements during create
+     allow partial restore]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- Makefile      |   3 +-
- Makefile.objs |   1 +
- vma-reader.c  | 857 ++++++++++++++++++++++++++++++++++++++++++++++++++
- vma-writer.c  | 771 +++++++++++++++++++++++++++++++++++++++++++++
- vma.c         | 837 ++++++++++++++++++++++++++++++++++++++++++++++++
- vma.h         | 150 +++++++++
- 6 files changed, 2618 insertions(+), 1 deletion(-)
+ block/meson.build |   2 +
+ meson.build       |   5 +
+ vma-reader.c      | 870 ++++++++++++++++++++++++++++++++++++++++++++
+ vma-writer.c      | 818 +++++++++++++++++++++++++++++++++++++++++
+ vma.c             | 901 ++++++++++++++++++++++++++++++++++++++++++++++
+ vma.h             | 150 ++++++++
+ 6 files changed, 2746 insertions(+)
 create mode 100644 vma-reader.c
 create mode 100644 vma-writer.c
 create mode 100644 vma.c
 create mode 100644 vma.h

-diff --git a/Makefile b/Makefile
-index 8a9113e666..74c2039005 100644
--- a/Makefile
-+++ b/Makefile
-@@ -479,7 +479,7 @@ dummy := $(call unnest-vars,, \
+diff --git a/block/meson.build b/block/meson.build
+index b530e117b5..b245daa98e 100644
+--- a/block/meson.build
+++ b/block/meson.build
+@@ -42,6 +42,8 @@ block_ss.add(files(
+   'zeroinit.c',
+ ), zstd, zlib, gnutls)
 
- include $(SRC_PATH)/tests/Makefile.include
+block_ss.add(files('../vma-writer.c'), libuuid)
+
+ system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
+ system_ss.add(files('block-ram-registrar.c'))
 
-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
-+all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
+diff --git a/meson.build b/meson.build
+index 91a0aa64c6..620cc594b2 100644
+--- a/meson.build
+++ b/meson.build
+@@ -1922,6 +1922,8 @@ endif
 
- qemu-version.h: FORCE
- 	$(call quiet-command, \
-@@ -608,6 +608,7 @@ qemu-img$(EXESUF): qemu-img.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io
- qemu-nbd$(EXESUF): qemu-nbd.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
- qemu-io$(EXESUF): qemu-io.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
- qemu-storage-daemon$(EXESUF): qemu-storage-daemon.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(chardev-obj-y) $(io-obj-y) $(qom-obj-y) $(storage-daemon-obj-y) $(COMMON_LDADDS)
-+vma$(EXESUF): vma.o vma-reader.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+ has_gettid = cc.has_function('gettid')
 
- qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
+libuuid = cc.find_library('uuid', required: true)
+
+ # libselinux
+ selinux = dependency('libselinux',
+                      required: get_option('selinux'),
+@@ -4023,6 +4025,9 @@ if have_tools
+                dependencies: [blockdev, qemuutil, gnutls, selinux],
+                install: true)
 
-diff --git a/Makefile.objs b/Makefile.objs
-index d0b4dde836..05031a3da7 100644
--- a/Makefile.objs
-+++ b/Makefile.objs
-@@ -18,6 +18,7 @@ block-obj-y += block.o blockjob.o job.o
- block-obj-y += block/ scsi/
- block-obj-y += qemu-io-cmds.o
- block-obj-$(CONFIG_REPLICATION) += replication.o
-+block-obj-y += vma-writer.o
- 
- block-obj-m = block/
+  vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
+                   dependencies: [authz, block, crypto, io, qom], install: true)
+
+   subdir('storage-daemon')
 
+   foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
 diff --git a/vma-reader.c b/vma-reader.c
 new file mode 100644
-index 0000000000..2b1d1cdab3
+index 0000000000..d0b6721812
 --- /dev/null
 +++ b/vma-reader.c
-@@ -0,0 +1,857 @@
+@@ -0,0 +1,870 @@
 +/*
 + * VMA: Virtual Machine Archive
 + *
@@ -72,11 +84,11 @@ index 0000000000..2b1d1cdab3
 +#include <glib.h>
 +#include <uuid/uuid.h>
 +
-+#include "qemu-common.h"
 +#include "qemu/timer.h"
 +#include "qemu/ratelimit.h"
 +#include "vma.h"
 +#include "block/block.h"
+#include "block/graph-lock.h"
 +#include "sysemu/block-backend.h"
 +
 +static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
@@ -86,6 +98,7 @@ index 0000000000..2b1d1cdab3
 +    bool write_zeroes;
 +    unsigned long *bitmap;
 +    int bitmap_size;
+    bool skip;
 +}  VmaRestoreState;
 +
 +struct VmaReader {
@@ -249,6 +262,9 @@ index 0000000000..2b1d1cdab3
 +        if (vmar->rstate[i].bitmap) {
 +            g_free(vmar->rstate[i].bitmap);
 +        }
+        if (vmar->rstate[i].target) {
+            blk_unref(vmar->rstate[i].target);
+        }
 +    }
 +
 +    if (vmar->md5csum) {
@@ -480,13 +496,14 @@ index 0000000000..2b1d1cdab3
 +}
 +
 +static void allocate_rstate(VmaReader *vmar,  guint8 dev_id,
-+                            BlockBackend *target, bool write_zeroes)
+                            BlockBackend *target, bool write_zeroes, bool skip)
 +{
 +    assert(vmar);
 +    assert(dev_id);
 +
 +    vmar->rstate[dev_id].target = target;
 +    vmar->rstate[dev_id].write_zeroes = write_zeroes;
+    vmar->rstate[dev_id].skip = skip;
 +
 +    int64_t size = vmar->devinfo[dev_id].size;
 +
@@ -501,28 +518,30 @@ index 0000000000..2b1d1cdab3
 +}
 +
 +int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
-+                           bool write_zeroes, Error **errp)
+                           bool write_zeroes, bool skip, Error **errp)
 +{
 +    assert(vmar);
-+    assert(target != NULL);
+    assert(target != NULL || skip);
 +    assert(dev_id);
-+    assert(vmar->rstate[dev_id].target == NULL);
+    assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
 +
-+    int64_t size = blk_getlength(target);
-+    int64_t size_diff = size - vmar->devinfo[dev_id].size;
+    if (target != NULL) {
+        int64_t size = blk_getlength(target);
+        int64_t size_diff = size - vmar->devinfo[dev_id].size;
 +
-+    /* storage types can have different size restrictions, so it
-+     * is not always possible to create an image with exact size.
-+     * So we tolerate a size difference up to 4MB.
-+     */
-+    if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
-+        error_setg(errp, "vma_reader_register_bs for stream %s failed - "
-+                   "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
-+                   size, vmar->devinfo[dev_id].size);
-+        return -1;
+        /* storage types can have different size restrictions, so it
+         * is not always possible to create an image with exact size.
+         * So we tolerate a size difference up to 4MB.
+         */
+        if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+            error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+                       "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+                       size, vmar->devinfo[dev_id].size);
+            return -1;
+        }
 +    }
 +
-+    allocate_rstate(vmar, dev_id, target, write_zeroes);
+    allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
 +
 +    return 0;
 +}
@@ -580,10 +599,12 @@ index 0000000000..2b1d1cdab3
 +            }
 +        }
 +    } else {
-+        int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
+        int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
 +        if (res < 0) {
+            bdrv_graph_rdlock_main_loop();
 +            error_setg(errp, "blk_pwrite to %s failed (%d)",
 +                       bdrv_get_device_name(blk_bs(target)), res);
+            bdrv_graph_rdunlock_main_loop();
 +            return -1;
 +        }
 +    }
@@ -615,19 +636,23 @@ index 0000000000..2b1d1cdab3
 +        VmaRestoreState *rstate = &vmar->rstate[dev_id];
 +        BlockBackend *target = NULL;
 +
+        bool skip = rstate->skip;
+
 +        if (dev_id != vmar->vmstate_stream) {
 +            target = rstate->target;
-+            if (!verify && !target) {
+            if (!verify && !target && !skip) {
 +                error_setg(errp, "got wrong dev id %d", dev_id);
 +                return -1;
 +            }
 +
-+            if (vma_reader_get_bitmap(rstate, cluster_num)) {
-+                error_setg(errp, "found duplicated cluster %zd for stream %s",
-+                          cluster_num, vmar->devinfo[dev_id].devname);
-+                return -1;
+            if (!skip) {
+                if (vma_reader_get_bitmap(rstate, cluster_num)) {
+                    error_setg(errp, "found duplicated cluster %zd for stream %s",
+                              cluster_num, vmar->devinfo[dev_id].devname);
+                    return -1;
+                }
+                vma_reader_set_bitmap(rstate, cluster_num, 1);
 +            }
-+            vma_reader_set_bitmap(rstate, cluster_num, 1);
 +
 +            max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
 +        } else {
@@ -673,7 +698,7 @@ index 0000000000..2b1d1cdab3
 +                return -1;
 +            }
 +
-+            if (!verify) {
+            if (!verify && !skip) {
 +                int nb_sectors = end_sector - sector_num;
 +                if (restore_write_data(vmar, dev_id, target, vmstate_fd,
 +                                       buf + start, sector_num, nb_sectors,
@@ -709,7 +734,7 @@ index 0000000000..2b1d1cdab3
 +                        return -1;
 +                    }
 +
-+                    if (!verify) {
+                    if (!verify && !skip) {
 +                        int nb_sectors = end_sector - sector_num;
 +                        if (restore_write_data(vmar, dev_id, target, vmstate_fd,
 +                                               buf + start, sector_num,
@@ -734,7 +759,7 @@ index 0000000000..2b1d1cdab3
 +                            vmar->partial_zero_cluster_data += zero_size;
 +                        }
 +
-+                        if (rstate->write_zeroes && !verify) {
+                        if (rstate->write_zeroes && !verify && !skip) {
 +                            if (restore_write_data(vmar, dev_id, target, vmstate_fd,
 +                                                   zero_vma_block, sector_num,
 +                                                   nb_sectors, errp) < 0) {
@@ -905,7 +930,7 @@ index 0000000000..2b1d1cdab3
 +
 +    for (dev_id = 1; dev_id < 255; dev_id++) {
 +        if (vma_reader_get_device_info(vmar, dev_id)) {
-+            allocate_rstate(vmar, dev_id, NULL, false);
+            allocate_rstate(vmar, dev_id, NULL, false, false);
 +        }
 +    }
 +
@@ -914,10 +939,10 @@ index 0000000000..2b1d1cdab3
 +
 diff --git a/vma-writer.c b/vma-writer.c
 new file mode 100644
-index 0000000000..fe86b18a60
+index 0000000000..126b296647
 --- /dev/null
 +++ b/vma-writer.c
-@@ -0,0 +1,771 @@
+@@ -0,0 +1,818 @@
 +/*
 + * VMA: Virtual Machine Archive
 + *
@@ -933,6 +958,8 @@ index 0000000000..fe86b18a60
 +
 +#include "qemu/osdep.h"
 +#include <glib.h>
+#include <linux/magic.h>
+#include <sys/vfs.h>
 +#include <uuid/uuid.h>
 +
 +#include "vma.h"
@@ -941,6 +968,8 @@ index 0000000000..fe86b18a60
 +#include "qemu/main-loop.h"
 +#include "qemu/coroutine.h"
 +#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
 +
 +#define DEBUG_VMA 0
 +
@@ -1123,10 +1152,10 @@ index 0000000000..fe86b18a60
 +{
 +    assert(qemu_in_coroutine());
 +    AioContext *ctx = qemu_get_current_aio_context();
-+    aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter,
+    aio_set_fd_handler(ctx, fd, NULL, (IOHandler *)qemu_coroutine_enter, NULL,
 +                       NULL, qemu_coroutine_self());
 +    qemu_coroutine_yield();
-+    aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL);
+    aio_set_fd_handler(ctx, fd, NULL, NULL, NULL, NULL, NULL);
 +}
 +
 +static ssize_t coroutine_fn
@@ -1175,6 +1204,23 @@ index 0000000000..fe86b18a60
 +    return (done == bytes) ? bytes : -1;
 +}
 +
+static bool is_path_tmpfs(const char *path) {
+    struct statfs fs;
+    int ret;
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        warn_report("statfs call for %s failed, assuming not tmpfs - %s\n",
+                    path, strerror(errno));
+        return false;
+    }
+
+    return fs.f_type == TMPFS_MAGIC;
+}
+
 +VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
 +{
 +    const char *p;
@@ -1213,23 +1259,30 @@ index 0000000000..fe86b18a60
 +
 +        if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
 +            oflags = O_NONBLOCK|O_WRONLY;
-+            vmaw->fd = qemu_open(filename, oflags, 0644);
+            vmaw->fd = qemu_open(filename, oflags, errp);
 +        } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
 +            oflags = O_NONBLOCK|O_WRONLY;
-+            vmaw->fd = qemu_open(filename, oflags, 0644);
+            vmaw->fd = qemu_open(filename, oflags, errp);
 +        } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
-+            vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp);
+            vmaw->fd = monitor_get_fd(monitor_cur(), tmp_id_str, errp);
 +            if (vmaw->fd < 0) {
 +                goto err;
 +            }
 +            /* try to use O_NONBLOCK */
 +            fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
-+        } else  {
-+            oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_CREAT|O_EXCL;
-+            vmaw->fd = qemu_open(filename, oflags, 0644);
+        } else {
+            gchar *dirname = g_path_get_dirname(filename);
+            oflags = O_NONBLOCK|O_WRONLY|O_EXCL;
+            if (!is_path_tmpfs(dirname)) {
+                oflags |= O_DIRECT;
+            }
+            g_free(dirname);
+            vmaw->fd = qemu_create(filename, oflags, 0644, errp);
 +        }
 +
 +        if (vmaw->fd < 0) {
+            error_free(*errp);
+            *errp = NULL;
 +            error_setg(errp, "can't open file %s - %s\n", filename,
 +                       g_strerror(errno));
 +            goto err;
@@ -1553,17 +1606,33 @@ index 0000000000..fe86b18a60
 +
 +    DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
 +
+    uint64_t dev_size = vmaw->stream_info[dev_id].size;
 +    uint16_t mask = 0;
 +
 +    if (buf) {
 +        int i;
 +        int bit = 1;
+        uint64_t byte_offset = cluster_num * VMA_CLUSTER_SIZE;
 +        for (i = 0; i < 16; i++) {
 +            const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
-+            if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
+
+            // Note: If the source is not 64k-aligned, we might reach 4k blocks
+            // after the end of the device. Always mark these as zero in the
+            // mask, so the restore handles them correctly.
+            if (byte_offset < dev_size &&
+                !buffer_is_zero(vmablock, VMA_BLOCK_SIZE))
+            {
 +                mask |= bit;
 +                memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
 +                       VMA_BLOCK_SIZE);
+
+                // prevent memory leakage on unaligned last block
+                if (byte_offset + VMA_BLOCK_SIZE > dev_size) {
+                    uint64_t real_data_in_block = dev_size - byte_offset;
+                    memset(vmaw->outbuf + vmaw->outbuf_pos + real_data_in_block,
+                           0, VMA_BLOCK_SIZE - real_data_in_block);
+                }
+
 +                vmaw->outbuf_pos += VMA_BLOCK_SIZE;
 +            } else {
 +                DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
@@ -1571,6 +1640,7 @@ index 0000000000..fe86b18a60
 +                *zero_bytes += VMA_BLOCK_SIZE;
 +            }
 +
+            byte_offset += VMA_BLOCK_SIZE;
 +            bit = bit << 1;
 +        }
 +    } else {
@@ -1596,8 +1666,8 @@ index 0000000000..fe86b18a60
 +
 +    if (dev_id != vmaw->vmstate_stream) {
 +        uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
-+        if (last > vmaw->stream_info[dev_id].size) {
-+            uint64_t diff = last - vmaw->stream_info[dev_id].size;
+        if (last > dev_size) {
+            uint64_t diff = last - dev_size;
 +            if (diff >= VMA_CLUSTER_SIZE) {
 +                vma_writer_set_error(vmaw, "vma_writer_write: "
 +                                     "read after last cluster");
@@ -1687,14 +1757,16 @@ index 0000000000..fe86b18a60
 +        g_checksum_free(vmaw->md5csum);
 +    }
 +
+    qemu_vfree(vmaw->headerbuf);
+    qemu_vfree(vmaw->outbuf);
 +    g_free(vmaw);
 +}
 diff --git a/vma.c b/vma.c
 new file mode 100644
-index 0000000000..a82752448a
+index 0000000000..bb715e9061
 --- /dev/null
 +++ b/vma.c
-@@ -0,0 +1,837 @@
+@@ -0,0 +1,901 @@
 +/*
 + * VMA: Virtual Machine Archive
 + *
@@ -1712,11 +1784,11 @@ index 0000000000..a82752448a
 +#include <glib.h>
 +
 +#include "vma.h"
-+#include "qemu-common.h"
 +#include "qemu/module.h"
 +#include "qemu/error-report.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/cutils.h"
+#include "qemu/memalign.h"
 +#include "qapi/qmp/qdict.h"
 +#include "sysemu/block-backend.h"
 +
@@ -1728,7 +1800,7 @@ index 0000000000..a82752448a
 +        "vma list <filename>\n"
 +        "vma config <filename> [-c config]\n"
 +        "vma create <filename> [-c config] pathname ...\n"
-+        "vma extract <filename> [-r <fifo>] <targetdir>\n"
+        "vma extract <filename> [-d <drive-list>] [-r <fifo>] <targetdir>\n"
 +        "vma verify <filename> [-v]\n"
 +        ;
 +
@@ -1835,6 +1907,7 @@ index 0000000000..a82752448a
 +    char *throttling_group;
 +    char *cache;
 +    bool write_zero;
+    bool skip;
 +} RestoreMap;
 +
 +static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -1872,9 +1945,10 @@ index 0000000000..a82752448a
 +    const char *filename;
 +    const char *dirname;
 +    const char *readmap = NULL;
+    gchar **drive_list = NULL;
 +
 +    for (;;) {
-+        c = getopt(argc, argv, "hvr:");
+        c = getopt(argc, argv, "hvd:r:");
 +        if (c == -1) {
 +            break;
 +        }
@@ -1883,6 +1957,9 @@ index 0000000000..a82752448a
 +        case 'h':
 +            help();
 +            break;
+        case 'd':
+            drive_list = g_strsplit(optarg, ",", 254);
+            break;
 +        case 'r':
 +            readmap = optarg;
 +            break;
@@ -1942,76 +2019,89 @@ index 0000000000..a82752448a
 +            char *bps = NULL;
 +            char *group = NULL;
 +            char *cache = NULL;
+            char *devname = NULL;
+            bool skip = false;
+            uint64_t bps_value = 0;
+            const char *path = NULL;
+            bool write_zero = true;
+
 +            if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
 +                break;
 +            }
 +            int len = strlen(line);
 +            if (line[len - 1] == '\n') {
 +                line[len - 1] = '\0';
-+                if (len == 1) {
+                len = len - 1;
+                if (len == 0) {
 +                    break;
 +                }
 +            }
 +
-+            while (1) {
-+                if (!try_parse_option(&line, "format", &format, inbuf) &&
-+                    !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
-+                    !try_parse_option(&line, "throttling.group", &group, inbuf) &&
-+                    !try_parse_option(&line, "cache", &cache, inbuf))
-+                {
-+                    break;
+            if (strncmp(line, "skip", 4) == 0) {
+                if (len < 6 || line[4] != '=') {
+                    g_error("read map failed - option 'skip' has no value ('%s')",
+                            inbuf);
+                } else {
+                    devname = line + 5;
+                    skip = true;
 +                }
-+            }
-+
-+            uint64_t bps_value = 0;
-+            if (bps) {
-+                bps_value = verify_u64(bps);
-+                g_free(bps);
-+            }
-+
-+            const char *path;
-+            bool write_zero;
-+            if (line[0] == '0' && line[1] == ':') {
-+                path = line + 2;
-+                write_zero = false;
-+            } else if (line[0] == '1' && line[1] == ':') {
-+                path = line + 2;
-+                write_zero = true;
 +            } else {
-+                g_error("read map failed - parse error ('%s')", inbuf);
+                while (1) {
+                    if (!try_parse_option(&line, "format", &format, inbuf) &&
+                        !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+                        !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+                        !try_parse_option(&line, "cache", &cache, inbuf))
+                    {
+                        break;
+                    }
+                }
+
+                if (bps) {
+                    bps_value = verify_u64(bps);
+                    g_free(bps);
+                }
+
+                if (line[0] == '0' && line[1] == ':') {
+                    path = line + 2;
+                    write_zero = false;
+                } else if (line[0] == '1' && line[1] == ':') {
+                    path = line + 2;
+                    write_zero = true;
+                } else {
+                    g_error("read map failed - parse error ('%s')", inbuf);
+                }
+
+                path = extract_devname(path, &devname, -1);
 +            }
 +
-+            char *devname = NULL;
-+            path = extract_devname(path, &devname, -1);
 +            if (!devname) {
 +                g_error("read map failed - no dev name specified ('%s')",
 +                        inbuf);
 +            }
 +
-+            RestoreMap *map = g_new0(RestoreMap, 1);
-+            map->devname = g_strdup(devname);
-+            map->path = g_strdup(path);
-+            map->format = format;
-+            map->throttling_bps = bps_value;
-+            map->throttling_group = group;
-+            map->cache = cache;
-+            map->write_zero = write_zero;
+            RestoreMap *restore_map = g_new0(RestoreMap, 1);
+            restore_map->devname = g_strdup(devname);
+            restore_map->path = g_strdup(path);
+            restore_map->format = format;
+            restore_map->throttling_bps = bps_value;
+            restore_map->throttling_group = group;
+            restore_map->cache = cache;
+            restore_map->write_zero = write_zero;
+            restore_map->skip = skip;
 +
-+            g_hash_table_insert(devmap, map->devname, map);
+            g_hash_table_insert(devmap, restore_map->devname, restore_map);
 +
 +        };
 +    }
 +
 +    int i;
 +    int vmstate_fd = -1;
-+    guint8 vmstate_stream = 0;
-+
-+    BlockBackend *blk = NULL;
+    bool drive_rename_bitmap[255];
+    memset(drive_rename_bitmap, 0, sizeof(drive_rename_bitmap));
 +
 +    for (i = 1; i < 255; i++) {
 +        VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
 +        if (di && (strcmp(di->devname, "vmstate") == 0)) {
-+            vmstate_stream = i;
 +            char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
 +            vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
 +            if (vmstate_fd < 0) {
@@ -2027,8 +2117,25 @@ index 0000000000..a82752448a
 +            const char *cache = NULL;
 +            int flags = BDRV_O_RDWR;
 +            bool write_zero = true;
+            bool skip = false;
 +
-+            if (readmap) {
+            BlockBackend *blk = NULL;
+
+            if (drive_list) {
+                skip = true;
+                int j;
+                for (j = 0; drive_list[j]; j++) {
+                    if (strcmp(drive_list[j], di->devname) == 0) {
+                        skip = false;
+                        drive_rename_bitmap[i] = true;
+                        break;
+                    }
+                }
+            } else {
+                drive_rename_bitmap[i] = true;
+            }
+
+            if (!skip && readmap) {
 +                RestoreMap *map;
 +                map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
 +                if (map == NULL) {
@@ -2040,7 +2147,8 @@ index 0000000000..a82752448a
 +                throttling_group = map->throttling_group;
 +                cache = map->cache;
 +                write_zero = map->write_zero;
-+            } else {
+                skip = map->skip;
+            } else if (!skip) {
 +                devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
 +                                        dirname, di->devname);
 +                printf("DEVINFO %s %zd\n", devfn, di->size);
@@ -2058,57 +2166,60 @@ index 0000000000..a82752448a
 +                write_zero = false;
 +            }
 +
-+	    size_t devlen = strlen(devfn);
-+	    QDict *options = NULL;
-+            bool writethrough;
-+            if (format) {
-+                /* explicit format from commandline */
-+                options = qdict_new();
-+                qdict_put_str(options, "driver", format);
-+            } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
-+	               strncmp(devfn, "/dev/", 5) == 0)
-+	    {
-+                /* This part is now deprecated for PVE as well (just as qemu
-+                 * deprecated not specifying an explicit raw format, too.
-+                 */
-+		/* explicit raw format */
-+		options = qdict_new();
-+		qdict_put_str(options, "driver", "raw");
-+	    }
-+            if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
-+                g_error("invalid cache option: %s\n", cache);
-+            }
-+
-+	    if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
-+                g_error("can't open file %s - %s", devfn,
-+                        error_get_pretty(errp));
-+            }
-+
-+            if (cache) {
-+                blk_set_enable_write_cache(blk, !writethrough);
-+            }
-+
-+            if (throttling_group) {
-+                blk_io_limits_enable(blk, throttling_group);
-+            }
-+
-+            if (throttling_bps) {
-+                if (!throttling_group) {
-+                    blk_io_limits_enable(blk, devfn);
+            if (!skip) {
+                size_t devlen = strlen(devfn);
+                QDict *options = NULL;
+                bool writethrough;
+                if (format) {
+                    /* explicit format from commandline */
+                    options = qdict_new();
+                    qdict_put_str(options, "driver", format);
+                } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+                    strncmp(devfn, "/dev/", 5) == 0)
+                {
+                    /* This part is now deprecated for PVE as well (just as qemu
+                     * deprecated not specifying an explicit raw format, too.
+                     */
+                    /* explicit raw format */
+                    options = qdict_new();
+                    qdict_put_str(options, "driver", "raw");
 +                }
 +
-+                ThrottleConfig cfg;
-+                throttle_config_init(&cfg);
-+                cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
-+                Error *err = NULL;
-+                if (!throttle_is_valid(&cfg, &err)) {
-+                    error_report_err(err);
-+                    g_error("failed to apply throttling");
+                if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+                    g_error("invalid cache option: %s\n", cache);
+                }
+
+                if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+                    g_error("can't open file %s - %s", devfn,
+                            error_get_pretty(errp));
+                }
+
+                if (cache) {
+                    blk_set_enable_write_cache(blk, !writethrough);
+                }
+
+                if (throttling_group) {
+                    blk_io_limits_enable(blk, throttling_group);
+                }
+
+                if (throttling_bps) {
+                    if (!throttling_group) {
+                        blk_io_limits_enable(blk, devfn);
+                    }
+
+                    ThrottleConfig cfg;
+                    throttle_config_init(&cfg);
+                    cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+                    Error *err = NULL;
+                    if (!throttle_is_valid(&cfg, &err)) {
+                        error_report_err(err);
+                        g_error("failed to apply throttling");
+                    }
+                    blk_set_io_limits(blk, &cfg);
 +                }
-+                blk_set_io_limits(blk, &cfg);
 +            }
 +
-+            if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+            if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
 +                g_error("%s", error_get_pretty(errp));
 +            }
 +
@@ -2118,6 +2229,10 @@ index 0000000000..a82752448a
 +        }
 +    }
 +
+    if (drive_list) {
+        g_strfreev(drive_list);
+    }
+
 +    if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
 +        g_error("restore failed - %s", error_get_pretty(errp));
 +    }
@@ -2125,7 +2240,7 @@ index 0000000000..a82752448a
 +    if (!readmap) {
 +        for (i = 1; i < 255; i++) {
 +            VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
-+            if (di && (i != vmstate_stream)) {
+            if (di && drive_rename_bitmap[i]) {
 +                char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
 +                                              dirname, di->devname);
 +                char *fn = g_strdup_printf("%s/disk-%s.raw",
@@ -2140,8 +2255,6 @@ index 0000000000..a82752448a
 +
 +    vma_reader_destroy(vmar);
 +
-+    blk_unref(blk);
-+
 +    bdrv_close_all();
 +
 +    return ret;
@@ -2226,7 +2339,7 @@ index 0000000000..a82752448a
 +    struct iovec iov;
 +    QEMUIOVector qiov;
 +
-+    int64_t start, end;
+    int64_t start, end, readlen;
 +    int ret = 0;
 +
 +    unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -2240,16 +2353,24 @@ index 0000000000..a82752448a
 +        iov.iov_len = VMA_CLUSTER_SIZE;
 +        qemu_iovec_init_external(&qiov, &iov, 1);
 +
+        if (start + 1 == end) {
+            memset(buf, 0, VMA_CLUSTER_SIZE);
+            readlen = job->len - start * VMA_CLUSTER_SIZE;
+            assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+        } else {
+            readlen = VMA_CLUSTER_SIZE;
+        }
+
 +        ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
-+                            VMA_CLUSTER_SIZE, &qiov, 0);
+                            readlen, &qiov, 0);
 +        if (ret < 0) {
-+            vma_writer_set_error(job->vmaw, "read error", -1);
+            vma_writer_set_error(job->vmaw, "read error");
 +            goto out;
 +        }
 +
 +        size_t zb = 0;
 +        if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
-+            vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
+            vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed");
 +            goto out;
 +        }
 +    }
@@ -2262,13 +2383,15 @@ index 0000000000..a82752448a
 +            g_warning("vma_writer_close failed %s", error_get_pretty(err));
 +        }
 +    }
+    qemu_vfree(buf);
 +}
 +
 +static int create_archive(int argc, char **argv)
 +{
-+    int i, c;
+    int c;
 +    int verbose = 0;
 +    const char *archivename;
+    GList *backup_coroutines = NULL;
 +    GList *config_files = NULL;
 +
 +    for (;;) {
@@ -2357,7 +2480,9 @@ index 0000000000..a82752448a
 +        job->dev_id = dev_id;
 +
 +        Coroutine *co = qemu_coroutine_create(backup_run, job);
-+        qemu_coroutine_enter(co);
+        // Don't enter coroutine yet, because it might write the header before
+        // all streams can be registered.
+        backup_coroutines = g_list_append(backup_coroutines, co);
 +    }
 +
 +    VmaStatus vmastat;
@@ -2365,6 +2490,13 @@ index 0000000000..a82752448a
 +    int last_percent = -1;
 +
 +    if (devcount) {
+        GList *entry = backup_coroutines;
+        while (entry && entry->data) {
+            Coroutine *co = entry->data;
+            qemu_coroutine_enter(co);
+            entry = g_list_next(entry);
+        }
+
 +        while (1) {
 +            main_loop_wait(false);
 +            vma_writer_get_status(vmaw, &vmastat);
@@ -2414,6 +2546,7 @@ index 0000000000..a82752448a
 +    vma_writer_get_status(vmaw, &vmastat);
 +
 +    if (verbose) {
+        int i;
 +        for (i = 0; i < 256; i++) {
 +            VmaStreamInfo *si = &vmastat.stream_info[i];
 +            if (si->size) {
@@ -2429,6 +2562,9 @@ index 0000000000..a82752448a
 +        g_error("creating vma archive failed");
 +    }
 +
+    g_list_free(backup_coroutines);
+    g_list_free(config_files);
+    vma_writer_destroy(vmaw);
 +    return 0;
 +}
 +
@@ -2534,7 +2670,7 @@ index 0000000000..a82752448a
 +}
 diff --git a/vma.h b/vma.h
 new file mode 100644
-index 0000000000..c895c97f6d
+index 0000000000..86d2873aa5
 --- /dev/null
 +++ b/vma.h
@@ -0,0 +1,150 @@
@@ -2672,7 +2808,7 @@ index 0000000000..c895c97f6d
 +int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
 +
 +int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
-+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 +
 +
 +VmaReader *vma_reader_create(const char *filename, Error **errp);
@@ -2682,7 +2818,7 @@ index 0000000000..c895c97f6d
 +VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
 +int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
 +                           BlockBackend *target, bool write_zeroes,
-+                           Error **errp);
+                           bool skip, Error **errp);
 +int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
 +                       Error **errp);
 +int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);
--- a/debian/patches/pve/0027-PVE-Backup-modify-job-api.patch
+++ b/debian/patches/pve/0027-PVE-Backup-modify-job-api.patch
@@ -1,92 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Mon, 6 Apr 2020 12:16:56 +0200
-Subject: [PATCH] PVE-Backup: modify job api
-
-Introduce a pause_count parameter to start a backup in
-paused mode. This way backups of multiple drives can be
-started up sequentially via the completion callback while
-having been started at the same point in time.
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
- block/backup.c            | 3 +++
- block/replication.c       | 2 +-
- blockdev.c                | 3 ++-
- include/block/block_int.h | 1 +
- job.c                     | 2 +-
- 5 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/block/backup.c b/block/backup.c
-index a7a7dcaf4c..ecd93e91e0 100644
--- a/block/backup.c
-+++ b/block/backup.c
-@@ -338,6 +338,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-                   BlockdevOnError on_target_error,
-                   int creation_flags,
-                   BlockCompletionFunc *cb, void *opaque,
-+                  int pause_count,
-                   JobTxn *txn, Error **errp)
- {
-     int64_t len;
-@@ -459,6 +460,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
-                        &error_abort);
- 
-+    job->common.job.pause_count += pause_count;
-+
-     return &job->common;
- 
-  error:
-diff --git a/block/replication.c b/block/replication.c
-index da013c2041..17246a822c 100644
--- a/block/replication.c
-+++ b/block/replication.c
-@@ -554,7 +554,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
-                                 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
-                                 BLOCKDEV_ON_ERROR_REPORT,
-                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
-                                backup_job_completed, bs, NULL, &local_err);
-+                                backup_job_completed, bs, 0, NULL, &local_err);
-         if (local_err) {
-             error_propagate(errp, local_err);
-             backup_job_cleanup(bs);
-diff --git a/blockdev.c b/blockdev.c
-index 5faddaa705..65c358e4ef 100644
--- a/blockdev.c
-+++ b/blockdev.c
-@@ -3114,7 +3114,8 @@ static BlockJob *do_backup_common(BackupCommon *backup,
-                             backup->filter_node_name,
-                             backup->on_source_error,
-                             backup->on_target_error,
-                            job_flags, NULL, NULL, txn, errp);
-+                            job_flags, NULL, NULL, 0, txn, errp);
-+
-     return job;
- }
- 
-diff --git a/include/block/block_int.h b/include/block/block_int.h
-index 4c3587ea19..336f71e69d 100644
--- a/include/block/block_int.h
-+++ b/include/block/block_int.h
-@@ -1219,6 +1219,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-                             BlockdevOnError on_target_error,
-                             int creation_flags,
-                             BlockCompletionFunc *cb, void *opaque,
-+                            int pause_count,
-                             JobTxn *txn, Error **errp);
- 
- BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
-diff --git a/job.c b/job.c
-index 53be57a3a0..e82253e041 100644
--- a/job.c
-+++ b/job.c
-@@ -918,7 +918,7 @@ void job_start(Job *job)
-     job->co = qemu_coroutine_create(job_co_entry, job);
-     job->pause_count--;
-     job->busy = true;
-    job->paused = false;
-+    job->paused = job->pause_count > 0;
-     job_state_transition(job, JOB_STATUS_RUNNING);
-     aio_co_enter(job->aio_context, job->co);
- }
--- a/debian/patches/pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
+++ b/debian/patches/pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
@@ -7,33 +7,25 @@ Subject: [PATCH] PVE-Backup: add backup-dump block driver
 - move BackupBlockJob declaration from block/backup.c to include/block/block_int.h
 - block/backup.c - backup-job-create: also consider source cluster size
 - job.c: make job_should_pause non-static
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: adapt to coroutine changes]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
- block/Makefile.objs       |   1 +
- block/backup-dump.c       | 169 ++++++++++++++++++++++++++++++++++++++
- block/backup.c            |  23 ++----
- include/block/block_int.h |  30 +++++++
- job.c                     |   3 +-
- 5 files changed, 207 insertions(+), 19 deletions(-)
+ block/backup-dump.c              | 172 +++++++++++++++++++++++++++++++
+ block/backup.c                   |  30 ++----
+ block/meson.build                |   1 +
+ include/block/block_int-common.h |  35 +++++++
+ job.c                            |   3 +-
+ 5 files changed, 218 insertions(+), 23 deletions(-)
 create mode 100644 block/backup-dump.c

-diff --git a/block/Makefile.objs b/block/Makefile.objs
-index 1282445672..8af7073c83 100644
--- a/block/Makefile.objs
-+++ b/block/Makefile.objs
-@@ -34,6 +34,7 @@ block-obj-$(CONFIG_RBD) += rbd.o
- block-obj-$(CONFIG_GLUSTERFS) += gluster.o
- block-obj-$(CONFIG_VXHS) += vxhs.o
- block-obj-$(CONFIG_LIBSSH) += ssh.o
-+block-obj-y += backup-dump.o
- block-obj-y += accounting.o dirty-bitmap.o
- block-obj-y += write-threshold.o
- block-obj-y += backup.o
 diff --git a/block/backup-dump.c b/block/backup-dump.c
 new file mode 100644
-index 0000000000..3066ab0698
+index 0000000000..e46abf1070
 --- /dev/null
 +++ b/block/backup-dump.c
-@@ -0,0 +1,169 @@
+@@ -0,0 +1,172 @@
 +/*
 + * BlockDriver to send backup data stream to a callback function
 + *
@@ -45,7 +37,8 @@ index 0000000000..3066ab0698
 + */
 +
 +#include "qemu/osdep.h"
-+#include "qemu-common.h"
+
+#include "qapi/qmp/qdict.h"
 +#include "qom/object_interfaces.h"
 +#include "block/block_int.h"
 +
@@ -56,12 +49,12 @@ index 0000000000..3066ab0698
 +    void           *dump_cb_data;
 +} BDRVBackupDumpState;
 +
-+static int qemu_backup_dump_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static coroutine_fn int qemu_backup_dump_co_get_info(BlockDriverState *bs,
+                                                     BlockDriverInfo *bdi)
 +{
 +    BDRVBackupDumpState *s = bs->opaque;
 +
 +    bdi->cluster_size = s->dump_cb_block_size;
-+    bdi->unallocated_blocks_are_zero = true;
 +    return 0;
 +}
 +
@@ -98,7 +91,7 @@ index 0000000000..3066ab0698
 +    /* Nothing to do. */
 +}
 +
-+static int64_t qemu_backup_dump_getlength(BlockDriverState *bs)
+static coroutine_fn int64_t qemu_backup_dump_co_getlength(BlockDriverState *bs)
 +{
 +    BDRVBackupDumpState *s = bs->opaque;
 +
@@ -142,7 +135,7 @@ index 0000000000..3066ab0698
 +static void qemu_backup_dump_child_perm(
 +    BlockDriverState *bs,
 +    BdrvChild *c,
-+    const BdrvChildRole *role,
+    BdrvChildRole role,
 +    BlockReopenQueue *reopen_queue,
 +    uint64_t perm, uint64_t shared,
 +    uint64_t *nperm, uint64_t *nshared)
@@ -158,8 +151,8 @@ index 0000000000..3066ab0698
 +
 +    .bdrv_close                   = qemu_backup_dump_close,
 +    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-+    .bdrv_getlength               = qemu_backup_dump_getlength,
-+    .bdrv_get_info                = qemu_backup_dump_get_info,
+    .bdrv_co_getlength            = qemu_backup_dump_co_getlength,
+    .bdrv_co_get_info             = qemu_backup_dump_co_get_info,
 +
 +    .bdrv_co_writev               = qemu_backup_dump_co_writev,
 +
@@ -178,7 +171,7 @@ index 0000000000..3066ab0698
 +block_init(bdrv_backup_dump_init);
 +
 +
-+BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
 +    int dump_cb_block_size,
 +    uint64_t byte_size,
 +    BackupDumpFunc *dump_cb,
@@ -186,9 +179,11 @@ index 0000000000..3066ab0698
 +    Error **errp)
 +{
 +    BDRVBackupDumpState *state;
-+    BlockDriverState *bs = bdrv_new_open_driver(
-+        &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
 +
+    QDict *options = qdict_new();
+    qdict_put_str(options, "driver", "backup-dump-drive");
+
+    BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
 +    if (!bs) {
 +        return NULL;
 +    }
@@ -204,17 +199,18 @@ index 0000000000..3066ab0698
 +    return bs;
 +}
 diff --git a/block/backup.c b/block/backup.c
-index ecd93e91e0..cf8f5ad25d 100644
+index 270957c0cd..16d611c4ca 100644
 --- a/block/backup.c
 +++ b/block/backup.c
-@@ -32,24 +32,6 @@
+@@ -29,28 +29,6 @@
 
- #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
+ #include "block/copy-before-write.h"
 
 -typedef struct BackupBlockJob {
 -    BlockJob common;
-    BlockDriverState *backup_top;
+-    BlockDriverState *cbw;
 -    BlockDriverState *source_bs;
+-    BlockDriverState *target_bs;
 -
 -    BdrvDirtyBitmap *sync_bitmap;
 -
@@ -223,38 +219,64 @@ index ecd93e91e0..cf8f5ad25d 100644
 -    BlockdevOnError on_source_error;
 -    BlockdevOnError on_target_error;
 -    uint64_t len;
-    uint64_t bytes_read;
 -    int64_t cluster_size;
+-    BackupPerf perf;
 -
 -    BlockCopyState *bcs;
+-
+-    bool wait;
+-    BlockCopyCallState *bg_bcs_call;
 -} BackupBlockJob;
 -
 static const BlockJobDriver backup_job_driver;
 
- static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
-@@ -411,6 +393,11 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-         goto error;
+ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
+@@ -461,6 +439,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     }
 
+     cluster_size = block_copy_cluster_size(bcs);
+    if (cluster_size < 0) {
+        goto error;
+    }
+
 +    BlockDriverInfo bdi;
 +    if (bdrv_get_info(bs, &bdi) == 0) {
 +        cluster_size = MAX(cluster_size, bdi.cluster_size);
 +    }
-+
-     /*
-      * If source is in backing chain of target assume that target is going to be
-      * used for "image fleecing", i.e. it should represent a kind of snapshot of
-diff --git a/include/block/block_int.h b/include/block/block_int.h
-index 336f71e69d..62e5579723 100644
--- a/include/block/block_int.h
-+++ b/include/block/block_int.h
-@@ -60,6 +60,36 @@
+ 
+     if (perf->max_chunk && perf->max_chunk < cluster_size) {
+         error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+diff --git a/block/meson.build b/block/meson.build
+index b245daa98e..e99914eaa4 100644
+--- a/block/meson.build
+++ b/block/meson.build
+@@ -4,6 +4,7 @@ block_ss.add(files(
+   'aio_task.c',
+   'amend.c',
+   'backup.c',
+  'backup-dump.c',
+   'blkdebug.c',
+   'blklogwrites.c',
+   'blkverify.c',
+diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
+index 761276127e..b3e6697613 100644
+--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
+@@ -26,6 +26,7 @@
+ 
+ #include "block/aio.h"
+ #include "block/block-common.h"
+#include "block/block-copy.h"
+ #include "block/block-global-state.h"
+ #include "block/snapshot.h"
+ #include "qemu/iov.h"
+@@ -60,6 +61,40 @@
 
 #define BLOCK_PROBE_BUF_SIZE        512
 
 +typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
 +
-+BlockDriverState *bdrv_backuo_dump_create(
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
 +    int dump_cb_block_size,
 +    uint64_t byte_size,
 +    BackupDumpFunc *dump_cb,
@@ -266,8 +288,9 @@ index 336f71e69d..62e5579723 100644
 +typedef struct BlockCopyState BlockCopyState;
 +typedef struct BackupBlockJob {
 +    BlockJob common;
-+    BlockDriverState *backup_top;
+    BlockDriverState *cbw;
 +    BlockDriverState *source_bs;
+    BlockDriverState *target_bs;
 +
 +    BdrvDirtyBitmap *sync_bitmap;
 +
@@ -276,26 +299,29 @@ index 336f71e69d..62e5579723 100644
 +    BlockdevOnError on_source_error;
 +    BlockdevOnError on_target_error;
 +    uint64_t len;
-+    uint64_t bytes_read;
 +    int64_t cluster_size;
+    BackupPerf perf;
 +
 +    BlockCopyState *bcs;
+
+    bool wait;
+    BlockCopyCallState *bg_bcs_call;
 +} BackupBlockJob;
 +
 enum BdrvTrackedRequestType {
     BDRV_TRACKED_READ,
     BDRV_TRACKED_WRITE,
 diff --git a/job.c b/job.c
-index e82253e041..bcbbb0be02 100644
+index 660ce22c56..baf54c8d60 100644
 --- a/job.c
 +++ b/job.c
-@@ -269,7 +269,8 @@ static bool job_started(Job *job)
-     return job->co;
+@@ -331,7 +331,8 @@ static bool job_started_locked(Job *job)
 }
 
-static bool job_should_pause(Job *job)
-+bool job_should_pause(Job *job);
-+bool job_should_pause(Job *job)
+ /* Called with job_mutex held. */
+-static bool job_should_pause_locked(Job *job)
+bool job_should_pause_locked(Job *job);
+bool job_should_pause_locked(Job *job)
 {
     return job->pause_count > 0;
 }
--- a/debian/patches/pve/0029-PVE-Add-sequential-job-transaction-support.patch
+++ b/debian/patches/pve/0029-PVE-Add-sequential-job-transaction-support.patch
@@ -0,0 +1,101 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Thu, 20 Aug 2020 14:31:59 +0200
+Subject: [PATCH] PVE: Add sequential job transaction support
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ include/qemu/job.h | 12 ++++++++++++
+ job.c              | 34 ++++++++++++++++++++++++++++++++++
+ 2 files changed, 46 insertions(+)
+
+diff --git a/include/qemu/job.h b/include/qemu/job.h
+index 2b873f2576..528cd6acb9 100644
+--- a/include/qemu/job.h
+++ b/include/qemu/job.h
+@@ -362,6 +362,18 @@ void job_unlock(void);
+  */
+ JobTxn *job_txn_new(void);
+ 
+/**
+ * Create a new transaction and set it to sequential mode, i.e. run all jobs
+ * one after the other instead of at the same time.
+ */
+JobTxn *job_txn_new_seq(void);
+
+/**
+ * Helper method to start the first job in a sequential transaction to kick it
+ * off. Other jobs will be run after this one completes.
+ */
+void job_txn_start_seq(JobTxn *txn);
+
+ /**
+  * Release a reference that was previously acquired with job_txn_add_job or
+  * job_txn_new. If it's the last reference to the object, it will be freed.
+diff --git a/job.c b/job.c
+index baf54c8d60..3ac5e5cde2 100644
+--- a/job.c
+++ b/job.c
+@@ -94,6 +94,8 @@ struct JobTxn {
+ 
+     /* Reference count */
+     int refcnt;
+
+    bool sequential;
+ };
+ 
+ void job_lock(void)
+@@ -119,6 +121,25 @@ JobTxn *job_txn_new(void)
+     return txn;
+ }
+ 
+JobTxn *job_txn_new_seq(void)
+{
+    JobTxn *txn = job_txn_new();
+    txn->sequential = true;
+    return txn;
+}
+
+void job_txn_start_seq(JobTxn *txn)
+{
+    assert(txn->sequential);
+    assert(!txn->aborting);
+
+    Job *first = QLIST_FIRST(&txn->jobs);
+    assert(first);
+    assert(first->status == JOB_STATUS_CREATED);
+
+    job_start(first);
+}
+
+ /* Called with job_mutex held. */
+ static void job_txn_ref_locked(JobTxn *txn)
+ {
+@@ -1042,6 +1063,12 @@ static void job_completed_txn_success_locked(Job *job)
+      */
+     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+         if (!job_is_completed_locked(other_job)) {
+            if (txn->sequential) {
+                job_unlock();
+                /* Needs to be called without holding the job lock */
+                job_start(other_job);
+                job_lock();
+            }
+             return;
+         }
+         assert(other_job->ret == 0);
+@@ -1253,6 +1280,13 @@ int job_finish_sync_locked(Job *job,
+         return -EBUSY;
+     }
+ 
+    /* in a sequential transaction jobs with status CREATED can appear at time
+     * of cancelling, these have not begun work so job_enter won't do anything,
+     * let's ensure they are marked as ABORTING if required */
+    if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+        job_update_rc_locked(job);
+    }
+
+     job_unlock();
+     AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
+                             (job_enter(job), !job_is_completed(job)));
--- a/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
+++ b/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
--- a/debian/patches/pve/0030-PVE-Backup-proxmox-backup-patches-for-qemu.patch
+++ b/debian/patches/pve/0030-PVE-Backup-proxmox-backup-patches-for-qemu.patch
--- a/debian/patches/pve/0031-PVE-Backup-aquire-aio_context-before-calling-backup_.patch
+++ b/debian/patches/pve/0031-PVE-Backup-aquire-aio_context-before-calling-backup_.patch
@@ -1,39 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dietmar Maurer <dietmar@proxmox.com>
-Date: Mon, 6 Apr 2020 12:17:00 +0200
-Subject: [PATCH] PVE-Backup: aquire aio_context before calling
- backup_job_create
-
-And do not set target in same aoi_context as source, because
-this is already done in bdrv_backup_top_append ...
-
-Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
---
- pve-backup.c | 8 +++-----
- 1 file changed, 3 insertions(+), 5 deletions(-)
-
-diff --git a/pve-backup.c b/pve-backup.c
-index 9ae89fb679..38dd33e28b 100644
--- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -757,17 +757,15 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
-         PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-         l = g_list_next(l);
- 
-        // make sure target runs in same aoi_context as source
-         AioContext *aio_context = bdrv_get_aio_context(di->bs);
-         aio_context_acquire(aio_context);
-        GSList *ignore = NULL;
-        bdrv_set_aio_context_ignore(di->target, aio_context, &ignore);
-        g_slist_free(ignore);
-        aio_context_release(aio_context);
- 
-         job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
-                                 BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
-                                 JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
-+
-+        aio_context_release(aio_context);
-+
-         if (!job || local_err != NULL) {
-             qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-             error_setg(&backup_state.stat.error, "backup_job_create failed");
--- a/debian/patches/pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
+++ b/debian/patches/pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
@@ -4,40 +4,36 @@ Date: Mon, 6 Apr 2020 12:17:01 +0200
 Subject: [PATCH] PVE-Backup: pbs-restore - new command to restore from proxmox
 backup server

+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[WB: add namespace support]
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
 ---
- Makefile      |   4 +-
- pbs-restore.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 211 insertions(+), 1 deletion(-)
+ meson.build   |   4 +
+ pbs-restore.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 240 insertions(+)
 create mode 100644 pbs-restore.c

-diff --git a/Makefile b/Makefile
-index dbd9542ae4..7c1fb58e18 100644
--- a/Makefile
-+++ b/Makefile
-@@ -479,7 +479,7 @@ dummy := $(call unnest-vars,, \
+diff --git a/meson.build b/meson.build
+index d16b97cf3c..6de51c34cb 100644
+--- a/meson.build
+++ b/meson.build
+@@ -4029,6 +4029,10 @@ if have_tools
+   vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
+                    dependencies: [authz, block, crypto, io, qom], install: true)
 
- include $(SRC_PATH)/tests/Makefile.include
- 
-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
-+all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) vma$(EXESUF) pbs-restore$(EXESUF) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
- 
- qemu-version.h: FORCE
- 	$(call quiet-command, \
-@@ -610,6 +610,8 @@ qemu-io$(EXESUF): qemu-io.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-o
- qemu-storage-daemon$(EXESUF): qemu-storage-daemon.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(chardev-obj-y) $(io-obj-y) $(qom-obj-y) $(storage-daemon-obj-y) $(COMMON_LDADDS)
- qemu-storage-daemon$(EXESUF): LIBS += -lproxmox_backup_qemu
- vma$(EXESUF): vma.o vma-reader.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-+pbs-restore$(EXESUF): pbs-restore.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-+pbs-restore$(EXESUF): LIBS += -lproxmox_backup_qemu
- 
- qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
+  pbs_restore = executable('pbs-restore', files('pbs-restore.c') + genh,
+                  dependencies: [authz, block, crypto, io, qom,
+                    libproxmox_backup_qemu], install: true)
+
+   subdir('storage-daemon')
 
+   foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
 diff --git a/pbs-restore.c b/pbs-restore.c
 new file mode 100644
-index 0000000000..f65de8b890
+index 0000000000..f03d9bab8d
 --- /dev/null
 +++ b/pbs-restore.c
-@@ -0,0 +1,208 @@
+@@ -0,0 +1,236 @@
 +/*
 + * Qemu image restore helper for Proxmox Backup
 + *
@@ -56,7 +52,6 @@ index 0000000000..f65de8b890
 +#include <getopt.h>
 +#include <string.h>
 +
-+#include "qemu-common.h"
 +#include "qemu/module.h"
 +#include "qemu/error-report.h"
 +#include "qemu/main-loop.h"
@@ -70,7 +65,7 @@ index 0000000000..f65de8b890
 +static void help(void)
 +{
 +    const char *help_msg =
-+        "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+        "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
 +        ;
 +
 +    printf("%s", help_msg);
@@ -102,7 +97,7 @@ index 0000000000..f65de8b890
 +        }
 +        res = blk_pwrite_zeroes(callback_data->target, offset, data_len, 0);
 +    } else {
-+        res = blk_pwrite(callback_data->target, offset, data, data_len, 0);
+        res = blk_pwrite(callback_data->target, offset, data_len, data, 0);
 +    }
 +
 +    if (res < 0) {
@@ -118,19 +113,21 @@ index 0000000000..f65de8b890
 +    Error *main_loop_err = NULL;
 +    const char *format = "raw";
 +    const char *repository = NULL;
+    const char *backup_ns = NULL;
 +    const char *keyfile = NULL;
 +    int verbose = false;
 +    bool skip_zero = false;
 +
 +    error_init(argv[0]);
 +
-+    for(;;) {
+    for (;;) {
 +        static const struct option long_options[] = {
 +            {"help", no_argument, 0, 'h'},
 +            {"skip-zero", no_argument, 0, 'S'},
 +            {"verbose", no_argument, 0, 'v'},
 +            {"format", required_argument, 0, 'f'},
 +            {"repository", required_argument, 0, 'r'},
+            {"ns", required_argument, 0, 'n'},
 +            {"keyfile", required_argument, 0, 'k'},
 +            {0, 0, 0, 0}
 +        };
@@ -138,31 +135,34 @@ index 0000000000..f65de8b890
 +        if (c == -1) {
 +            break;
 +        }
-+        switch(c) {
-+        case ':':
-+            fprintf(stderr, "missing argument for option '%s'", argv[optind - 1]);
-+            return -1;
-+        case '?':
-+            fprintf(stderr, "unrecognized option '%s'", argv[optind - 1]);
-+            return -1;
-+        case 'f':
-+            format = g_strdup(argv[optind - 1]);
-+            break;
-+        case 'r':
-+            repository = g_strdup(argv[optind - 1]);
-+            break;
-+        case 'k':
-+            keyfile = g_strdup(argv[optind - 1]);
-+            break;
-+        case 'v':
-+            verbose = true;
-+            break;
-+        case 'S':
-+            skip_zero = true;
-+            break;
-+        case 'h':
-+            help();
-+            return 0;
+        switch (c) {
+            case ':':
+                fprintf(stderr, "missing argument for option '%s'\n", argv[optind - 1]);
+                return -1;
+            case '?':
+                fprintf(stderr, "unrecognized option '%s'\n", argv[optind - 1]);
+                return -1;
+            case 'f':
+                format = g_strdup(argv[optind - 1]);
+                break;
+            case 'r':
+                repository = g_strdup(argv[optind - 1]);
+                break;
+            case 'n':
+                backup_ns = g_strdup(argv[optind - 1]);
+                break;
+            case 'k':
+                keyfile = g_strdup(argv[optind - 1]);
+                break;
+            case 'v':
+                verbose = true;
+                break;
+            case 'S':
+                skip_zero = true;
+                break;
+            case 'h':
+                help();
+                return 0;
 +        }
 +    }
 +
@@ -197,31 +197,50 @@ index 0000000000..f65de8b890
 +    bdrv_init();
 +    module_call_init(MODULE_INIT_QOM);
 +
+    if (verbose) {
+        fprintf(stderr, "connecting to repository '%s'\n", repository);
+    }
 +    char *pbs_error = NULL;
-+    ProxmoxRestoreHandle *conn = proxmox_restore_connect(
-+        repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+    ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+        repository,
+        snapshot,
+        backup_ns,
+        password,
+        keyfile,
+        key_password,
+        fingerprint,
+        &pbs_error
+    );
 +    if (conn == NULL) {
 +        fprintf(stderr, "restore failed: %s\n", pbs_error);
 +        return -1;
 +    }
 +
+    int res = proxmox_restore_connect(conn, &pbs_error);
+    if (res < 0 || pbs_error) {
+        fprintf(stderr, "restore failed (connection error): %s\n", pbs_error);
+        return -1;
+    }
+
 +    QDict *options = qdict_new();
-+    qdict_put_str(options, "driver", format);
 +
 +    if (format) {
 +        qdict_put_str(options, "driver", format);
 +    }
 +
+
+    if (verbose) {
+        fprintf(stderr, "open block backend for target '%s'\n", target);
+    }
 +    Error *local_err = NULL;
 +    int flags = BDRV_O_RDWR;
-+
 +    BlockBackend *blk = blk_new_open(target, NULL, options, flags, &local_err);
 +    if (!blk) {
 +        fprintf(stderr, "%s\n", error_get_pretty(local_err));
 +        return -1;
 +    }
 +
-+    CallbackData *callback_data = calloc(sizeof( CallbackData), 1);
+    CallbackData *callback_data = calloc(sizeof(CallbackData), 1);
 +
 +    callback_data->target = blk;
 +    callback_data->skip_zero = skip_zero;
@@ -229,7 +248,11 @@ index 0000000000..f65de8b890
 +
 +    // blk_set_enable_write_cache(blk, !writethrough);
 +
-+    int res = proxmox_restore_image(
+    if (verbose) {
+        fprintf(stderr, "starting to restore snapshot '%s'\n", snapshot);
+        fflush(stderr); // ensure we do not get printed after the progress log
+    }
+    res = proxmox_restore_image(
 +        conn,
 +        archive_name,
 +        write_callback,
@@ -238,6 +261,7 @@ index 0000000000..f65de8b890
 +        verbose);
 +
 +    proxmox_restore_disconnect(conn);
+    blk_unref(blk);
 +
 +    if (res < 0) {
 +        fprintf(stderr, "restore failed: %s\n", pbs_error);
--- a/debian/patches/pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
+++ b/debian/patches/pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
@@ -0,0 +1,429 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Wed, 8 Jul 2020 09:50:54 +0200
+Subject: [PATCH] PVE: Add PBS block driver to map backup archives into VMs
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+[error cleanups, file_open implementation]
+Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[WB: add namespace support]
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+[FE: adapt to changed function signatures
+     make pbs_co_preadv return values consistent with QEMU
+     getlength is now a coroutine function]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/meson.build    |   2 +
+ block/pbs.c          | 307 +++++++++++++++++++++++++++++++++++++++++++
+ meson.build          |   2 +-
+ qapi/block-core.json |  29 ++++
+ qapi/pragma.json     |   1 +
+ 5 files changed, 340 insertions(+), 1 deletion(-)
+ create mode 100644 block/pbs.c
+
+diff --git a/block/meson.build b/block/meson.build
+index 6bba803f94..1945e04eeb 100644
+--- a/block/meson.build
+++ b/block/meson.build
+@@ -49,6 +49,8 @@ block_ss.add(files(
+   '../pve-backup.c',
+ ), libproxmox_backup_qemu)
+ 
+block_ss.add(files('pbs.c'), libproxmox_backup_qemu)
+
+ 
+ system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
+ system_ss.add(files('block-ram-registrar.c'))
+diff --git a/block/pbs.c b/block/pbs.c
+new file mode 100644
+index 0000000000..dd72356bd3
+--- /dev/null
+++ b/block/pbs.c
+@@ -0,0 +1,307 @@
+/*
+ * Proxmox Backup Server read-only block driver
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+
+#include <proxmox-backup-qemu.h>
+
+#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
+#define PBS_OPT_SNAPSHOT "snapshot"
+#define PBS_OPT_ARCHIVE "archive"
+#define PBS_OPT_KEYFILE "keyfile"
+#define PBS_OPT_PASSWORD "password"
+#define PBS_OPT_FINGERPRINT "fingerprint"
+#define PBS_OPT_ENCRYPTION_PASSWORD "key_password"
+
+typedef struct {
+    ProxmoxRestoreHandle *conn;
+    char aid;
+    int64_t length;
+
+    char *repository;
+    char *namespace;
+    char *snapshot;
+    char *archive;
+} BDRVPBSState;
+
+static QemuOptsList runtime_opts = {
+    .name = "pbs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = PBS_OPT_REPOSITORY,
+            .type = QEMU_OPT_STRING,
+            .help = "The server address and repository to connect to.",
+        },
+        {
+            .name = PBS_OPT_NAMESPACE,
+            .type = QEMU_OPT_STRING,
+            .help = "Optional: The snapshot's namespace.",
+        },
+        {
+            .name = PBS_OPT_SNAPSHOT,
+            .type = QEMU_OPT_STRING,
+            .help = "The snapshot to read.",
+        },
+        {
+            .name = PBS_OPT_ARCHIVE,
+            .type = QEMU_OPT_STRING,
+            .help = "Which archive within the snapshot should be accessed.",
+        },
+        {
+            .name = PBS_OPT_PASSWORD,
+            .type = QEMU_OPT_STRING,
+            .help = "Server password. Can be passed as env var 'PBS_PASSWORD'.",
+        },
+        {
+            .name = PBS_OPT_FINGERPRINT,
+            .type = QEMU_OPT_STRING,
+            .help = "Server fingerprint. Can be passed as env var 'PBS_FINGERPRINT'.",
+        },
+        {
+            .name = PBS_OPT_ENCRYPTION_PASSWORD,
+            .type = QEMU_OPT_STRING,
+            .help = "Optional: Key password. Can be passed as env var 'PBS_ENCRYPTION_PASSWORD'.",
+        },
+        {
+            .name = PBS_OPT_KEYFILE,
+            .type = QEMU_OPT_STRING,
+            .help = "Optional: The path to the keyfile to use.",
+        },
+        { /* end of list */ }
+    },
+};
+
+
+// filename format:
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+static void pbs_parse_filename(const char *filename, QDict *options,
+                                     Error **errp)
+{
+
+    if (!strstart(filename, "pbs:", &filename)) {
+        if (errp) error_setg(errp, "pbs_parse_filename failed - missing 'pbs:' prefix");
+    }
+
+
+    QemuOpts *opts = qemu_opts_parse_noisily(&runtime_opts, filename, false);
+    if (!opts) {
+        if (errp) error_setg(errp, "pbs_parse_filename failed");
+        return;
+    }
+
+    qemu_opts_to_qdict(opts, options);
+
+    qemu_opts_del(opts);
+}
+
+static int pbs_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    QemuOpts *opts;
+    BDRVPBSState *s = bs->opaque;
+    char *pbs_error = NULL;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &error_abort);
+
+    s->repository = g_strdup(qemu_opt_get(opts, PBS_OPT_REPOSITORY));
+    s->snapshot = g_strdup(qemu_opt_get(opts, PBS_OPT_SNAPSHOT));
+    s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
+    const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
+    const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+    const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
+    const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
+    const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
+
+    if (!password) {
+        password = getenv("PBS_PASSWORD");
+    }
+    if (!fingerprint) {
+        fingerprint = getenv("PBS_FINGERPRINT");
+    }
+    if (!key_password) {
+        key_password = getenv("PBS_ENCRYPTION_PASSWORD");
+    }
+    if (namespace) {
+        s->namespace = g_strdup(namespace);
+    }
+
+    /* connect to PBS server in read mode */
+    s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
+        keyfile, key_password, fingerprint, &pbs_error);
+
+    /* invalidates qemu_opt_get char pointers from above */
+    qemu_opts_del(opts);
+
+    if (!s->conn) {
+        if (pbs_error && errp) error_setg(errp, "PBS restore_new failed: %s", pbs_error);
+        if (pbs_error) proxmox_backup_free_error(pbs_error);
+        return -ENOMEM;
+    }
+
+    int ret = proxmox_restore_connect(s->conn, &pbs_error);
+    if (ret < 0) {
+        if (pbs_error && errp) error_setg(errp, "PBS connect failed: %s", pbs_error);
+        if (pbs_error) proxmox_backup_free_error(pbs_error);
+        return -ECONNREFUSED;
+    }
+
+    /* acquire handle and length */
+    s->aid = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+    if (s->aid < 0) {
+        if (pbs_error && errp) error_setg(errp, "PBS open_image failed: %s", pbs_error);
+        if (pbs_error) proxmox_backup_free_error(pbs_error);
+        return -ENODEV;
+    }
+    s->length = proxmox_restore_get_image_length(s->conn, s->aid, &pbs_error);
+    if (s->length < 0) {
+        if (pbs_error && errp) error_setg(errp, "PBS get_image_length failed: %s", pbs_error);
+        if (pbs_error) proxmox_backup_free_error(pbs_error);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int pbs_file_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp)
+{
+    return pbs_open(bs, options, flags, errp);
+}
+
+static void pbs_close(BlockDriverState *bs) {
+    BDRVPBSState *s = bs->opaque;
+    g_free(s->repository);
+    g_free(s->namespace);
+    g_free(s->snapshot);
+    g_free(s->archive);
+    proxmox_restore_disconnect(s->conn);
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+pbs_co_getlength(BlockDriverState *bs)
+{
+    BDRVPBSState *s = bs->opaque;
+    return s->length;
+}
+
+typedef struct ReadCallbackData {
+    Coroutine *co;
+    AioContext *ctx;
+} ReadCallbackData;
+
+static void read_callback(void *callback_data)
+{
+    ReadCallbackData *rcb = callback_data;
+    aio_co_schedule(rcb->ctx, rcb->co);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    BDRVPBSState *s = bs->opaque;
+    int ret;
+    char *pbs_error = NULL;
+    uint8_t *buf;
+    bool inline_buf = true;
+
+    /* for single-buffer IO vectors we can fast-path the write directly to it */
+    if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+        buf = qiov->iov->iov_base;
+    } else {
+        inline_buf = false;
+        buf = g_malloc(bytes);
+    }
+
+    if (offset < 0 || bytes < 0) {
+        fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+        return -EIO;
+    }
+
+    ReadCallbackData rcb = {
+        .co = qemu_coroutine_self(),
+        .ctx = bdrv_get_aio_context(bs),
+    };
+
+    proxmox_restore_read_image_at_async(s->conn, s->aid, buf, (uint64_t)offset, (uint64_t)bytes,
+                                        read_callback, (void *) &rcb, &ret, &pbs_error);
+
+    qemu_coroutine_yield();
+
+    if (ret < 0) {
+        fprintf(stderr, "error during PBS read: %s\n", pbs_error ? pbs_error : "unknown error");
+        if (pbs_error) proxmox_backup_free_error(pbs_error);
+        return -EIO;
+    }
+
+    if (!inline_buf) {
+        qemu_iovec_from_buf(qiov, 0, buf, bytes);
+        g_free(buf);
+    }
+
+    return 0;
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
+           "any attached disk devices are set to read-only!\n");
+    return -EPERM;
+}
+
+static void GRAPH_RDLOCK
+pbs_refresh_filename(BlockDriverState *bs)
+{
+    BDRVPBSState *s = bs->opaque;
+    if (s->namespace) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+                 s->repository, s->namespace, s->snapshot, s->archive);
+    } else {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+                 s->repository, s->snapshot, s->archive);
+    }
+}
+
+static const char *const pbs_strong_runtime_opts[] = {
+    NULL
+};
+
+static BlockDriver bdrv_pbs_co = {
+    .format_name            = "pbs",
+    .protocol_name          = "pbs",
+    .instance_size          = sizeof(BDRVPBSState),
+
+    .bdrv_parse_filename    = pbs_parse_filename,
+
+    .bdrv_file_open         = pbs_file_open,
+    .bdrv_open              = pbs_open,
+    .bdrv_close             = pbs_close,
+    .bdrv_co_getlength      = pbs_co_getlength,
+
+    .bdrv_co_preadv         = pbs_co_preadv,
+    .bdrv_co_pwritev        = pbs_co_pwritev,
+
+    .bdrv_refresh_filename  = pbs_refresh_filename,
+    .strong_runtime_opts    = pbs_strong_runtime_opts,
+};
+
+static void bdrv_pbs_init(void)
+{
+    bdrv_register(&bdrv_pbs_co);
+}
+
+block_init(bdrv_pbs_init);
+diff --git a/meson.build b/meson.build
+index 6de51c34cb..3bc039f60f 100644
+--- a/meson.build
+++ b/meson.build
+@@ -4477,7 +4477,7 @@ summary_info += {'bzip2 support':     libbzip2}
+ summary_info += {'lzfse support':     liblzfse}
+ summary_info += {'zstd support':      zstd}
+ summary_info += {'NUMA host support': numa}
+-summary_info += {'capstone':          capstone}
+summary_info += {'PBS bdrv support':  config_host.has_key('CONFIG_PBS_BDRV')}
+ summary_info += {'libpmem support':   libpmem}
+ summary_info += {'libdaxctl support': libdaxctl}
+ summary_info += {'libudev':           libudev}
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index e49c7b5bc9..fc32ff9957 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -3457,6 +3457,7 @@
+             'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
+             'raw', 'rbd',
+             { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
+            'pbs',
+             'ssh', 'throttle', 'vdi', 'vhdx',
+             { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
+             { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
+@@ -3543,6 +3544,33 @@
+ { 'struct': 'BlockdevOptionsNull',
+   'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
+ 
+##
+# @BlockdevOptionsPbs:
+#
+# Driver specific block device options for the PBS backend.
+#
+# @repository: Proxmox Backup Server repository.
+#
+# @snapshot: backup snapshots ID.
+#
+# @archive: archive name.
+#
+# @keyfile: keyfile to use for encryption.
+#
+# @password: password to use for connection.
+#
+# @fingerprint: backup server fingerprint.
+#
+# @key_password: password to unlock key.
+#
+# @namespace: namespace where backup snapshot lives.
+#
+##
+{ 'struct': 'BlockdevOptionsPbs',
+  'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
+            '*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
+            '*key_password': 'str', '*namespace': 'str' } }
+
+ ##
+ # @BlockdevOptionsNVMe:
+ #
+@@ -4977,6 +5005,7 @@
+       'nfs':        'BlockdevOptionsNfs',
+       'null-aio':   'BlockdevOptionsNull',
+       'null-co':    'BlockdevOptionsNull',
+      'pbs':        'BlockdevOptionsPbs',
+       'nvme':       'BlockdevOptionsNVMe',
+       'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
+                          'if': 'CONFIG_BLKIO' },
+diff --git a/qapi/pragma.json b/qapi/pragma.json
+index be8fa304c5..7ff46bd128 100644
+--- a/qapi/pragma.json
+++ b/qapi/pragma.json
+@@ -100,6 +100,7 @@
+         'BlockInfo',                # query-block
+         'BlockdevAioOptions',       # blockdev-add, -blockdev
+         'BlockdevDriver',           # blockdev-add, query-blockstats, ...
+        'BlockdevOptionsPbs',       # for PBS backwards compat
+         'BlockdevVmdkAdapterType',  # blockdev-create (to match VMDK spec)
+         'BlockdevVmdkSubformat',    # blockdev-create (to match VMDK spec)
+         'ColoCompareProperties',    # object_add, -object
--- a/debian/patches/pve/0033-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch
+++ b/debian/patches/pve/0033-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch
@@ -1,884 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dietmar Maurer <dietmar@proxmox.com>
-Date: Mon, 6 Apr 2020 12:17:02 +0200
-Subject: [PATCH] PVE-Backup: avoid coroutines to fix AIO freeze, cleanups
-
-We observed various AIO pool loop freezes, so we decided to avoid
-coroutines and restrict ourselfes using similar code as upstream
-(see blockdev.c: do_backup_common).
-
-* avoid coroutine for job related code (causes hangs with iothreads)
-    - this imply to use normal QemuRecMutex instead of CoMutex
-
-* split pvebackup_co_dump_cb into:
-    - pvebackup_co_dump_pbs_cb and
-    - pvebackup_co_dump_pbs_cb
-
-* new helper functions
-    - pvebackup_propagate_error
-    - pvebackup_error_or_canceled
-    - pvebackup_add_transfered_bytes
-
-* avoid cancel flag (not needed)
-
-* simplify backup_cancel logic
-
-There is progress on upstream to support running qmp commands inside
-coroutines, see:
-
-https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04852.html
-
-We should consider using that when it is available in upstream qemu.
---
- pve-backup.c | 611 +++++++++++++++++++++++++--------------------------
- 1 file changed, 299 insertions(+), 312 deletions(-)
-
-diff --git a/pve-backup.c b/pve-backup.c
-index 38dd33e28b..169f0c68d0 100644
--- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -11,11 +11,10 @@
- 
- /* PVE backup state and related function */
- 
-
- static struct PVEBackupState {
-     struct {
-        // Everithing accessed from qmp command, protected using rwlock
-        CoRwlock rwlock;
-+        // Everithing accessed from qmp_backup_query command is protected using lock
-+        QemuRecMutex lock;
-         Error *error;
-         time_t start_time;
-         time_t end_time;
-@@ -25,19 +24,18 @@ static struct PVEBackupState {
-         size_t total;
-         size_t transferred;
-         size_t zero_bytes;
-        bool cancel;
-     } stat;
-     int64_t speed;
-     VmaWriter *vmaw;
-     ProxmoxBackupHandle *pbs;
-     GList *di_list;
-    CoMutex backup_mutex;
-+    QemuRecMutex backup_mutex;
- } backup_state;
- 
- static void pvebackup_init(void)
- {
-    qemu_co_rwlock_init(&backup_state.stat.rwlock);
-    qemu_co_mutex_init(&backup_state.backup_mutex);
-+    qemu_rec_mutex_init(&backup_state.stat.lock);
-+    qemu_rec_mutex_init(&backup_state.backup_mutex);
- }
- 
- // initialize PVEBackupState at startup
-@@ -52,10 +50,54 @@ typedef struct PVEBackupDevInfo {
-     BlockDriverState *target;
- } PVEBackupDevInfo;
- 
-static void pvebackup_co_run_next_job(void);
-+static void pvebackup_run_next_job(void);
-+
-+static BlockJob *
-+lookup_active_block_job(PVEBackupDevInfo *di)
-+{
-+    if (!di->completed && di->bs) {
-+        for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
-+            if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
-+                continue;
-+            }
-+
-+            BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
-+            if (bjob && bjob->source_bs == di->bs) {
-+                return job;
-+            }
-+        }
-+    }
-+    return NULL;
-+}
-+
-+static void pvebackup_propagate_error(Error *err)
-+{
-+    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    error_propagate(&backup_state.stat.error, err);
-+    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+}
-+
-+static bool pvebackup_error_or_canceled(void)
-+{
-+    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    bool error_or_canceled = !!backup_state.stat.error;
-+    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+
-+    return error_or_canceled;
-+}
- 
-+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
-+{
-+    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    backup_state.stat.zero_bytes += zero_bytes;
-+    backup_state.stat.transferred += transferred;
-+    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+}
-+
-+// This may get called from multiple coroutines in multiple io-threads
-+// Note1: this may get called after job_cancel()
- static int coroutine_fn
-pvebackup_co_dump_cb(
-+pvebackup_co_dump_pbs_cb(
-     void *opaque,
-     uint64_t start,
-     uint64_t bytes,
-@@ -67,137 +109,129 @@ pvebackup_co_dump_cb(
-     const unsigned char *buf = pbuf;
-     PVEBackupDevInfo *di = opaque;
- 
-    qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-    bool cancel = backup_state.stat.cancel;
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+    assert(backup_state.pbs);
-+
-+    Error *local_err = NULL;
-+    int pbs_res = -1;
-+
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
- 
-    if (cancel) {
-        return size; // return success
-+    // avoid deadlock if job is cancelled
-+    if (pvebackup_error_or_canceled()) {
-+        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        return -1;
-     }
- 
-    qemu_co_mutex_lock(&backup_state.backup_mutex);
-+    pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-    int ret = -1;
-+    if (pbs_res < 0) {
-+        pvebackup_propagate_error(local_err);
-+        return pbs_res;
-+    } else {
-+        pvebackup_add_transfered_bytes(size, !buf ? size : 0);
-+    }
- 
-    if (backup_state.vmaw) {
-        size_t zero_bytes = 0;
-        uint64_t remaining = size;
-
-        uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
-        if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
-            qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-            if (!backup_state.stat.error) {
-                qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
-                error_setg(&backup_state.stat.error,
-                           "got unaligned write inside backup dump "
-                           "callback (sector %ld)", start);
-            }
-            qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-            qemu_co_mutex_unlock(&backup_state.backup_mutex);
-            return -1; // not aligned to cluster size
-        }
-+    return size;
-+}
- 
-        while (remaining > 0) {
-            ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num,
-                                   buf, &zero_bytes);
-            ++cluster_num;
-            if (buf) {
-                buf += VMA_CLUSTER_SIZE;
-            }
-            if (ret < 0) {
-                qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-                if (!backup_state.stat.error) {
-                    qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
-                    vma_writer_error_propagate(backup_state.vmaw, &backup_state.stat.error);
-                }
-                qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+// This may get called from multiple coroutines in multiple io-threads
-+static int coroutine_fn
-+pvebackup_co_dump_vma_cb(
-+    void *opaque,
-+    uint64_t start,
-+    uint64_t bytes,
-+    const void *pbuf)
-+{
-+    assert(qemu_in_coroutine());
- 
-                qemu_co_mutex_unlock(&backup_state.backup_mutex);
-                return ret;
-            } else {
-                qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-                backup_state.stat.zero_bytes += zero_bytes;
-                if (remaining >= VMA_CLUSTER_SIZE) {
-                    backup_state.stat.transferred += VMA_CLUSTER_SIZE;
-                    remaining -= VMA_CLUSTER_SIZE;
-                } else {
-                    backup_state.stat.transferred += remaining;
-                    remaining = 0;
-                }
-                qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-            }
-        }
-    } else if (backup_state.pbs) {
-        Error *local_err = NULL;
-        int pbs_res = -1;
-+    const uint64_t size = bytes;
-+    const unsigned char *buf = pbuf;
-+    PVEBackupDevInfo *di = opaque;
- 
-        pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
- 
-        qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-+    int ret = -1;
- 
-        if (pbs_res < 0) {
-            error_propagate(&backup_state.stat.error, local_err);
-            qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-            qemu_co_mutex_unlock(&backup_state.backup_mutex);
-            return pbs_res;
-        } else {
-            if (!buf) {
-                backup_state.stat.zero_bytes += size;
-            }
-            backup_state.stat.transferred += size;
-+    assert(backup_state.vmaw);
-+
-+    uint64_t remaining = size;
-+
-+    uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
-+    if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
-+        Error *local_err = NULL;
-+        error_setg(&local_err,
-+                   "got unaligned write inside backup dump "
-+                   "callback (sector %ld)", start);
-+        pvebackup_propagate_error(local_err);
-+        return -1; // not aligned to cluster size
-+    }
-+
-+    while (remaining > 0) {
-+        qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+        // avoid deadlock if job is cancelled
-+        if (pvebackup_error_or_canceled()) {
-+            qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+            return -1;
-         }
- 
-        qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+        size_t zero_bytes = 0;
-+        ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
-+        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-    } else {
-        qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-        if (!buf) {
-            backup_state.stat.zero_bytes += size;
-+        ++cluster_num;
-+        if (buf) {
-+            buf += VMA_CLUSTER_SIZE;
-+        }
-+        if (ret < 0) {
-+            Error *local_err = NULL;
-+            vma_writer_error_propagate(backup_state.vmaw, &local_err);
-+            pvebackup_propagate_error(local_err);
-+            return ret;
-+        } else {
-+            if (remaining >= VMA_CLUSTER_SIZE) {
-+                assert(ret == VMA_CLUSTER_SIZE);
-+                pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
-+                remaining -= VMA_CLUSTER_SIZE;
-+            } else {
-+                assert(ret == remaining);
-+                pvebackup_add_transfered_bytes(remaining, zero_bytes);
-+                remaining = 0;
-+            }
-         }
-        backup_state.stat.transferred += size;
-        qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-     }
- 
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-
-     return size;
- }
- 
-static void coroutine_fn pvebackup_co_cleanup(void)
-+static void coroutine_fn pvebackup_co_cleanup(void *unused)
- {
-     assert(qemu_in_coroutine());
- 
-    qemu_co_mutex_lock(&backup_state.backup_mutex);
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
- 
-    qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-+    qemu_rec_mutex_lock(&backup_state.stat.lock);
-     backup_state.stat.end_time = time(NULL);
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+    qemu_rec_mutex_unlock(&backup_state.stat.lock);
- 
-     if (backup_state.vmaw) {
-         Error *local_err = NULL;
-         vma_writer_close(backup_state.vmaw, &local_err);
- 
-         if (local_err != NULL) {
-            qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-            error_propagate(&backup_state.stat.error, local_err);
-            qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-        }
-+            pvebackup_propagate_error(local_err);
-+         }
- 
-         backup_state.vmaw = NULL;
-     }
- 
-     if (backup_state.pbs) {
-        qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-        bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
-        if (!error_or_canceled) {
-+        if (!pvebackup_error_or_canceled()) {
-             Error *local_err = NULL;
-             proxmox_backup_co_finish(backup_state.pbs, &local_err);
-             if (local_err != NULL) {
-                qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
-                error_propagate(&backup_state.stat.error, local_err);
-             }
-+                pvebackup_propagate_error(local_err);
-+            }
-         }
-        qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- 
-         proxmox_backup_disconnect(backup_state.pbs);
-         backup_state.pbs = NULL;
-@@ -205,43 +239,14 @@ static void coroutine_fn pvebackup_co_cleanup(void)
- 
-     g_list_free(backup_state.di_list);
-     backup_state.di_list = NULL;
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- }
- 
-typedef struct PVEBackupCompeteCallbackData {
-    PVEBackupDevInfo *di;
-    int result;
-} PVEBackupCompeteCallbackData;
-
-static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
-+static void coroutine_fn pvebackup_complete_stream(void *opaque)
- {
-    assert(qemu_in_coroutine());
-
-    PVEBackupCompeteCallbackData *cb_data = opaque;
-
-    qemu_co_mutex_lock(&backup_state.backup_mutex);
-
-    PVEBackupDevInfo *di = cb_data->di;
-    int ret = cb_data->result;
-
-    di->completed = true;
-
-    qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-    bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
-
-    if (ret < 0 && !backup_state.stat.error) {
-        qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
-        error_setg(&backup_state.stat.error, "job failed with err %d - %s",
-                   ret, strerror(-ret));
-    }
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-
-    di->bs = NULL;
-+    PVEBackupDevInfo *di = opaque;
- 
-    if (di->target) {
-        bdrv_unref(di->target);
-        di->target = NULL;
-    }
-+    bool error_or_canceled = pvebackup_error_or_canceled();
- 
-     if (backup_state.vmaw) {
-         vma_writer_close_stream(backup_state.vmaw, di->dev_id);
-@@ -251,108 +256,96 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
-         Error *local_err = NULL;
-         proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
-         if (local_err != NULL) {
-            qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-            error_propagate(&backup_state.stat.error, local_err);
-            qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+            pvebackup_propagate_error(local_err);
-         }
-     }
-+}
- 
-    // remove self from job queue
-    backup_state.di_list = g_list_remove(backup_state.di_list, di);
-    g_free(di);
-+static void pvebackup_complete_cb(void *opaque, int ret)
-+{
-+    assert(!qemu_in_coroutine());
-+
-+    PVEBackupDevInfo *di = opaque;
- 
-    int pending_jobs = g_list_length(backup_state.di_list);
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
- 
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+    di->completed = true;
- 
-    if (pending_jobs > 0) {
-        pvebackup_co_run_next_job();
-    } else {
-        pvebackup_co_cleanup();
-+    if (ret < 0) {
-+        Error *local_err = NULL;
-+        error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
-+        pvebackup_propagate_error(local_err);
-     }
-}
- 
-static void pvebackup_complete_cb(void *opaque, int ret)
-{
-    // This can be called from the main loop, or from a coroutine
-    PVEBackupCompeteCallbackData cb_data = {
-        .di = opaque,
-        .result = ret,
-    };
-+    di->bs = NULL;
- 
-    if (qemu_in_coroutine()) {
-        pvebackup_co_complete_cb(&cb_data);
-    } else {
-        block_on_coroutine_fn(pvebackup_co_complete_cb, &cb_data);
-    }
-}
-+    assert(di->target == NULL);
- 
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
-{
-    assert(qemu_in_coroutine());
-+    block_on_coroutine_fn(pvebackup_complete_stream, di);
- 
-    qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-    backup_state.stat.cancel = true;
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+    // remove self from job queue
-+    backup_state.di_list = g_list_remove(backup_state.di_list, di);
- 
-    qemu_co_mutex_lock(&backup_state.backup_mutex);
-+    g_free(di);
- 
-    // Avoid race between block jobs and backup-cancel command:
-    if (!(backup_state.vmaw || backup_state.pbs)) {
-        qemu_co_mutex_unlock(&backup_state.backup_mutex);
-        return;
-    }
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-    qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-    if (!backup_state.stat.error) {
-        qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
-        error_setg(&backup_state.stat.error, "backup cancelled");
-    }
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+    pvebackup_run_next_job();
-+}
-+
-+static void pvebackup_cancel(void)
-+{
-+    Error *cancel_err = NULL;
-+    error_setg(&cancel_err, "backup canceled");
-+    pvebackup_propagate_error(cancel_err);
-+
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
- 
-     if (backup_state.vmaw) {
-         /* make sure vma writer does not block anymore */
-        vma_writer_set_error(backup_state.vmaw, "backup cancelled");
-+        vma_writer_set_error(backup_state.vmaw, "backup canceled");
-     }
- 
-     if (backup_state.pbs) {
-        proxmox_backup_abort(backup_state.pbs, "backup cancelled");
-+        proxmox_backup_abort(backup_state.pbs, "backup canceled");
-     }
- 
-    bool running_jobs = 0;
-    GList *l = backup_state.di_list;
-    while (l) {
-        PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-        l = g_list_next(l);
-        if (!di->completed && di->bs) {
-            for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
-                if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
-                    continue;
-                }
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-                BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
-                if (bjob && bjob->source_bs == di->bs) {
-                    AioContext *aio_context = job->job.aio_context;
-                    aio_context_acquire(aio_context);
-+    for(;;) {
- 
-                    if (!di->completed) {
-                        running_jobs += 1;
-                        job_cancel(&job->job, false);
-                    }
-                    aio_context_release(aio_context);
-                }
-+        BlockJob *next_job = NULL;
-+
-+        qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+
-+        GList *l = backup_state.di_list;
-+        while (l) {
-+            PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-+            l = g_list_next(l);
-+
-+            BlockJob *job = lookup_active_block_job(di);
-+            if (job != NULL) {
-+                next_job = job;
-+                break;
-             }
-         }
-    }
- 
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-    if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
-+        if (next_job) {
-+            AioContext *aio_context = next_job->job.aio_context;
-+            aio_context_acquire(aio_context);
-+            job_cancel_sync(&next_job->job);
-+            aio_context_release(aio_context);
-+        } else {
-+            break;
-+        }
-+    }
- }
- 
- void qmp_backup_cancel(Error **errp)
- {
-    block_on_coroutine_fn(pvebackup_co_cancel, NULL);
-+    pvebackup_cancel();
- }
- 
- static int coroutine_fn pvebackup_co_add_config(
-@@ -406,46 +399,97 @@ static int coroutine_fn pvebackup_co_add_config(
- 
- bool job_should_pause(Job *job);
- 
-static void coroutine_fn pvebackup_co_run_next_job(void)
-+static void pvebackup_run_next_job(void)
- {
-    assert(qemu_in_coroutine());
-+    assert(!qemu_in_coroutine());
- 
-    qemu_co_mutex_lock(&backup_state.backup_mutex);
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
- 
-     GList *l = backup_state.di_list;
-     while (l) {
-         PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-         l = g_list_next(l);
-        if (!di->completed && di->bs) {
-            for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
-                if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
-                    continue;
-                }
- 
-                BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
-                if (bjob && bjob->source_bs == di->bs) {
-                    AioContext *aio_context = job->job.aio_context;
-                    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-                    aio_context_acquire(aio_context);
-
-                    if (job_should_pause(&job->job)) {
-                        qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-                        bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
-                        qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-
-                        if (error_or_canceled) {
-                            job_cancel(&job->job, false);
-                        } else {
-                            job_resume(&job->job);
-                        }
-                    }
-                    aio_context_release(aio_context);
-                    return;
-+        BlockJob *job = lookup_active_block_job(di);
-+
-+        if (job) {
-+            qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+
-+            AioContext *aio_context = job->job.aio_context;
-+            aio_context_acquire(aio_context);
-+
-+            if (job_should_pause(&job->job)) {
-+                bool error_or_canceled = pvebackup_error_or_canceled();
-+                if (error_or_canceled) {
-+                    job_cancel_sync(&job->job);
-+                } else {
-+                    job_resume(&job->job);
-                 }
-             }
-+            aio_context_release(aio_context);
-+            return;
-         }
-     }
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+
-+    block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
-+}
-+
-+static bool create_backup_jobs(void) {
-+
-+    assert(!qemu_in_coroutine());
-+
-+    Error *local_err = NULL;
-+
-+    /* create and start all jobs (paused state) */
-+    GList *l =  backup_state.di_list;
-+    while (l) {
-+        PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-+        l = g_list_next(l);
-+
-+        assert(di->target != NULL);
-+
-+        AioContext *aio_context = bdrv_get_aio_context(di->bs);
-+        aio_context_acquire(aio_context);
-+
-+        BlockJob *job = backup_job_create(
-+            NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
-+            BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
-+            JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
-+
-+        aio_context_release(aio_context);
-+
-+        if (!job || local_err != NULL) {
-+            Error *create_job_err = NULL;
-+            error_setg(&create_job_err, "backup_job_create failed: %s",
-+                       local_err ? error_get_pretty(local_err) : "null");
-+
-+            pvebackup_propagate_error(create_job_err);
-+            break;
-+        }
-+        job_start(&job->job);
-+
-+        bdrv_unref(di->target);
-+        di->target = NULL;
-+    }
-+
-+    bool errors = pvebackup_error_or_canceled();
-+
-+    if (errors) {
-+        l = backup_state.di_list;
-+        while (l) {
-+            PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-+            l = g_list_next(l);
-+
-+            if (di->target) {
-+                bdrv_unref(di->target);
-+                di->target = NULL;
-+            }
-+        }
-+    }
-+
-+    return errors;
- }
- 
- typedef struct QmpBackupTask {
-@@ -476,7 +520,7 @@ typedef struct QmpBackupTask {
-     UuidInfo *result;
- } QmpBackupTask;
- 
-static void coroutine_fn pvebackup_co_start(void *opaque)
-+static void coroutine_fn pvebackup_co_prepare(void *opaque)
- {
-     assert(qemu_in_coroutine());
- 
-@@ -495,15 +539,14 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
-     GList *di_list = NULL;
-     GList *l;
-     UuidInfo *uuid_info;
-    BlockJob *job;
- 
-     const char *config_name = "qemu-server.conf";
-     const char *firewall_name = "qemu-server.fw";
- 
-    qemu_co_mutex_lock(&backup_state.backup_mutex);
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
- 
-     if (backup_state.di_list) {
-        qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-         error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
-                   "previous backup not finished");
-         return;
-@@ -631,7 +674,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
-             if (dev_id < 0)
-                 goto err;
- 
-            if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_cb, di, task->errp))) {
-+            if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
-                 goto err;
-             }
- 
-@@ -652,7 +695,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
-             PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-             l = g_list_next(l);
- 
-            if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_cb, di, task->errp))) {
-+            if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
-                 goto err;
-             }
- 
-@@ -717,9 +760,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
-     }
-     /* initialize global backup_state now */
- 
-    qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-
-    backup_state.stat.cancel = false;
-+    qemu_rec_mutex_lock(&backup_state.stat.lock);
- 
-     if (backup_state.stat.error) {
-         error_free(backup_state.stat.error);
-@@ -742,7 +783,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
-     backup_state.stat.transferred = 0;
-     backup_state.stat.zero_bytes = 0;
- 
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-+    qemu_rec_mutex_unlock(&backup_state.stat.lock);
- 
-     backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
- 
-@@ -751,45 +792,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
- 
-     backup_state.di_list = di_list;
- 
-    /* start all jobs (paused state) */
-    l = di_list;
-    while (l) {
-        PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-        l = g_list_next(l);
-
-        AioContext *aio_context = bdrv_get_aio_context(di->bs);
-        aio_context_acquire(aio_context);
-
-        job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
-                                BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
-                                JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
-
-        aio_context_release(aio_context);
-
-        if (!job || local_err != NULL) {
-            qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-            error_setg(&backup_state.stat.error, "backup_job_create failed");
-            qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-            break;
-        }
-        job_start(&job->job);
-        if (di->target) {
-            bdrv_unref(di->target);
-            di->target = NULL;
-        }
-    }
-
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-
-    qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-    bool no_errors = !backup_state.stat.error;
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-
-    if (no_errors) {
-        pvebackup_co_run_next_job(); // run one job
-    } else {
-        pvebackup_co_cancel(NULL);
-    }
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-     uuid_info = g_malloc0(sizeof(*uuid_info));
-     uuid_info->UUID = uuid_str;
-@@ -833,7 +836,7 @@ err:
-         rmdir(backup_dir);
-     }
- 
-    qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- 
-     task->result = NULL;
-     return;
-@@ -878,32 +881,28 @@ UuidInfo *qmp_backup(
-         .errp = errp,
-     };
- 
-    block_on_coroutine_fn(pvebackup_co_start, &task);
-+    block_on_coroutine_fn(pvebackup_co_prepare, &task);
-+
-+    if (*errp == NULL) {
-+        qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+        create_backup_jobs();
-+        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        pvebackup_run_next_job();
-+    }
- 
-     return task.result;
- }
- 
-
-typedef struct QmpQueryBackupTask {
-    Error **errp;
-    BackupStatus *result;
-} QmpQueryBackupTask;
-
-static void coroutine_fn pvebackup_co_query(void *opaque)
-+BackupStatus *qmp_query_backup(Error **errp)
- {
-    assert(qemu_in_coroutine());
-
-    QmpQueryBackupTask *task = opaque;
-
-     BackupStatus *info = g_malloc0(sizeof(*info));
- 
-    qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
-+    qemu_rec_mutex_lock(&backup_state.stat.lock);
- 
-     if (!backup_state.stat.start_time) {
-         /* not started, return {} */
-        task->result = info;
-        qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-        return;
-+        qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+        return info;
-     }
- 
-     info->has_status = true;
-@@ -939,19 +938,7 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
-     info->has_transferred = true;
-     info->transferred = backup_state.stat.transferred;
- 
-    task->result = info;
-+    qemu_rec_mutex_unlock(&backup_state.stat.lock);
- 
-    qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-}
-
-BackupStatus *qmp_query_backup(Error **errp)
-{
-    QmpQueryBackupTask task = {
-        .errp = errp,
-        .result = NULL,
-    };
-
-    block_on_coroutine_fn(pvebackup_co_query, &task);
-
-    return task.result;
-+    return info;
- }
--- a/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
+++ b/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
@@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Tue, 12 Jan 2021 14:12:20 +0100
+Subject: [PATCH] PVE: redirect stderr to journal when daemonized
+
+QEMU uses the logging for error messages usually, so LOG_ERR is most
+fitting.
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ meson.build | 3 ++-
+ os-posix.c  | 7 +++++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/meson.build b/meson.build
+index 3bc039f60f..067e8956a7 100644
+--- a/meson.build
+++ b/meson.build
+@@ -1923,6 +1923,7 @@ endif
+ has_gettid = cc.has_function('gettid')
+ 
+ libuuid = cc.find_library('uuid', required: true)
+libsystemd = cc.find_library('systemd', required: true)
+ libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
+ 
+ # libselinux
+@@ -3530,7 +3531,7 @@ if have_block
+   if host_os == 'windows'
+     system_ss.add(files('os-win32.c'))
+   else
+-    blockdev_ss.add(files('os-posix.c'))
+    blockdev_ss.add(files('os-posix.c'), libsystemd)
+   endif
+ endif
+ 
+diff --git a/os-posix.c b/os-posix.c
+index a4284e2c07..197a2120fd 100644
+--- a/os-posix.c
+++ b/os-posix.c
+@@ -29,6 +29,8 @@
+ #include <pwd.h>
+ #include <grp.h>
+ #include <libgen.h>
+#include <systemd/sd-journal.h>
+#include <syslog.h>
+ 
+ #include "qemu/error-report.h"
+ #include "qemu/log.h"
+@@ -302,9 +304,10 @@ void os_setup_post(void)
+ 
+         dup2(fd, 0);
+         dup2(fd, 1);
+-        /* In case -D is given do not redirect stderr to /dev/null */
+        /* In case -D is given do not redirect stderr to journal */
+         if (!qemu_log_enabled()) {
+-            dup2(fd, 2);
+            int journal_fd = sd_journal_stream_fd("QEMU", LOG_ERR, 0);
+            dup2(journal_fd, 2);
+         }
+ 
+         close(fd);
--- a/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
+++ b/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
@@ -0,0 +1,211 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Thu, 22 Oct 2020 17:34:18 +0200
+Subject: [PATCH] PVE: Migrate dirty bitmap state via savevm
+
+QEMU provides 'savevm' registrations as a mechanism for arbitrary state
+to be migrated along with a VM. Use this to send a serialized version of
+dirty bitmap state data from proxmox-backup-qemu, and restore it on the
+target node.
+
+Also add a flag to query-proxmox-support so qemu-server can determine if
+safe migration is possible and makes sense.
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: split up state_pending for 8.0]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ include/migration/misc.h |   3 ++
+ migration/meson.build    |   2 +
+ migration/migration.c    |   1 +
+ migration/pbs-state.c    | 104 +++++++++++++++++++++++++++++++++++++++
+ pve-backup.c             |   1 +
+ qapi/block-core.json     |   6 +++
+ 6 files changed, 117 insertions(+)
+ create mode 100644 migration/pbs-state.c
+
+diff --git a/include/migration/misc.h b/include/migration/misc.h
+index c9e200f4eb..12c99ebc69 100644
+--- a/include/migration/misc.h
+++ b/include/migration/misc.h
+@@ -117,4 +117,7 @@ bool migration_in_bg_snapshot(void);
+ /* migration/block-dirty-bitmap.c */
+ void dirty_bitmap_mig_init(void);
+ 
+/* migration/pbs-state.c */
+void pbs_state_mig_init(void);
+
+ #endif
+diff --git a/migration/meson.build b/migration/meson.build
+index 800f12a60d..35a4306183 100644
+--- a/migration/meson.build
+++ b/migration/meson.build
+@@ -7,7 +7,9 @@ migration_files = files(
+   'vmstate.c',
+   'qemu-file.c',
+   'yank_functions.c',
+  'pbs-state.c',
+ )
+system_ss.add(libproxmox_backup_qemu)
+ 
+ system_ss.add(files(
+   'block-dirty-bitmap.c',
+diff --git a/migration/migration.c b/migration/migration.c
+index 86bf76e925..b8d7e471a4 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -239,6 +239,7 @@ void migration_object_init(void)
+     blk_mig_init();
+     ram_mig_init();
+     dirty_bitmap_mig_init();
+    pbs_state_mig_init();
+ }
+ 
+ typedef struct {
+diff --git a/migration/pbs-state.c b/migration/pbs-state.c
+new file mode 100644
+index 0000000000..887e998b9e
+--- /dev/null
+++ b/migration/pbs-state.c
+@@ -0,0 +1,104 @@
+/*
+ * PBS (dirty-bitmap) state migration
+ */
+
+#include "qemu/osdep.h"
+#include "migration/misc.h"
+#include "qemu-file.h"
+#include "migration/vmstate.h"
+#include "migration/register.h"
+#include "proxmox-backup-qemu.h"
+
+typedef struct PBSState {
+    bool active;
+} PBSState;
+
+/* state is accessed via this static variable directly, 'opaque' is NULL */
+static PBSState pbs_state;
+
+static void pbs_state_pending(void *opaque, uint64_t *must_precopy,
+                              uint64_t *can_postcopy)
+{
+    /* we send everything in save_setup, so nothing is ever pending */
+}
+
+/* receive PBS state via f and deserialize, called on target */
+static int pbs_state_load(QEMUFile *f, void *opaque, int version_id)
+{
+    /* safe cast, we cannot migrate to target with less bits than source */
+    size_t buf_size = (size_t)qemu_get_be64(f);
+
+    uint8_t *buf = (uint8_t *)malloc(buf_size);
+    size_t read = qemu_get_buffer(f, buf, buf_size);
+
+    if (read < buf_size) {
+        fprintf(stderr, "error receiving PBS state: not enough data\n");
+        return -EIO;
+    }
+
+    proxmox_import_state(buf, buf_size);
+
+    free(buf);
+    return 0;
+}
+
+/* serialize PBS state and send to target via f, called on source */
+static int pbs_state_save_setup(QEMUFile *f, void *opaque)
+{
+    size_t buf_size;
+    uint8_t *buf = proxmox_export_state(&buf_size);
+
+    /* LV encoding */
+    qemu_put_be64(f, buf_size);
+    qemu_put_buffer(f, buf, buf_size);
+
+    proxmox_free_state_buf(buf);
+    pbs_state.active = false;
+    return 0;
+}
+
+static bool pbs_state_is_active(void *opaque)
+{
+    /* we need to return active exactly once, else .save_setup is never called,
+     * but if we'd just return true the migration doesn't make progress since
+     * it'd be waiting for us */
+    return pbs_state.active;
+}
+
+static bool pbs_state_is_active_iterate(void *opaque)
+{
+    /* we don't iterate, everything is sent in save_setup */
+    return pbs_state_is_active(opaque);
+}
+
+static bool pbs_state_has_postcopy(void *opaque)
+{
+    /* PBS state can't change during a migration (since that's blocking any
+     * potential backups), so we can copy everything before the VM is stopped */
+    return false;
+}
+
+static void pbs_state_save_cleanup(void *opaque)
+{
+    /* reset active after migration succeeds or fails */
+    pbs_state.active = false;
+}
+
+static SaveVMHandlers savevm_pbs_state_handlers = {
+    .save_setup = pbs_state_save_setup,
+    .has_postcopy = pbs_state_has_postcopy,
+    .state_pending_exact = pbs_state_pending,
+    .state_pending_estimate = pbs_state_pending,
+    .is_active_iterate = pbs_state_is_active_iterate,
+    .load_state = pbs_state_load,
+    .is_active = pbs_state_is_active,
+    .save_cleanup = pbs_state_save_cleanup,
+};
+
+void pbs_state_mig_init(void)
+{
+    pbs_state.active = true;
+    register_savevm_live("pbs-state", 0, 1,
+                         &savevm_pbs_state_handlers,
+                         NULL);
+}
+diff --git a/pve-backup.c b/pve-backup.c
+index 9c13a92623..9d480a8eec 100644
+--- a/pve-backup.c
+++ b/pve-backup.c
+@@ -1091,6 +1091,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+     ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
+     ret->pbs_dirty_bitmap = true;
+     ret->pbs_dirty_bitmap_savevm = true;
+    ret->pbs_dirty_bitmap_migration = true;
+     ret->query_bitmap_info = true;
+     ret->pbs_masterkey = true;
+     ret->backup_max_workers = true;
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index fc32ff9957..f516d8e95a 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -1004,6 +1004,11 @@
+ # @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
+ #                           safely be set for savevm-async.
+ #
+# @pbs-dirty-bitmap-migration: True if safe migration of dirty-bitmaps including
+#                              PBS state is supported. Enabling 'dirty-bitmaps'
+#                              migration cap if this is false/unset may lead
+#                              to crashes on migration!
+#
+ # @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
+ #                 parameter.
+ #
+@@ -1017,6 +1022,7 @@
+   'data': { 'pbs-dirty-bitmap': 'bool',
+             'query-bitmap-info': 'bool',
+             'pbs-dirty-bitmap-savevm': 'bool',
+            'pbs-dirty-bitmap-migration': 'bool',
+             'pbs-masterkey': 'bool',
+             'pbs-library-version': 'str',
+             'backup-max-workers': 'bool' } }
--- a/debian/patches/pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
+++ b/debian/patches/pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
@@ -0,0 +1,33 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Tue, 3 Nov 2020 14:57:32 +0100
+Subject: [PATCH] migration/block-dirty-bitmap: migrate other bitmaps even if
+ one fails
+
+If the checks in bdrv_dirty_bitmap_check fail, that only means that this
+one specific bitmap cannot be migrated. That is not an error condition
+for any other bitmaps on the same block device.
+
+Fixes dirty-bitmap migration with sync=bitmap, as the bitmaps used for
+that are obviously marked as "busy", which would cause none at all to be
+transferred.
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ migration/block-dirty-bitmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
+index 2708abf3d7..fb17c01308 100644
+--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
+@@ -540,7 +540,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
+ 
+         if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
+             error_report_err(local_err);
+-            return -1;
+            continue;
+         }
+ 
+         if (bitmap_aliases) {
--- a/debian/patches/pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
+++ b/debian/patches/pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fabian Ebner <f.ebner@proxmox.com>
+Date: Tue, 17 Nov 2020 10:51:05 +0100
+Subject: [PATCH] PVE: fall back to open-iscsi initiatorname
+
+When no explicit option is given, try reading the initiator name from
+/etc/iscsi/initiatorname.iscsi and only use the generic fallback, i.e.
+iqn.2008-11.org.linux-kvmXXX, as a third alternative.
+
+This avoids the need to add an explicit option for vma and to explicitly set it
+for each call to qemu that deals with iSCSI disks, while still allowing to set
+the option if a different name is needed.
+
+According to RFC 3720, an initiator name is at most 223 bytes long, so the
+4 KiB buffer is big enough, even if many whitespaces are used.
+
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/iscsi.c | 30 ++++++++++++++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+diff --git a/block/iscsi.c b/block/iscsi.c
+index 2ff14b7472..46f275fbf7 100644
+--- a/block/iscsi.c
+++ b/block/iscsi.c
+@@ -1392,12 +1392,42 @@ static char *get_initiator_name(QemuOpts *opts)
+     const char *name;
+     char *iscsi_name;
+     UuidInfo *uuid_info;
+    FILE *name_fh;
+ 
+     name = qemu_opt_get(opts, "initiator-name");
+     if (name) {
+         return g_strdup(name);
+     }
+ 
+    name_fh = fopen("/etc/iscsi/initiatorname.iscsi", "r");
+    if (name_fh) {
+        const char *key = "InitiatorName";
+        char buffer[4096];
+        char *line;
+
+        while ((line = fgets(buffer, sizeof(buffer), name_fh))) {
+            line = g_strstrip(line);
+            if (!strncmp(line, key, strlen(key))) {
+                line = strchr(line, '=');
+                if (!line || strlen(line) == 1) {
+                    continue;
+                }
+                line++;
+                g_strstrip(line);
+                if (!strlen(line)) {
+                    continue;
+                }
+                name = line;
+                break;
+            }
+        }
+        fclose(name_fh);
+
+        if (name) {
+            return g_strdup(name);
+        }
+    }
+
+     uuid_info = qmp_query_uuid(NULL);
+     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
+         name = qemu_get_vm_name();
--- a/debian/patches/pve/0037-PVE-block-stream-increase-chunk-size.patch
+++ b/debian/patches/pve/0037-PVE-block-stream-increase-chunk-size.patch
@@ -0,0 +1,25 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Tue, 2 Mar 2021 16:34:28 +0100
+Subject: [PATCH] PVE: block/stream: increase chunk size
+
+Ceph favors bigger chunks, so increase to 4M.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/stream.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/stream.c b/block/stream.c
+index 7031eef12b..d2da83ae7c 100644
+--- a/block/stream.c
+++ b/block/stream.c
+@@ -27,7 +27,7 @@ enum {
+      * large enough to process multiple clusters in a single call, so
+      * that populating contiguous regions of the image is efficient.
+      */
+-    STREAM_CHUNK = 512 * 1024, /* in bytes */
+    STREAM_CHUNK = 4 * 1024 * 1024, /* in bytes */
+ };
+ 
+ typedef struct StreamBlockJob {
--- a/debian/patches/pve/0038-block-add-alloc-track-driver.patch
+++ b/debian/patches/pve/0038-block-add-alloc-track-driver.patch
@@ -0,0 +1,471 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stefan Reiter <s.reiter@proxmox.com>
+Date: Mon, 7 Dec 2020 15:21:03 +0100
+Subject: [PATCH] block: add alloc-track driver
+
+Add a new filter node 'alloc-track', which seperates reads and writes to
+different children, thus allowing to put a backing image behind any
+blockdev (regardless of driver support). Since we can't detect any
+pre-allocated blocks, we can only track new writes, hence the write
+target ('file') for this node must always be empty.
+
+Intended use case is for live restoring, i.e. add a backup image as a
+block device into a VM, then put an alloc-track on the restore target
+and set the backup as backing. With this, one can use a regular
+'block-stream' to restore the image, while the VM can already run in the
+background. Copy-on-read will help make progress as the VM reads as
+well.
+
+This only worked if the target supports backing images, so up until now
+only for qcow2, with alloc-track any driver for the target can be used.
+
+Replacing the node cannot be done in the
+track_co_change_backing_file() callback, because replacing a node
+cannot happen in a coroutine and requires the block graph lock
+exclusively. Could either become a special option for the stream job,
+or maybe the upcoming blockdev-replace QMP command can be used in the
+future.
+
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+[FE: adapt to changed function signatures
+     make error return value consistent with QEMU
+     avoid premature break during read
+     adhere to block graph lock requirements]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/alloc-track.c | 366 ++++++++++++++++++++++++++++++++++++++++++++
+ block/meson.build   |   1 +
+ block/stream.c      |  34 ++++
+ 3 files changed, 401 insertions(+)
+ create mode 100644 block/alloc-track.c
+
+diff --git a/block/alloc-track.c b/block/alloc-track.c
+new file mode 100644
+index 0000000000..b9f8ea9137
+--- /dev/null
+++ b/block/alloc-track.c
+@@ -0,0 +1,366 @@
+/*
+ * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this
+ * must never be put on a node that already contains data.
+ *
+ * Copyright (c) 2020 Proxmox Server Solutions GmbH
+ * Copyright (c) 2020 Stefan Reiter <s.reiter@proxmox.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "sysemu/block-backend.h"
+
+#define TRACK_OPT_AUTO_REMOVE "auto-remove"
+
+typedef enum DropState {
+    DropNone,
+    DropInProgress,
+} DropState;
+
+typedef struct {
+    BdrvDirtyBitmap *bitmap;
+    uint64_t granularity;
+    DropState drop_state;
+    bool auto_remove;
+} BDRVAllocTrackState;
+
+static QemuOptsList runtime_opts = {
+    .name = "alloc-track",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = TRACK_OPT_AUTO_REMOVE,
+            .type = QEMU_OPT_BOOL,
+            .help = "automatically replace this node with 'file' when 'backing'"
+                    "is detached",
+        },
+        { /* end of list */ }
+    },
+};
+
+static void GRAPH_RDLOCK
+track_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    BDRVAllocTrackState *s = bs->opaque;
+
+    if (!bs->file) {
+        return;
+    }
+
+    /*
+     * Always use alignment from underlying write device so RMW cycle for
+     * bdrv_pwritev reads data from our backing via track_co_preadv. Also use at
+     * least the bitmap granularity.
+     */
+    bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
+                                   s->granularity);
+}
+
+static int track_open(BlockDriverState *bs, QDict *options, int flags,
+                      Error **errp)
+{
+    BDRVAllocTrackState *s = bs->opaque;
+    BdrvChild *file = NULL;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    int ret = 0;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+
+    /* open the target (write) node, backing will be attached by block layer */
+    file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+                           BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+                           &local_err);
+    bdrv_graph_wrlock();
+    bs->file = file;
+    bdrv_graph_wrunlock();
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+
+    bdrv_graph_rdlock_main_loop();
+    BlockDriverInfo bdi = {0};
+    ret = bdrv_get_info(bs->file->bs, &bdi);
+    if (ret < 0) {
+        /*
+         * Not a hard failure. Worst that can happen is partial cluster
+         * allocation in the write target. However, the driver here returns its
+         * allocation status based on the dirty bitmap, so any other data that
+         * maps to such a cluster will still be copied later by a stream job (or
+         * during writes to that cluster).
+         */
+        warn_report("alloc-track: unable to query cluster size for write target: %s",
+                    strerror(ret));
+    }
+    ret = 0;
+    /*
+     * Always consider alignment from underlying write device so RMW cycle for
+     * bdrv_pwritev reads data from our backing via track_co_preadv. Also try to
+     * avoid partial cluster allocation in the write target by considering the
+     * cluster size.
+     */
+    s->granularity = MAX(bs->file->bs->bl.request_alignment,
+                         MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+    track_refresh_limits(bs, errp);
+    s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, s->granularity, NULL,
+                                         &local_err);
+    bdrv_graph_rdunlock_main_loop();
+    if (local_err) {
+        ret = -EIO;
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+
+    s->drop_state = DropNone;
+
+fail:
+    if (ret < 0) {
+        bdrv_graph_wrlock();
+        bdrv_unref_child(bs, bs->file);
+        bdrv_graph_wrunlock();
+        if (s->bitmap) {
+            bdrv_release_dirty_bitmap(s->bitmap);
+        }
+    }
+    qemu_opts_del(opts);
+    return ret;
+}
+
+static void track_close(BlockDriverState *bs)
+{
+    BDRVAllocTrackState *s = bs->opaque;
+    if (s->bitmap) {
+        bdrv_release_dirty_bitmap(s->bitmap);
+    }
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+track_co_getlength(BlockDriverState *bs)
+{
+    return bdrv_co_getlength(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    BDRVAllocTrackState *s = bs->opaque;
+    QEMUIOVector local_qiov;
+    int ret;
+
+    /* 'cur_offset' is relative to 'offset', 'local_offset' to image start */
+    uint64_t cur_offset, local_offset;
+    int64_t local_bytes;
+    bool alloc;
+
+    if (offset < 0 || bytes < 0) {
+        fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+        return -EIO;
+    }
+
+    /* a read request can span multiple granularity-sized chunks, and can thus
+     * contain blocks with different allocation status - we could just iterate
+     * granularity-wise, but for better performance use bdrv_dirty_bitmap_next_X
+     * to find the next flip and consider everything up to that in one go */
+    for (cur_offset = 0; cur_offset < bytes; cur_offset += local_bytes) {
+        local_offset = offset + cur_offset;
+        alloc = bdrv_dirty_bitmap_get(s->bitmap, local_offset);
+        if (alloc) {
+            local_bytes = bdrv_dirty_bitmap_next_zero(s->bitmap, local_offset,
+                                                      bytes - cur_offset);
+        } else {
+            local_bytes = bdrv_dirty_bitmap_next_dirty(s->bitmap, local_offset,
+                                                       bytes - cur_offset);
+        }
+
+        /* _bitmap_next_X return is -1 if no end found within limit, otherwise
+         * offset of next flip (to start of image) */
+        local_bytes = local_bytes < 0 ?
+            bytes - cur_offset :
+            local_bytes - local_offset;
+
+        qemu_iovec_init_slice(&local_qiov, qiov, cur_offset, local_bytes);
+
+        if (alloc) {
+            ret = bdrv_co_preadv(bs->file, local_offset, local_bytes,
+                                 &local_qiov, flags);
+        } else if (bs->backing) {
+            ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
+                                 &local_qiov, flags);
+        } else {
+            qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+            ret = 0;
+        }
+
+        if (ret != 0) {
+            break;
+        }
+    }
+
+    return ret;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       BdrvRequestFlags flags)
+{
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    return bdrv_co_pdiscard(bs->file, offset, bytes);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+track_co_flush(BlockDriverState *bs)
+{
+    return bdrv_co_flush(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_block_status(BlockDriverState *bs, bool want_zero,
+                                            int64_t offset,
+                                            int64_t bytes,
+                                            int64_t *pnum,
+                                            int64_t *map,
+                                            BlockDriverState **file)
+{
+    BDRVAllocTrackState *s = bs->opaque;
+
+    bool alloc = bdrv_dirty_bitmap_get(s->bitmap, offset);
+    int64_t next_flipped;
+    if (alloc) {
+        next_flipped = bdrv_dirty_bitmap_next_zero(s->bitmap, offset, bytes);
+    } else {
+        next_flipped = bdrv_dirty_bitmap_next_dirty(s->bitmap, offset, bytes);
+    }
+
+    /* in case not the entire region has the same state, we need to set pnum to
+     * indicate for how many bytes our result is valid */
+    *pnum = next_flipped == -1 ? bytes : next_flipped - offset;
+    *map = offset;
+
+    if (alloc) {
+        *file = bs->file->bs;
+        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
+    } else if (bs->backing) {
+        *file = bs->backing->bs;
+    }
+    return 0;
+}
+
+static void GRAPH_RDLOCK
+track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                 BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
+                 uint64_t *nperm, uint64_t *nshared)
+{
+    BDRVAllocTrackState *s = bs->opaque;
+
+    *nshared = BLK_PERM_ALL;
+
+    /* in case we're currently dropping ourselves, claim to not use any
+     * permissions at all - which is fine, since from this point on we will
+     * never issue a read or write anymore */
+    if (s->drop_state == DropInProgress) {
+        *nperm = 0;
+        return;
+    }
+
+    if (role & BDRV_CHILD_DATA) {
+        *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+    } else {
+        /* 'backing' is also a child of our BDS, but we don't expect it to be
+         * writeable, so we only forward 'consistent read' */
+        *nperm = perm & BLK_PERM_CONSISTENT_READ;
+    }
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+                             const char *backing_fmt)
+{
+    /*
+     * Note that the actual backing file graph change is already done in the
+     * stream job itself with bdrv_set_backing_hd_drained(), so no need to
+     * actually do anything here. But still needs to be implemented, to make
+     * our caller (i.e. bdrv_co_change_backing_file() do the right thing).
+     *
+     * FIXME
+     * We'd like to auto-remove ourselves from the block graph, but it cannot
+     * be done from a coroutine. Currently done in the stream job, where it
+     * kinda fits better, but in the long-term, a special parameter would be
+     * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+     */
+    if (backing_file == NULL) {
+        BDRVAllocTrackState *s = bs->opaque;
+        bdrv_drained_begin(bs);
+        s->drop_state = DropInProgress;
+        bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+        bdrv_drained_end(bs);
+    }
+
+    return 0;
+}
+
+static BlockDriver bdrv_alloc_track = {
+    .format_name                      = "alloc-track",
+    .instance_size                    = sizeof(BDRVAllocTrackState),
+
+    .bdrv_file_open                   = track_open,
+    .bdrv_close                       = track_close,
+    .bdrv_co_getlength                = track_co_getlength,
+    .bdrv_child_perm                  = track_child_perm,
+    .bdrv_refresh_limits              = track_refresh_limits,
+
+    .bdrv_co_pwrite_zeroes            = track_co_pwrite_zeroes,
+    .bdrv_co_pwritev                  = track_co_pwritev,
+    .bdrv_co_preadv                   = track_co_preadv,
+    .bdrv_co_pdiscard                 = track_co_pdiscard,
+
+    .bdrv_co_flush                    = track_co_flush,
+    .bdrv_co_flush_to_disk            = track_co_flush,
+
+    .supports_backing                 = true,
+
+    .bdrv_co_block_status             = track_co_block_status,
+    .bdrv_co_change_backing_file      = track_co_change_backing_file,
+};
+
+static void bdrv_alloc_track_init(void)
+{
+    bdrv_register(&bdrv_alloc_track);
+}
+
+block_init(bdrv_alloc_track_init);
+diff --git a/block/meson.build b/block/meson.build
+index 1945e04eeb..2873f3a25a 100644
+--- a/block/meson.build
+++ b/block/meson.build
+@@ -2,6 +2,7 @@ block_ss.add(genh)
+ block_ss.add(files(
+   'accounting.c',
+   'aio_task.c',
+  'alloc-track.c',
+   'amend.c',
+   'backup.c',
+   'backup-dump.c',
+diff --git a/block/stream.c b/block/stream.c
+index d2da83ae7c..f941cba14e 100644
+--- a/block/stream.c
+++ b/block/stream.c
+@@ -120,6 +120,40 @@ static int stream_prepare(Job *job)
+             ret = -EPERM;
+             goto out;
+         }
+
+        /*
+         * This cannot be done in the co_change_backing_file callback, because
+         * bdrv_replace_node() cannot be done in a coroutine. The latter also
+         * requires the graph lock exclusively. Only required for the
+         * alloc-track driver.
+         *
+         * The long-term plan is to either have an explicit parameter for the
+         * stream job or use the upcoming blockdev-replace QMP command.
+         */
+        if (base_id == NULL && strcmp(unfiltered_bs->drv->format_name, "alloc-track") == 0) {
+            BlockDriverState *file_bs;
+
+            bdrv_graph_rdlock_main_loop();
+            file_bs = unfiltered_bs->file->bs;
+            bdrv_graph_rdunlock_main_loop();
+
+            bdrv_ref(unfiltered_bs); // unrefed by bdrv_replace_node()
+            bdrv_drained_begin(file_bs);
+            bdrv_graph_wrlock();
+
+            bdrv_replace_node(unfiltered_bs, file_bs, &local_err);
+
+            bdrv_graph_wrunlock();
+            bdrv_drained_end(file_bs);
+            bdrv_unref(unfiltered_bs);
+
+            if (local_err) {
+                error_prepend(&local_err, "failed to replace alloc-track node: ");
+                error_report_err(local_err);
+                ret = -EPERM;
+                goto out;
+            }
+        }
+     }
+ 
+ out:
--- a/debian/patches/pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
+++ b/debian/patches/pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
@@ -0,0 +1,81 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fabian Ebner <f.ebner@proxmox.com>
+Date: Thu, 23 Jun 2022 14:00:05 +0200
+Subject: [PATCH] Revert "block/rbd: workaround for ceph issue #53784"
+
+This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
+preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
+
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/rbd.c | 42 ++----------------------------------------
+ 1 file changed, 2 insertions(+), 40 deletions(-)
+
+diff --git a/block/rbd.c b/block/rbd.c
+index 63f60d41be..367db42dce 100644
+--- a/block/rbd.c
+++ b/block/rbd.c
+@@ -1515,7 +1515,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+     int status, r;
+     RBDDiffIterateReq req = { .offs = offset };
+     uint64_t features, flags;
+-    uint64_t head = 0;
+ 
+     assert(offset + bytes <= s->image_size);
+ 
+@@ -1543,43 +1542,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+         return status;
+     }
+ 
+-#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
+-    /*
+-     * librbd had a bug until early 2022 that affected all versions of ceph that
+-     * supported fast-diff. This bug results in reporting of incorrect offsets
+-     * if the offset parameter to rbd_diff_iterate2 is not object aligned.
+-     * Work around this bug by rounding down the offset to object boundaries.
+-     * This is OK because we call rbd_diff_iterate2 with whole_object = true.
+-     * However, this workaround only works for non cloned images with default
+-     * striping.
+-     *
+-     * See: https://tracker.ceph.com/issues/53784
+-     */
+-
+-    /* check if RBD image has non-default striping enabled */
+-    if (features & RBD_FEATURE_STRIPINGV2) {
+-        return status;
+-    }
+-
+-#pragma GCC diagnostic push
+-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+-    /*
+-     * check if RBD image is a clone (= has a parent).
+-     *
+-     * rbd_get_parent_info is deprecated from Nautilus onwards, but the
+-     * replacement rbd_get_parent is not present in Luminous and Mimic.
+-     */
+-    if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
+-        return status;
+-    }
+-#pragma GCC diagnostic pop
+-
+-    head = req.offs & (s->object_size - 1);
+-    req.offs -= head;
+-    bytes += head;
+-#endif
+-
+-    r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
+    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+                           qemu_rbd_diff_iterate_cb, &req);
+     if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
+         return status;
+@@ -1598,8 +1561,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+         status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
+     }
+ 
+-    assert(req.bytes > head);
+-    *pnum = req.bytes - head;
+    *pnum = req.bytes;
+     return status;
+ }
+ 
--- a/debian/patches/pve/0040-PVE-savevm-async-set-up-migration-state.patch
+++ b/debian/patches/pve/0040-PVE-savevm-async-set-up-migration-state.patch
@@ -1,88 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Stefan Reiter <s.reiter@proxmox.com>
-Date: Wed, 8 Apr 2020 15:29:03 +0200
-Subject: [PATCH] PVE: savevm-async: set up migration state
-
-code mostly adapted from upstream savevm.c
-
-Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
- savevm-async.c | 30 ++++++++++++++++++++++++++++--
- 1 file changed, 28 insertions(+), 2 deletions(-)
-
-diff --git a/savevm-async.c b/savevm-async.c
-index 790e27ae37..a38b15d652 100644
--- a/savevm-async.c
-+++ b/savevm-async.c
-@@ -225,6 +225,7 @@ static void *process_savevm_thread(void *opaque)
- {
-     int ret;
-     int64_t maxlen;
-+    MigrationState *ms = migrate_get_current();
- 
-     rcu_register_thread();
- 
-@@ -234,8 +235,7 @@ static void *process_savevm_thread(void *opaque)
- 
-     if (ret < 0) {
-         save_snapshot_error("qemu_savevm_state_setup failed");
-        rcu_unregister_thread();
-        return NULL;
-+        goto out;
-     }
- 
-     while (snap_state.state == SAVE_STATE_ACTIVE) {
-@@ -287,6 +287,12 @@ static void *process_savevm_thread(void *opaque)
-     qemu_bh_schedule(snap_state.cleanup_bh);
-     qemu_mutex_unlock_iothread();
- 
-+out:
-+    /* set migration state accordingly and clear soon-to-be stale file */
-+    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
-+                      ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
-+    ms->to_dst_file = NULL;
-+
-     rcu_unregister_thread();
-     return NULL;
- }
-@@ -294,6 +300,7 @@ static void *process_savevm_thread(void *opaque)
- void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
- {
-     Error *local_err = NULL;
-+    MigrationState *ms = migrate_get_current();
- 
-     int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
- 
-@@ -303,6 +310,17 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
-         return;
-     }
- 
-+    if (migration_is_running(ms->state)) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
-+        return;
-+    }
-+
-+    if (migrate_use_block()) {
-+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
-+                  "Block migration and snapshots are incompatible");
-+        return;
-+    }
-+
-     /* initialize snapshot info */
-     snap_state.saved_vm_running = runstate_is_running();
-     snap_state.bs_pos = 0;
-@@ -341,6 +359,14 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
-         goto restart;
-     }
- 
-+    /*
-+     * qemu_savevm_* paths use migration code and expect a migration state.
-+     * State is cleared in process_savevm_thread, but has to be initialized
-+     * here (blocking main thread, from QMP) to avoid race conditions.
-+     */
-+    migrate_init(ms);
-+    memset(&ram_counters, 0, sizeof(ram_counters));
-+    ms->to_dst_file = snap_state.file;
- 
-     error_setg(&snap_state.blocker, "block device is in use by savevm");
-     blk_op_block_all(snap_state.target, snap_state.blocker);
--- a/debian/patches/pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
+++ b/debian/patches/pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
@@ -0,0 +1,36 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fabian Ebner <f.ebner@proxmox.com>
+Date: Thu, 23 Jun 2022 14:00:07 +0200
+Subject: [PATCH] Revert "block/rbd: fix handling of holes in
+ .bdrv_co_block_status"
+
+This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
+preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
+
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/rbd.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/block/rbd.c b/block/rbd.c
+index 367db42dce..347b121626 100644
+--- a/block/rbd.c
+++ b/block/rbd.c
+@@ -1474,11 +1474,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
+     RBDDiffIterateReq *req = opaque;
+ 
+     assert(req->offs + req->bytes <= offs);
+-
+-    /* treat a hole like an unallocated area and bail out */
+-    if (!exists) {
+-        return 0;
+-    }
+    /*
+     * we do not diff against a snapshot so we should never receive a callback
+     * for a hole.
+     */
+    assert(exists);
+ 
+     if (!req->exists && offs > req->offs) {
+         /*
--- a/debian/patches/pve/0041-PVE-Backup-avoid-use-QemuRecMutex-inside-coroutines.patch
+++ b/debian/patches/pve/0041-PVE-Backup-avoid-use-QemuRecMutex-inside-coroutines.patch
@@ -1,211 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dietmar Maurer <dietmar@proxmox.com>
-Date: Fri, 17 Apr 2020 08:57:47 +0200
-Subject: [PATCH] PVE Backup: avoid use QemuRecMutex inside coroutines
-
---
- pve-backup.c | 59 +++++++++++++++++++++++++++++++++-------------------
- 1 file changed, 38 insertions(+), 21 deletions(-)
-
-diff --git a/pve-backup.c b/pve-backup.c
-index 169f0c68d0..dddf430399 100644
--- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -11,6 +11,23 @@
- 
- /* PVE backup state and related function */
- 
-+/*
-+ * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
-+ * so you may not use normal mutexes within coroutines:
-+ *
-+ * ---bad-example---
-+ * qemu_rec_mutex_lock(lock)
-+ * ...
-+ * qemu_coroutine_yield() // wait for something
-+ * // we are now inside a different thread
-+ * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
-+ * ---end-bad-example--
-+ *
-+ * ==> Always use CoMutext inside coroutines.
-+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
-+ *
-+ */
-+
- static struct PVEBackupState {
-     struct {
-         // Everithing accessed from qmp_backup_query command is protected using lock
-@@ -30,12 +47,14 @@ static struct PVEBackupState {
-     ProxmoxBackupHandle *pbs;
-     GList *di_list;
-     QemuRecMutex backup_mutex;
-+    CoMutex dump_callback_mutex;
- } backup_state;
- 
- static void pvebackup_init(void)
- {
-     qemu_rec_mutex_init(&backup_state.stat.lock);
-     qemu_rec_mutex_init(&backup_state.backup_mutex);
-+    qemu_co_mutex_init(&backup_state.dump_callback_mutex);
- }
- 
- // initialize PVEBackupState at startup
-@@ -114,16 +133,16 @@ pvebackup_co_dump_pbs_cb(
-     Error *local_err = NULL;
-     int pbs_res = -1;
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+    qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
- 
-     // avoid deadlock if job is cancelled
-     if (pvebackup_error_or_canceled()) {
-        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
-         return -1;
-     }
- 
-     pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+    qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- 
-     if (pbs_res < 0) {
-         pvebackup_propagate_error(local_err);
-@@ -149,7 +168,6 @@ pvebackup_co_dump_vma_cb(
-     const unsigned char *buf = pbuf;
-     PVEBackupDevInfo *di = opaque;
- 
-
-     int ret = -1;
- 
-     assert(backup_state.vmaw);
-@@ -167,16 +185,16 @@ pvebackup_co_dump_vma_cb(
-     }
- 
-     while (remaining > 0) {
-        qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+        qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
-         // avoid deadlock if job is cancelled
-         if (pvebackup_error_or_canceled()) {
-            qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+            qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
-             return -1;
-         }
- 
-         size_t zero_bytes = 0;
-         ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
-        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- 
-         ++cluster_num;
-         if (buf) {
-@@ -203,12 +221,11 @@ pvebackup_co_dump_vma_cb(
-     return size;
- }
- 
-+// assumes the caller holds backup_mutex
- static void coroutine_fn pvebackup_co_cleanup(void *unused)
- {
-     assert(qemu_in_coroutine());
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-
-     qemu_rec_mutex_lock(&backup_state.stat.lock);
-     backup_state.stat.end_time = time(NULL);
-     qemu_rec_mutex_unlock(&backup_state.stat.lock);
-@@ -239,9 +256,9 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
- 
-     g_list_free(backup_state.di_list);
-     backup_state.di_list = NULL;
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- }
- 
-+// assumes the caller holds backup_mutex
- static void coroutine_fn pvebackup_complete_stream(void *opaque)
- {
-     PVEBackupDevInfo *di = opaque;
-@@ -295,6 +312,8 @@ static void pvebackup_complete_cb(void *opaque, int ret)
- 
- static void pvebackup_cancel(void)
- {
-+    assert(!qemu_in_coroutine());
-+
-     Error *cancel_err = NULL;
-     error_setg(&cancel_err, "backup canceled");
-     pvebackup_propagate_error(cancel_err);
-@@ -348,6 +367,7 @@ void qmp_backup_cancel(Error **errp)
-     pvebackup_cancel();
- }
- 
-+// assumes the caller holds backup_mutex
- static int coroutine_fn pvebackup_co_add_config(
-     const char *file,
-     const char *name,
-@@ -431,9 +451,9 @@ static void pvebackup_run_next_job(void)
-         }
-     }
- 
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
-     block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
-+
-+    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- }
- 
- static bool create_backup_jobs(void) {
-@@ -520,6 +540,7 @@ typedef struct QmpBackupTask {
-     UuidInfo *result;
- } QmpBackupTask;
- 
-+// assumes the caller holds backup_mutex
- static void coroutine_fn pvebackup_co_prepare(void *opaque)
- {
-     assert(qemu_in_coroutine());
-@@ -543,11 +564,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
-     const char *config_name = "qemu-server.conf";
-     const char *firewall_name = "qemu-server.fw";
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-
-     if (backup_state.di_list) {
-        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-        error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
-+         error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
-                   "previous backup not finished");
-         return;
-     }
-@@ -792,8 +810,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
- 
-     backup_state.di_list = di_list;
- 
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
-     uuid_info = g_malloc0(sizeof(*uuid_info));
-     uuid_info->UUID = uuid_str;
- 
-@@ -836,8 +852,6 @@ err:
-         rmdir(backup_dir);
-     }
- 
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
-     task->result = NULL;
-     return;
- }
-@@ -881,13 +895,16 @@ UuidInfo *qmp_backup(
-         .errp = errp,
-     };
- 
-+    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+
-     block_on_coroutine_fn(pvebackup_co_prepare, &task);
- 
-     if (*errp == NULL) {
-        qemu_rec_mutex_lock(&backup_state.backup_mutex);
-         create_backup_jobs();
-         qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-         pvebackup_run_next_job();
-+    } else {
-+        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-     }
- 
-     return task.result;
--- a/debian/patches/pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
+++ b/debian/patches/pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
@@ -0,0 +1,162 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fabian Ebner <f.ebner@proxmox.com>
+Date: Tue, 17 May 2022 09:46:02 +0200
+Subject: [PATCH] Revert "block/rbd: implement bdrv_co_block_status"
+
+During backup, bdrv_co_block_status is called for each block copy
+chunk. When RBD is used, the current implementation with
+rbd_diff_iterate2() using whole_object=true takes about linearly more
+time, depending on the image size. Since there are linearly more
+chunks, the slowdown is quadratic, becoming unacceptable for large
+images (starting somewhere between 500-1000 GiB in my testing).
+
+This reverts commit 0347a8fd4c3faaedf119be04c197804be40a384b as a
+stop-gap measure, until it's clear how to make the implemenation
+more efficient.
+
+Upstream bug report:
+https://gitlab.com/qemu-project/qemu/-/issues/1026
+
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/rbd.c | 112 ----------------------------------------------------
+ 1 file changed, 112 deletions(-)
+
+diff --git a/block/rbd.c b/block/rbd.c
+index 347b121626..e61b359b97 100644
+--- a/block/rbd.c
+++ b/block/rbd.c
+@@ -108,12 +108,6 @@ typedef struct RBDTask {
+     int64_t ret;
+ } RBDTask;
+ 
+-typedef struct RBDDiffIterateReq {
+-    uint64_t offs;
+-    uint64_t bytes;
+-    bool exists;
+-} RBDDiffIterateReq;
+-
+ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
+                             BlockdevOptionsRbd *opts, bool cache,
+                             const char *keypairs, const char *secretid,
+@@ -1460,111 +1454,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
+     return spec_info;
+ }
+ 
+-/*
+- * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
+- * value in the callback routine. Choose a value that does not conflict with
+- * an existing exitcode and return it if we want to prematurely stop the
+- * execution because we detected a change in the allocation status.
+- */
+-#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
+-
+-static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
+-                                    int exists, void *opaque)
+-{
+-    RBDDiffIterateReq *req = opaque;
+-
+-    assert(req->offs + req->bytes <= offs);
+-    /*
+-     * we do not diff against a snapshot so we should never receive a callback
+-     * for a hole.
+-     */
+-    assert(exists);
+-
+-    if (!req->exists && offs > req->offs) {
+-        /*
+-         * we started in an unallocated area and hit the first allocated
+-         * block. req->bytes must be set to the length of the unallocated area
+-         * before the allocated area. stop further processing.
+-         */
+-        req->bytes = offs - req->offs;
+-        return QEMU_RBD_EXIT_DIFF_ITERATE2;
+-    }
+-
+-    if (req->exists && offs > req->offs + req->bytes) {
+-        /*
+-         * we started in an allocated area and jumped over an unallocated area,
+-         * req->bytes contains the length of the allocated area before the
+-         * unallocated area. stop further processing.
+-         */
+-        return QEMU_RBD_EXIT_DIFF_ITERATE2;
+-    }
+-
+-    req->bytes += len;
+-    req->exists = true;
+-
+-    return 0;
+-}
+-
+-static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+-                                                 bool want_zero, int64_t offset,
+-                                                 int64_t bytes, int64_t *pnum,
+-                                                 int64_t *map,
+-                                                 BlockDriverState **file)
+-{
+-    BDRVRBDState *s = bs->opaque;
+-    int status, r;
+-    RBDDiffIterateReq req = { .offs = offset };
+-    uint64_t features, flags;
+-
+-    assert(offset + bytes <= s->image_size);
+-
+-    /* default to all sectors allocated */
+-    status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+-    *map = offset;
+-    *file = bs;
+-    *pnum = bytes;
+-
+-    /* check if RBD image supports fast-diff */
+-    r = rbd_get_features(s->image, &features);
+-    if (r < 0) {
+-        return status;
+-    }
+-    if (!(features & RBD_FEATURE_FAST_DIFF)) {
+-        return status;
+-    }
+-
+-    /* check if RBD fast-diff result is valid */
+-    r = rbd_get_flags(s->image, &flags);
+-    if (r < 0) {
+-        return status;
+-    }
+-    if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
+-        return status;
+-    }
+-
+-    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+-                          qemu_rbd_diff_iterate_cb, &req);
+-    if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
+-        return status;
+-    }
+-    assert(req.bytes <= bytes);
+-    if (!req.exists) {
+-        if (r == 0) {
+-            /*
+-             * rbd_diff_iterate2 does not invoke callbacks for unallocated
+-             * areas. This here catches the case where no callback was
+-             * invoked at all (req.bytes == 0).
+-             */
+-            assert(req.bytes == 0);
+-            req.bytes = bytes;
+-        }
+-        status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
+-    }
+-
+-    *pnum = req.bytes;
+-    return status;
+-}
+-
+ static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
+ {
+     BDRVRBDState *s = bs->opaque;
+@@ -1800,7 +1689,6 @@ static BlockDriver bdrv_rbd = {
+ #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+     .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
+ #endif
+-    .bdrv_co_block_status   = qemu_rbd_co_block_status,
+ 
+     .bdrv_snapshot_create   = qemu_rbd_snap_create,
+     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
--- a/debian/patches/pve/0042-PVE-Backup-use-QemuMutex-instead-of-QemuRecMutex.patch
+++ b/debian/patches/pve/0042-PVE-Backup-use-QemuMutex-instead-of-QemuRecMutex.patch
@@ -1,227 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dietmar Maurer <dietmar@proxmox.com>
-Date: Fri, 17 Apr 2020 08:57:48 +0200
-Subject: [PATCH] PVE Backup: use QemuMutex instead of QemuRecMutex
-
-We acquire/release all mutexes outside coroutines now, so we can now
-correctly use a normal mutex.
---
- pve-backup.c | 58 ++++++++++++++++++++++++++--------------------------
- 1 file changed, 29 insertions(+), 29 deletions(-)
-
-diff --git a/pve-backup.c b/pve-backup.c
-index dddf430399..bb917ee972 100644
--- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -31,7 +31,7 @@
- static struct PVEBackupState {
-     struct {
-         // Everithing accessed from qmp_backup_query command is protected using lock
-        QemuRecMutex lock;
-+        QemuMutex lock;
-         Error *error;
-         time_t start_time;
-         time_t end_time;
-@@ -46,14 +46,14 @@ static struct PVEBackupState {
-     VmaWriter *vmaw;
-     ProxmoxBackupHandle *pbs;
-     GList *di_list;
-    QemuRecMutex backup_mutex;
-+    QemuMutex backup_mutex;
-     CoMutex dump_callback_mutex;
- } backup_state;
- 
- static void pvebackup_init(void)
- {
-    qemu_rec_mutex_init(&backup_state.stat.lock);
-    qemu_rec_mutex_init(&backup_state.backup_mutex);
-+    qemu_mutex_init(&backup_state.stat.lock);
-+    qemu_mutex_init(&backup_state.backup_mutex);
-     qemu_co_mutex_init(&backup_state.dump_callback_mutex);
- }
- 
-@@ -91,26 +91,26 @@ lookup_active_block_job(PVEBackupDevInfo *di)
- 
- static void pvebackup_propagate_error(Error *err)
- {
-    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    qemu_mutex_lock(&backup_state.stat.lock);
-     error_propagate(&backup_state.stat.error, err);
-    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+    qemu_mutex_unlock(&backup_state.stat.lock);
- }
- 
- static bool pvebackup_error_or_canceled(void)
- {
-    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    qemu_mutex_lock(&backup_state.stat.lock);
-     bool error_or_canceled = !!backup_state.stat.error;
-    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+    qemu_mutex_unlock(&backup_state.stat.lock);
- 
-     return error_or_canceled;
- }
- 
- static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
- {
-    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    qemu_mutex_lock(&backup_state.stat.lock);
-     backup_state.stat.zero_bytes += zero_bytes;
-     backup_state.stat.transferred += transferred;
-    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+    qemu_mutex_unlock(&backup_state.stat.lock);
- }
- 
- // This may get called from multiple coroutines in multiple io-threads
-@@ -226,9 +226,9 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
- {
-     assert(qemu_in_coroutine());
- 
-    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    qemu_mutex_lock(&backup_state.stat.lock);
-     backup_state.stat.end_time = time(NULL);
-    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+    qemu_mutex_unlock(&backup_state.stat.lock);
- 
-     if (backup_state.vmaw) {
-         Error *local_err = NULL;
-@@ -284,7 +284,7 @@ static void pvebackup_complete_cb(void *opaque, int ret)
- 
-     PVEBackupDevInfo *di = opaque;
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+    qemu_mutex_lock(&backup_state.backup_mutex);
- 
-     di->completed = true;
- 
-@@ -305,7 +305,7 @@ static void pvebackup_complete_cb(void *opaque, int ret)
- 
-     g_free(di);
- 
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+    qemu_mutex_unlock(&backup_state.backup_mutex);
- 
-     pvebackup_run_next_job();
- }
-@@ -318,7 +318,7 @@ static void pvebackup_cancel(void)
-     error_setg(&cancel_err, "backup canceled");
-     pvebackup_propagate_error(cancel_err);
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+    qemu_mutex_lock(&backup_state.backup_mutex);
- 
-     if (backup_state.vmaw) {
-         /* make sure vma writer does not block anymore */
-@@ -329,13 +329,13 @@ static void pvebackup_cancel(void)
-         proxmox_backup_abort(backup_state.pbs, "backup canceled");
-     }
- 
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+    qemu_mutex_unlock(&backup_state.backup_mutex);
- 
-     for(;;) {
- 
-         BlockJob *next_job = NULL;
- 
-        qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+        qemu_mutex_lock(&backup_state.backup_mutex);
- 
-         GList *l = backup_state.di_list;
-         while (l) {
-@@ -349,7 +349,7 @@ static void pvebackup_cancel(void)
-             }
-         }
- 
-        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_mutex_unlock(&backup_state.backup_mutex);
- 
-         if (next_job) {
-             AioContext *aio_context = next_job->job.aio_context;
-@@ -423,7 +423,7 @@ static void pvebackup_run_next_job(void)
- {
-     assert(!qemu_in_coroutine());
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+    qemu_mutex_lock(&backup_state.backup_mutex);
- 
-     GList *l = backup_state.di_list;
-     while (l) {
-@@ -433,7 +433,7 @@ static void pvebackup_run_next_job(void)
-         BlockJob *job = lookup_active_block_job(di);
- 
-         if (job) {
-            qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+            qemu_mutex_unlock(&backup_state.backup_mutex);
- 
-             AioContext *aio_context = job->job.aio_context;
-             aio_context_acquire(aio_context);
-@@ -453,7 +453,7 @@ static void pvebackup_run_next_job(void)
- 
-     block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
- 
-    qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+    qemu_mutex_unlock(&backup_state.backup_mutex);
- }
- 
- static bool create_backup_jobs(void) {
-@@ -778,7 +778,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
-     }
-     /* initialize global backup_state now */
- 
-    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    qemu_mutex_lock(&backup_state.stat.lock);
- 
-     if (backup_state.stat.error) {
-         error_free(backup_state.stat.error);
-@@ -801,7 +801,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
-     backup_state.stat.transferred = 0;
-     backup_state.stat.zero_bytes = 0;
- 
-    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+    qemu_mutex_unlock(&backup_state.stat.lock);
- 
-     backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
- 
-@@ -895,16 +895,16 @@ UuidInfo *qmp_backup(
-         .errp = errp,
-     };
- 
-    qemu_rec_mutex_lock(&backup_state.backup_mutex);
-+    qemu_mutex_lock(&backup_state.backup_mutex);
- 
-     block_on_coroutine_fn(pvebackup_co_prepare, &task);
- 
-     if (*errp == NULL) {
-         create_backup_jobs();
-        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_mutex_unlock(&backup_state.backup_mutex);
-         pvebackup_run_next_job();
-     } else {
-        qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-+        qemu_mutex_unlock(&backup_state.backup_mutex);
-     }
- 
-     return task.result;
-@@ -914,11 +914,11 @@ BackupStatus *qmp_query_backup(Error **errp)
- {
-     BackupStatus *info = g_malloc0(sizeof(*info));
- 
-    qemu_rec_mutex_lock(&backup_state.stat.lock);
-+    qemu_mutex_lock(&backup_state.stat.lock);
- 
-     if (!backup_state.stat.start_time) {
-         /* not started, return {} */
-        qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+        qemu_mutex_unlock(&backup_state.stat.lock);
-         return info;
-     }
- 
-@@ -955,7 +955,7 @@ BackupStatus *qmp_query_backup(Error **errp)
-     info->has_transferred = true;
-     info->transferred = backup_state.stat.transferred;
- 
-    qemu_rec_mutex_unlock(&backup_state.stat.lock);
-+    qemu_mutex_unlock(&backup_state.stat.lock);
- 
-     return info;
- }
--- a/debian/patches/pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
+++ b/debian/patches/pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
@@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Tue, 26 Mar 2024 14:57:51 +0100
+Subject: [PATCH] alloc-track: error out when auto-remove is not set
+
+Since replacing the node now happens in the stream job, where the
+option cannot be read from (it's internal to the driver), it will
+always be treated as on.
+
+qemu-server will always set it, make sure to have other users notice
+the change (should they even exist). The option can be fully dropped
+in the future while adding a version guard in qemu-server.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/alloc-track.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/block/alloc-track.c b/block/alloc-track.c
+index b9f8ea9137..f3ed2935c4 100644
+--- a/block/alloc-track.c
+++ b/block/alloc-track.c
+@@ -34,7 +34,6 @@ typedef struct {
+     BdrvDirtyBitmap *bitmap;
+     uint64_t granularity;
+     DropState drop_state;
+-    bool auto_remove;
+ } BDRVAllocTrackState;
+ 
+ static QemuOptsList runtime_opts = {
+@@ -86,7 +85,11 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
+         goto fail;
+     }
+ 
+-    s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+    if (!qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false)) {
+        error_setg(errp, "alloc-track: requires auto-remove option to be set to on");
+        ret = -EINVAL;
+        goto fail;
+    }
+ 
+     /* open the target (write) node, backing will be attached by block layer */
+     file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
--- a/debian/patches/pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
+++ b/debian/patches/pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
@@ -0,0 +1,84 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Wed, 27 Mar 2024 11:15:39 +0100
+Subject: [PATCH] alloc-track: avoid seemingly superfluous child permission
+ update
+
+Doesn't seem necessary nowadays (maybe after commit "alloc-track: fix
+deadlock during drop" where the dropping is not rescheduled and delayed
+anymore or some upstream change). Should there really be some issue,
+instead of having a drop state, this could also be just based off the
+fact whether there is still a backing child.
+
+Dumping the cumulative (shared) permissions for the BDS with a debug
+print yields the same values after this patch and with QEMU 8.1,
+namely 3 and 5.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/alloc-track.c | 26 --------------------------
+ 1 file changed, 26 deletions(-)
+
+diff --git a/block/alloc-track.c b/block/alloc-track.c
+index f3ed2935c4..29138dcc49 100644
+--- a/block/alloc-track.c
+++ b/block/alloc-track.c
+@@ -25,15 +25,9 @@
+ 
+ #define TRACK_OPT_AUTO_REMOVE "auto-remove"
+ 
+-typedef enum DropState {
+-    DropNone,
+-    DropInProgress,
+-} DropState;
+-
+ typedef struct {
+     BdrvDirtyBitmap *bitmap;
+     uint64_t granularity;
+-    DropState drop_state;
+ } BDRVAllocTrackState;
+ 
+ static QemuOptsList runtime_opts = {
+@@ -137,8 +131,6 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
+         goto fail;
+     }
+ 
+-    s->drop_state = DropNone;
+-
+ fail:
+     if (ret < 0) {
+         bdrv_graph_wrlock();
+@@ -289,18 +281,8 @@ track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                  BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
+                  uint64_t *nperm, uint64_t *nshared)
+ {
+-    BDRVAllocTrackState *s = bs->opaque;
+-
+     *nshared = BLK_PERM_ALL;
+ 
+-    /* in case we're currently dropping ourselves, claim to not use any
+-     * permissions at all - which is fine, since from this point on we will
+-     * never issue a read or write anymore */
+-    if (s->drop_state == DropInProgress) {
+-        *nperm = 0;
+-        return;
+-    }
+-
+     if (role & BDRV_CHILD_DATA) {
+         *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+     } else {
+@@ -326,14 +308,6 @@ track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+      * kinda fits better, but in the long-term, a special parameter would be
+      * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+      */
+-    if (backing_file == NULL) {
+-        BDRVAllocTrackState *s = bs->opaque;
+-        bdrv_drained_begin(bs);
+-        s->drop_state = DropInProgress;
+-        bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+-        bdrv_drained_end(bs);
+-    }
+-
+     return 0;
+ }
+ 
--- a/debian/patches/pve/0043-move-savevm-async-back-into-a-coroutine.patch
+++ b/debian/patches/pve/0043-move-savevm-async-back-into-a-coroutine.patch
@@ -1,111 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Thu, 30 Apr 2020 15:55:37 +0200
-Subject: [PATCH] move savevm-async back into a coroutine
-
-Move qemu_savevm_state_{header,setup} into the main loop and
-the rest of the iteration into a coroutine. The former need
-to lock the iothread (and we can't unlock it in the
-coroutine), and the latter can't deal with being in a
-separate thread, so a coroutine it must be.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
- savevm-async.c | 28 +++++++++-------------------
- 1 file changed, 9 insertions(+), 19 deletions(-)
-
-diff --git a/savevm-async.c b/savevm-async.c
-index a38b15d652..af865b9a0a 100644
--- a/savevm-async.c
-+++ b/savevm-async.c
-@@ -51,7 +51,7 @@ static struct SnapshotState {
-     QEMUFile *file;
-     int64_t total_time;
-     QEMUBH *cleanup_bh;
-    QemuThread thread;
-+    Coroutine *co;
- } snap_state;
- 
- SaveVMInfo *qmp_query_savevm(Error **errp)
-@@ -201,11 +201,9 @@ static void process_savevm_cleanup(void *opaque)
-     int ret;
-     qemu_bh_delete(snap_state.cleanup_bh);
-     snap_state.cleanup_bh = NULL;
-+    snap_state.co = NULL;
-     qemu_savevm_state_cleanup();
- 
-    qemu_mutex_unlock_iothread();
-    qemu_thread_join(&snap_state.thread);
-    qemu_mutex_lock_iothread();
-     ret = save_snapshot_cleanup();
-     if (ret < 0) {
-         save_snapshot_error("save_snapshot_cleanup error %d", ret);
-@@ -221,18 +219,13 @@ static void process_savevm_cleanup(void *opaque)
-     }
- }
- 
-static void *process_savevm_thread(void *opaque)
-+static void process_savevm_coro(void *opaque)
- {
-     int ret;
-     int64_t maxlen;
-     MigrationState *ms = migrate_get_current();
- 
-    rcu_register_thread();
-
-    qemu_savevm_state_header(snap_state.file);
-    qemu_savevm_state_setup(snap_state.file);
-     ret = qemu_file_get_error(snap_state.file);
-
-     if (ret < 0) {
-         save_snapshot_error("qemu_savevm_state_setup failed");
-         goto out;
-@@ -247,16 +240,13 @@ static void *process_savevm_thread(void *opaque)
-         maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
- 
-         if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
-            qemu_mutex_lock_iothread();
-             ret = qemu_savevm_state_iterate(snap_state.file, false);
-             if (ret < 0) {
-                 save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
-                 break;
-             }
-            qemu_mutex_unlock_iothread();
-             DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
-         } else {
-            qemu_mutex_lock_iothread();
-             qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
-             ret = global_state_store();
-             if (ret) {
-@@ -285,16 +275,12 @@ static void *process_savevm_thread(void *opaque)
-     }
- 
-     qemu_bh_schedule(snap_state.cleanup_bh);
-    qemu_mutex_unlock_iothread();
- 
- out:
-     /* set migration state accordingly and clear soon-to-be stale file */
-     migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
-                       ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
-     ms->to_dst_file = NULL;
-
-    rcu_unregister_thread();
-    return NULL;
- }
- 
- void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
-@@ -373,8 +359,12 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
- 
-     snap_state.state = SAVE_STATE_ACTIVE;
-     snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
-    qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
-                       NULL, QEMU_THREAD_JOINABLE);
-+    snap_state.co = qemu_coroutine_create(&process_savevm_coro, NULL);
-+    qemu_mutex_unlock_iothread();
-+    qemu_savevm_state_header(snap_state.file);
-+    qemu_savevm_state_setup(snap_state.file);
-+    qemu_mutex_lock_iothread();
-+    aio_co_schedule(iohandler_get_aio_context(), snap_state.co);
- 
-     return;
- 
--- a/debian/patches/pve/0044-add-optional-buffer-size-to-QEMUFile.patch
+++ b/debian/patches/pve/0044-add-optional-buffer-size-to-QEMUFile.patch
@@ -1,183 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Mon, 4 May 2020 11:05:08 +0200
-Subject: [PATCH] add optional buffer size to QEMUFile
-
-So we can use a 4M buffer for savevm-async which should
-increase performance storing the state onto ceph.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
- migration/qemu-file.c | 36 ++++++++++++++++++++++++------------
- migration/qemu-file.h |  1 +
- savevm-async.c        |  4 ++--
- 3 files changed, 27 insertions(+), 14 deletions(-)
-
-diff --git a/migration/qemu-file.c b/migration/qemu-file.c
-index 1c3a358a14..7362e51c71 100644
--- a/migration/qemu-file.c
-+++ b/migration/qemu-file.c
-@@ -30,7 +30,7 @@
- #include "trace.h"
- #include "qapi/error.h"
- 
-#define IO_BUF_SIZE 32768
-+#define DEFAULT_IO_BUF_SIZE 32768
- #define MAX_IOV_SIZE MIN(IOV_MAX, 64)
- 
- struct QEMUFile {
-@@ -45,7 +45,8 @@ struct QEMUFile {
-                     when reading */
-     int buf_index;
-     int buf_size; /* 0 when writing */
-    uint8_t buf[IO_BUF_SIZE];
-+    size_t buf_allocated_size;
-+    uint8_t *buf;
- 
-     DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
-     struct iovec iov[MAX_IOV_SIZE];
-@@ -101,7 +102,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
-     return false;
- }
- 
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
-+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, size_t buffer_size)
- {
-     QEMUFile *f;
- 
-@@ -109,9 +110,17 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
- 
-     f->opaque = opaque;
-     f->ops = ops;
-+    f->buf_allocated_size = buffer_size;
-+    f->buf = malloc(buffer_size);
-+
-     return f;
- }
- 
-+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
-+{
-+    return qemu_fopen_ops_sized(opaque, ops, DEFAULT_IO_BUF_SIZE);
-+}
-+
- 
- void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
- {
-@@ -346,7 +355,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
-     }
- 
-     len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
-                             IO_BUF_SIZE - pending, &local_error);
-+                             f->buf_allocated_size - pending, &local_error);
-     if (len > 0) {
-         f->buf_size += len;
-         f->pos += len;
-@@ -386,6 +395,9 @@ int qemu_fclose(QEMUFile *f)
-             ret = ret2;
-         }
-     }
-+
-+    free(f->buf);
-+
-     /* If any error was spotted before closing, we should report it
-      * instead of the close() return value.
-      */
-@@ -435,7 +447,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
- {
-     if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
-         f->buf_index += len;
-        if (f->buf_index == IO_BUF_SIZE) {
-+        if (f->buf_index == f->buf_allocated_size) {
-             qemu_fflush(f);
-         }
-     }
-@@ -461,7 +473,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
-     }
- 
-     while (size > 0) {
-        l = IO_BUF_SIZE - f->buf_index;
-+        l = f->buf_allocated_size - f->buf_index;
-         if (l > size) {
-             l = size;
-         }
-@@ -508,8 +520,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
-     size_t index;
- 
-     assert(!qemu_file_is_writable(f));
-    assert(offset < IO_BUF_SIZE);
-    assert(size <= IO_BUF_SIZE - offset);
-+    assert(offset < f->buf_allocated_size);
-+    assert(size <= f->buf_allocated_size - offset);
- 
-     /* The 1st byte to read from */
-     index = f->buf_index + offset;
-@@ -559,7 +571,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
-         size_t res;
-         uint8_t *src;
- 
-        res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
-+        res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
-         if (res == 0) {
-             return done;
-         }
-@@ -593,7 +605,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
-  */
- size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
- {
-    if (size < IO_BUF_SIZE) {
-+    if (size < f->buf_allocated_size) {
-         size_t res;
-         uint8_t *src;
- 
-@@ -618,7 +630,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
-     int index = f->buf_index + offset;
- 
-     assert(!qemu_file_is_writable(f));
-    assert(offset < IO_BUF_SIZE);
-+    assert(offset < f->buf_allocated_size);
- 
-     if (index >= f->buf_size) {
-         qemu_fill_buffer(f);
-@@ -770,7 +782,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
- ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
-                                   const uint8_t *p, size_t size)
- {
-    ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
-+    ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
- 
-     if (blen < compressBound(size)) {
-         return -1;
-diff --git a/migration/qemu-file.h b/migration/qemu-file.h
-index a9b6d6ccb7..8752d27c74 100644
--- a/migration/qemu-file.h
-+++ b/migration/qemu-file.h
-@@ -120,6 +120,7 @@ typedef struct QEMUFileHooks {
- } QEMUFileHooks;
- 
- QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
-+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, size_t buffer_size);
- void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
- int qemu_get_fd(QEMUFile *f);
- int qemu_fclose(QEMUFile *f);
-diff --git a/savevm-async.c b/savevm-async.c
-index af865b9a0a..c3fe741c38 100644
--- a/savevm-async.c
-+++ b/savevm-async.c
-@@ -338,7 +338,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
-         goto restart;
-     }
- 
-    snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
-+    snap_state.file = qemu_fopen_ops_sized(&snap_state, &block_file_ops, 4 * 1024 * 1024);
- 
-     if (!snap_state.file) {
-         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
-@@ -454,7 +454,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
-     blk_op_block_all(be, blocker);
- 
-     /* restore the VM state */
-    f = qemu_fopen_ops(be, &loadstate_file_ops);
-+    f = qemu_fopen_ops_sized(be, &loadstate_file_ops, 4 * 1024 * 1024);
-     if (!f) {
-         error_setg(errp, "Could not open VM state file");
-         goto the_end;
--- a/debian/patches/pve/0044-block-copy-before-write-fix-permission.patch
+++ b/debian/patches/pve/0044-block-copy-before-write-fix-permission.patch
@@ -0,0 +1,55 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:22 +0200
+Subject: [PATCH] block/copy-before-write: fix permission
+
+In case when source node does not have any parents, the condition still
+works as required: backup job do create the parent by
+
+  block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
+
+Still, in this case checking @perm variable doesn't work, as backup job
+creates the root blk with empty permissions (as it rely on CBW filter
+to require correct permissions and don't want to create extra
+conflicts).
+
+So, we should not check @perm.
+
+The hack may be dropped entirely when transactional insertion of
+filter (when we don't try to recalculate permissions in intermediate
+state, when filter does conflict with original parent of the source
+node) merged (old big series
+"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
+current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
+
+[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
+[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 026fa9840f..5a9456d426 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                            perm, shared, nperm, nshared);
+ 
+         if (!QLIST_EMPTY(&bs->parents)) {
+-            if (perm & BLK_PERM_WRITE) {
+-                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+-            }
+            /*
+             * Note, that source child may be shared with backup job. Backup job
+             * does create own blk parent on copy-before-write node, so this
+             * works even if source node does not have any parents before backup
+             * start
+             */
+            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+         }
+     }
--- a/debian/patches/pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch
+++ b/debian/patches/pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch
@@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:23 +0200
+Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
+
+First thing that crashes on unligned access here is
+bdrv_reset_dirty_bitmap(). Correct way is to align-down the
+snapshot-discard request.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 5a9456d426..c0e70669a2 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
+ cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+ {
+     BDRVCopyBeforeWriteState *s = bs->opaque;
+    uint32_t cluster_size = block_copy_cluster_size(s->bcs);
+    int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
+    int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
+    int64_t aligned_bytes;
+
+    if (aligned_end <= aligned_offset) {
+        return 0;
+    }
+    aligned_bytes = aligned_end - aligned_offset;
+ 
+     WITH_QEMU_LOCK_GUARD(&s->lock) {
+-        bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
+        bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
+                                aligned_bytes);
+     }
+ 
+-    block_copy_reset(s->bcs, offset, bytes);
+    block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
+ 
+-    return bdrv_co_pdiscard(s->target, offset, bytes);
+    return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
+ }
+ 
+ static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
--- a/debian/patches/pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch
+++ b/debian/patches/pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch
@@ -0,0 +1,373 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:24 +0200
+Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
+ node
+
+Currently block_copy creates copy_bitmap in source node. But that is in
+bad relation with .independent_close=true of copy-before-write filter:
+source node may be detached and removed before .bdrv_close() handler
+called, which should call block_copy_state_free(), which in turn should
+remove copy_bitmap.
+
+That's all not ideal: it would be better if internal bitmap of
+block-copy object is not attached to any node. But that is not possible
+now.
+
+The simplest solution is just create copy_bitmap in filter node, where
+anyway two other bitmaps are created.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c         |   3 +-
+ block/copy-before-write.c  |   2 +-
+ include/block/block-copy.h |   1 +
+ tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
+ 4 files changed, 60 insertions(+), 58 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 9ee3dd7ef5..8fca2c3698 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ }
+ 
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      Error **errp)
+ {
+@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+         return NULL;
+     }
+ 
+-    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
+    copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
+                                            errp);
+     if (!copy_bitmap) {
+         return NULL;
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index c0e70669a2..94db31512d 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+              bs->file->bs->supported_zero_flags);
+ 
+-    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
+    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 0700953ab8..8b41643bfa 100644
+--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
+@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
+ typedef struct BlockCopyCallState BlockCopyCallState;
+ 
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      Error **errp);
+ 
+diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
+index aa76131ca9..c33dd7f3a9 100644
+--- a/tests/qemu-iotests/257.out
+++ b/tests/qemu-iotests/257.out
+@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
+      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
+      }
+    ],
+    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
--- a/debian/patches/pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch
+++ b/debian/patches/pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch
@@ -0,0 +1,277 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:25 +0200
+Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
+
+Add a parameter that enables discard-after-copy. That is mostly useful
+in "push backup with fleecing" scheme, when source is snapshot-access
+format driver node, based on copy-before-write filter snapshot-access
+API:
+
+[guest]      [snapshot-access] ~~ blockdev-backup ~~> [backup target]
+   |            |
+   | root       | file
+   v            v
+[copy-before-write]
+   |             |
+   | file        | target
+   v             v
+[active disk]   [temp.img]
+
+In this case discard-after-copy does two things:
+
+ - discard data in temp.img to save disk space
+ - avoid further copy-before-write operation in discarded area
+
+Note that we have to declare WRITE permission on source in
+copy-before-write filter, for discard to work. Still we can't take it
+unconditionally, as it will break normal backup from RO source. So, we
+have to add a parameter and pass it thorough bdrv_open flags.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c                         |  5 +++--
+ block/block-copy.c                     |  9 +++++++++
+ block/copy-before-write.c              | 15 +++++++++++++--
+ block/copy-before-write.h              |  1 +
+ block/replication.c                    |  4 ++--
+ blockdev.c                             |  2 +-
+ include/block/block-common.h           |  2 ++
+ include/block/block-copy.h             |  1 +
+ include/block/block_int-global-state.h |  2 +-
+ qapi/block-core.json                   |  4 ++++
+ 10 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index 16d611c4ca..1963e47ab9 100644
+--- a/block/backup.c
+++ b/block/backup.c
+@@ -332,7 +332,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                   BlockDriverState *target, int64_t speed,
+                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+                   BitmapSyncMode bitmap_mode,
+-                  bool compress,
+                  bool compress, bool discard_source,
+                   const char *filter_node_name,
+                   BackupPerf *perf,
+                   BlockdevOnError on_source_error,
+@@ -433,7 +433,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+         goto error;
+     }
+ 
+-    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
+    cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+                          &bcs, errp);
+     if (!cbw) {
+         goto error;
+     }
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 8fca2c3698..7e3b378528 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
+     CoMutex lock;
+     int64_t in_flight_bytes;
+     BlockCopyMethod method;
+    bool discard_source;
+     BlockReqList reqs;
+     QLIST_HEAD(, BlockCopyCallState) calls;
+     /*
+@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                     bool discard_source,
+                                      Error **errp)
+ {
+     ERRP_GUARD();
+@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     cluster_size),
+     };
+ 
+    s->discard_source = discard_source;
+     block_copy_set_copy_opts(s, false, false);
+ 
+     ratelimit_init(&s->rate_limit);
+@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+     co_put_to_shres(s->mem, t->req.bytes);
+     block_copy_task_end(t, ret);
+ 
+    if (s->discard_source && ret == 0) {
+        int64_t nbytes =
+            MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+        bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+    }
+
+     return ret;
+ }
+ 
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 94db31512d..853e01a1eb 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
+     BdrvChild *target;
+     OnCbwError on_cbw_error;
+     uint64_t cbw_timeout_ns;
+    bool discard_source;
+ 
+     /*
+      * @lock: protects access to @access_bitmap, @done_bitmap and
+@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                uint64_t perm, uint64_t shared,
+                uint64_t *nperm, uint64_t *nshared)
+ {
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+
+     if (!(role & BDRV_CHILD_FILTERED)) {
+         /*
+          * Target child
+@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+              * start
+              */
+             *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+            if (s->discard_source) {
+                *nperm = *nperm | BLK_PERM_WRITE;
+            }
+
+             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+         }
+     }
+@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+              bs->file->bs->supported_zero_flags);
+ 
+-    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+    s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                  bool discard_source,
+                                   BlockCopyState **bcs,
+                                   Error **errp)
+ {
+     BDRVCopyBeforeWriteState *state;
+     BlockDriverState *top;
+     QDict *opts;
+    int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
+ 
+     assert(source->total_sectors == target->total_sectors);
+     GLOBAL_STATE_CODE();
+@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+     qdict_put_str(opts, "file", bdrv_get_node_name(source));
+     qdict_put_str(opts, "target", bdrv_get_node_name(target));
+ 
+-    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
+    top = bdrv_insert_node(source, opts, flags, errp);
+     if (!top) {
+         return NULL;
+     }
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 6e72bb25e9..01af0cd3c4 100644
+--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
+@@ -39,6 +39,7 @@
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                  bool discard_source,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/block/replication.c b/block/replication.c
+index ca6bd0a720..0415a5e8b7 100644
+--- a/block/replication.c
+++ b/block/replication.c
+@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
+ 
+         s->backup_job = backup_job_create(
+                                 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+-                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
+-                                &perf,
+                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
+                                NULL, &perf,
+                                 BLOCKDEV_ON_ERROR_REPORT,
+                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
+                                 backup_job_completed, bs, NULL, &local_err);
+diff --git a/blockdev.c b/blockdev.c
+index 5e5dbc1da9..1054a69279 100644
+--- a/blockdev.c
+++ b/blockdev.c
+@@ -2727,7 +2727,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+ 
+     job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                             backup->sync, bmap, backup->bitmap_mode,
+-                            backup->compress,
+                            backup->compress, backup->discard_source,
+                             backup->filter_node_name,
+                             &perf,
+                             backup->on_source_error,
+diff --git a/include/block/block-common.h b/include/block/block-common.h
+index a846023a09..338fe5ff7a 100644
+--- a/include/block/block-common.h
+++ b/include/block/block-common.h
+@@ -243,6 +243,8 @@ typedef enum {
+                                       read-write fails */
+ #define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
+ 
+#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
+
+ #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+ 
+ 
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 8b41643bfa..bdc703bacd 100644
+--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
+@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                     bool discard_source,
+                                      Error **errp);
+ 
+ /* Function should be called prior any actual copy request */
+diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
+index cc1387ae02..f0c642b194 100644
+--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
+@@ -195,7 +195,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                             MirrorSyncMode sync_mode,
+                             BdrvDirtyBitmap *sync_bitmap,
+                             BitmapSyncMode bitmap_mode,
+-                            bool compress,
+                            bool compress, bool discard_source,
+                             const char *filter_node_name,
+                             BackupPerf *perf,
+                             BlockdevOnError on_source_error,
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index f516d8e95a..d796d49abb 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -1849,6 +1849,9 @@
+ #     node specified by @drive.  If this option is not given, a node
+ #     name is autogenerated.  (Since: 4.2)
+ #
+# @discard-source: Discard blocks on source which are already copied
+#     to the target.  (Since 9.0)
+#
+ # @x-perf: Performance options.  (Since 6.0)
+ #
+ # Features:
+@@ -1870,6 +1873,7 @@
+             '*on-target-error': 'BlockdevOnError',
+             '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
+             '*filter-node-name': 'str',
+            '*discard-source': 'bool',
+             '*x-perf': { 'type': 'BackupPerf',
+                          'features': [ 'unstable' ] } } }
+ 
--- a/debian/patches/pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch
+++ b/debian/patches/pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch
@@ -0,0 +1,133 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:26 +0200
+Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Copy-before-write operations will use at least this granularity and in
+particular, discard requests to the source node will too. If the
+granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+The QAPI uses uint32 so the value will be non-negative, but still fit
+into a uint64_t.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c         | 17 +++++++++++++----
+ block/copy-before-write.c  |  3 ++-
+ include/block/block-copy.h |  1 +
+ qapi/block-core.json       |  8 +++++++-
+ 4 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 7e3b378528..adb1cbb440 100644
+--- a/block/block-copy.c
+++ b/block/block-copy.c
+@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+ }
+ 
+ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                                                 int64_t min_cluster_size,
+                                                  Error **errp)
+ {
+     int ret;
+@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                     "used. If the actual block size of the target exceeds "
+                     "this default, the backup may be unusable",
+                     BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+     } else if (ret < 0 && !target_does_cow) {
+         error_setg_errno(errp, -ret,
+             "Couldn't determine the cluster size of the target image, "
+@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+         return ret;
+     } else if (ret < 0 && target_does_cow) {
+         /* Not fatal; just trudge on ahead. */
+-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+     }
+ 
+-    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+    return MAX(min_cluster_size,
+               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
+ }
+ 
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      bool discard_source,
+                                     int64_t min_cluster_size,
+                                      Error **errp)
+ {
+     ERRP_GUARD();
+@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ 
+     GLOBAL_STATE_CODE();
+ 
+-    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
+        error_setg(errp, "min-cluster-size needs to be a power of 2");
+        return NULL;
+    }
+
+    cluster_size = block_copy_calculate_cluster_size(target->bs,
+                                                     min_cluster_size, errp);
+     if (cluster_size < 0) {
+         return NULL;
+     }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 853e01a1eb..47b3cdd09f 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+ 
+     s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+     s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+-                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
+                                  opts->min_cluster_size, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index bdc703bacd..77857c6c68 100644
+--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
+@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      bool discard_source,
+                                     int64_t min_cluster_size,
+                                      Error **errp);
+ 
+ /* Function should be called prior any actual copy request */
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index d796d49abb..edbf6e78b9 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -4930,12 +4930,18 @@
+ #     @on-cbw-error parameter will decide how this failure is handled.
+ #     Default 0.  (Since 7.1)
+ #
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+#     operations.  Has to be a power of 2.  No effect if smaller than
+#     the maximum of the target's cluster size and 64 KiB.  Default 0.
+#     (Since 8.1)
+#
+ # Since: 6.2
+ ##
+ { 'struct': 'BlockdevOptionsCbw',
+   'base': 'BlockdevOptionsGenericFormat',
+   'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
+-            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
+            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
+            '*min-cluster-size': 'uint32' } }
+ 
+ ##
+ # @BlockdevOptions:
--- a/debian/patches/pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch
+++ b/debian/patches/pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch
@@ -0,0 +1,106 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:27 +0200
+Subject: [PATCH] backup: add minimum cluster size to performance options
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Backup/block-copy will use at least this granularity for copy operations
+and in particular, discard requests to the backup source will too. If
+the granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c            | 2 +-
+ block/copy-before-write.c | 2 ++
+ block/copy-before-write.h | 1 +
+ blockdev.c                | 3 +++
+ qapi/block-core.json      | 9 +++++++--
+ 5 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index 1963e47ab9..fe69723ada 100644
+--- a/block/backup.c
+++ b/block/backup.c
+@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+     }
+ 
+     cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+-                          &bcs, errp);
+                          perf->min_cluster_size, &bcs, errp);
+     if (!cbw) {
+         goto error;
+     }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 47b3cdd09f..bba58326d7 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                   bool discard_source,
+                                  int64_t min_cluster_size,
+                                   BlockCopyState **bcs,
+                                   Error **errp)
+ {
+@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+     }
+     qdict_put_str(opts, "file", bdrv_get_node_name(source));
+     qdict_put_str(opts, "target", bdrv_get_node_name(target));
+    qdict_put_int(opts, "min-cluster-size", min_cluster_size);
+ 
+     top = bdrv_insert_node(source, opts, flags, errp);
+     if (!top) {
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 01af0cd3c4..dc6cafe7fa 100644
+--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
+@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                   bool discard_source,
+                                  int64_t min_cluster_size,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/blockdev.c b/blockdev.c
+index 1054a69279..cbe224387b 100644
+--- a/blockdev.c
+++ b/blockdev.c
+@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+         if (backup->x_perf->has_max_chunk) {
+             perf.max_chunk = backup->x_perf->max_chunk;
+         }
+        if (backup->x_perf->has_min_cluster_size) {
+            perf.min_cluster_size = backup->x_perf->min_cluster_size;
+        }
+     }
+ 
+     if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index edbf6e78b9..6e7ee87633 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -1790,11 +1790,16 @@
+ #     it should not be less than job cluster size which is calculated
+ #     as maximum of target image cluster size and 64k.  Default 0.
+ #
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+#     and background copy operations.  Has to be a power of 2.  No
+#     effect if smaller than the maximum of the target's cluster size
+#     and 64 KiB.  Default 0. (Since 8.1)
+#
+ # Since: 6.0
+ ##
+ { 'struct': 'BackupPerf',
+-  'data': { '*use-copy-range': 'bool',
+-            '*max-workers': 'int', '*max-chunk': 'int64' } }
+  'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
+            '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
+ 
+ ##
+ # @BackupCommon:
--- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
@@ -0,0 +1,345 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:28 +0200
+Subject: [PATCH] PVE backup: add fleecing option
+
+When a fleecing option is given, it is expected that each device has
+a corresponding "-fleecing" block device already attached, except for
+EFI disk and TPM state, where fleecing is never used.
+
+The following graph was adapted from [0] which also contains more
+details about fleecing.
+
+[guest]
+   |
+   | root
+   v                 file
+[copy-before-write]<------[snapshot-access]
+   |           |
+   | file      | target
+   v           v
+[source] [fleecing]
+
+For fleecing, a copy-before-write filter is inserted on top of the
+source node, as well as a snapshot-access node pointing to the filter
+node which allows to read the consistent state of the image at the
+time it was inserted. New guest writes are passed through the
+copy-before-write filter which will first copy over old data to the
+fleecing image in case that old data is still needed by the
+snapshot-access node.
+
+The backup process will sequentially read from the snapshot access,
+which has a bitmap and knows whether to read from the original image
+or the fleecing image to get the "snapshot" state, i.e. data from the
+source image at the time when the copy-before-write filter was
+inserted. After reading, the copied sections are discarded from the
+fleecing image to reduce space usage.
+
+All of this can be restricted by an initial dirty bitmap to parts of
+the source image that are required for an incremental backup.
+
+For discard to work, it is necessary that the fleecing image does not
+have a larger cluster size than the backup job granularity. Since
+querying that size does not always work, e.g. for RBD with krbd, the
+cluster size will not be reported, a minimum of 4 MiB is used. A job
+with PBS target already has at least this granularity, so it's just
+relevant for other targets. I.e. edge cases where this minimum is not
+enough should be very rare in practice. If ever necessary in the
+future, can still add a passed-in value for the backup QMP command to
+override.
+
+Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
+are set when installing the copy-before-write filter and
+snapshot-access. When an error or timeout occurs, the problematic (and
+each further) snapshot operation will fail and thus cancel the backup
+instead of breaking the guest write.
+
+Note that job_id cannot be inferred from the snapshot-access bs because
+it has no parent, so just pass the one from the original bs.
+
+[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/monitor/block-hmp-cmds.c |   1 +
+ pve-backup.c                   | 143 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json           |  10 ++-
+ 3 files changed, 150 insertions(+), 4 deletions(-)
+
+diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
+index 5000c084c5..70b3de4c7e 100644
+--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
+@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
+         NULL, NULL,
+         devlist, qdict_haskey(qdict, "speed"), speed,
+         false, 0, // BackupPerf max-workers
+        false, false, // fleecing
+         &error);
+ 
+     hmp_handle_error(mon, error);
+diff --git a/pve-backup.c b/pve-backup.c
+index 9d480a8eec..7cc1dd3724 100644
+--- a/pve-backup.c
+++ b/pve-backup.c
+@@ -7,9 +7,11 @@
+ #include "sysemu/blockdev.h"
+ #include "block/block_int-global-state.h"
+ #include "block/blockjob.h"
+#include "block/copy-before-write.h"
+ #include "block/dirty-bitmap.h"
+ #include "block/graph-lock.h"
+ #include "qapi/qapi-commands-block.h"
+#include "qapi/qmp/qdict.h"
+ #include "qapi/qmp/qerror.h"
+ #include "qemu/cutils.h"
+ 
+@@ -81,8 +83,15 @@ static void pvebackup_init(void)
+ // initialize PVEBackupState at startup
+ opts_init(pvebackup_init);
+ 
+typedef struct PVEBackupFleecingInfo {
+    BlockDriverState *bs;
+    BlockDriverState *cbw;
+    BlockDriverState *snapshot_access;
+} PVEBackupFleecingInfo;
+
+ typedef struct PVEBackupDevInfo {
+     BlockDriverState *bs;
+    PVEBackupFleecingInfo fleecing;
+     size_t size;
+     uint64_t block_size;
+     uint8_t dev_id;
+@@ -355,6 +364,25 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+     PVEBackupDevInfo *di = opaque;
+     di->completed_ret = ret;
+ 
+    /*
+     * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
+     * won't be done as a coroutine anyways:
+     * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
+     *   just spawn a BH calling bdrv_unref().
+     * - For cbw, draining would need to spawn a BH.
+     *
+     * Note that the AioContext lock is already acquired by our caller, i.e.
+     * job_finalize_single_locked()
+     */
+    if (di->fleecing.snapshot_access) {
+        bdrv_unref(di->fleecing.snapshot_access);
+        di->fleecing.snapshot_access = NULL;
+    }
+    if (di->fleecing.cbw) {
+        bdrv_cbw_drop(di->fleecing.cbw);
+        di->fleecing.cbw = NULL;
+    }
+
+     /*
+      * Needs to happen outside of coroutine, because it takes the graph write lock.
+      */
+@@ -522,9 +550,82 @@ static void create_backup_jobs_bh(void *opaque) {
+         }
+         bdrv_drained_begin(di->bs);
+ 
+        BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
+
+        BlockDriverState *source_bs = di->bs;
+        bool discard_source = false;
+        bdrv_graph_co_rdlock();
+        const char *job_id = bdrv_get_device_name(di->bs);
+        bdrv_graph_co_rdunlock();
+        if (di->fleecing.bs) {
+            QDict *cbw_opts = qdict_new();
+            qdict_put_str(cbw_opts, "driver", "copy-before-write");
+            qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
+            qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
+
+            if (di->bitmap) {
+                /*
+                 * Only guest writes to parts relevant for the backup need to be intercepted with
+                 * old data being copied to the fleecing image.
+                 */
+                qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
+                qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
+            }
+            /*
+             * Fleecing storage is supposed to be fast and it's better to break backup than guest
+             * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
+             * abort a bit before that.
+             */
+            qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
+            qdict_put_int(cbw_opts, "cbw-timeout", 45);
+
+            di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
+
+            if (!di->fleecing.cbw) {
+                error_setg(errp, "appending cbw node for fleecing failed: %s",
+                           local_err ? error_get_pretty(local_err) : "unknown error");
+                break;
+            }
+
+            QDict *snapshot_access_opts = qdict_new();
+            qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
+            qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
+
+            /*
+             * Holding the AioContext lock here would cause a deadlock, because bdrv_open_driver()
+             * will aquire it a second time. But it's allowed to be held exactly once when polling
+             * and that happens when the bdrv_refresh_total_sectors() call is made there.
+             */
+            di->fleecing.snapshot_access =
+                bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
+            if (!di->fleecing.snapshot_access) {
+                error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+                           local_err ? error_get_pretty(local_err) : "unknown error");
+                break;
+            }
+            source_bs = di->fleecing.snapshot_access;
+            discard_source = true;
+
+            /*
+             * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
+             * on the fleecing image won't work if the backup job's granularity is less than the RBD
+             * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
+             * target, the backup job granularity would already be at least this much.
+             */
+            perf.min_cluster_size = 4 * 1024 * 1024;
+            /*
+             * For discard to work, cluster size for the backup job must be at least the same as for
+             * the fleecing image.
+             */
+            BlockDriverInfo bdi;
+            if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
+                perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
+            }
+        }
+
+         BlockJob *job = backup_job_create(
+-            NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+-            bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
+            job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+            bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
+             BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+             &local_err);
+ 
+@@ -580,6 +681,14 @@ static void create_backup_jobs_bh(void *opaque) {
+     aio_co_enter(data->ctx, data->co);
+ }
+ 
+/*
+ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
+ */
+static bool device_uses_fleecing(const char *device_id)
+{
+    return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
+}
+
+ /*
+  * Returns a list of device infos, which needs to be freed by the caller. In
+  * case of an error, errp will be set, but the returned value might still be a
+@@ -587,6 +696,7 @@ static void create_backup_jobs_bh(void *opaque) {
+  */
+ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+     const char *devlist,
+    bool fleecing,
+     Error **errp)
+ {
+     gchar **devs = NULL;
+@@ -610,6 +720,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+             }
+             PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+             di->bs = bs;
+
+            if (fleecing && device_uses_fleecing(*d)) {
+                g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
+                BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
+                if (!fleecing_blk) {
+                    error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                              "Device '%s' not found", fleecing_devid);
+                    goto err;
+                }
+                BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
+                if (!bdrv_co_is_inserted(fleecing_bs)) {
+                    error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
+                    goto err;
+                }
+                /*
+                 * Fleecing image needs to be the same size to act as a cbw target.
+                 */
+                if (bs->total_sectors != fleecing_bs->total_sectors) {
+                    error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
+                               fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
+                    goto err;
+                }
+                di->fleecing.bs = fleecing_bs;
+            }
+
+             di_list = g_list_append(di_list, di);
+             d++;
+         }
+@@ -659,6 +794,7 @@ UuidInfo coroutine_fn *qmp_backup(
+     const char *devlist,
+     bool has_speed, int64_t speed,
+     bool has_max_workers, int64_t max_workers,
+    bool has_fleecing, bool fleecing,
+     Error **errp)
+ {
+     assert(qemu_in_coroutine());
+@@ -687,7 +823,7 @@ UuidInfo coroutine_fn *qmp_backup(
+     format = has_format ? format : BACKUP_FORMAT_VMA;
+ 
+     bdrv_graph_co_rdlock();
+-    di_list = get_device_info(devlist, &local_err);
+    di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
+     bdrv_graph_co_rdunlock();
+     if (local_err) {
+         error_propagate(errp, local_err);
+@@ -1095,5 +1231,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+     ret->query_bitmap_info = true;
+     ret->pbs_masterkey = true;
+     ret->backup_max_workers = true;
+    ret->backup_fleecing = true;
+     return ret;
+ }
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 6e7ee87633..dc5f75cd39 100644
+--- a/qapi/block-core.json
+++ b/qapi/block-core.json
+@@ -948,6 +948,10 @@
+ #
+ # @max-workers: see @BackupPerf for details. Default 16.
+ #
+# @fleecing: perform a backup with fleecing. For each device in @devlist, a
+#            corresponing '-fleecing' device with the same size already needs to
+#            be present.
+#
+ # Returns: the uuid of the backup job
+ #
+ ##
+@@ -968,7 +972,8 @@
+                                     '*firewall-file': 'str',
+                                     '*devlist': 'str',
+                                     '*speed': 'int',
+-                                    '*max-workers': 'int' },
+                                    '*max-workers': 'int',
+                                    '*fleecing': 'bool' },
+   'returns': 'UuidInfo', 'coroutine': true }
+ 
+ ##
+@@ -1014,6 +1019,8 @@
+ #
+ # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+ #
+# @backup-fleecing: Whether backup fleecing is supported or not.
+#
+ # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
+ #     supported or not.
+ #
+@@ -1025,6 +1032,7 @@
+             'pbs-dirty-bitmap-migration': 'bool',
+             'pbs-masterkey': 'bool',
+             'pbs-library-version': 'str',
+            'backup-fleecing': 'bool',
+             'backup-max-workers': 'bool' } }
+ 
+ ##
--- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
@@ -0,0 +1,117 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 29 Apr 2024 14:43:58 +0200
+Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
+ fleecing
+
+With fleecing, failure for copy-before-write does not fail the guest
+write, but only sets the snapshot error that is associated to the
+copy-before-write filter, making further requests to the snapshot
+access fail with EACCES, which then also fails the job. But that error
+code is not the root cause of why the backup failed, so bubble up the
+original snapshot error instead.
+
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+---
+ block/copy-before-write.c | 18 ++++++++++++------
+ block/copy-before-write.h |  1 +
+ pve-backup.c              |  9 +++++++++
+ 3 files changed, 22 insertions(+), 6 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index bba58326d7..50cc4c7aae 100644
+--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
+@@ -27,6 +27,7 @@
+ #include "qapi/qmp/qjson.h"
+ 
+ #include "sysemu/block-backend.h"
+#include "qemu/atomic.h"
+ #include "qemu/cutils.h"
+ #include "qapi/error.h"
+ #include "block/block_int.h"
+@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
+      * @snapshot_error is normally zero. But on first copy-before-write failure
+      * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
+      * value of this error (<0). After that all in-flight and further
+-     * snapshot-API requests will fail with that error.
+     * snapshot-API requests will fail with that error. To be accessed with
+     * atomics.
+      */
+     int snapshot_error;
+ } BDRVCopyBeforeWriteState;
+@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+         return 0;
+     }
+ 
+-    if (s->snapshot_error) {
+    if (qatomic_read(&s->snapshot_error)) {
+         return 0;
+     }
+ 
+@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+     WITH_QEMU_LOCK_GUARD(&s->lock) {
+         if (ret < 0) {
+             assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
+-            if (!s->snapshot_error) {
+-                s->snapshot_error = ret;
+-            }
+            qatomic_cmpxchg(&s->snapshot_error, 0, ret);
+         } else {
+             bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
+         }
+@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ 
+     QEMU_LOCK_GUARD(&s->lock);
+ 
+-    if (s->snapshot_error) {
+    if (qatomic_read(&s->snapshot_error)) {
+         g_free(req);
+         return NULL;
+     }
+@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
+     bdrv_unref(bs);
+ }
+ 
+int bdrv_cbw_snapshot_error(BlockDriverState *bs)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    return qatomic_read(&s->snapshot_error);
+}
+
+ static void cbw_init(void)
+ {
+     bdrv_register(&bdrv_cbw_filter);
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index dc6cafe7fa..a27d2d7d9f 100644
+--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
+@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+int bdrv_cbw_snapshot_error(BlockDriverState *bs);
+ 
+ #endif /* COPY_BEFORE_WRITE_H */
+diff --git a/pve-backup.c b/pve-backup.c
+index 7cc1dd3724..07709aa350 100644
+--- a/pve-backup.c
+++ b/pve-backup.c
+@@ -379,6 +379,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+         di->fleecing.snapshot_access = NULL;
+     }
+     if (di->fleecing.cbw) {
+        /*
+         * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
+         * error, making further requests to the snapshot fail with EACCES, which then also fail the
+         * job. But that code is not the root cause and just confusing, so update it.
+         */
+        int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
+        if (di->completed_ret == -EACCES && snapshot_error) {
+            di->completed_ret = snapshot_error;
+        }
+         bdrv_cbw_drop(di->fleecing.cbw);
+         di->fleecing.cbw = NULL;
+     }
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,44 +1,70 @@
+extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
+extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
+extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
+extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
+extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
+extra/0006-virtio-gpu-fix-v2-migration.patch
+extra/0007-hw-pflash-fix-block-write-start.patch
+extra/0008-target-i386-fix-operand-size-for-DATA16-REX.W-POPCNT.patch
+extra/0009-target-i386-rdpkru-wrpkru-are-no-prefix-instructions.patch
+extra/0010-target-i386-fix-feature-dependency-for-WAITPKG.patch
+extra/0011-Revert-virtio-pci-fix-use-of-a-released-vector.patch
+extra/0012-hw-core-machine-move-compatibility-flags-for-VirtIO-.patch
+bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
+bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
+bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
+bitmap-mirror/0004-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
+bitmap-mirror/0005-iotests-add-test-for-bitmap-mirror.patch
+bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
 pve/0001-PVE-Config-block-file-change-locking-default-to-off.patch
 pve/0002-PVE-Config-Adjust-network-script-path-to-etc-kvm.patch
 pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
 pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
-pve/0005-PVE-Config-smm_available-false.patch
-pve/0006-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
-pve/0007-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
-pve/0008-PVE-Up-qmp-add-get_link_status.patch
-pve/0009-PVE-Up-glusterfs-allow-partial-reads.patch
-pve/0010-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
-pve/0011-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
-pve/0012-PVE-Up-qemu-img-dd-add-isize-parameter.patch
-pve/0013-PVE-Up-qemu-img-dd-add-n-skip_create.patch
-pve/0014-PVE-virtio-balloon-improve-query-balloon.patch
-pve/0015-PVE-qapi-modify-query-machines.patch
-pve/0016-PVE-qapi-modify-spice-query.patch
-pve/0017-PVE-internal-snapshot-async.patch
-pve/0018-PVE-block-add-the-zeroinit-block-driver-filter.patch
-pve/0019-PVE-Add-dummy-id-command-line-parameter.patch
-pve/0020-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
-pve/0021-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
-pve/0022-PVE-savevm-async-kick-AIO-wait-on-block-state-write.patch
-pve/0023-PVE-move-snapshot-cleanup-into-bottom-half.patch
-pve/0024-PVE-monitor-disable-oob-capability.patch
-pve/0025-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
-pve/0026-PVE-Allow-version-code-in-machine-type.patch
-pve/0027-PVE-Backup-modify-job-api.patch
-pve/0028-PVE-Backup-add-vma-backup-format-code.patch
-pve/0029-PVE-Backup-add-backup-dump-block-driver.patch
-pve/0030-PVE-Backup-proxmox-backup-patches-for-qemu.patch
-pve/0031-PVE-Backup-aquire-aio_context-before-calling-backup_.patch
-pve/0032-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
-pve/0033-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch
-pve/0034-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
-pve/0035-drive-mirror-add-support-for-conditional-and-always-.patch
-pve/0036-mirror-add-check-for-bitmap-mode-without-bitmap.patch
-pve/0037-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
-pve/0038-iotests-add-test-for-bitmap-mirror.patch
-pve/0039-mirror-move-some-checks-to-qmp.patch
-pve/0040-PVE-savevm-async-set-up-migration-state.patch
-pve/0041-PVE-Backup-avoid-use-QemuRecMutex-inside-coroutines.patch
-pve/0042-PVE-Backup-use-QemuMutex-instead-of-QemuRecMutex.patch
-pve/0043-move-savevm-async-back-into-a-coroutine.patch
-pve/0044-add-optional-buffer-size-to-QEMUFile.patch
+pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
+pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
+pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
+pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
+pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
+pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
+pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
+pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
+pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
+pve/0014-PVE-qapi-modify-query-machines.patch
+pve/0015-PVE-qapi-modify-spice-query.patch
+pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
+pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
+pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
+pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
+pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
+pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
+pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
+pve/0023-PVE-monitor-disable-oob-capability.patch
+pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
+pve/0025-PVE-Allow-version-code-in-machine-type.patch
+pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
+pve/0027-PVE-Backup-add-vma-backup-format-code.patch
+pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
+pve/0029-PVE-Add-sequential-job-transaction-support.patch
+pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
+pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
+pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
+pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
+pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
+pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
+pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
+pve/0037-PVE-block-stream-increase-chunk-size.patch
+pve/0038-block-add-alloc-track-driver.patch
+pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
+pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
+pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
+pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
+pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
+pve/0044-block-copy-before-write-fix-permission.patch
+pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch
+pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch
+pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch
+pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch
+pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch
+pve/0050-PVE-backup-add-fleecing-option.patch
+pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+pve-qemu-9.0-vitastor.patch
--- a/debian/pve-qemu-kvm.install
+++ b/debian/pve-qemu-kvm.install
@@ -1,8 +1,6 @@
-# install the userspace utilities
-vma usr/bin/
-debian/kvm-ifup etc/kvm/
-debian/kvm-ifdown etc/kvm/
-
 #install ovmf uefi rom
 debian/OVMF_CODE-pure-efi.fd usr/share/kvm/
 debian/OVMF_VARS-pure-efi.fd usr/share/kvm/
+debian/kvm-ifdown etc/kvm/
+# install the userspace utilities
+debian/kvm-ifup etc/kvm/
--- a/debian/pve-qemu-kvm.links
+++ b/debian/pve-qemu-kvm.links
@@ -1,16 +1,13 @@
-usr/bin/qemu-system-x86_64 usr/bin/kvm
-
-# qemu-system-i386 and qemu-system-x86_64 provides the same hardware emulation
-usr/bin/qemu-system-x86_64 usr/bin/qemu-system-i386
-
 # also use aarch64 for 32 bit arm
 usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
-
+usr/bin/qemu-system-x86_64 usr/bin/kvm
+# qemu-system-i386 and qemu-system-x86_64 provides the same hardware emulation
+usr/bin/qemu-system-x86_64 usr/bin/qemu-system-i386
 # upstream provides a qemu man page,
 # we symlink to kvm for backward compatibility
 # and to qemu-system-{i386,x86_64} to fullfill our 'Provides: qemu-system-x86'
 usr/share/man/man1/qemu.1  usr/share/man/man1/kvm.1
+usr/share/man/man1/qemu.1  usr/share/man/man1/qemu-system-aarch64.1
+usr/share/man/man1/qemu.1  usr/share/man/man1/qemu-system-arm.1
 usr/share/man/man1/qemu.1  usr/share/man/man1/qemu-system-i386.1
 usr/share/man/man1/qemu.1  usr/share/man/man1/qemu-system-x86_64.1
-usr/share/man/man1/qemu.1  usr/share/man/man1/qemu-system-arm.1
-usr/share/man/man1/qemu.1  usr/share/man/man1/qemu-system-aarch64.1
--- a/debian/pve-qemu-kvm.lintian-overrides
+++ b/debian/pve-qemu-kvm.lintian-overrides
@@ -1,4 +1,7 @@
-pve-qemu-kvm: arch-dependent-file-in-usr-share usr/share/kvm/hppa-firmware.img
-pve-qemu-kvm: binary-from-other-architecture usr/share/kvm/hppa-firmware.img
-pve-qemu-kvm: unstripped-binary-or-object usr/share/kvm/hppa-firmware.img
-pve-qemu-kvm: statically-linked-binary usr/share/kvm/hppa-firmware.img
+pve-qemu-kvm: arch-dependent-file-in-usr-share [usr/share/kvm/hppa-firmware.img]
+pve-qemu-kvm: binary-from-other-architecture [usr/share/kvm/hppa-firmware.img]
+pve-qemu-kvm: embedded-javascript-library please use * [usr/share/doc/pve-qemu-kvm/kvm/_static/*]
+pve-qemu-kvm: groff-message *: warning [*]: can't break line [usr/share/man/*]
+pve-qemu-kvm: groff-message *: warning [*]: cannot adjust line [usr/share/man/*]
+pve-qemu-kvm: statically-linked-binary [usr/share/kvm/hppa-firmware.img]
+pve-qemu-kvm: unstripped-binary-or-object [usr/share/kvm/hppa-firmware.img]
--- a/debian/rules
+++ b/debian/rules
@@ -1,80 +1,89 @@
 #!/usr/bin/make -f
 # -*- makefile -*-
-# Sample debian/rules that uses debhelper.
-# This file was originally written by Joey Hess and Craig Small.
-# As a special exception, when this file is copied by dh-make into a
-# dh-make output file, you may use that output file without restriction.
-# This special exception was added by Craig Small in version 0.37 of dh-make.

 # Uncomment this to turn on verbose mode.
 #export DH_VERBOSE=1

-include /usr/share/dpkg/pkg-info.mk
+include /usr/share/dpkg/default.mk

-# These are used for cross-compiling and for saving the configure script
-# from having to guess our platform (since we know it already)
-DEB_HOST_GNU_TYPE   ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
-DEB_BUILD_GNU_TYPE  ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
-
-ARCH ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU)
+HOST_CPU ?= $(DEB_HOST_GNU_CPU)

 PACKAGE=pve-qemu-kvm
 destdir := $(CURDIR)/debian/$(PACKAGE)

 flagfile := $(destdir)/usr/share/kvm/recognized-CPUID-flags-x86_64
+machinefile := $(destdir)/usr/share/kvm/machine-versions-x86_64.json

-CFLAGS = -Wall
+# default QEMU out-of-tree build directory is ./build
+BUILDDIR=build

-ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
-	CFLAGS += -O0
-else
-	CFLAGS += -O2
-endif
+# FIXME: pass to configure as --extra-cflags=CFLAGS ?! also LDFLAGS?
+CFLAGS += -Wall

-config.status: configure
+export CFLAGS
+
+# DEB_BUILD_OPTIONS=parallel=N
+MAKEFLAGS += $(subst parallel=,-j,$(filter parallel=%,${DEB_BUILD_OPTIONS}))
+
+${BUILDDIR}/config.status: configure
 	dh_testdir
 	# Add here commands to configure the package.

+ifneq "$(wildcard /usr/share/misc/config.sub)" ""
+	cp -f /usr/share/misc/config.sub config.sub
+endif
+ifneq "$(wildcard /usr/share/misc/config.guess)" ""
+	cp -f /usr/share/misc/config.guess config.guess
+endif
+
 	# guest-agent is only required for guest systems
 	./configure \
-	--docdir=/usr/share/doc/pve-qemu-kvm \
-	--localstatedir=/var \
-	--prefix=/usr \
-	--sysconfdir=/etc \
-	--target-list=$(ARCH)-softmmu,aarch64-softmmu \
-	--with-confsuffix="/kvm" \
-	--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM}" \
-	--audio-drv-list="alsa" \
-	--datadir=/usr/share \
-	--disable-capstone \
-	--disable-gtk \
-	--disable-guest-agent \
-	--disable-guest-agent-msi \
-	--disable-libnfs \
-	--disable-libxml2 \
-	--disable-sdl \
-	--disable-smartcard \
-	--disable-strip \
-	--disable-xen \
-	--enable-curl \
-	--enable-docs \
-	--enable-glusterfs \
-	--enable-gnutls \
-	--enable-jemalloc \
-	--enable-libiscsi \
-	--enable-libusb \
-	--enable-linux-aio \
-	--enable-numa \
-	--enable-rbd \
-	--enable-seccomp \
-	--enable-spice \
-	--enable-usb-redir \
-	--enable-virtfs \
-	--enable-xfsctl
+	    --disable-download \
+	    --docdir=/usr/share/doc/pve-qemu-kvm \
+	    --localstatedir=/var \
+	    --prefix=/usr \
+	    --sysconfdir=/etc \
+	    --target-list=$(HOST_CPU)-softmmu,aarch64-softmmu \
+	    --with-suffix="kvm" \
+	    --with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM_REVISION}" \
+	    --audio-drv-list="alsa" \
+	    --datadir=/usr/share \
+	    --libexecdir=/usr/lib/kvm \
+	    --disable-capstone \
+	    --disable-gtk \
+	    --disable-guest-agent \
+	    --disable-guest-agent-msi \
+	    --disable-libnfs \
+	    --disable-libssh \
+	    --disable-sdl \
+	    --disable-smartcard \
+	    --disable-strip \
+	    --disable-xen \
+	    --enable-curl \
+	    --enable-docs \
+	    --enable-glusterfs \
+	    --enable-gnutls \
+	    --enable-libiscsi \
+	    --enable-libusb \
+	    --enable-linux-aio \
+	    --enable-linux-io-uring \
+	    --enable-numa \
+	    --enable-opengl \
+	    --enable-rbd \
+	    --enable-seccomp \
+	    --enable-slirp \
+	    --enable-spice \
+	    --enable-usb-redir \
+	    --enable-virglrenderer \
+	    --enable-virtfs \
+	    --enable-zstd

+build: build-arch build-indep
+build-arch: build-stamp
+build-indep: build-stamp
 build: build-stamp

-build-stamp:  config.status
+build-stamp: ${BUILDDIR}/config.status
 	dh_testdir

 	# Add here commands to compile the package.
@@ -89,15 +98,8 @@ clean:
 	dh_testroot
 	rm -f build-stamp

-	# Add here commands to clean up after the build process.
+	# Add here commands to clean up before the build process.
 	-$(MAKE) distclean
-ifneq "$(wildcard /usr/share/misc/config.sub)" ""
-	cp -f /usr/share/misc/config.sub config.sub
-endif
-ifneq "$(wildcard /usr/share/misc/config.guess)" ""
-	cp -f /usr/share/misc/config.guess config.guess
-endif
-

 	dh_clean

@@ -111,25 +113,15 @@ install: build
 	# Add here commands to install the package into debian/pve-kvm.
 	$(MAKE) DESTDIR=$(destdir) install

-	# we do not need openbios files (sparc/ppc)
-	rm -rf $(destdir)/usr/share/kvm/openbios-*
-	# remove ppc files
-	rm $(destdir)/usr/share/kvm/*.dtb
-	rm $(destdir)/usr/share/kvm/s390-ccw.img
-	rm $(destdir)/usr/share/kvm/s390-netboot.img
-	rm $(destdir)/usr/share/kvm/qemu_vga.ndrv
-	rm $(destdir)/usr/share/kvm/slof.bin
-	rm $(destdir)/usr/share/kvm/u-boot.e500
-	# remove Aplha files
-	rm $(destdir)/usr/share/kvm/palcode-clipper
-
 	# Remove things we don't package at all, would be a "kvm-dev" package
 	rm -Rf $(destdir)/usr/include/linux/
 	rm -Rf $(destdir)/usr/include
-	rm -Rf $(destdir)/usr/lib*
+	rm -f $(destdir)/usr/lib/kvm/qemu-bridge-helper
+	rm -f $(destdir)/usr/lib/kvm/virtfs-proxy-helper

 	# CPU flags are static for QEMU version, allows avoiding more costly checks
 	$(destdir)/usr/bin/qemu-system-x86_64 -cpu help | ./debian/parse-cpu-flags.pl > $(flagfile)
+	$(destdir)/usr/bin/qemu-system-x86_64 -machine help | ./debian/parse-machines.pl > $(machinefile)

 # Build architecture-independent files here.
 binary-indep: build install
@@ -156,7 +148,7 @@ binary-arch: build install
 #	dh_installinfo
 	dh_installman
 	dh_link
-	dh_strip --dbg-package=pve-qemu-kvm-dbg
+	dh_strip --dbgsym-migration='pve-qemu-kvm-dbg (<<8.0.0-1~)'
 	dh_compress
 	dh_fixperms
 #	dh_perl
--- a/debian/source/include-binaries
+++ b/debian/source/include-binaries
@@ -0,0 +1,3 @@
+debian/OVMF_CODE-pure-efi.fd
+debian/OVMF_VARS-pure-efi.fd
+debian/Logo.bmp
--- a/debian/source/lintian-overrides
+++ b/debian/source/lintian-overrides
@@ -0,0 +1,2 @@
+source-is-missing [roms/SLOF/*.oco]
+source-is-missing [linux-user/*/vdso-*.so]
--- a/Show More
+++ b/Show More