Compare commits

..

2 Commits

Author SHA1 Message Date
0115e4efb0 Add bdrv_co_block_status 2023-01-13 23:50:40 +03:00
4948452f3c Add Vitastor support 2022-12-14 19:16:08 +03:00
120 changed files with 12965 additions and 8693 deletions

7
.gitignore vendored
View File

@@ -1,7 +0,0 @@
/*.build
/*.buildinfo
/*.changes
/*.deb
/*.dsc
/*.tar*
/pve-qemu-kvm-*.*/

View File

@@ -1,89 +1,60 @@
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
include /usr/share/dpkg/architecture.mk
PACKAGE = pve-qemu-kvm
SRCDIR := qemu
BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
ORIG_SRC_TAR=$(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz
BUILDDIR ?= ${PACKAGE}-${DEB_VERSION_UPSTREAM}
GITVERSION := $(shell git rev-parse HEAD)
DSC=$(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION).dsc
DEB = $(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB_DBG = $(PACKAGE)-dbgsym_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB = ${PACKAGE}_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEB_DBG = ${PACKAGE}-dbg_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEBS = $(DEB) $(DEB_DBG)
all: $(DEBS)
.PHONY: submodule
submodule:
ifeq ($(shell test -f "$(SRCDIR)/configure" && echo 1 || echo 0), 0)
git submodule update --init --recursive
cd $(SRCDIR); meson subprojects download
endif
test -f "${SRCDIR}/configure" || git submodule update --init --recursive
PC_BIOS_FW_PURGE_LIST_IN = \
hppa-firmware.img \
hppa-firmware64.img \
openbios-ppc \
openbios-sparc32 \
openbios-sparc64 \
palcode-clipper \
s390-ccw.img \
s390-netboot.img \
u-boot.e500 \
.*[a-zA-Z0-9]\.dtb \
.*[a-zA-Z0-9]\.dts \
qemu_vga.ndrv \
slof.bin \
opensbi-riscv.*-generic-fw_dynamic.bin \
BLOB_PURGE_SED_CMDS = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "/$(FILE)/d")
BLOB_PURGE_FILTER = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "$(FILE)")
$(BUILDDIR): submodule
$(BUILDDIR): keycodemapdb | submodule
# check if qemu/ was used for a build
# if so, please run 'make distclean' in the submodule and try again
test ! -f $(SRCDIR)/build/config.status
rm -rf $@.tmp $@
cp -a $(SRCDIR) $@.tmp
cp -a debian $@.tmp/debian
rm -rf $@.tmp/roms/edk2 # packaged separately
find $@.tmp/pc-bios -type f | grep $(BLOB_PURGE_FILTER) | xargs rm -f
sed -i $(BLOB_PURGE_SED_CMDS) $@.tmp/pc-bios/meson.build
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $@.tmp/debian/SOURCE
mv $@.tmp $@
rm -rf $(BUILDDIR)
cp -a $(SRCDIR) $(BUILDDIR)
cp -a debian $(BUILDDIR)/debian
rm -rf $(BUILDDIR)/ui/keycodemapdb
cp -a keycodemapdb $(BUILDDIR)/ui/
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $(BUILDDIR)/debian/SOURCE
.PHONY: deb kvm
deb kvm: $(DEBS)
$(DEBS) &: $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
$(DEB_DBG): $(DEB)
$(DEB): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j
lintian $(DEBS)
sbuild: $(DSC)
sbuild $(DSC)
$(ORIG_SRC_TAR): $(BUILDDIR)
tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
.PHONY: dsc
dsc:
rm -rf *.dsc $(BUILDDIR)
$(MAKE) $(DSC)
lintian $(DSC)
$(DSC): $(ORIG_SRC_TAR) $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
.PHONY: update
update:
cd $(SRCDIR) && git submodule deinit ui/keycodemapdb || true
rm -rf $(SRCDIR)/ui/keycodemapdb
mkdir $(SRCDIR)/ui/keycodemapdb
cd $(SRCDIR) && git submodule update --init ui/keycodemapdb
rm -rf keycodemapdb
mkdir keycodemapdb
cp -R $(SRCDIR)/ui/keycodemapdb/* keycodemapdb/
git add keycodemapdb
.PHONY: upload
upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
upload: $(DEBS)
tar cf - $(DEBS) | ssh repoman@repo.proxmox.com upload --product pve --dist $(UPLOAD_DIST)
tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist bullseye
.PHONY: distclean clean
distclean: clean
clean:
rm -rf $(PACKAGE)-[0-9]*/ $(PACKAGE)*.tar* *.deb *.dsc *.build *.buildinfo *.changes
rm -rf $(BUILDDIR) $(PACKAGE)*.deb *.buildinfo *.changes
.PHONY: dinstall
dinstall: $(DEBS)

626
debian/changelog vendored
View File

@@ -1,628 +1,14 @@
pve-qemu-kvm (10.0.2-4) trixie; urgency=medium
pve-qemu-kvm (6.1.1-2+vitastor2) bullseye; urgency=medium
* savevm-async: reuse migration blocker check for snapshots/hibernation to
avoid crashing a VM when on these actions if its configuration does not
support them.
* Add bdrv_co_block_status implementation for QCOW2 export support
-- Proxmox Support Team <support@proxmox.com> Thu, 17 Jul 2025 23:08:46 +0200
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 13 Jan 2023 20:20:16 +0300
pve-qemu-kvm (10.0.2-3) trixie; urgency=medium
pve-qemu-kvm (6.1.1-2+vitastor1) bullseye; urgency=medium
* add backup/zeroinit/track-alloc blockdev patches
* Add Vitastor support
-- Proxmox Support Team <support@proxmox.com> Thu, 03 Jul 2025 12:02:03 +0200
pve-qemu-kvm (10.0.2-2) trixie; urgency=medium
* drop support for accessing Gluster based storage directly due to its
effective end of support and maintenance. The last upstream release
happened over 2.5 years ago and there's currently no one providing
enterprise support or security updates. Further, upstream QEMU will remove
the integration in one of the next releases, so use the upcomming PVE 9
major release to provide a clean cut.
User can either stay on Proxmox VE 8 until its end-of-life (probably end
of June 2026), or mount GlusterFS "manually" (e.g., /etc/fstab) and add it
as directory storage to Proxmox VE.
We recommend moving to other actively maintained storage technology
altogether though.
-- Proxmox Support Team <support@proxmox.com> Wed, 25 Jun 2025 19:23:30 +0200
pve-qemu-kvm (10.0.2-1) trixie; urgency=medium
* update QEMU and downstream patches for 10.0.2 release.
-- Proxmox Support Team <support@proxmox.com> Tue, 17 Jun 2025 08:38:51 +0200
pve-qemu-kvm (9.2.0-5) bookworm; urgency=medium
* pve backup: backup-access api: simplify bitmap logic
-- Proxmox Support Team <support@proxmox.com> Fri, 04 Apr 2025 16:15:58 +0200
pve-qemu-kvm (9.2.0-4) bookworm; urgency=medium
* various async snapshot improvements, inclduing using a dedicated IO thread
for the state file when doing a live snapshot. That should reduce load on
the main thread and for it to get stuck on IO, i.e. same benefits as using
a dedicated IO thread for regular drives. This is particularly interesting
when the VM state storage is a network storage like NFS. It should also
address #6262.
* pve backup: implement basic features and API in preperation for external
backup provider storage plugins.
-- Proxmox Support Team <support@proxmox.com> Thu, 03 Apr 2025 17:00:34 +0200
pve-qemu-kvm (9.2.0-3) bookworm; urgency=medium
* revert changes to the High Precision Event Timer (HPET) to fix performance
regression
-- Proxmox Support Team <support@proxmox.com> Wed, 26 Mar 2025 09:56:01 +0100
pve-qemu-kvm (9.2.0-2) bookworm; urgency=medium
* fix assertion failure when migrating a VM with multiple disks on a
replicated ZFS.
-- Proxmox Support Team <support@proxmox.com> Mon, 24 Feb 2025 17:33:34 +0100
pve-qemu-kvm (9.2.0-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.2.0
-- Proxmox Support Team <support@proxmox.com> Tue, 04 Feb 2025 08:49:20 +0100
pve-qemu-kvm (9.1.2-3) bookworm; urgency=medium
* async snapshot: explicitly specify raw format when loading the VM state
file
* vma create: rework CLI parameters for passing disk to a more structured
style and use that to allow explicitly specifying the format
-- Proxmox Support Team <support@proxmox.com> Fri, 24 Jan 2025 16:12:34 +0100
pve-qemu-kvm (9.1.2-2) bookworm; urgency=medium
* adapt machine version deprecation for Proxmox VE release and support
cycle.
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Jan 2025 16:34:06 +0100
pve-qemu-kvm (9.1.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.1.2
* improve error handling and edge cases with fleecing backups.
-- Proxmox Support Team <support@proxmox.com> Wed, 11 Dec 2024 16:47:21 +0100
pve-qemu-kvm (9.0.2-4) bookworm; urgency=medium
* async snapshot: ensure any dynamic vCPU-throttling applied for
auto-converge gets always disabled again after finishing the snapshot.
-- Proxmox Support Team <support@proxmox.com> Sun, 10 Nov 2024 11:23:09 +0100
pve-qemu-kvm (9.0.2-3) bookworm; urgency=medium
* pick up fix for VirtIO PCI regressions
* pick up stable fixes for 9.0, including fixes for VirtIO-net, ARM and
x86(_64) emulation, CVEs to harden NBD server against malicious clients,
as well as a few others (VNC, physmem, Intel IOMMU, ...).
-- Proxmox Support Team <support@proxmox.com> Fri, 06 Sep 2024 16:21:42 +0200
pve-qemu-kvm (9.0.2-2) bookworm; urgency=medium
* actually update submodule to QEMU 9.0.2. The previous release was still
based on 9.0.0 by mistake.
-- Proxmox Support Team <support@proxmox.com> Wed, 07 Aug 2024 10:16:01 +0200
pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.0.2. While our version had most
stable fixes included already, there are new fixes for VirtIO and VGA
display screen blanking (#4786)
* backport fix for a regression with the LSI-53c895a controller and one for
the boot order getting ignored for USB storage
-- Proxmox Support Team <support@proxmox.com> Mon, 29 Jul 2024 18:59:40 +0200
pve-qemu-kvm (9.0.0-6) bookworm; urgency=medium
* fix a regression in the zeroinit block driver that prevented importing and
cloning disks to RBD storages which are not using the krbd setting
-- Proxmox Support Team <support@proxmox.com> Mon, 08 Jul 2024 16:11:15 +0200
pve-qemu-kvm (9.0.0-5) bookworm; urgency=medium
* backport fix for CVE-2024-4467 to prevent malicious qcow2 image files from
already causing bad effects if being queried via 'qemu-img info'. For
Proxmox VE, this is an additional safe guard, as currently it directly
creates and manages the qcow2 images used by VMs and does not allow
unprivileged users to import them
* fix #4726: code cleanup: avoid superfluous check in vma backup code
-- Proxmox Support Team <support@proxmox.com> Wed, 03 Jul 2024 13:13:35 +0200
pve-qemu-kvm (9.0.0-4) bookworm; urgency=medium
* fix crash after saving a snapshot without including VM state when a VirtIO
block device with iothread is configured.
* fix edge case in error handling when opening a block device from PBS fails
* minor code cleanup in backup code
-- Proxmox Support Team <support@proxmox.com> Mon, 01 Jul 2024 11:26:11 +0200
pve-qemu-kvm (9.0.0-3) bookworm; urgency=medium
* fix crash when doing resize after hotplugging a disk using io_uring
* fix some minor issues in software CPU emulation (i.e. non-KVM) for ARM and
x86(_64)
-- Proxmox Support Team <support@proxmox.com> Wed, 29 May 2024 15:55:44 +0200
pve-qemu-kvm (9.0.0-2) bookworm; urgency=medium
* fix #5409: backup: fix copy-before-write timeout
* backup: improve error when copy-before-write fails for fleecing
* fix forwards and backwards migration with VirtIO-GPU display
* fix a regression in pflash device introduced in 8.2
* revert a commit for VirtIO PCI devices that turned out to cause more
potential security issues than what it fixed
* move compatibility flags for a new VirtIO-net feature to the correct
machine type. The feature was introduced in QEMU 8.2, but the
compatibility flags got added to machine version 8.0 instead of 8.1. This
breaks backwards migration with machine version 8.1 from a 8.2/9.0 binary
to an 8.1 binary, in cases where the guest kernel enables the feature
(e.g. Ubuntu 23.10).
While that breaks migration with machine version 8.1 from an unpatched to
a patched binary, Proxmox VE only ever had 8.2 on the test repository and
9.0 not yet in any public repository.
-- Proxmox Support Team <support@proxmox.com> Fri, 17 May 2024 17:04:52 +0200
pve-qemu-kvm (9.0.0-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.0.0
-- Proxmox Support Team <support@proxmox.com> Mon, 29 Apr 2024 10:51:37 +0200
pve-qemu-kvm (8.2.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.2.2
-- Proxmox Support Team <support@proxmox.com> Sat, 27 Apr 2024 12:44:30 +0200
pve-qemu-kvm (8.1.5-5) bookworm; urgency=medium
* implement support for backup fleecing
-- Proxmox Support Team <support@proxmox.com> Thu, 11 Apr 2024 17:46:48 +0200
pve-qemu-kvm (8.1.5-4) bookworm; urgency=medium
* fix live-import for certain kinds of VMDK images that rely on padding
* backup: avoid bubbling up first error if it's an ECANCELED one, as those
are often a result of cancling the job due to running into an actual
issue.
* backup: factor out & clean up gathering device info into helper
-- Proxmox Support Team <support@proxmox.com> Tue, 12 Mar 2024 14:08:40 +0100
pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
* backport fix for potential deadlock during QMP stop command if the VM has
disks attached through VirtIO-Block and IO-Thread enabled
* fix #4507: add patch to automatically increase NOFILE soft limit
-- Proxmox Support Team <support@proxmox.com> Wed, 21 Feb 2024 20:11:23 +0100
pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium
* work around for a situation where guest IO might get stuck, if the VM is
configure with iothread and VirtIO block/SCSI
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:41:27 +0100
pve-qemu-kvm (8.1.5-1) bookworm; urgency=medium
* update to 8.1.5 stable release, including more relevant fixes like:
- virtio-net: correctly copy vnet header when flushing TX
- hw/pflash: implement update buffer for block writes
- Fixes to i386 emulation and ARM emulation.
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:08:13 +0100
pve-qemu-kvm (8.1.2-6) bookworm; urgency=medium
* revert attempted fix to avoid rare issue with stuck guest IO when using
iothread, because it caused a much more common issue with iothreads
consuming too much CPU
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Dec 2023 14:22:06 +0100
pve-qemu-kvm (8.1.2-5) bookworm; urgency=medium
* backport workaround for stuck guest IO with iothread and VirtIO block/SCSI
in some rare edge cases
* backport fix for potential deadlock when issuing the "resize" QMP command
for a disk that is using iothread
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Dec 2023 16:58:27 +0100
pve-qemu-kvm (8.1.2-4) bookworm; urgency=medium
* fix vnc clipboard in the host to guest direction
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Nov 2023 14:28:21 +0100
pve-qemu-kvm (8.1.2-3) bookworm; urgency=medium
* fix #5054: backport fix for software reset with SATA, avoiding breakage
with, e.g., some FreeBSD VMs
-- Proxmox Support Team <support@proxmox.com> Mon, 20 Nov 2023 10:24:50 +0100
pve-qemu-kvm (8.1.2-2) bookworm; urgency=medium
* revert "x86: acpi: workaround Windows not handling name references in
Package properly" as that seems to have broken networking (and possibly
other things) one some localized variants of Windows (e.g., the German
versions).
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Nov 2023 11:55:23 +0100
pve-qemu-kvm (8.1.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.1.2
* use QEMU's keycode-map-db again instead of our static copy from QEMU 6.0
* disable graph locking, newly introduced in the 8.1 release, as it has
still various deadlock issuess, e.g., during canceling backup jobs.
-- Proxmox Support Team <support@proxmox.com> Tue, 24 Oct 2023 13:42:45 +0200
pve-qemu-kvm (8.0.2-7) bookworm; urgency=medium
* fix #2874: SATA: avoid unsolicited write to sector 0 during reset
-- Proxmox Support Team <support@proxmox.com> Wed, 04 Oct 2023 08:33:35 +0200
pve-qemu-kvm (8.0.2-6) bookworm; urgency=medium
* fix #1534: vma: add extract-filter for disk images allowing users to pass
a comma separated list of the disks they want to extract from an archive.
* backup: create jobs in a drained section to avoid subtle bugs where
something interferes with the block-copy-state bitmap on initialization
* backup: drop experimental, and since a while also fully broken, directory
backup format (BACKUP_FORMAT_DIR). This format was never exposed via the
Proxmox VE API, but only available via QMP, as its broken since QEMU 8 and
we got zero reports about that, it's safe to assume that there are no
public users, so just remove it completely.
-- Proxmox Support Team <support@proxmox.com> Wed, 06 Sep 2023 17:03:59 +0200
pve-qemu-kvm (8.0.2-5) bookworm; urgency=medium
* improve memory footprint after backup by not keeping as much memory
resident.
* fix file descriptor leak for vhost (used by default by vNICs).
-- Proxmox Support Team <support@proxmox.com> Wed, 16 Aug 2023 11:52:24 +0200
pve-qemu-kvm (8.0.2-4) bookworm; urgency=medium
* fix resume for snapshot and hibernate in combination with iothread and
dirty bitmap
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Jul 2023 12:58:22 +0200
pve-qemu-kvm (8.0.2-3) bookworm; urgency=medium
* fix regression in QEMU 8.0 for drive mirror with bitmap
-- Proxmox Support Team <support@proxmox.com> Thu, 15 Jun 2023 13:57:46 +0200
pve-qemu-kvm (8.0.2-2) bookworm; urgency=medium
* drop custom get_link_status QMP command, was never really used.
* drop custom & deprecated drive snapshot QMP commands, we use a better
alternative since a while.
-- Proxmox Support Team <support@proxmox.com> Fri, 09 Jun 2023 07:57:56 +0200
pve-qemu-kvm (8.0.2-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.2
* update patches for avoiding issues with DMA reentrancy to current,
slightly optimized version.
-- Proxmox Support Team <support@proxmox.com> Tue, 06 Jun 2023 16:34:50 +0200
pve-qemu-kvm (8.0.0-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.0
* re-build for Proxmox VE 8 / Debian 12 Bookworm
* adapt to the local virtiofsd C variant being dropped, it has been
rewritten in Rust and is now hosted in a separate source repository.
-- Proxmox Support Team <support@proxmox.com> Mon, 22 May 2023 13:45:49 +0200
pve-qemu-kvm (7.2.0-8) bullseye; urgency=medium
* backport fix for ACPI CPU hotplug issue with TCG
* cherry-pick TCG-related stable fixes for 7.2 for users that turned off KVM
HW acceleration
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Mar 2023 15:47:08 +0100
pve-qemu-kvm (7.2.0-7) bullseye; urgency=medium
* improve fix for potential deadlock with trim for IDE/SATA and draining
* backport stable fixes:
- hw/nvme: fix missing endian conversions for doorbell buffers
- hw/smbios: fix field corruption in type 4 table
- virtio-rng-pci: fix transitional migration compat for vectors
- hw/timer/hpet: Fix expiration time overflow
- vhost/vdpa: stop all svq on device deletion
- vhost: avoid a potential use of an uninitialized variable in the call to
vhost_svq_poll
- chardev/char-socket: set s->listener = NULL in char_socket_finalize to
fix a potential crash after live-migration
- intel-iommu: fail MAP notifier without caching mode
- intel-iommu: fail DEVIOTLB_UNMAP without dt mode
* fix a regression for when the LSI SCSI controller is used
-- Proxmox Support Team <support@proxmox.com> Mon, 13 Mar 2023 17:42:49 +0100
pve-qemu-kvm (7.2.0-6) bullseye; urgency=medium
* fix 7.2 regression for Linux boot failures with megasas SCSI
* fix 7.0 regression for a potential deadlock with trim for IDE/SATA and
draining
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Mar 2023 14:32:17 +0100
pve-qemu-kvm (7.2.0-5) bullseye; urgency=medium
* fix #4476: savevm-async: avoid looping without progress
* savevm-async: decrease the boundary for free space for (memory) state left
on target from 30 MiB to 100 MiB, improving the heuristic for when to
enter the final "pause and sync" stage.
* QMP backup: use correct error number when getting blockdrive length fails
* backport fix for some DMA reentrancy issues, better protecting against
malicious guests
* backport fix for iSCSI double free issue leading to crashes
-- Proxmox Support Team <support@proxmox.com> Tue, 21 Feb 2023 13:49:43 +0100
pve-qemu-kvm (7.2.0-4) bullseye; urgency=medium
* backport fix for a 7.2 regression when using VirtIO disk with
detect-zeroes=unmap
-- Proxmox Support Team <support@proxmox.com> Fri, 27 Jan 2023 09:37:49 +0100
pve-qemu-kvm (7.2.0-3) bullseye; urgency=medium
* add fix for live-migration with virtio-rng devices, which regressed in
QEMU 7.2.0.
-- Proxmox Support Team <support@proxmox.com> Thu, 12 Jan 2023 13:13:14 +0100
pve-qemu-kvm (7.2.0-2) bullseye; urgency=medium
* enable slirp again for now, as in qemu-server, user networking is
supported (via CLI/API) when no bridge is set on a virtual NIC
* cherry-pick stable fixes for 7.2. Two for virtio-mem and one for vIOMMU.
Both features are not yet exposed in PVE's qemu-server, but there's work
going on to change that.
-- Proxmox Support Team <support@proxmox.com> Tue, 10 Jan 2023 15:47:48 +0100
pve-qemu-kvm (7.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.2.0
* drop 'slirp' networking
-- Proxmox Support Team <support@proxmox.com> Fri, 16 Dec 2022 13:18:21 +0100
pve-qemu-kvm (7.1.0-4) bullseye; urgency=medium
* cherry-pick "block/block-backend: blk_set_enable_write_cache is IO_CODE"
-- Proxmox Support Team <support@proxmox.com> Tue, 22 Nov 2022 09:21:06 +0100
pve-qemu-kvm (7.1.0-3) bullseye; urgency=medium
* init: daemonize: defuse PID file resolve error to a warning at max, fixing
some usecases that regressed with 7.1, like tracking start up in our
file-restore VM.
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Oct 2022 10:27:49 +0200
pve-qemu-kvm (7.1.0-2) bullseye; urgency=medium
* fix an issue with error handling in async backup code
-- Proxmox Support Team <support@proxmox.com> Tue, 18 Oct 2022 15:33:44 +0200
pve-qemu-kvm (7.1.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.1.0
* add fix for io_uring_register_ring_fd from upstream
-- Proxmox Support Team <support@proxmox.com> Fri, 14 Oct 2022 14:54:09 +0200
pve-qemu-kvm (7.0.0-4) bullseye; urgency=medium
* add revision to version output
* PVE Backup: allow passing max-workers performance setting
-- Proxmox Support Team <support@proxmox.com> Mon, 10 Oct 2022 11:55:37 +0200
pve-qemu-kvm (7.0.0-3) bullseye; urgency=medium
* savevm-async: avoid segfault when aborting snapshot creation task
* savevm-async: set SAVE_STATE_DONE when closing state file was successful
allowing one to start a new snapshot task after aborting one.
-- Proxmox Support Team <support@proxmox.com> Tue, 30 Aug 2022 12:54:03 +0200
pve-qemu-kvm (7.0.0-2) bullseye; urgency=medium
* backport "io_uring: fix short read slow path"
* backport "e1000: set RX descriptor status in a separate operation"
-- Proxmox Support Team <support@proxmox.com> Wed, 20 Jul 2022 09:17:07 +0200
pve-qemu-kvm (7.0.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.0.0
-- Proxmox Support Team <support@proxmox.com> Thu, 30 Jun 2022 11:07:37 +0200
pve-qemu-kvm (6.2.0-11) bullseye; urgency=medium
* add 'namespace' to BlockdevOptionsPbs for live-restore support
* vma: create: support 64KiB-unaligned input images like to improve backing
up some VM templates
* block: alloc-track: avoid unlikely, but possible premature break
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Jun 2022 15:54:54 +0200
pve-qemu-kvm (6.2.0-10) bullseye; urgency=medium
* fix #4101: fix backup cancellation bug with iothreads
-- Proxmox Support Team <support@proxmox.com> Thu, 9 Jun 2022 16:35:51 +0200
pve-qemu-kvm (6.2.0-9) bullseye; urgency=medium
* fix possible race conditions during cancellation of a PBS backup
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Jun 2022 14:03:22 +0200
pve-qemu-kvm (6.2.0-8) bullseye; urgency=medium
* revert "block/rbd: implement bdrv_co_block_status" to work around
performance regression when backing up large RBD disk
-- Proxmox Support Team <support@proxmox.com> Thu, 19 May 2022 09:24:45 +0200
pve-qemu-kvm (6.2.0-7) bullseye; urgency=medium
* Proxmox Backup Server namespace support
-- Proxmox Support Team <support@proxmox.com> Thu, 12 May 2022 16:05:56 +0200
pve-qemu-kvm (6.2.0-6) bullseye; urgency=medium
* block/gluster: correctly set max_pdiscard which is int64_t to avoid
triggering assertion
* ui/vnc.c: Fixed a deadlock bug
* display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) and
integer overflow in cursor_alloc (CVE-2021-4206)
-- Proxmox Support Team <support@proxmox.com> Wed, 11 May 2022 10:42:53 +0200
pve-qemu-kvm (6.2.0-5) bullseye; urgency=medium
* vma: allow partial restore by skipping some disk
-- Proxmox Support Team <support@proxmox.com> Mon, 25 Apr 2022 10:13:46 +0200
pve-qemu-kvm (6.2.0-4) bullseye; urgency=medium
* d/control: add libgbm to build dependencies
* d/control: add suggest dependency-hint for libgl1
* various stable backports:
+ virtio-net: fix map leaking on error during receive
+ memory: Fix incorrect calls of log_global_start/stop
+ acpi: fix OEM ID/OEM Table ID padding
+ vhost-vsock: detach the virqueue element in case of error
+ vhost-user: remove VirtQ notifier restore
+ vhost-user: fix VirtQ notifier cleanup
+ virtio: fix the condition for iommu_platform not supported
-- Proxmox Support Team <support@proxmox.com> Fri, 22 Apr 2022 11:52:30 +0200
pve-qemu-kvm (6.2.0-3) bullseye; urgency=medium
* cherry-pick fix for some manually added ACPI table SLIC entries via the
custom args flag.
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Apr 2022 09:09:37 +0200
pve-qemu-kvm (6.2.0-2) bullseye; urgency=medium
* compile in virgl support
* enable zstd support
* drop sdl dependency (it was disabled at compile time already)
* recommend 'numactl'
* fix an issue with multi-disk backups where chunks would be written
multiple times
-- Proxmox Support Team <support@proxmox.com> Thu, 03 Mar 2022 12:03:44 +0100
pve-qemu-kvm (6.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 6.2.0
-- Proxmox Support Team <support@proxmox.com> Thu, 17 Feb 2022 06:23:14 +0100
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 14 Dec 2022 19:15:40 +0300
pve-qemu-kvm (6.1.1-2) bullseye; urgency=medium

1
debian/compat vendored Normal file
View File

@@ -0,0 +1 @@
10

41
debian/control vendored
View File

@@ -2,42 +2,41 @@ Source: pve-qemu-kvm
Section: admin
Priority: optional
Maintainer: Proxmox Support Team <support@proxmox.com>
Build-Depends: debhelper-compat (= 13),
Build-Depends: autotools-dev,
check,
debhelper (>= 9),
libacl1-dev,
libaio-dev,
libattr1-dev,
libcap-ng-dev,
libcurl4-gnutls-dev,
libepoxy-dev,
libfdt-dev,
libgbm-dev,
libglusterfs-dev (>= 5.2-2),
libgnutls28-dev,
libiscsi-dev (>= 1.12.0),
libjemalloc-dev,
libjpeg-dev,
libjson-perl,
libnuma-dev,
libpci-dev,
libpixman-1-dev,
libproxmox-backup-qemu0-dev (>= 1.3.0),
libproxmox-backup-qemu0-dev (>= 1.0.3-1),
librbd-dev (>= 0.48),
libsdl1.2-dev,
libseccomp-dev,
libslirp-dev,
libspice-protocol-dev (>= 0.12.14~),
libspice-server-dev (>= 0.14.0~),
libsystemd-dev,
liburing-dev,
libusb-1.0-0-dev (>= 1.0.17),
libusb-1.0-0-dev (>= 1.0.17-1),
libusbredirparser-dev (>= 0.6-2),
libvirglrenderer-dev,
libzstd-dev,
meson,
python3-minimal,
python3-sphinx,
python3-sphinx-rtd-theme,
python3-venv,
quilt,
texi2html,
texinfo,
uuid-dev,
xfslibs-dev,
Standards-Version: 3.7.2
@@ -46,16 +45,25 @@ Package: pve-qemu-kvm
Architecture: any
Depends: ceph-common (>= 0.48),
iproute2,
libaio1,
libgfapi0 | glusterfs-common (>= 5.6),
libgfchangelog0 | glusterfs-common (>= 5.6),
libgfdb0 | glusterfs-common (>= 5.6),
libgfrpc0 | glusterfs-common (>= 5.6),
libgfxdr0 | glusterfs-common (>= 5.6),
libglusterfs-dev | glusterfs-common (>= 5.6),
libglusterfs0 | glusterfs-common (>= 5.6),
libiscsi4 (>= 1.12.0) | libiscsi7,
libjemalloc2,
libjpeg62-turbo,
libsdl1.2debian,
libspice-server1 (>= 0.14.0~),
libusb-1.0-0 (>= 1.0.17-1),
libusbredirparser1 (>= 0.6-2),
libuuid1,
numactl,
${misc:Depends},
${shlibs:Depends},
Recommends: numactl,
Suggests: libgl1,
Conflicts: kvm,
pve-kvm,
pve-qemu-kvm-2.6.18,
@@ -63,17 +71,22 @@ Conflicts: kvm,
qemu-kvm,
qemu-system-arm,
qemu-system-common,
qemu-system-data,
qemu-system-x86,
qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils
Replaces: pve-kvm,
pve-qemu-kvm-2.6.18,
qemu-system-arm,
qemu-system-x86,
qemu-utils,
Breaks: qemu-server (<= 8.0.6)
Description: Full virtualization on x86 hardware
Using KVM, one can run multiple virtual PCs, each running unmodified Linux or
Windows images. Each virtual machine has private virtualized hardware: a
network card, disk, graphics adapter, etc.
Package: pve-qemu-kvm-dbg
Architecture: any
Section: debug
Depends: pve-qemu-kvm (= ${binary:Version})
Description: pve qemu debugging symbols
This package contains the debugging symbols for pve-qemu-kvm.

2
debian/copyright vendored
View File

@@ -25,7 +25,7 @@ License:
In particular, the QEMU virtual CPU core library (libqemu.a) is
released under the GNU Lesser General Public License version 2 or later.
On Debian systems, the complete text of the GNU Lesser General Public
On Debian systems, the complete text of the GNU Lesser General Public
License can be found in the file /usr/share/common-licenses/LGPL.
Some hardware device emulation sources and other QEMU functionality are

View File

@@ -24,5 +24,4 @@ while (<STDIN>) {
die "no QEMU machine types detected from STDIN input" if scalar (@$machines) <= 0;
print to_json($machines, { utf8 => 1, canonical => 1 })
or die "failed to encode detected machines as JSON - $!\n";
print to_json($machines, { utf8 => 1 }) or die "$!\n";

View File

@@ -27,21 +27,19 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebased for 9.1.2]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 99 ++++++++++++++++++++------
blockdev.c | 38 +++++++++-
include/block/block_int-global-state.h | 4 +-
qapi/block-core.json | 25 ++++++-
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 142 insertions(+), 28 deletions(-)
block/mirror.c | 98 +++++++++++++++++++++++++-------
blockdev.c | 39 ++++++++++++-
include/block/block_int.h | 4 +-
qapi/block-core.json | 29 ++++++++--
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 145 insertions(+), 29 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index a53582f17b..fafca1360e 100644
index 85b781bc21..0821214138 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@@ -50,7 +50,7 @@ typedef struct MirrorBlockJob {
BlockDriverState *to_replace;
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
@@ -50,7 +48,7 @@ index a53582f17b..fafca1360e 100644
BlockMirrorBackingMode backing_mode;
/* Whether the target image requires explicit zero-initialization */
bool zero_target;
@@ -73,6 +73,8 @@ typedef struct MirrorBlockJob {
@@ -65,6 +65,8 @@ typedef struct MirrorBlockJob {
size_t buf_size;
int64_t bdev_length;
unsigned long *cow_bitmap;
@@ -59,9 +57,9 @@ index a53582f17b..fafca1360e 100644
BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi;
uint8_t *buf;
@@ -723,7 +725,8 @@ static int mirror_exit_common(Job *job)
@@ -697,7 +699,8 @@ static int mirror_exit_common(Job *job)
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ BlockDriverState *backing;
@@ -69,7 +67,7 @@ index a53582f17b..fafca1360e 100644
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
if (bdrv_cow_bs(unfiltered_target) != backing) {
@@ -824,6 +827,16 @@ static void mirror_abort(Job *job)
@@ -802,6 +805,16 @@ static void mirror_abort(Job *job)
assert(ret == 0);
}
@@ -86,7 +84,7 @@ index a53582f17b..fafca1360e 100644
static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -1020,7 +1033,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
@@ -983,7 +996,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -96,7 +94,7 @@ index a53582f17b..fafca1360e 100644
ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit;
@@ -1309,6 +1323,7 @@ static const BlockJobDriver mirror_job_driver = {
@@ -1216,6 +1230,7 @@ static const BlockJobDriver mirror_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
@@ -104,15 +102,15 @@ index a53582f17b..fafca1360e 100644
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = mirror_cancel,
@@ -1327,6 +1342,7 @@ static const BlockJobDriver commit_active_job_driver = {
@@ -1232,6 +1247,7 @@ static const BlockJobDriver commit_active_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
+ .clean = mirror_clean,
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = commit_active_cancel,
@@ -1719,7 +1735,10 @@ static BlockJob *mirror_start_job(
},
@@ -1594,7 +1610,10 @@ static BlockJob *mirror_start_job(
BlockCompletionFunc *cb,
void *opaque,
const BlockJobDriver *driver,
@@ -123,13 +121,12 @@ index a53582f17b..fafca1360e 100644
+ BlockDriverState *base,
bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode,
bool base_ro,
@@ -1734,10 +1753,39 @@ static BlockJob *mirror_start_job(
GLOBAL_STATE_CODE();
Error **errp)
@@ -1606,10 +1625,39 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
- if (granularity == 0) {
- granularity = bdrv_get_default_bitmap_granularity(target);
+ if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+ error_setg(errp, "Sync mode '%s' not supported",
+ MirrorSyncMode_str(sync_mode));
@@ -150,8 +147,8 @@ index a53582f17b..fafca1360e 100644
+ "sync mode '%s' is not compatible with bitmaps",
+ MirrorSyncMode_str(sync_mode));
+ return NULL;
}
+ }
+
+ if (bitmap) {
+ if (granularity) {
+ error_setg(errp, "granularity (%d)"
@@ -161,12 +158,13 @@ index a53582f17b..fafca1360e 100644
+ }
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ } else if (granularity == 0) {
+ granularity = bdrv_get_default_bitmap_granularity(target);
+ }
granularity = bdrv_get_default_bitmap_granularity(target);
}
-
assert(is_power_of_2(granularity));
if (buf_size < 0) {
@@ -1878,7 +1926,9 @@ static BlockJob *mirror_start_job(
@@ -1747,7 +1795,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
@@ -176,10 +174,10 @@ index a53582f17b..fafca1360e 100644
+ s->bitmap_mode = bitmap_mode;
s->backing_mode = backing_mode;
s->zero_target = zero_target;
qatomic_set(&s->copy_mode, copy_mode);
@@ -1904,6 +1954,18 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
s->copy_mode = copy_mode;
@@ -1768,6 +1818,18 @@ static BlockJob *mirror_start_job(
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
}
+ if (s->sync_bitmap) {
+ bdrv_dirty_bitmap_set_busy(s->sync_bitmap, true);
@@ -193,10 +191,10 @@ index a53582f17b..fafca1360e 100644
+ }
+ }
+
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
@@ -1986,6 +2048,9 @@ fail:
BLK_PERM_CONSISTENT_READ,
@@ -1845,6 +1907,9 @@ fail:
if (s->dirty_bitmap) {
bdrv_release_dirty_bitmap(s->dirty_bitmap);
}
@@ -206,7 +204,7 @@ index a53582f17b..fafca1360e 100644
job_early_fail(&s->common.job);
}
@@ -2008,35 +2073,28 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1862,29 +1927,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -223,31 +221,25 @@ index a53582f17b..fafca1360e 100644
- bool is_none_mode;
BlockDriverState *base;
GLOBAL_STATE_CODE();
- if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
- (mode == MIRROR_SYNC_MODE_BITMAP)) {
- error_setg(errp, "Sync mode '%s' not supported",
- MirrorSyncMode_str(mode));
- return;
- }
-
bdrv_graph_rdlock_main_loop();
- is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
bdrv_graph_rdunlock_main_loop();
mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode, zero_target,
on_source_error, on_target_error, unmap, NULL, NULL,
- &mirror_job_driver, is_none_mode, base, false,
- filter_node_name, true, copy_mode, false, errp);
- filter_node_name, true, copy_mode, errp);
+ &mirror_job_driver, mode, bitmap, bitmap_mode, base,
+ false, filter_node_name, true, copy_mode, false, errp);
+ false, filter_node_name, true, copy_mode, errp);
}
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -2063,7 +2121,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1909,7 +1968,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque,
@@ -255,35 +247,36 @@ index a53582f17b..fafca1360e 100644
+ &commit_active_job_driver, MIRROR_SYNC_MODE_FULL,
+ NULL, 0, base, auto_complete,
filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
base_read_only, errp);
errp);
if (!job) {
diff --git a/blockdev.c b/blockdev.c
index 1d1f27cfff..ec45bbaa52 100644
index 3d8ac368a1..03e99264dc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2797,6 +2797,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2957,6 +2957,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target,
const char *replaces,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
+ bool has_bitmap,
+ const char *bitmap_name,
+ bool has_bitmap_mode,
+ BitmapSyncMode bitmap_mode,
BlockMirrorBackingMode backing_mode,
bool zero_target,
bool has_speed, int64_t speed,
@@ -2815,6 +2818,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2976,6 +2980,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
{
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT;
+ BdrvDirtyBitmap *bitmap = NULL;
GLOBAL_STATE_CODE();
GRAPH_RDLOCK_GUARD_MAINLOOP();
@@ -2869,6 +2873,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_speed) {
speed = 0;
@@ -3030,6 +3035,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if (bitmap_name) {
+ if (has_bitmap) {
+ if (granularity) {
+ error_setg(errp, "Granularity and bitmap cannot both be set");
+ return;
@@ -306,53 +299,53 @@ index 1d1f27cfff..ec45bbaa52 100644
+ }
+ }
+
if (!replaces) {
if (!has_replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
@@ -2910,8 +2937,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3076,8 +3104,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
- replaces, job_flags,
- has_replaces ? replaces : NULL, job_flags,
- speed, granularity, buf_size, sync, backing_mode, zero_target,
+ replaces, job_flags, speed, granularity, buf_size, sync,
+ bitmap, bitmap_mode, backing_mode, zero_target,
+ has_replaces ? replaces : NULL, job_flags, speed, granularity,
+ buf_size, sync, bitmap, bitmap_mode, backing_mode, zero_target,
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
}
@@ -3055,6 +3082,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3222,6 +3250,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->job_id, bs, target_bs,
arg->replaces, arg->sync,
+ arg->bitmap,
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
arg->has_replaces, arg->replaces, arg->sync,
+ arg->has_bitmap, arg->bitmap,
+ arg->has_bitmap_mode, arg->bitmap_mode,
backing_mode, zero_target,
arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
@@ -3074,6 +3103,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3243,6 +3273,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
const char *device, const char *target,
const char *replaces,
bool has_replaces, const char *replaces,
MirrorSyncMode sync,
+ const char *bitmap,
+ bool has_bitmap, const char *bitmap,
+ bool has_bitmap_mode, BitmapSyncMode bitmap_mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
@@ -3114,7 +3145,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3292,7 +3324,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
}
blockdev_mirror_common(job_id, bs, target_bs,
- replaces, sync, backing_mode,
+ replaces, sync,
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
- has_replaces, replaces, sync, backing_mode,
+ has_replaces, replaces, sync, has_bitmap,
+ bitmap, has_bitmap_mode, bitmap_mode, backing_mode,
zero_target, has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index eb2d92a226..f0c642b194 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c31cbd034a..11442893d0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1254,7 +1254,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -364,26 +357,31 @@ index eb2d92a226..f0c642b194 100644
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index b1937780e1..0e5f148d30 100644
index 675d8265eb..6356a63695 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2182,6 +2182,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -1938,10 +1938,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (Since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2224,7 +2233,9 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M (since 1.4).
+# power of 2 between 512 and 64M. Must not be specified if
+# @bitmap is present (since 1.4).
#
# @buf-size: maximum amount of data in flight from source to
# target (since 1.4).
@@ -1979,7 +1988,9 @@
{ 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -394,23 +392,28 @@ index b1937780e1..0e5f148d30 100644
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
@@ -2503,6 +2514,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -2247,10 +2258,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2551,7 +2571,8 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M
+# power of 2 between 512 and 64M . Must not be specified if
+# @bitmap is present.
#
# @buf-size: maximum amount of data in flight from source to
# target
@@ -2299,7 +2319,8 @@
{ 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str',
@@ -421,10 +424,10 @@ index b1937780e1..0e5f148d30 100644
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 2b358eaaa8..2a149fe021 100644
index c39e70b2f5..470ef79ae0 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -755,8 +755,8 @@ static void test_propagate_mirror(void)
@@ -617,8 +617,8 @@ static void test_propagate_mirror(void)
/* Start a mirror job */
mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@@ -434,4 +437,4 @@ index 2b358eaaa8..2a149fe021 100644
+ false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
&error_abort);
job = job_get("job0");

View File

@@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index fafca1360e..05e738bcce 100644
index 0821214138..c688726fae 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -694,8 +694,6 @@ static int mirror_exit_common(Job *job)
@@ -674,8 +674,6 @@ static int mirror_exit_common(Job *job)
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
}
@@ -36,9 +36,9 @@ index fafca1360e..05e738bcce 100644
/* Make sure that the source BDS doesn't go away during bdrv_replace_node,
* before we can call bdrv_drained_end */
bdrv_ref(src);
@@ -805,6 +803,18 @@ static int mirror_exit_common(Job *job)
bdrv_drained_end(target_bs);
bdrv_unref(target_bs);
@@ -783,6 +781,18 @@ static int mirror_exit_common(Job *job)
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+ if (s->sync_bitmap) {
+ if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@@ -55,7 +55,7 @@ index fafca1360e..05e738bcce 100644
bs_opaque->job = NULL;
bdrv_drained_end(src);
@@ -1763,10 +1773,6 @@ static BlockJob *mirror_start_job(
@@ -1635,10 +1645,6 @@ static BlockJob *mirror_start_job(
" sync mode",
MirrorSyncMode_str(sync_mode));
return NULL;
@@ -66,7 +66,7 @@ index fafca1360e..05e738bcce 100644
}
} else if (bitmap) {
error_setg(errp,
@@ -1783,6 +1789,12 @@ static BlockJob *mirror_start_job(
@@ -1655,6 +1661,12 @@ static BlockJob *mirror_start_job(
return NULL;
}
granularity = bdrv_dirty_bitmap_granularity(bitmap);

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
index ec45bbaa52..9fab7ec554 100644
index 03e99264dc..9e14feec87 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2894,6 +2894,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3056,6 +3056,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return;
}
@@ -28,4 +28,4 @@ index ec45bbaa52..9fab7ec554 100644
+ return;
}
if (!replaces) {
if (!has_replaces) {

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 05e738bcce..2a2a227f3b 100644
index c688726fae..a7f829f766 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -809,8 +809,8 @@ static int mirror_exit_common(Job *job)
@@ -787,8 +787,8 @@ static int mirror_exit_common(Job *job)
job->ret == 0 && ret == 0)) {
/* Success; synchronize copy back to sync. */
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@@ -30,7 +30,7 @@ index 05e738bcce..2a2a227f3b 100644
}
}
bdrv_release_dirty_bitmap(s->dirty_bitmap);
@@ -1971,11 +1971,8 @@ static BlockJob *mirror_start_job(
@@ -1835,11 +1835,8 @@ static BlockJob *mirror_start_job(
}
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
@@ -43,4 +43,4 @@ index 05e738bcce..2a2a227f3b 100644
+ NULL, true);
}
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,

View File

@@ -12,8 +12,6 @@ uniform w.r.t. backup block jobs.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebase for 8.2.2]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 28 +++------------
blockdev.c | 29 +++++++++++++++
@@ -21,12 +19,12 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 2a2a227f3b..87c0856979 100644
index a7f829f766..6a126d18c8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1763,31 +1763,13 @@ static BlockJob *mirror_start_job(
GLOBAL_STATE_CODE();
@@ -1635,31 +1635,13 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
- error_setg(errp, "Sync mode '%s' not supported",
@@ -62,17 +60,17 @@ index 2a2a227f3b..87c0856979 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
index 9fab7ec554..158ac9314b 100644
index 9e14feec87..b6f797b41f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2873,7 +2873,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3035,7 +3035,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if ((sync == MIRROR_SYNC_MODE_BITMAP) ||
+ (sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
+ /* done before desugaring 'incremental' to print the right message */
+ if (!bitmap_name) {
+ if (!has_bitmap) {
+ error_setg(errp, "Must provide a valid bitmap name for "
+ "'%s' sync mode", MirrorSyncMode_str(sync));
+ return;
@@ -93,7 +91,7 @@ index 9fab7ec554..158ac9314b 100644
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
+
if (bitmap_name) {
if (has_bitmap) {
+ if (sync != MIRROR_SYNC_MODE_BITMAP) {
+ error_setg(errp, "Sync mode '%s' not supported with bitmap.",
+ MirrorSyncMode_str(sync));

View File

@@ -48,7 +48,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index c3740ec616..7f38ce6b8b 100644
index 1a8a369b50..2c8a558c67 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
@@ -60,10 +60,10 @@ index c3740ec616..7f38ce6b8b 100644
void monitor_init_globals(void);
void monitor_init_globals_core(void);
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index 5676eb334e..4c452a6aeb 100644
index 9c3a09cb01..a92be8c3f7 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -151,6 +151,13 @@ typedef struct {
@@ -144,6 +144,13 @@ typedef struct {
QemuMutex qmp_queue_lock;
/* Input queue that holds all the parsed QMP requests */
GQueue *qmp_requests;
@@ -78,10 +78,10 @@ index 5676eb334e..4c452a6aeb 100644
/**
diff --git a/monitor/monitor.c b/monitor/monitor.c
index c5a5d30877..07775784d4 100644
index 46a171bca6..5ccdd2424b 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -116,6 +116,21 @@ bool monitor_cur_is_qmp(void)
@@ -135,6 +135,21 @@ bool monitor_cur_is_qmp(void)
return cur_mon && monitor_is_qmp(cur_mon);
}
@@ -104,10 +104,10 @@ index c5a5d30877..07775784d4 100644
* Is @mon is using readline?
* Note: not all HMP monitors use readline, e.g., gdbserver has a
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 2f46cf9e49..f093e256e9 100644
index 092c527b6f..6b8cfcf6d8 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -165,6 +165,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
QDict *rsp;
QDict *error;
@@ -116,7 +116,7 @@ index 2f46cf9e49..f093e256e9 100644
rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
&mon->common);
@@ -180,7 +182,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
}
}
@@ -135,7 +135,7 @@ index 2f46cf9e49..f093e256e9 100644
qobject_unref(rsp);
}
@@ -461,6 +473,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
switch (event) {
case CHR_EVENT_OPENED:
@@ -144,10 +144,10 @@ index 2f46cf9e49..f093e256e9 100644
monitor_qmp_caps_reset(mon);
data = qmp_greeting(mon);
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index e569224eae..eb03782e91 100644
index 59600210ce..95602446eb 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -117,16 +117,28 @@ typedef struct QmpDispatchBH {
@@ -120,16 +120,28 @@ typedef struct QmpDispatchBH {
QObject **ret;
Error **errp;
Coroutine *co;
@@ -180,19 +180,19 @@ index e569224eae..eb03782e91 100644
aio_co_wake(data->co);
}
@@ -253,6 +265,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
@@ -243,6 +255,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
.ret = &ret,
.errp = &err,
.co = qemu_coroutine_self(),
+ .conn_nr = monitor_get_connection_nr(cur_mon),
};
aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh,
aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
&data);
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index 1894cdfe1f..d74d0459f0 100644
index d058a2a00d..3290b58120 100644
--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
@@ -12,6 +12,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
@@ -13,6 +13,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
return NULL;
}
@@ -201,6 +201,6 @@ index 1894cdfe1f..d74d0459f0 100644
+ return -1;
+}
+
void qapi_event_emit(QAPIEvent event, QDict *qdict)
void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
{
}

View File

@@ -1,100 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Tue, 7 Mar 2023 15:03:02 +0100
Subject: [PATCH] ide: avoid potential deadlock when draining during trim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The deadlock can happen as follows:
1. ide_issue_trim is called, and increments the in_flight counter.
2. ide_issue_trim_cb calls blk_aio_pdiscard.
3. Somebody else starts draining (e.g. backup to insert the cbw node).
4. ide_issue_trim_cb is called as the completion callback for
blk_aio_pdiscard.
5. ide_issue_trim_cb issues yet another blk_aio_pdiscard request.
6. The request is added to the wait queue via blk_wait_while_drained,
because draining has been started.
7. Nobody ever decrements the in_flight counter and draining can't
finish. This would be done by ide_trim_bh_cb, which is called after
ide_issue_trim_cb has issued its last request, but
ide_issue_trim_cb is not called anymore, because it's the
completion callback of blk_aio_pdiscard, which waits on draining.
Quoting Hanna Czenczek:
> The point of 7e5cdb345f was that we need any in-flight count to
> accompany a set s->bus->dma->aiocb. While blk_aio_pdiscard() is
> happening, we dont necessarily need another count. But we do need
> it while there is no blk_aio_pdiscard().
> ide_issue_trim_cb() returns in two cases (and, recursively through
> its callers, leaves s->bus->dma->aiocb set):
> 1. After calling blk_aio_pdiscard(), which will keep an in-flight
> count,
> 2. After calling replay_bh_schedule_event() (i.e.
> qemu_bh_schedule()), which does not keep an in-flight count.
Thus, even after moving the blk_inc_in_flight to above the
replay_bh_schedule_event call, the invariant "ide_issue_trim_cb
returns with an accompanying in-flight count" is still satisfied.
However, the issue 7e5cdb345f fixed for canceling resurfaces, because
ide_cancel_dma_sync assumes that it just needs to drain once. But now
the in_flight count is not consistently > 0 during the trim operation.
So, change it to drain until !s->bus->dma->aiocb, which means that the
operation finished (s->bus->dma->aiocb is cleared by ide_set_inactive
via the ide_dma_cb when the end of the transfer is reached).
Discussion here:
https://lists.nongnu.org/archive/html/qemu-devel/2023-03/msg02506.html
Fixes: 7e5cdb345f ("ide: Increment BB in-flight counter for TRIM BH")
Suggested-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/ide/core.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index b14983ec54..41c543e627 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -456,7 +456,7 @@ static void ide_trim_bh_cb(void *opaque)
iocb->bh = NULL;
qemu_aio_unref(iocb);
- /* Paired with an increment in ide_issue_trim() */
+ /* Paired with an increment in ide_issue_trim_cb() */
blk_dec_in_flight(blk);
}
@@ -516,6 +516,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
done:
iocb->aiocb = NULL;
if (iocb->bh) {
+ /* Paired with a decrement in ide_trim_bh_cb() */
+ blk_inc_in_flight(s->blk);
replay_bh_schedule_event(iocb->bh);
}
}
@@ -528,9 +530,6 @@ BlockAIOCB *ide_issue_trim(
IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
- /* Paired with a decrement in ide_trim_bh_cb() */
- blk_inc_in_flight(s->blk);
-
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
@@ -754,8 +753,9 @@ void ide_cancel_dma_sync(IDEState *s)
*/
if (s->bus->dma->aiocb) {
trace_ide_cancel_dma_sync_remaining();
- blk_drain(s->blk);
- assert(s->bus->dma->aiocb == NULL);
+ while (s->bus->dma->aiocb) {
+ blk_drain(s->blk);
+ }
}
}

View File

@@ -0,0 +1,55 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 1 Sep 2021 16:51:04 +0200
Subject: [PATCH] monitor/hmp: add support for flag argument with value
Adds support for the "-xS" parameter type, where "-x" denotes a flag
name and the "S" suffix indicates that this flag is supposed to take an
arbitrary string parameter.
These parameters are always optional, the entry in the qdict will be
omitted if the flag is not given.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/monitor/hmp.c b/monitor/hmp.c
index d50c3124e1..a32dce7a35 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -980,6 +980,7 @@ static QDict *monitor_parse_arguments(Monitor *mon,
{
const char *tmp = p;
int skip_key = 0;
+ int ret;
/* option */
c = *typestr++;
@@ -1002,8 +1003,22 @@ static QDict *monitor_parse_arguments(Monitor *mon,
}
if (skip_key) {
p = tmp;
+ } else if (*typestr == 'S') {
+ /* has option with string value */
+ typestr++;
+ tmp = p++;
+ while (qemu_isspace(*p)) {
+ p++;
+ }
+ ret = get_str(buf, sizeof(buf), &p);
+ if (ret < 0) {
+ monitor_printf(mon, "%s: value expected for -%c\n",
+ cmd->name, *tmp);
+ goto fail;
+ }
+ qdict_put_str(qdict, key, buf);
} else {
- /* has option */
+ /* has boolean option */
p++;
qdict_put_bool(qdict, key, true);
}

View File

@@ -0,0 +1,479 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 25 Aug 2021 11:14:13 +0200
Subject: [PATCH] monitor: refactor set/expire_password and allow VNC display
id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It is possible to specify more than one VNC server on the command line,
either with an explicit ID or the auto-generated ones à la "default",
"vnc2", "vnc3", ...
It is not possible to change the password on one of these extra VNC
displays though. Fix this by adding a "display" parameter to the
"set_password" and "expire_password" QMP and HMP commands.
For HMP, the display is specified using the "-d" value flag.
For QMP, the schema is updated to explicitly express the supported
variants of the commands with protocol-discriminated unions.
Suggested-by: Eric Blake <eblake@redhat.com>
Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hmp-commands.hx | 29 ++++----
monitor/hmp-cmds.c | 57 +++++++++++++++-
monitor/qmp-cmds.c | 62 ++++++-----------
qapi/ui.json | 165 ++++++++++++++++++++++++++++++++++++++-------
4 files changed, 233 insertions(+), 80 deletions(-)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 8e45bce2cd..d78e4cfc47 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1514,34 +1514,35 @@ ERST
{
.name = "set_password",
- .args_type = "protocol:s,password:s,connected:s?",
- .params = "protocol password action-if-connected",
+ .args_type = "protocol:s,password:s,display:-dS,connected:s?",
+ .params = "protocol password [-d display] [action-if-connected]",
.help = "set spice/vnc password",
.cmd = hmp_set_password,
},
SRST
-``set_password [ vnc | spice ] password [ action-if-connected ]``
- Change spice/vnc password. Use zero to make the password stay valid
- forever. *action-if-connected* specifies what should happen in
- case a connection is established: *fail* makes the password change
- fail. *disconnect* changes the password and disconnects the
- client. *keep* changes the password and keeps the connection up.
- *keep* is the default.
+``set_password [ vnc | spice ] password [ -d display ] [ action-if-connected ]``
+ Change spice/vnc password. *display* can be used with 'vnc' to specify
+ which display to set the password on. *action-if-connected* specifies
+ what should happen in case a connection is established: *fail* makes
+ the password change fail. *disconnect* changes the password and
+ disconnects the client. *keep* changes the password and keeps the
+ connection up. *keep* is the default.
ERST
{
.name = "expire_password",
- .args_type = "protocol:s,time:s",
- .params = "protocol time",
+ .args_type = "protocol:s,time:s,display:-dS",
+ .params = "protocol time [-d display]",
.help = "set spice/vnc password expire-time",
.cmd = hmp_expire_password,
},
SRST
-``expire_password [ vnc | spice ]`` *expire-time*
- Specify when a password for spice/vnc becomes
- invalid. *expire-time* accepts:
+``expire_password [ vnc | spice ] expire-time [ -d display ]``
+ Specify when a password for spice/vnc becomes invalid.
+ *display* behaves the same as in ``set_password``.
+ *expire-time* accepts:
``now``
Invalidate password instantly.
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a7e197a90b..f4ef58d257 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1451,10 +1451,41 @@ void hmp_set_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
const char *password = qdict_get_str(qdict, "password");
+ const char *display = qdict_get_try_str(qdict, "display");
const char *connected = qdict_get_try_str(qdict, "connected");
Error *err = NULL;
+ DisplayProtocol proto;
- qmp_set_password(protocol, password, !!connected, connected, &err);
+ SetPasswordOptions opts = {
+ .password = g_strdup(password),
+ .u.vnc.display = NULL,
+ };
+
+ proto = qapi_enum_parse(&DisplayProtocol_lookup, protocol,
+ DISPLAY_PROTOCOL_VNC, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ opts.protocol = proto;
+
+ if (proto == DISPLAY_PROTOCOL_VNC) {
+ opts.u.vnc.has_display = !!display;
+ opts.u.vnc.display = g_strdup(display);
+ } else if (proto == DISPLAY_PROTOCOL_SPICE) {
+ opts.u.spice.has_connected = !!connected;
+ opts.u.spice.connected =
+ qapi_enum_parse(&SetPasswordAction_lookup, connected,
+ SET_PASSWORD_ACTION_KEEP, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ }
+
+ qmp_set_password(&opts, &err);
+ g_free(opts.password);
+ g_free(opts.u.vnc.display);
hmp_handle_error(mon, err);
}
@@ -1462,9 +1493,31 @@ void hmp_expire_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
const char *whenstr = qdict_get_str(qdict, "time");
+ const char *display = qdict_get_try_str(qdict, "display");
Error *err = NULL;
+ DisplayProtocol proto;
- qmp_expire_password(protocol, whenstr, &err);
+ ExpirePasswordOptions opts = {
+ .time = g_strdup(whenstr),
+ .u.vnc.display = NULL,
+ };
+
+ proto = qapi_enum_parse(&DisplayProtocol_lookup, protocol,
+ DISPLAY_PROTOCOL_VNC, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ opts.protocol = proto;
+
+ if (proto == DISPLAY_PROTOCOL_VNC) {
+ opts.u.vnc.has_display = !!display;
+ opts.u.vnc.display = g_strdup(display);
+ }
+
+ qmp_expire_password(&opts, &err);
+ g_free(opts.time);
+ g_free(opts.u.vnc.display);
hmp_handle_error(mon, err);
}
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index f7d64a6457..65882b5997 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -164,45 +164,30 @@ void qmp_system_wakeup(Error **errp)
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, errp);
}
-void qmp_set_password(const char *protocol, const char *password,
- bool has_connected, const char *connected, Error **errp)
+void qmp_set_password(SetPasswordOptions *opts, Error **errp)
{
- int disconnect_if_connected = 0;
- int fail_if_connected = 0;
- int rc;
+ bool disconnect_if_connected = false;
+ bool fail_if_connected = false;
+ int rc = 0;
- if (has_connected) {
- if (strcmp(connected, "fail") == 0) {
- fail_if_connected = 1;
- } else if (strcmp(connected, "disconnect") == 0) {
- disconnect_if_connected = 1;
- } else if (strcmp(connected, "keep") == 0) {
- /* nothing */
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER, "connected");
- return;
- }
- }
-
- if (strcmp(protocol, "spice") == 0) {
+ if (opts->protocol == DISPLAY_PROTOCOL_SPICE) {
if (!qemu_using_spice(errp)) {
return;
}
- rc = qemu_spice.set_passwd(password, fail_if_connected,
+ if (opts->u.spice.has_connected) {
+ fail_if_connected =
+ opts->u.spice.connected == SET_PASSWORD_ACTION_FAIL;
+ disconnect_if_connected =
+ opts->u.spice.connected == SET_PASSWORD_ACTION_DISCONNECT;
+ }
+ rc = qemu_spice.set_passwd(opts->password, fail_if_connected,
disconnect_if_connected);
- } else if (strcmp(protocol, "vnc") == 0) {
- if (fail_if_connected || disconnect_if_connected) {
- /* vnc supports "connected=keep" only */
- error_setg(errp, QERR_INVALID_PARAMETER, "connected");
- return;
- }
+ } else if (opts->protocol == DISPLAY_PROTOCOL_VNC) {
/* Note that setting an empty password will not disable login through
* this interface. */
- rc = vnc_display_password(NULL, password);
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol",
- "'vnc' or 'spice'");
- return;
+ rc = vnc_display_password(
+ opts->u.vnc.has_display ? opts->u.vnc.display : NULL,
+ opts->password);
}
if (rc != 0) {
@@ -210,11 +195,11 @@ void qmp_set_password(const char *protocol, const char *password,
}
}
-void qmp_expire_password(const char *protocol, const char *whenstr,
- Error **errp)
+void qmp_expire_password(ExpirePasswordOptions *opts, Error **errp)
{
time_t when;
int rc;
+ const char* whenstr = opts->time;
if (strcmp(whenstr, "now") == 0) {
when = 0;
@@ -226,17 +211,14 @@ void qmp_expire_password(const char *protocol, const char *whenstr,
when = strtoull(whenstr, NULL, 10);
}
- if (strcmp(protocol, "spice") == 0) {
+ if (opts->protocol == DISPLAY_PROTOCOL_SPICE) {
if (!qemu_using_spice(errp)) {
return;
}
rc = qemu_spice.set_pw_expire(when);
- } else if (strcmp(protocol, "vnc") == 0) {
- rc = vnc_display_pw_expire(NULL, when);
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol",
- "'vnc' or 'spice'");
- return;
+ } else if (opts->protocol == DISPLAY_PROTOCOL_VNC) {
+ rc = vnc_display_pw_expire(
+ opts->u.vnc.has_display ? opts->u.vnc.display : NULL, when);
}
if (rc != 0) {
diff --git a/qapi/ui.json b/qapi/ui.json
index fd9677d48e..cba8665b73 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -9,22 +9,23 @@
{ 'include': 'common.json' }
{ 'include': 'sockets.json' }
+##
+# @DisplayProtocol:
+#
+# Display protocols which support changing password options.
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'DisplayProtocol',
+ 'data': [ { 'name': 'vnc', 'if': 'defined(CONFIG_VNC)' },
+ { 'name': 'spice', 'if': 'defined(CONFIG_SPICE)' } ] }
+
##
# @set_password:
#
# Sets the password of a remote display session.
#
-# @protocol: - 'vnc' to modify the VNC server password
-# - 'spice' to modify the Spice server password
-#
-# @password: the new password
-#
-# @connected: how to handle existing clients when changing the
-# password. If nothing is specified, defaults to 'keep'
-# 'fail' to fail the command if clients are connected
-# 'disconnect' to disconnect existing clients
-# 'keep' to maintain existing clients
-#
# Returns: - Nothing on success
# - If Spice is not enabled, DeviceNotFound
#
@@ -37,16 +38,123 @@
# <- { "return": {} }
#
##
-{ 'command': 'set_password',
- 'data': {'protocol': 'str', 'password': 'str', '*connected': 'str'} }
+{ 'command': 'set_password', 'boxed': true, 'data': 'SetPasswordOptions' }
+
+##
+# @SetPasswordOptions:
+#
+# Data required to set a new password on a display server protocol.
+#
+# @protocol: - 'vnc' to modify the VNC server password
+# - 'spice' to modify the Spice server password
+#
+# @password: the new password
+#
+# Since: 6.2
+#
+##
+{ 'union': 'SetPasswordOptions',
+ 'base': { 'protocol': 'DisplayProtocol',
+ 'password': 'str' },
+ 'discriminator': 'protocol',
+ 'data': { 'vnc': 'SetPasswordOptionsVnc',
+ 'spice': 'SetPasswordOptionsSpice' } }
+
+##
+# @SetPasswordAction:
+#
+# An action to take on changing a password on a connection with active clients.
+#
+# @fail: fail the command if clients are connected
+#
+# @disconnect: disconnect existing clients
+#
+# @keep: maintain existing clients
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'SetPasswordAction',
+ 'data': [ 'fail', 'disconnect', 'keep' ] }
+
+##
+# @SetPasswordActionVnc:
+#
+# See @SetPasswordAction. VNC only supports the keep action. 'connection'
+# should just be omitted for VNC, this is kept for backwards compatibility.
+#
+# @keep: maintain existing clients
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'SetPasswordActionVnc',
+ 'data': [ 'keep' ] }
+
+##
+# @SetPasswordOptionsSpice:
+#
+# Options for set_password specific to the VNC procotol.
+#
+# @connected: How to handle existing clients when changing the
+# password. If nothing is specified, defaults to 'keep'.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'SetPasswordOptionsSpice',
+ 'data': { '*connected': 'SetPasswordAction' } }
+
+##
+# @SetPasswordOptionsVnc:
+#
+# Options for set_password specific to the VNC procotol.
+#
+# @display: The id of the display where the password should be changed.
+# Defaults to the first.
+#
+# @connected: How to handle existing clients when changing the
+# password.
+#
+# Features:
+# @deprecated: For VNC, @connected will always be 'keep', parameter should be
+# omitted.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'SetPasswordOptionsVnc',
+ 'data': { '*display': 'str',
+ '*connected': { 'type': 'SetPasswordActionVnc',
+ 'features': ['deprecated'] } } }
##
# @expire_password:
#
# Expire the password of a remote display server.
#
-# @protocol: the name of the remote display protocol 'vnc' or 'spice'
+# Returns: - Nothing on success
+# - If @protocol is 'spice' and Spice is not active, DeviceNotFound
#
+# Since: 0.14
+#
+# Example:
+#
+# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
+# "time": "+60" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'expire_password', 'boxed': true, 'data': 'ExpirePasswordOptions' }
+
+##
+# @ExpirePasswordOptions:
+#
+# Data required to set password expiration on a display server protocol.
+#
+# @protocol: - 'vnc' to modify the VNC server expiration
+# - 'spice' to modify the Spice server expiration
+
# @time: when to expire the password.
#
# - 'now' to expire the password immediately
@@ -54,24 +162,33 @@
# - '+INT' where INT is the number of seconds from now (integer)
# - 'INT' where INT is the absolute time in seconds
#
-# Returns: - Nothing on success
-# - If @protocol is 'spice' and Spice is not active, DeviceNotFound
-#
-# Since: 0.14
-#
# Notes: Time is relative to the server and currently there is no way to
# coordinate server time with client time. It is not recommended to
# use the absolute time version of the @time parameter unless you're
# sure you are on the same machine as the QEMU instance.
#
-# Example:
+# Since: 6.2
#
-# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
-# "time": "+60" } }
-# <- { "return": {} }
+##
+{ 'union': 'ExpirePasswordOptions',
+ 'base': { 'protocol': 'DisplayProtocol',
+ 'time': 'str' },
+ 'discriminator': 'protocol',
+ 'data': { 'vnc': 'ExpirePasswordOptionsVnc' } }
+
+##
+# @ExpirePasswordOptionsVnc:
+#
+# Options for expire_password specific to the VNC procotol.
+#
+# @display: The id of the display where the expiration should be changed.
+# Defaults to the first.
+#
+# Since: 6.2
#
##
-{ 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
+{ 'struct': 'ExpirePasswordOptionsVnc',
+ 'data': { '*display': 'str' } }
##
# @screendump:

View File

@@ -0,0 +1,83 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 10 Sep 2021 14:45:33 +0200
Subject: [PATCH] block/mirror: fix NULL pointer dereference in
mirror_wait_on_conflicts()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In mirror_iteration() we call mirror_wait_on_conflicts() with
`self` parameter set to NULL.
Starting from commit d44dae1a7c we dereference `self` pointer in
mirror_wait_on_conflicts() without checks if it is not NULL.
Backtrace:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
at ../block/mirror.c:172
172 self->waiting_for_op = op;
[Current thread is 1 (Thread 0x7f0908931ec0 (LWP 380249))]
(gdb) bt
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
at ../block/mirror.c:172
#1 0x00005610c5d9d631 in mirror_run (job=0x5610c76a2c00, errp=<optimized out>) at ../block/mirror.c:491
#2 0x00005610c5d58726 in job_co_entry (opaque=0x5610c76a2c00) at ../job.c:917
#3 0x00005610c5f046c6 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>)
at ../util/coroutine-ucontext.c:173
#4 0x00007f0909975820 in ?? () at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91
from /usr/lib64/libc.so.6
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001404
Fixes: d44dae1a7c ("block/mirror: fix active mirror dead-lock in mirror_wait_on_conflicts")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20210910124533.288318-1-sgarzare@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
(cherry picked from commit 66fed30c9cd11854fc878a4eceb507e915d7c9cd)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 98fc66eabf..85b781bc21 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -160,18 +160,25 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
if (ranges_overlap(self_start_chunk, self_nb_chunks,
op_start_chunk, op_nb_chunks))
{
- /*
- * If the operation is already (indirectly) waiting for us, or
- * will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case).
- */
- if (op->waiting_for_op) {
- continue;
+ if (self) {
+ /*
+ * If the operation is already (indirectly) waiting for us,
+ * or will wait for us as soon as it wakes up, then just go
+ * on (instead of producing a deadlock in the former case).
+ */
+ if (op->waiting_for_op) {
+ continue;
+ }
+
+ self->waiting_for_op = op;
}
- self->waiting_for_op = op;
qemu_co_queue_wait(&op->waiting_requests, NULL);
- self->waiting_for_op = NULL;
+
+ if (self) {
+ self->waiting_for_op = NULL;
+ }
+
break;
}
}

View File

@@ -0,0 +1,990 @@
Index: pve-qemu-kvm-6.1.0/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-6.1.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-6.1.0/qapi/block-core.json
@@ -3084,7 +3084,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4020,6 +4020,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4392,6 +4414,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4782,6 +4805,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -4977,6 +5011,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-6.1.0/block/meson.build
===================================================================
--- pve-qemu-kvm-6.1.0.orig/block/meson.build
+++ pve-qemu-kvm-6.1.0/block/meson.build
@@ -91,6 +91,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-6.1.0/configure
===================================================================
--- pve-qemu-kvm-6.1.0.orig/configure
+++ pve-qemu-kvm-6.1.0/configure
@@ -375,6 +375,7 @@ trace_file="trace"
spice="$default_feature"
spice_protocol="auto"
rbd="auto"
+vitastor="auto"
smartcard="auto"
u2f="auto"
libusb="auto"
@@ -1293,6 +1294,10 @@ for opt do
;;
--enable-rbd) rbd="enabled"
;;
+ --disable-vitastor) vitastor="disabled"
+ ;;
+ --enable-vitastor) vitastor="enabled"
+ ;;
--disable-xfsctl) xfs="no"
;;
--enable-xfsctl) xfs="yes"
@@ -1921,6 +1926,7 @@ disabled with --disable-FEATURE, default
spice spice
spice-protocol spice-protocol
rbd rados block device (rbd)
+ vitastor vitastor block device
libiscsi iscsi support
libnfs nfs support
smartcard smartcard support (libcacard)
@@ -5211,7 +5217,7 @@ if test "$skip_meson" = no; then
-Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt -Dbrlapi=$brlapi \
-Dcurl=$curl -Dglusterfs=$glusterfs -Dbzip2=$bzip2 -Dlibiscsi=$libiscsi \
-Dlibnfs=$libnfs -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
- -Drbd=$rbd -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse -Dlibxml2=$libxml2 \
+ -Drbd=$rbd -Dvitastor=$vitastor -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse -Dlibxml2=$libxml2 \
-Dlibdaxctl=$libdaxctl -Dlibpmem=$libpmem -Dlinux_io_uring=$linux_io_uring \
-Dgnutls=$gnutls -Dnettle=$nettle -Dgcrypt=$gcrypt -Dauth_pam=$auth_pam \
-Dzstd=$zstd -Dseccomp=$seccomp -Dvirtfs=$virtfs -Dcap_ng=$cap_ng \
Index: pve-qemu-kvm-6.1.0/meson.build
===================================================================
--- pve-qemu-kvm-6.1.0.orig/meson.build
+++ pve-qemu-kvm-6.1.0/meson.build
@@ -729,6 +729,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1268,6 +1288,7 @@ config_host_data.set('CONFIG_LIBNFS', li
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3087,6 +3108,7 @@ summary_info += {'bpf support': libbpf.f
# TODO: add back protocol and server version
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
summary_info += {'rbd support': rbd.found()}
+summary_info += {'vitastor support': vitastor.found()}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': cacard.found()}
summary_info += {'U2F support': u2f.found()}
Index: pve-qemu-kvm-6.1.0/meson_options.txt
===================================================================
--- pve-qemu-kvm-6.1.0.orig/meson_options.txt
+++ pve-qemu-kvm-6.1.0/meson_options.txt
@@ -102,6 +102,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
Index: a/block/vitastor.c
===================================================================
--- /dev/null
+++ a/block/vitastor.c
@@ -0,0 +1,797 @@
+// Copyright (c) Vitaliy Filippov, 2019+
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
+
+// QEMU block driver
+
+#ifdef VITASTOR_SOURCE_TREE
+#define BUILD_DSO
+#define _GNU_SOURCE
+#endif
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/uri.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+
+#if QEMU_VERSION_MAJOR >= 3
+#include "qemu/units.h"
+#include "block/qdict.h"
+#include "qemu/cutils.h"
+#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10
+#include "qemu/cutils.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"
+#else
+#include "qapi/qmp/qint.h"
+#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val)))
+#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
+#define qobject_unref QDECREF
+#endif
+
+#include "vitastor_c.h"
+
+#ifdef VITASTOR_SOURCE_TREE
+void qemu_module_dummy(void)
+{
+}
+
+void DSO_STAMP_FUN(void)
+{
+}
+#endif
+
+typedef struct VitastorClient
+{
+ void *proxy;
+ void *watch;
+ char *config_path;
+ char *etcd_host;
+ char *etcd_prefix;
+ char *image;
+ int skip_parents;
+ uint64_t inode;
+ uint64_t pool;
+ uint64_t size;
+ long readonly;
+ int use_rdma;
+ char *rdma_device;
+ int rdma_port_num;
+ int rdma_gid_index;
+ int rdma_mtu;
+ QemuMutex mutex;
+
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
+ uint32_t last_bitmap_granularity;
+ uint8_t *last_bitmap;
+} VitastorClient;
+
+typedef struct VitastorRPC
+{
+ BlockDriverState *bs;
+ Coroutine *co;
+ QEMUIOVector *iov;
+ long ret;
+ int complete;
+ uint64_t inode, offset, len;
+ uint32_t bitmap_granularity;
+ uint8_t *bitmap;
+} VitastorRPC;
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
+static void vitastor_co_generic_bh_cb(void *opaque, long retval);
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
+static void vitastor_close(BlockDriverState *bs);
+
+static char *qemu_vitastor_next_tok(char *src, char delim, char **p)
+{
+ char *end;
+ *p = NULL;
+ for (end = src; *end; ++end)
+ {
+ if (*end == delim)
+ break;
+ if (*end == '\\' && end[1] != '\0')
+ end++;
+ }
+ if (*end == delim)
+ {
+ *p = end + 1;
+ *end = '\0';
+ }
+ return src;
+}
+
+static void qemu_vitastor_unescape(char *src)
+{
+ char *p;
+ for (p = src; *src; ++src, ++p)
+ {
+ if (*src == '\\' && src[1] != '\0')
+ src++;
+ *p = *src;
+ }
+ *p = '\0';
+}
+
+// vitastor[:key=value]*
+// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
+// vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg
+static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
+{
+ const char *start;
+ char *p, *buf;
+
+ if (!strstart(filename, "vitastor:", &start))
+ {
+ error_setg(errp, "File name must start with 'vitastor:'");
+ return;
+ }
+
+ buf = g_strdup(start);
+ p = buf;
+
+ // The following are all key/value pairs
+ while (p)
+ {
+ int i;
+ char *name, *value;
+ name = qemu_vitastor_next_tok(p, '=', &p);
+ if (!p)
+ {
+ error_setg(errp, "conf option %s has no value", name);
+ break;
+ }
+ for (i = 0; i < strlen(name); i++)
+ if (name[i] == '_')
+ name[i] = '-';
+ qemu_vitastor_unescape(name);
+ value = qemu_vitastor_next_tok(p, ':', &p);
+ qemu_vitastor_unescape(value);
+ if (!strcmp(name, "inode") ||
+ !strcmp(name, "pool") ||
+ !strcmp(name, "size") ||
+ !strcmp(name, "skip-parents") ||
+ !strcmp(name, "use-rdma") ||
+ !strcmp(name, "rdma-port_num") ||
+ !strcmp(name, "rdma-gid-index") ||
+ !strcmp(name, "rdma-mtu"))
+ {
+ unsigned long long num_val;
+ if (parse_uint_full(value, &num_val, 0))
+ {
+ error_setg(errp, "Illegal %s: %s", name, value);
+ goto out;
+ }
+ qdict_put_int(options, name, num_val);
+ }
+ else
+ {
+ qdict_put_str(options, name, value);
+ }
+ }
+ if (!qdict_get_try_str(options, "image"))
+ {
+ if (!qdict_get_try_int(options, "inode", 0))
+ {
+ error_setg(errp, "one of image (name) and inode (number) must be specified");
+ goto out;
+ }
+ if (!(qdict_get_try_int(options, "inode", 0) >> (64-POOL_ID_BITS)) &&
+ !qdict_get_try_int(options, "pool", 0))
+ {
+ error_setg(errp, "pool number must be specified or included in the inode number");
+ goto out;
+ }
+ if (!qdict_get_try_int(options, "size", 0))
+ {
+ error_setg(errp, "size must be specified when inode number is used instead of image name");
+ goto out;
+ }
+ }
+
+out:
+ g_free(buf);
+ return;
+}
+
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
+{
+ BlockDriverState *bs = task->bs;
+ VitastorClient *client = bs->opaque;
+ task->co = qemu_coroutine_self();
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task->complete)
+ {
+ qemu_coroutine_yield();
+ }
+}
+
+static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ aio_set_fd_handler(ctx, fd,
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
+ 0 /*is_external*/,
+#endif
+ fd_read, fd_write,
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
+ NULL /*io_flush*/,
+#endif
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
+ NULL /*io_poll*/,
+#endif
+#if QEMU_VERSION_MAJOR >= 7
+ NULL /*io_poll_ready*/,
+#endif
+ opaque);
+}
+
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+ VitastorRPC task;
+ VitastorClient *client = bs->opaque;
+ void *image = NULL;
+ int64_t ret = 0;
+ qemu_mutex_init(&client->mutex);
+ client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
+ // FIXME: Rename to etcd_address
+ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
+ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
+ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
+ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
+ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
+ if (!image)
+ client->image = (char*)"x";
+ task.complete = 0;
+ task.bs = bs;
+ if (qemu_in_coroutine())
+ {
+ vitastor_co_get_metadata(&task);
+ }
+ else
+ {
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+ BDRV_POLL_WHILE(bs, !task.complete);
+ }
+ client->image = image;
+ if (client->image)
+ {
+ client->watch = (void*)task.ret;
+ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
+ client->size = vitastor_c_inode_get_size(client->watch);
+ if (!vitastor_c_inode_get_num(client->watch))
+ {
+ error_setg(errp, "image does not exist");
+ vitastor_close(bs);
+ return -1;
+ }
+ if (!client->size)
+ {
+ client->size = qdict_get_try_int(options, "size", 0);
+ }
+ }
+ else
+ {
+ client->watch = NULL;
+ client->inode = qdict_get_try_int(options, "inode", 0);
+ client->pool = qdict_get_try_int(options, "pool", 0);
+ if (client->pool)
+ {
+ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
+ }
+ client->size = qdict_get_try_int(options, "size", 0);
+ vitastor_c_close_watch(client->proxy, (void*)task.ret);
+ }
+ if (!client->size)
+ {
+ error_setg(errp, "image size not specified");
+ vitastor_close(bs);
+ return -1;
+ }
+ bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
+ //client->aio_context = bdrv_get_aio_context(bs);
+ qdict_del(options, "use-rdma");
+ qdict_del(options, "rdma-mtu");
+ qdict_del(options, "rdma-gid-index");
+ qdict_del(options, "rdma-port-num");
+ qdict_del(options, "rdma-device");
+ qdict_del(options, "config-path");
+ qdict_del(options, "etcd-host");
+ qdict_del(options, "etcd-prefix");
+ qdict_del(options, "image");
+ qdict_del(options, "inode");
+ qdict_del(options, "pool");
+ qdict_del(options, "size");
+ qdict_del(options, "skip-parents");
+ return ret;
+}
+
+static void vitastor_close(BlockDriverState *bs)
+{
+ VitastorClient *client = bs->opaque;
+ vitastor_c_destroy(client->proxy);
+ qemu_mutex_destroy(&client->mutex);
+ if (client->config_path)
+ g_free(client->config_path);
+ if (client->etcd_host)
+ g_free(client->etcd_host);
+ if (client->etcd_prefix)
+ g_free(client->etcd_prefix);
+ if (client->image)
+ g_free(client->image);
+ free(client->last_bitmap);
+ client->last_bitmap = NULL;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
+static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+ bsz->phys = 4096;
+ bsz->log = 512;
+ return 0;
+}
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+static int coroutine_fn vitastor_co_create_opts(
+#if QEMU_VERSION_MAJOR >= 4
+ BlockDriver *drv,
+#endif
+ const char *url, QemuOpts *opts, Error **errp)
+{
+ QDict *options;
+ int ret;
+
+ options = qdict_new();
+ vitastor_parse_filename(url, options, errp);
+ if (*errp)
+ {
+ ret = -1;
+ goto out;
+ }
+
+ // inodes don't require creation in Vitastor. FIXME: They will when there will be some metadata
+
+ ret = 0;
+out:
+ qobject_unref(options);
+ return ret;
+}
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3
+static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offset,
+#if QEMU_VERSION_MAJOR >= 4
+ bool exact,
+#endif
+ PreallocMode prealloc,
+#if QEMU_VERSION_MAJOR >= 5 && QEMU_VERSION_MINOR >= 1 || QEMU_VERSION_MAJOR > 5 || defined RHEL_BDRV_CO_TRUNCATE_FLAGS
+ BdrvRequestFlags flags,
+#endif
+ Error **errp)
+{
+ VitastorClient *client = bs->opaque;
+
+ if (prealloc != PREALLOC_MODE_OFF)
+ {
+ error_setg(errp, "Unsupported preallocation mode '%s'", PreallocMode_str(prealloc));
+ return -ENOTSUP;
+ }
+
+ // TODO: Resize inode to <offset> bytes
+ client->size = offset / BDRV_SECTOR_SIZE;
+
+ return 0;
+}
+#endif
+
+static int vitastor_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ bdi->cluster_size = 4096;
+ return 0;
+}
+
+static int64_t vitastor_getlength(BlockDriverState *bs)
+{
+ VitastorClient *client = bs->opaque;
+ return client->size;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
+static void vitastor_refresh_limits(BlockDriverState *bs, Error **errp)
+#else
+static int vitastor_refresh_limits(BlockDriverState *bs)
+#endif
+{
+ bs->bl.request_alignment = 4096;
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 3
+ bs->bl.min_mem_alignment = 4096;
+#endif
+ bs->bl.opt_mem_alignment = 4096;
+#if QEMU_VERSION_MAJOR < 2 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR == 0
+ return 0;
+#endif
+}
+
+//static int64_t vitastor_get_allocated_file_size(BlockDriverState *bs)
+//{
+// return 0;
+//}
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
+{
+ *task = (VitastorRPC) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ };
+}
+
+static void vitastor_co_generic_bh_cb(void *opaque, long retval)
+{
+ VitastorRPC *task = opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
+{
+ vitastor_co_generic_bh_cb(opaque, retval);
+}
+
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
+#else
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
+#endif
+)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+ task.iov = iov;
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+
+ return task.ret;
+}
+
+static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
+#else
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
+#endif
+)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+ task.iov = iov;
+
+ if (client->last_bitmap)
+ {
+ // Invalidate last bitmap on write
+ free(client->last_bitmap);
+ client->last_bitmap = NULL;
+ }
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+
+ return task.ret;
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
+#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
+static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
+{
+ VitastorRPC *task = opaque;
+ VitastorClient *client = task->bs->opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (retval >= 0)
+ {
+ task->bitmap = bitmap;
+ if (client->last_bitmap_inode == task->inode &&
+ client->last_bitmap_offset == task->offset &&
+ client->last_bitmap_len == task->len)
+ {
+ free(client->last_bitmap);
+ client->last_bitmap = bitmap;
+ }
+ }
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static int coroutine_fn vitastor_co_block_status(
+ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
+{
+ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
+ // Not allocated => return 0
+ // Error => return -errno
+ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
+ VitastorRPC task;
+ VitastorClient *client = bs->opaque;
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ uint8_t bit = 0;
+ if (client->last_bitmap && client->last_bitmap_inode == inode &&
+ client->last_bitmap_offset <= offset &&
+ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
+ {
+ // Use the previously read bitmap
+ task.bitmap_granularity = client->last_bitmap_granularity;
+ task.offset = client->last_bitmap_offset;
+ task.len = client->last_bitmap_len;
+ task.bitmap = client->last_bitmap;
+ }
+ else
+ {
+ // Read bitmap from this position, rounding to full inode PG blocks
+ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
+ if (!block_size)
+ return -EAGAIN;
+ // Init coroutine
+ vitastor_co_init_task(bs, &task);
+ free(client->last_bitmap);
+ task.inode = client->last_bitmap_inode = inode;
+ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
+ task.offset = client->last_bitmap_offset = offset / block_size * block_size;
+ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
+ task.bitmap = client->last_bitmap = NULL;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+ if (task.ret < 0)
+ {
+ // Error
+ return task.ret;
+ }
+ }
+ if (want_zero)
+ {
+ // Get precise mapping with all holes
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
+ uint64_t bmp_len = task.len / task.bitmap_granularity;
+ uint64_t bmp_end = bmp_pos+1;
+ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
+ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
+ {
+ bmp_end++;
+ }
+ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
+ }
+ else
+ {
+ // Get larger allocated extents, possibly with false positives
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
+ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
+ while (bmp_pos < bmp_end)
+ {
+ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
+ {
+ bit = bit || task.bitmap[bmp_pos >> 3];
+ bmp_pos += 8;
+ }
+ else
+ {
+ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
+ bmp_pos++;
+ }
+ }
+ *pnum = bytes;
+ }
+ if (bit)
+ {
+ *map = offset;
+ *file = bs;
+ }
+ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
+}
+#endif
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
+// QEMU 1.7-2.11
+static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+{
+ int64_t map = 0;
+ int64_t pnumbytes = 0;
+ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
+ *pnum = pnumbytes/BDRV_SECTOR_SIZE;
+ return r;
+}
+#endif
+#endif
+
+#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
+static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
+{
+ return vitastor_co_preadv(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
+}
+
+static int coroutine_fn vitastor_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
+{
+ return vitastor_co_pwritev(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
+}
+#endif
+
+static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+
+ return task.ret;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
+static QemuOptsList vitastor_create_opts = {
+ .name = "vitastor-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vitastor_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
+};
+#else
+static QEMUOptionParameter vitastor_create_opts[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+#endif
+
+#if QEMU_VERSION_MAJOR >= 4
+static const char *vitastor_strong_runtime_opts[] = {
+ "inode",
+ "pool",
+ "config-path",
+ "etcd-host",
+ "etcd-prefix",
+
+ NULL
+};
+#endif
+
+static BlockDriver bdrv_vitastor = {
+ .format_name = "vitastor",
+ .protocol_name = "vitastor",
+
+ .instance_size = sizeof(VitastorClient),
+ .bdrv_parse_filename = vitastor_parse_filename,
+
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_get_info = vitastor_get_info,
+ .bdrv_getlength = vitastor_getlength,
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
+ .bdrv_probe_blocksizes = vitastor_probe_blocksizes,
+#endif
+ .bdrv_refresh_limits = vitastor_refresh_limits,
+
+ // FIXME: Implement it along with per-inode statistics
+ //.bdrv_get_allocated_file_size = vitastor_get_allocated_file_size,
+
+ .bdrv_file_open = vitastor_file_open,
+ .bdrv_close = vitastor_close,
+
+ // Option list for the create operation
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
+ .create_opts = &vitastor_create_opts,
+#else
+ .create_options = vitastor_create_opts,
+#endif
+
+ // For qmp_blockdev_create(), used by the qemu monitor / QAPI
+ // Requires patching QAPI IDL, thus unimplemented
+ //.bdrv_co_create = vitastor_co_create,
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+ // For bdrv_create(), used by qemu-img
+ .bdrv_co_create_opts = vitastor_co_create_opts,
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3
+ .bdrv_co_truncate = vitastor_co_truncate,
+#endif
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+ // For snapshot export
+ .bdrv_co_block_status = vitastor_co_block_status,
+#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
+ .bdrv_co_get_block_status = vitastor_co_get_block_status,
+#endif
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
+ .bdrv_co_preadv = vitastor_co_preadv,
+ .bdrv_co_pwritev = vitastor_co_pwritev,
+#else
+ .bdrv_co_readv = vitastor_co_readv,
+ .bdrv_co_writev = vitastor_co_writev,
+#endif
+
+ .bdrv_co_flush_to_disk = vitastor_co_flush,
+
+#if QEMU_VERSION_MAJOR >= 4
+ .strong_runtime_opts = vitastor_strong_runtime_opts,
+#endif
+};
+
+static void vitastor_block_init(void)
+{
+ bdrv_register(&bdrv_vitastor);
+}
+
+block_init(vitastor_block_init);

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 56d1972d15..cfa0b832ba 100644
index dd295cfc6d..3ac5177cbb 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -565,7 +565,7 @@ static QemuOptsList raw_runtime_opts = {
@@ -533,7 +533,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "locking",
.type = QEMU_OPT_STRING,
@@ -26,7 +26,7 @@ index 56d1972d15..cfa0b832ba 100644
},
{
.name = "pr-manager",
@@ -665,7 +665,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
@@ -631,7 +631,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->use_lock = false;
break;
case ON_OFF_AUTO_AUTO:

View File

@@ -9,12 +9,12 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h
index cdd5b109b0..653a37e9d1 100644
index 5d1508081f..f665924193 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -305,8 +305,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
@@ -219,8 +219,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
int net_hub_id_for_client(NetClientState *nc, int *id);
NetClientState *net_hub_port_find(int hub_id);
-#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifdown"

View File

@@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 76f24446a5..2a47d79b49 100644
index 21b33fbe2e..32514193a9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2556,9 +2556,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
@@ -2007,9 +2007,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define CPU_RESOLVING_TYPE TYPE_X86_CPU
#ifdef TARGET_X86_64
@@ -24,4 +24,4 @@ index 76f24446a5..2a47d79b49 100644
+#define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("kvm32")
#endif
#define cpu_list x86_cpu_list
#define cpu_signal_handler cpu_x86_signal_handler

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 0326c63bec..d523d00200 100644
index 0371055e6c..840cf56923 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -690,32 +690,35 @@ static void qemu_spice_init(void)
@@ -694,32 +694,35 @@ static void qemu_spice_init(void)
if (tls_port) {
x509_dir = qemu_opt_get(opts, "x509-dir");

View File

@@ -9,7 +9,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/block/gluster.c b/block/gluster.c
index c6d25ae733..ccca125c3a 100644
index e8ee14c8e9..3eb6a05500 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -42,7 +42,7 @@
@@ -21,15 +21,15 @@ index c6d25ae733..ccca125c3a 100644
/*
* Several versions of GlusterFS (3.12? -> 6.0.1) fail when the transfer size
* is greater or equal to 1024 MiB, so we are limiting the transfer size to 512
@@ -421,6 +421,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -424,6 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
int old_errno;
SocketAddressList *server;
uint64_t port;
unsigned long long port;
+ const char *logfile;
glfs = glfs_find_preopened(gconf->volume);
if (glfs) {
@@ -463,9 +464,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -466,9 +467,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
}
}

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index af984fb7db..bf143fac00 100644
index dcf82b15b8..feeec452f0 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -814,6 +814,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
rados_conf_set(*cluster, "rbd_cache", "false");
}

View File

@@ -0,0 +1,88 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:37 +0200
Subject: [PATCH] PVE: [Up] qmp: add get_link_status
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
net/net.c | 27 +++++++++++++++++++++++++++
qapi/net.json | 15 +++++++++++++++
qapi/pragma.json | 1 +
3 files changed, 43 insertions(+)
diff --git a/net/net.c b/net/net.c
index 76bbb7c31b..82e0a768b4 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1314,6 +1314,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
}
}
+int64_t qmp_get_link_status(const char *name, Error **errp)
+{
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ NetClientState *nc;
+ int queues;
+ bool ret;
+
+ queues = qemu_find_net_clients_except(name, ncs,
+ NET_CLIENT_DRIVER__MAX,
+ MAX_QUEUE_NUM);
+
+ if (queues == 0) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", name);
+ return (int64_t) -1;
+ }
+
+ nc = ncs[0];
+ ret = ncs[0]->link_down;
+
+ if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+ ret = ncs[0]->peer->link_down;
+ }
+
+ return (int64_t) ret ? 0 : 1;
+}
+
void colo_notify_filters_event(int event, Error **errp)
{
NetClientState *nc;
diff --git a/qapi/net.json b/qapi/net.json
index 7fab2e7cd8..74c9a6109e 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -35,6 +35,21 @@
##
{ 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
+##
+# @get_link_status:
+#
+# Get the current link state of the nics or nic.
+#
+# @name: name of the nic you get the state of
+#
+# Return: If link is up 1
+# If link is down 0
+# If an error occure an empty string.
+#
+# Notes: this is an Proxmox VE extension and not offical part of Qemu.
+##
+{ 'command': 'get_link_status', 'data': {'name': 'str'} , 'returns': 'int' }
+
##
# @netdev_add:
#
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 3bc0335d1f..7c91ea3685 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -22,6 +22,7 @@
'system_reset',
'system_wakeup' ],
'command-returns-exceptions': [
+ 'get_link_status',
'human-monitor-command',
'qom-get',
'query-tpm-models',

View File

@@ -16,7 +16,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/gluster.c b/block/gluster.c
index ccca125c3a..301a653ea7 100644
index 3eb6a05500..b612918ee8 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -57,6 +57,7 @@ typedef struct GlusterAIOCB {
@@ -27,7 +27,7 @@ index ccca125c3a..301a653ea7 100644
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -746,8 +747,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
@@ -752,8 +753,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
acb->ret = 0; /* Success */
} else if (ret < 0) {
acb->ret = -errno; /* Read/Write failed */
@@ -39,15 +39,15 @@ index ccca125c3a..301a653ea7 100644
}
aio_co_schedule(acb->aio_context, acb->coroutine);
@@ -1018,6 +1021,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
@@ -1021,6 +1024,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1198,9 +1202,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
@@ -1202,9 +1206,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
@@ -59,7 +59,7 @@ index ccca125c3a..301a653ea7 100644
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
}
@@ -1263,6 +1269,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1268,6 +1274,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
@@ -67,11 +67,11 @@ index ccca125c3a..301a653ea7 100644
ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1311,6 +1318,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
@@ -1314,6 +1321,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c
index 2044c22a4c..4c8b5412c6 100644
index 908fd0cce5..5dc1d0a2ca 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3075,7 +3075,8 @@ static int img_info(int argc, char **argv)
@@ -2977,7 +2977,8 @@ static int img_info(int argc, char **argv)
list = collect_image_info_list(image_opts, filename, fmt, chain,
force_share);
if (!list) {

View File

@@ -31,17 +31,16 @@ override the output file's size.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img-cmds.hx | 4 +-
qemu-img.c | 202 ++++++++++++++++++++++++++++++-----------------
2 files changed, 133 insertions(+), 73 deletions(-)
qemu-img.c | 187 +++++++++++++++++++++++++++++------------------
2 files changed, 119 insertions(+), 72 deletions(-)
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index c9dd70a892..048788b23d 100644
index b3620f29e5..e70ef3dc91 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST
@@ -58,9 +58,9 @@ SRST
ERST
DEF("dd", img_dd,
@@ -54,10 +53,10 @@ index c9dd70a892..048788b23d 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 4c8b5412c6..d5fa89a204 100644
index 5dc1d0a2ca..f773182bd0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4997,10 +4997,12 @@ static int img_bitmap(int argc, char **argv)
@@ -4793,10 +4793,12 @@ static int img_bitmap(int argc, char **argv)
#define C_IF 04
#define C_OF 010
#define C_SKIP 020
@@ -70,7 +69,7 @@ index 4c8b5412c6..d5fa89a204 100644
};
struct DdIo {
@@ -5076,6 +5078,19 @@ static int img_dd_skip(const char *arg,
@@ -4872,6 +4874,19 @@ static int img_dd_skip(const char *arg,
return 0;
}
@@ -90,7 +89,7 @@ index 4c8b5412c6..d5fa89a204 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5116,6 +5131,7 @@ static int img_dd(int argc, char **argv)
@@ -4912,6 +4927,7 @@ static int img_dd(int argc, char **argv)
{ "if", img_dd_if, C_IF },
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
@@ -98,7 +97,7 @@ index 4c8b5412c6..d5fa89a204 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5191,91 +5207,112 @@ static int img_dd(int argc, char **argv)
@@ -4987,91 +5003,112 @@ static int img_dd(int argc, char **argv)
arg = NULL;
}
@@ -135,7 +134,11 @@ index 4c8b5412c6..d5fa89a204 100644
- error_report_err(local_err);
- ret = -1;
- goto out;
- }
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- if (!drv->create_opts) {
- error_report("Format driver '%s' does not support image creation",
- drv->format_name);
@@ -147,11 +150,7 @@ index 4c8b5412c6..d5fa89a204 100644
- proto_drv->format_name);
- ret = -1;
- goto out;
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- }
- create_opts = qemu_opts_append(create_opts, drv->create_opts);
- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
-
@@ -275,54 +274,41 @@ index 4c8b5412c6..d5fa89a204 100644
}
if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
@@ -5292,20 +5329,43 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
@@ -5089,11 +5126,17 @@ static int img_dd(int argc, char **argv)
for (out_pos = 0; in_pos < size; ) {
+ int in_ret, out_ret;
int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
for (out_pos = 0; in_pos < size; block_count++) {
int in_ret, out_ret;
-
- ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
- if (ret < 0) {
- if (in_pos + in.bsz > size) {
- in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
+ size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ if (blk1) {
+ in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+ if (in_ret == 0) {
+ in_ret = bytes;
+ }
+ } else {
+ in_ret = read(STDIN_FILENO, in.buf, bytes);
+ in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
- in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
+ in_ret = read(STDIN_FILENO, in.buf, in_bsz);
+ if (in_ret == 0) {
+ /* early EOF is considered an error */
+ error_report("Input ended unexpectedly");
+ ret = -1;
+ goto out;
+ }
+ }
+ if (in_ret < 0) {
error_report("error while reading from input image file: %s",
- strerror(-ret));
+ strerror(-in_ret));
+ ret = -1;
goto out;
}
in_pos += bytes;
if (in_ret < 0) {
error_report("error while reading from input image file: %s",
@@ -5103,9 +5146,13 @@ static int img_dd(int argc, char **argv)
}
in_pos += in_ret;
- ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
- if (ret < 0) {
- out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ if (blk2) {
+ out_ret = blk_pwrite(blk2, out_pos, in_ret, in.buf, 0);
+ if (out_ret == 0) {
+ out_ret = in_ret;
+ }
+ out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ } else {
+ out_ret = write(STDOUT_FILENO, in.buf, in_ret);
+ }
+
- if (out_ret < 0) {
+ if (out_ret != in_ret) {
error_report("error while writing to output image file: %s",
- strerror(-ret));
+ strerror(-out_ret));
+ ret = -1;
goto out;
}
out_pos += bytes;
strerror(-out_ret));
ret = -1;

View File

@@ -10,16 +10,15 @@ an expected end of input.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index d5fa89a204..d458e85af2 100644
index f773182bd0..98a6562364 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4998,11 +4998,13 @@ static int img_bitmap(int argc, char **argv)
@@ -4794,11 +4794,13 @@ static int img_bitmap(int argc, char **argv)
#define C_OF 010
#define C_SKIP 020
#define C_OSIZE 040
@@ -33,7 +32,7 @@ index d5fa89a204..d458e85af2 100644
};
struct DdIo {
@@ -5091,6 +5093,19 @@ static int img_dd_osize(const char *arg,
@@ -4887,6 +4889,19 @@ static int img_dd_osize(const char *arg,
return 0;
}
@@ -53,13 +52,13 @@ index d5fa89a204..d458e85af2 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5105,12 +5120,14 @@ static int img_dd(int argc, char **argv)
@@ -4901,12 +4916,14 @@ static int img_dd(int argc, char **argv)
int c, i;
const char *out_fmt = "raw";
const char *fmt = NULL;
- int64_t size = 0;
+ int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
int64_t block_count = 0, out_pos, in_pos;
bool force_share = false;
struct DdInfo dd = {
.flags = 0,
@@ -69,7 +68,7 @@ index d5fa89a204..d458e85af2 100644
};
struct DdIo in = {
.bsz = 512, /* Block size is by default 512 bytes */
@@ -5132,6 +5149,7 @@ static int img_dd(int argc, char **argv)
@@ -4928,6 +4945,7 @@ static int img_dd(int argc, char **argv)
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
{ "osize", img_dd_osize, C_OSIZE },
@@ -77,22 +76,20 @@ index d5fa89a204..d458e85af2 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5328,9 +5346,10 @@ static int img_dd(int argc, char **argv)
@@ -5124,14 +5142,18 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
- for (out_pos = 0; in_pos < size; ) {
- for (out_pos = 0; in_pos < size; block_count++) {
+ readsize = (dd.isize > 0) ? dd.isize : size;
+ for (out_pos = 0; in_pos < readsize; ) {
+ for (out_pos = 0; in_pos < readsize; block_count++) {
int in_ret, out_ret;
- int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
+ int bytes = (in_pos + in.bsz > readsize) ? readsize - in_pos : in.bsz;
- size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ size_t in_bsz = in_pos + in.bsz > readsize ? readsize - in_pos : in.bsz;
if (blk1) {
in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
if (in_ret == 0) {
@@ -5339,6 +5358,9 @@ static int img_dd(int argc, char **argv)
in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
in_ret = read(STDIN_FILENO, in.buf, bytes);
in_ret = read(STDIN_FILENO, in.buf, in_bsz);
if (in_ret == 0) {
+ if (dd.isize == 0) {
+ goto out;

View File

@@ -1,121 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: fix getopt-string + add documentation]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
docs/tools/qemu-img.rst | 11 ++++++++++-
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 23 ++++++++++++++---------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 3653adb963..d83e8fb3c0 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -212,6 +212,10 @@ Parameters to convert subcommand:
Parameters to dd subcommand:
+.. option:: -n
+
+ Skip the creation of the target volume
+
.. program:: qemu-img-dd
.. option:: bs=BLOCK_SIZE
@@ -492,7 +496,7 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
dd copies from *INPUT* file to *OUTPUT* file converting it from
*FMT* format to *OUTPUT_FMT* format.
@@ -503,6 +507,11 @@ Command description:
The size syntax is similar to :manpage:`dd(1)`'s size syntax.
+ If the ``-n`` option is specified, the target volume creation will be
+ skipped. This is useful for formats such as ``rbd`` if the target
+ volume has already been created with site specific options that cannot
+ be supplied through ``qemu-img``.
+
.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME
Give information about the disk image *FILENAME*. Use it in
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 048788b23d..0b29a67a06 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index d458e85af2..dc13efba8b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5122,7 +5122,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5160,7 +5160,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5180,6 +5180,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5310,13 +5313,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -0,0 +1,65 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qemu-img.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 98a6562364..355b3b82f4 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4918,7 +4918,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t block_count = 0, out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -4956,7 +4956,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:U:n", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -4976,6 +4976,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5106,13 +5109,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -1,130 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Mon, 7 Feb 2022 14:21:01 +0100
Subject: [PATCH] qemu-img dd: add -l option for loading a snapshot
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
docs/tools/qemu-img.rst | 6 +++---
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 33 +++++++++++++++++++++++++++++++--
3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index d83e8fb3c0..61c6b21859 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -496,10 +496,10 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
- dd copies from *INPUT* file to *OUTPUT* file converting it from
- *FMT* format to *OUTPUT_FMT* format.
+ dd copies from *INPUT* file or snapshot *SNAPSHOT_PARAM* to *OUTPUT* file
+ converting it from *FMT* format to *OUTPUT_FMT* format.
The data is by default read and written using blocks of 512 bytes but can be
modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 0b29a67a06..758f397232 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [-l snapshot_param] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index dc13efba8b..02f2e0aa45 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5114,6 +5114,7 @@ static int img_dd(int argc, char **argv)
BlockDriver *drv = NULL, *proto_drv = NULL;
BlockBackend *blk1 = NULL, *blk2 = NULL;
QemuOpts *opts = NULL;
+ QemuOpts *sn_opts = NULL;
QemuOptsList *create_opts = NULL;
Error *local_err = NULL;
bool image_opts = false;
@@ -5123,6 +5124,7 @@ static int img_dd(int argc, char **argv)
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
bool force_share = false, skip_create = false;
+ const char *snapshot_name = NULL;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5160,7 +5162,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:l:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5183,6 +5185,19 @@ static int img_dd(int argc, char **argv)
case 'n':
skip_create = true;
break;
+ case 'l':
+ if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+ sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+ optarg, false);
+ if (!sn_opts) {
+ error_report("Failed in parsing snapshot param '%s'",
+ optarg);
+ goto out;
+ }
+ } else {
+ snapshot_name = optarg;
+ }
+ break;
case 'U':
force_share = true;
break;
@@ -5242,11 +5257,24 @@ static int img_dd(int argc, char **argv)
if (dd.flags & C_IF) {
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
force_share);
-
if (!blk1) {
ret = -1;
goto out;
}
+ if (sn_opts) {
+ bdrv_snapshot_load_tmp(blk_bs(blk1),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+ &local_err);
+ } else if (snapshot_name != NULL) {
+ bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(blk1), snapshot_name,
+ &local_err);
+ }
+ if (local_err) {
+ error_reportf_err(local_err, "Failed to load snapshot: ");
+ ret = -1;
+ goto out;
+ }
}
if (dd.flags & C_OSIZE) {
@@ -5401,6 +5429,7 @@ static int img_dd(int argc, char **argv)
out:
g_free(arg);
qemu_opts_del(opts);
+ qemu_opts_del(sn_opts);
qemu_opts_free(create_opts);
blk_unref(blk1);
blk_unref(blk2);

View File

@@ -7,62 +7,17 @@ Actually provide memory information via the query-balloon
command.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: add BalloonInfo to member name exceptions list
rebase for 8.0 - moved to hw/core/machine-hmp-cmds.c]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
hw/virtio/virtio-balloon.c | 33 +++++++++++++++++++++++++++++++--
monitor/hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
qapi/machine.json | 22 +++++++++++++++++++++-
qapi/pragma.json | 1 +
4 files changed, 82 insertions(+), 4 deletions(-)
3 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index c6325cdcaa..7f817d622d 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -179,7 +179,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 2eb5a14fa2..aa2fd6c32f 100644
index ae7867a8db..956e3f4e46 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -795,8 +795,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
@@ -820,8 +820,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{
VirtIOBalloon *dev = opaque;
@@ -102,13 +57,54 @@ index 2eb5a14fa2..aa2fd6c32f 100644
}
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index f4ef58d257..c8b97909e7 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -698,7 +698,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/qapi/machine.json b/qapi/machine.json
index a6b8795b09..9f7ed0eaa0 100644
index 157712f006..34035c25d1 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1163,9 +1163,29 @@
# @actual: the logical size of the VM in bytes Formula used:
# logical_vm_size = vm_ram_size - balloon_size
@@ -1018,10 +1018,30 @@
# @actual: the logical size of the VM in bytes
# Formula used: logical_vm_size = vm_ram_size - balloon_size
#
+# @last_update: time when stats got updated from guest
+#
@@ -127,6 +123,7 @@ index a6b8795b09..9f7ed0eaa0 100644
+# @max_mem: amount of memory (in bytes) assigned to the guest
+#
# Since: 0.14
#
##
-{ 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
+{ 'struct': 'BalloonInfo',
@@ -137,15 +134,3 @@ index a6b8795b09..9f7ed0eaa0 100644
##
# @query-balloon:
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 023a2ef7bc..6aaa9cb975 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -81,6 +81,7 @@
'member-name-exceptions': [ # visible in:
'ACPISlotType', # query-acpi-ospm-status
'AcpiTableOptions', # -acpitable
+ 'BalloonInfo', # query-balloon
'BlkdebugEvent', # blockdev-add, -blockdev
'BlkdebugSetStateOptions', # blockdev-add, -blockdev
'BlockDeviceInfo', # query-block

View File

@@ -13,36 +13,36 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 1bc21b84a4..93fb4bc24a 100644
index 216fdfaf3a..8f8d5d5276 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -91,6 +91,12 @@ MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props,
@@ -98,6 +98,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
info->numa_mem_supported = mc->numa_mem_supported;
info->deprecated = !!mc->deprecation_reason;
info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
+
+ if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
+ info->has_is_current = true;
+ info->is_current = true;
+ }
+
if (default_cpu_type) {
info->default_cpu_type = g_strdup(default_cpu_type);
}
if (mc->default_cpu_type) {
info->default_cpu_type = g_strdup(mc->default_cpu_type);
info->has_default_cpu_type = true;
diff --git a/qapi/machine.json b/qapi/machine.json
index 9f7ed0eaa0..16366b774a 100644
index 34035c25d1..cf120ac343 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -167,6 +167,8 @@
@@ -141,6 +141,8 @@
#
# @is-default: whether the machine is default
#
+# @is-current: whether this machine is currently used
+#
# @cpu-max: maximum number of CPUs supported by the machine type
# (since 1.5)
# (since 1.5)
#
@@ -199,7 +201,7 @@
@@ -162,7 +164,7 @@
##
{ 'struct': 'MachineInfo',
'data': { 'name': 'str', '*alias': 'str',
@@ -50,4 +50,4 @@ index 9f7ed0eaa0..16366b774a 100644
+ '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str', 'acpi': 'bool',
'*default-ram-id': 'str' } }

View File

@@ -6,18 +6,16 @@ Subject: [PATCH] PVE: qapi: modify spice query
Provide the last ticket in the SpiceInfo struct optionally.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI change]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qapi/ui.json | 3 +++
ui/spice-core.c | 4 ++++
2 files changed, 7 insertions(+)
ui/spice-core.c | 5 +++++
2 files changed, 8 insertions(+)
diff --git a/qapi/ui.json b/qapi/ui.json
index c536d4e524..c2df48959b 100644
index cba8665b73..081115ea8a 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -312,11 +312,14 @@
@@ -333,11 +333,14 @@
#
# @channels: a list of @SpiceChannel for each active spice channel
#
@@ -30,17 +28,18 @@ index c536d4e524..c2df48959b 100644
'*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
+ '*ticket': 'str',
'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
'if': 'CONFIG_SPICE' }
'if': 'defined(CONFIG_SPICE)' }
diff --git a/ui/spice-core.c b/ui/spice-core.c
index d523d00200..c76c224706 100644
index 840cf56923..96be349635 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
@@ -534,6 +534,11 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
micro = SPICE_SERVER_VERSION & 0xff;
info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);
+ if (auth_passwd) {
+ info->has_ticket = true;
+ info->ticket = g_strdup(auth_passwd);
+ }
+

View File

@@ -1,284 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 13 Oct 2022 11:33:50 +0200
Subject: [PATCH] PVE: add IOChannel implementation for savevm-async
based on migration/channel-block.c and the implementation that was
present in migration/savevm-async.c before QEMU 7.1.
Passes along read/write requests to the given BlockBackend, while
ensuring that a read request going beyond the end results in a
graceful short read.
Additionally, allows tracking the current position from the outside
(intended to be used for progress tracking).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/channel-savevm-async.c | 184 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.h | 51 +++++++++
migration/meson.build | 1 +
3 files changed, 236 insertions(+)
create mode 100644 migration/channel-savevm-async.c
create mode 100644 migration/channel-savevm-async.h
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
new file mode 100644
index 0000000000..e57ab2ae40
--- /dev/null
+++ b/migration/channel-savevm-async.c
@@ -0,0 +1,184 @@
+/*
+ * QIO Channel implementation to be used by savevm-async QMP calls
+ */
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "qapi/error.h"
+#include "system/block-backend.h"
+#include "trace.h"
+
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos)
+{
+ QIOChannelSavevmAsync *ioc;
+
+ ioc = QIO_CHANNEL_SAVEVM_ASYNC(object_new(TYPE_QIO_CHANNEL_SAVEVM_ASYNC));
+
+ bdrv_ref(blk_bs(be));
+ ioc->be = be;
+ ioc->bs_pos = bs_pos;
+
+ return ioc;
+}
+
+
+static void
+qio_channel_savevm_async_finalize(Object *obj)
+{
+ QIOChannelSavevmAsync *ioc = QIO_CHANNEL_SAVEVM_ASYNC(obj);
+
+ if (ioc->be) {
+ bdrv_unref(blk_bs(ioc->be));
+ ioc->be = NULL;
+ }
+ ioc->bs_pos = NULL;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_readv(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int **fds,
+ size_t *nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ int64_t maxlen = blk_getlength(be);
+ QEMUIOVector qiov;
+ size_t size;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (*saioc->bs_pos >= maxlen) {
+ error_setg(errp, "cannot read beyond maxlen");
+ return -1;
+ }
+
+ if (maxlen - *saioc->bs_pos < qiov.size) {
+ size = maxlen - *saioc->bs_pos;
+ } else {
+ size = qiov.size;
+ }
+
+ // returns 0 on success
+ ret = blk_preadv(be, *saioc->bs_pos, size, &qiov, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk_preadv failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += size;
+ return size;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_writev(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int *fds,
+ size_t nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ QEMUIOVector qiov;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (qemu_in_coroutine()) {
+ ret = blk_co_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ aio_wait_kick();
+ } else {
+ ret = blk_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk(_co)_pwritev failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += qiov.size;
+ return qiov.size;
+}
+
+
+static int
+qio_channel_savevm_async_set_blocking(QIOChannel *ioc,
+ bool enabled,
+ Error **errp)
+{
+ if (!enabled) {
+ error_setg(errp, "Non-blocking mode not supported for savevm-async");
+ return -1;
+ }
+ return 0;
+}
+
+
+static int
+qio_channel_savevm_async_close(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ int rv = bdrv_flush(blk_bs(saioc->be));
+
+ if (rv < 0) {
+ error_setg_errno(errp, -rv, "Unable to flush VMState");
+ return -1;
+ }
+
+ bdrv_unref(blk_bs(saioc->be));
+ saioc->be = NULL;
+ saioc->bs_pos = NULL;
+
+ return 0;
+}
+
+
+static void
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
+ AioContext *read_ctx,
+ IOHandler *io_read,
+ AioContext *write_ctx,
+ IOHandler *io_write,
+ void *opaque)
+{
+ // if channel-block starts doing something, check if this needs adaptation
+}
+
+
+static void
+qio_channel_savevm_async_class_init(ObjectClass *klass,
+ void *class_data G_GNUC_UNUSED)
+{
+ QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+ ioc_klass->io_writev = qio_channel_savevm_async_writev;
+ ioc_klass->io_readv = qio_channel_savevm_async_readv;
+ ioc_klass->io_set_blocking = qio_channel_savevm_async_set_blocking;
+ ioc_klass->io_close = qio_channel_savevm_async_close;
+ ioc_klass->io_set_aio_fd_handler = qio_channel_savevm_async_set_aio_fd_handler;
+}
+
+static const TypeInfo qio_channel_savevm_async_info = {
+ .parent = TYPE_QIO_CHANNEL,
+ .name = TYPE_QIO_CHANNEL_SAVEVM_ASYNC,
+ .instance_size = sizeof(QIOChannelSavevmAsync),
+ .instance_finalize = qio_channel_savevm_async_finalize,
+ .class_init = qio_channel_savevm_async_class_init,
+};
+
+static void
+qio_channel_savevm_async_register_types(void)
+{
+ type_register_static(&qio_channel_savevm_async_info);
+}
+
+type_init(qio_channel_savevm_async_register_types);
diff --git a/migration/channel-savevm-async.h b/migration/channel-savevm-async.h
new file mode 100644
index 0000000000..17ae2cb261
--- /dev/null
+++ b/migration/channel-savevm-async.h
@@ -0,0 +1,51 @@
+/*
+ * QEMU I/O channels driver for savevm-async.c
+ *
+ * Copyright (c) 2022 Proxmox Server Solutions
+ *
+ * Authors:
+ * Fiona Ebner (f.ebner@proxmox.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QIO_CHANNEL_SAVEVM_ASYNC_H
+#define QIO_CHANNEL_SAVEVM_ASYNC_H
+
+#include "io/channel.h"
+#include "qom/object.h"
+
+#define TYPE_QIO_CHANNEL_SAVEVM_ASYNC "qio-channel-savevm-async"
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSavevmAsync, QIO_CHANNEL_SAVEVM_ASYNC)
+
+
+/**
+ * QIOChannelSavevmAsync:
+ *
+ * The QIOChannelBlock object provides a channel implementation that is able to
+ * perform I/O on any BlockBackend whose BlockDriverState directly contains a
+ * VMState (as opposed to indirectly, like qcow2). It allows tracking the
+ * current position from the outside.
+ */
+struct QIOChannelSavevmAsync {
+ QIOChannel parent;
+ BlockBackend *be;
+ size_t *bs_pos;
+};
+
+
+/**
+ * qio_channel_savevm_async_new:
+ * @be: the block backend
+ * @bs_pos: used to keep track of the IOChannels current position
+ *
+ * Create a new IO channel object that can perform I/O on a BlockBackend object
+ * whose BlockDriverState directly contains a VMState.
+ *
+ * Returns: the new channel object
+ */
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos);
+
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
diff --git a/migration/meson.build b/migration/meson.build
index 9aa48b290e..cf66c78681 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -14,6 +14,7 @@ system_ss.add(files(
'block-active.c',
'channel.c',
'channel-block.c',
+ 'channel-savevm-async.c',
'cpr.c',
'cpr-transfer.c',
'cpu-throttle.c',

View File

@@ -0,0 +1,188 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/qemu-file.c | 38 +++++++++++++++++++++++++-------------
migration/qemu-file.h | 1 +
migration/savevm-async.c | 4 ++--
3 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 6338d8e2ff..6697a93a7e 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,8 +30,8 @@
#include "trace.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile {
const QEMUFileOps *ops;
@@ -45,7 +45,8 @@ struct QEMUFile {
when reading */
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -103,7 +104,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, bool has_ioc, size_t buffer_size)
{
QEMUFile *f;
@@ -112,9 +113,17 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
f->opaque = opaque;
f->ops = ops;
f->has_ioc = has_ioc;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
+
return f;
}
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
+{
+ return qemu_fopen_ops_sized(opaque, ops, has_ioc, DEFAULT_IO_BUF_SIZE);
+}
+
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
{
@@ -349,7 +358,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
}
len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
- IO_BUF_SIZE - pending, &local_error);
+ f->buf_allocated_size - pending, &local_error);
if (len > 0) {
f->buf_size += len;
f->pos += len;
@@ -389,6 +398,9 @@ int qemu_fclose(QEMUFile *f)
ret = ret2;
}
}
+
+ free(f->buf);
+
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -443,7 +455,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -469,7 +481,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -516,8 +528,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -567,7 +579,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -601,7 +613,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
*/
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -626,7 +638,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -778,7 +790,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+ ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 3f36d4dc8c..67501fd9cf 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -121,6 +121,7 @@ typedef struct QEMUFileHooks {
} QEMUFileHooks;
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc);
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, bool has_ioc, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_get_fd(QEMUFile *f);
int qemu_fclose(QEMUFile *f);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 79a0cda906..970ee3b3fc 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -418,7 +418,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
goto restart;
}
- snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+ snap_state.file = qemu_fopen_ops_sized(&snap_state, &block_file_ops, false, 4 * 1024 * 1024);
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -567,7 +567,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */
- f = qemu_fopen_ops(be, &loadstate_file_ops);
+ f = qemu_fopen_ops_sized(be, &loadstate_file_ops, false, 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -1,208 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/qemu-file.c | 48 +++++++++++++++++++++++++++-------------
migration/qemu-file.h | 2 ++
migration/savevm-async.c | 5 +++--
3 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 1303a5bf58..6e2d58d5c0 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -34,8 +34,8 @@
#include "rdma.h"
#include "io/channel-file.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
typedef struct FdEntry {
QTAILQ_ENTRY(FdEntry) entry;
@@ -48,7 +48,8 @@ struct QEMUFile {
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -108,7 +109,9 @@ int qemu_file_shutdown(QEMUFile *f)
return 0;
}
-static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
+ bool is_writable,
+ size_t buffer_size)
{
QEMUFile *f;
@@ -119,6 +122,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
f->is_writable = is_writable;
f->can_pass_fd = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
QTAILQ_INIT(&f->fds);
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
return f;
}
@@ -129,17 +134,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
*/
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
{
- return qemu_file_new_impl(f->ioc, !f->is_writable);
+ return qemu_file_new_impl(f->ioc, !f->is_writable, DEFAULT_IO_BUF_SIZE);
}
QEMUFile *qemu_file_new_output(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, true);
+ return qemu_file_new_impl(ioc, true, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, true, buffer_size);
}
QEMUFile *qemu_file_new_input(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, false);
+ return qemu_file_new_impl(ioc, false, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, false, buffer_size);
}
/*
@@ -339,7 +354,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
}
do {
- struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
+ struct iovec iov = { f->buf + pending, f->buf_allocated_size - pending };
len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
&local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) {
@@ -443,6 +458,9 @@ int qemu_fclose(QEMUFile *f)
g_free(fde);
}
g_clear_pointer(&f->ioc, object_unref);
+
+ free(f->buf);
+
error_free(f->last_error_obj);
g_free(f);
trace_qemu_file_fclose();
@@ -491,7 +509,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -516,7 +534,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -662,8 +680,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -713,7 +731,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -747,7 +765,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
*/
size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -772,7 +790,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index f5b9f430e0..0179b90698 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -30,7 +30,9 @@
#include "io/channel.h"
QEMUFile *qemu_file_new_input(QIOChannel *ioc);
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
QEMUFile *qemu_file_new_output(QIOChannel *ioc);
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
int qemu_fclose(QEMUFile *f);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(QEMUFile, qemu_fclose)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 56e0fa6c69..730b815494 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -409,7 +409,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
&snap_state.bs_pos));
- snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_file_new_output_sized(ioc, 4 * 1024 * 1024);
if (!snap_state.file) {
error_setg(errp, "failed to open '%s'", statefile);
@@ -544,7 +544,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
bdrv_op_block_all(bs, blocker);
/* restore the VM state */
- f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_file_new_input_sized(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)),
+ 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -4,34 +4,30 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
Subject: [PATCH] PVE: block: add the zeroinit block driver filter
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
adhere to block graph lock requirements
use dedicated function to open file child]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 1 +
block/zeroinit.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 208 insertions(+)
block/zeroinit.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 197 insertions(+)
create mode 100644 block/zeroinit.c
diff --git a/block/meson.build b/block/meson.build
index 34b1b2a306..a21d9a5411 100644
index 0450914c7a..7a0bc3df09 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -39,6 +39,7 @@ block_ss.add(files(
'throttle.c',
'throttle-groups.c',
@@ -41,6 +41,7 @@ block_ss.add(files(
'vmdk.c',
'vpc.c',
'write-threshold.c',
+ 'zeroinit.c',
), zstd, zlib)
), zstd, zlib, gnutls)
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/zeroinit.c b/block/zeroinit.c
new file mode 100644
index 0000000000..f9d513db15
index 0000000000..5529627f7e
--- /dev/null
+++ b/block/zeroinit.c
@@ -0,0 +1,207 @@
@@ -0,0 +1,196 @@
+/*
+ * Filter to fake a zero-initialized block device.
+ *
@@ -45,10 +41,8 @@ index 0000000000..f9d513db15
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+#include "block/graph-lock.h"
+#include "qobject/qdict.h"
+#include "qobject/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
@@ -112,9 +106,10 @@ index 0000000000..f9d513db15
+ }
+
+ /* Open the raw file */
+ ret = bdrv_open_file_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &local_err);
+ if (ret < 0) {
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &child_of_bds, BDRV_CHILD_FILTERED, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
@@ -125,9 +120,7 @@ index 0000000000..f9d513db15
+ ret = 0;
+fail:
+ if (ret < 0) {
+ bdrv_graph_wrlock();
+ bdrv_unref_child(bs, bs->file);
+ bdrv_graph_wrunlock();
+ }
+ qemu_opts_del(opts);
+ return ret;
@@ -139,32 +132,28 @@ index 0000000000..f9d513db15
+ (void)s;
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+zeroinit_co_getlength(BlockDriverState *bs)
+static int64_t zeroinit_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags)
+static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ if (offset >= s->extents)
+ return 0;
+ return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ int64_t extents = offset + bytes;
@@ -173,37 +162,33 @@ index 0000000000..f9d513db15
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_flush(BlockDriverState *bs)
+static coroutine_fn int zeroinit_co_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->file->bs);
+}
+
+static int GRAPH_RDLOCK
+zeroinit_has_zero_init(BlockDriverState *bs)
+static int zeroinit_has_zero_init(BlockDriverState *bs)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ return s->has_zero_init;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static int GRAPH_RDLOCK
+zeroinit_co_truncate(BlockDriverState *bs, int64_t offset, _Bool exact,
+ PreallocMode prealloc, BdrvRequestFlags req_flags,
+ Error **errp)
+static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
+ _Bool exact, PreallocMode prealloc,
+ BdrvRequestFlags req_flags, Error **errp)
+{
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int zeroinit_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return bdrv_co_get_info(bs->file->bs, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
+}
+
+static BlockDriver bdrv_zeroinit = {
@@ -212,9 +197,9 @@ index 0000000000..f9d513db15
+ .instance_size = sizeof(BDRVZeroinitState),
+
+ .bdrv_parse_filename = zeroinit_parse_filename,
+ .bdrv_open = zeroinit_open,
+ .bdrv_file_open = zeroinit_open,
+ .bdrv_close = zeroinit_close,
+ .bdrv_co_getlength = zeroinit_co_getlength,
+ .bdrv_getlength = zeroinit_getlength,
+ .bdrv_child_perm = bdrv_default_perms,
+ .bdrv_co_flush_to_disk = zeroinit_co_flush,
+
@@ -230,7 +215,7 @@ index 0000000000..f9d513db15
+ .bdrv_co_pdiscard = zeroinit_co_pdiscard,
+
+ .bdrv_co_truncate = zeroinit_co_truncate,
+ .bdrv_co_get_info = zeroinit_co_get_info,
+ .bdrv_get_info = zeroinit_get_info,
+};
+
+static void bdrv_zeroinit_init(void)

View File

@@ -10,16 +10,16 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qemu-options.hx | 3 +++
system/vl.c | 8 ++++++++
softmmu/vl.c | 8 ++++++++
2 files changed, 11 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx
index defee0c06a..fb980a05cf 100644
index 002ba697e9..a05959b9f1 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1280,6 +1280,9 @@ legacy PC, they are not recommended for modern configurations.
@@ -1005,6 +1005,9 @@ DEFHEADING()
ERST
DEFHEADING(Block device options:)
+DEF("id", HAS_ARG, QEMU_OPTION_id,
+ "-id n set the VMID", QEMU_ARCH_ALL)
@@ -27,11 +27,11 @@ index defee0c06a..fb980a05cf 100644
DEF("fda", HAS_ARG, QEMU_OPTION_fda,
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
diff --git a/system/vl.c b/system/vl.c
index 9b36ace6b4..452742ab58 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2854,6 +2854,7 @@ void qemu_init(int argc, char **argv)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 220c67cd32..d87cf6e103 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2736,6 +2736,7 @@ void qemu_init(int argc, char **argv, char **envp)
MachineClass *machine_class;
bool userconfig = true;
FILE *vmstate_dump_file = NULL;
@@ -39,9 +39,9 @@ index 9b36ace6b4..452742ab58 100644
qemu_add_opts(&qemu_drive_opts);
qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -3472,6 +3473,13 @@ void qemu_init(int argc, char **argv)
machine_parse_property_opt(qemu_find_opts("smp-opts"),
"smp", optarg);
@@ -3360,6 +3361,13 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_smp:
machine_parse_property_opt(qemu_find_opts("smp-opts"), "smp", optarg, &error_fatal);
break;
+ case QEMU_OPTION_id:
+ vm_id = strtol(optarg, (char **)&optarg, 10);
@@ -50,6 +50,6 @@ index 9b36ace6b4..452742ab58 100644
+ exit(1);
+ }
+ break;
#ifdef CONFIG_VNC
case QEMU_OPTION_vnc:
vnc_parse(optarg);
break;

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 2a3e878c4d..efbed1aea3 100644
index 2a20982066..7968ad5a93 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -263,6 +263,15 @@ static void apic_reset_common(DeviceState *dev)
@@ -278,6 +278,15 @@ static void apic_reset_common(DeviceState *dev)
info->vapic_base_update(s);
apic_init_reset(dev);

View File

@@ -9,14 +9,14 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/file-posix.c | 59 ++++++++++++++++++++++++++++++--------------
qapi/block-core.json | 7 +++++-
2 files changed, 46 insertions(+), 20 deletions(-)
qapi/block-core.json | 3 ++-
2 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index cfa0b832ba..d5c28cccc9 100644
index 3ac5177cbb..907aa3f22e 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2897,6 +2897,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2443,6 +2443,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
int fd;
uint64_t perm, shared;
int result = 0;
@@ -24,7 +24,7 @@ index cfa0b832ba..d5c28cccc9 100644
/* Validate options and set default values */
assert(options->driver == BLOCKDEV_DRIVER_FILE);
@@ -2937,19 +2938,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2483,19 +2484,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
@@ -59,7 +59,7 @@ index cfa0b832ba..d5c28cccc9 100644
}
/* Clear the file by truncating it to 0 */
@@ -3003,13 +3007,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2549,13 +2553,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
}
out_unlock:
@@ -82,7 +82,7 @@ index cfa0b832ba..d5c28cccc9 100644
}
out_close:
@@ -3033,6 +3039,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2580,6 +2586,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
PreallocMode prealloc;
char *buf = NULL;
Error *local_err = NULL;
@@ -90,7 +90,7 @@ index cfa0b832ba..d5c28cccc9 100644
/* Skip file: protocol prefix */
strstart(filename, "file:", &filename);
@@ -3055,6 +3062,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2602,6 +2609,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
return -EINVAL;
}
@@ -109,7 +109,7 @@ index cfa0b832ba..d5c28cccc9 100644
options = (BlockdevCreateOptions) {
.driver = BLOCKDEV_DRIVER_FILE,
.u.file = {
@@ -3066,6 +3085,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2613,6 +2632,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
.nocow = nocow,
.has_extent_size_hint = has_extent_size_hint,
.extent_size_hint = extent_size_hint,
@@ -119,21 +119,10 @@ index cfa0b832ba..d5c28cccc9 100644
};
return raw_co_create(&options, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0e5f148d30..1c05413916 100644
index 6356a63695..fdfa579d00 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5016,6 +5016,10 @@
# @extent-size-hint: Extent size hint to add to the image file; 0 for
# not adding an extent size hint (default: 1 MB, since 5.1)
#
+# @locking: whether to enable file locking. If set to 'auto', only
+# enable when Open File Descriptor (OFD) locking API is available
+# (default: auto).
+#
# Since: 2.12
##
{ 'struct': 'BlockdevCreateOptionsFile',
@@ -5023,7 +5027,8 @@
@@ -4341,7 +4341,8 @@
'size': 'size',
'*preallocation': 'PreallocMode',
'*nocow': 'bool',

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/monitor/qmp.c b/monitor/qmp.c
index f093e256e9..78f1c8e3c8 100644
index 6b8cfcf6d8..3ec67e32d3 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -534,8 +534,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
@@ -519,8 +519,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
qemu_chr_fe_set_echo(&mon->common.chr, true);
/* Note: we run QMP monitor in I/O thread when @chr supports that */

View File

@@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 63c6ef93d2..9a34017e5a 100644
index 2cf2f321f9..e0f857820d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -193,7 +193,8 @@ GlobalProperty hw_compat_4_0[] = {
@@ -107,7 +107,8 @@ GlobalProperty hw_compat_4_0[] = {
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-device", "edid", "false" },
{ "virtio-device", "use-started", "false" },

View File

@@ -11,90 +11,90 @@ and only if 'is-current').
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-qmp-cmds.c | 5 +++++
hw/core/machine-qmp-cmds.c | 6 ++++++
include/hw/boards.h | 2 ++
qapi/machine.json | 3 +++
system/vl.c | 24 ++++++++++++++++++++++++
4 files changed, 34 insertions(+)
qapi/machine.json | 4 +++-
softmmu/vl.c | 25 +++++++++++++++++++++++++
4 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 93fb4bc24a..b9999423b4 100644
index 8f8d5d5276..370e66d9cc 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -95,6 +95,11 @@ MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props,
@@ -102,6 +102,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
info->has_is_current = true;
info->is_current = true;
+
+ // PVE version string only exists for current machine
+ if (mc->pve_version) {
+ info->has_pve_version = true;
+ info->pve_version = g_strdup(mc->pve_version);
+ }
}
if (default_cpu_type) {
if (mc->default_cpu_type) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index f22b2e7fc7..8ada4d5832 100644
index accd6eff35..1b16728389 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -271,6 +271,8 @@ struct MachineClass {
@@ -205,6 +205,8 @@ struct MachineClass {
const char *desc;
const char *deprecation_reason;
+ const char *pve_version;
+
void (*init)(MachineState *state);
void (*reset)(MachineState *state, ResetType type);
void (*reset)(MachineState *state);
void (*wakeup)(MachineState *state);
diff --git a/qapi/machine.json b/qapi/machine.json
index 16366b774a..12cfd3f260 100644
index cf120ac343..a6f483af4f 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -189,6 +189,8 @@
@@ -160,6 +160,8 @@
#
# @acpi: machine type supports ACPI (since 8.0)
# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
#
+# @pve-version: custom PVE version suffix specified as 'machine+pveN'
+#
# @compat-props: The machine type's compatibility properties. Only
# present when query-machines argument @compat-props is true.
# (since 9.1)
@@ -205,6 +207,7 @@
# Since: 1.2
##
{ 'struct': 'MachineInfo',
@@ -167,7 +169,7 @@
'*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str', 'acpi': 'bool',
+ '*pve-version': 'str',
'*compat-props': { 'type': ['CompatProperty'],
'features': ['unstable'] } } }
- '*default-ram-id': 'str' } }
+ '*default-ram-id': 'str', '*pve-version': 'str' } }
diff --git a/system/vl.c b/system/vl.c
index 452742ab58..c3707b2412 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -1674,6 +1674,7 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
##
# @query-machines:
diff --git a/softmmu/vl.c b/softmmu/vl.c
index d87cf6e103..e9d40065bc 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1621,6 +1621,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp)
{
ERRP_GUARD();
const char *machine_type = qdict_get_try_str(qdict, "type");
const char *optarg = qdict_get_try_str(qdict, "type");
+ const char *pvever = qdict_get_try_str(qdict, "pvever");
g_autoptr(GSList) machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class = NULL;
GSList *machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class;
Error *local_err = NULL;
@@ -1638,6 +1639,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
}
}
@@ -1693,7 +1694,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
if (!machine_class) {
error_append_hint(errp,
"Use -machine help to list supported machines\n");
+ } else {
+ if (machine_class) {
+ machine_class->pve_version = g_strdup(pvever);
+ qdict_del(qdict, "pvever");
}
+ }
+
return machine_class;
}
@@ -3414,12 +3419,31 @@ void qemu_init(int argc, char **argv)
g_slist_free(machines);
if (local_err) {
error_append_hint(&local_err, "Use -machine help to list supported machines\n");
@@ -3312,12 +3318,31 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_machine:
{
bool help;

View File

@@ -3,71 +3,64 @@ From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:57 +0200
Subject: [PATCH] PVE-Backup: add vma backup format code
Notes about partial restoring: skipping a certain drive is done via a
map line of the form skip=drive-scsi0. Since in PVE, most archives are
compressed and piped to vma for restore, it's not easily possible to
skip reads.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: improvements during create
allow partial restore
allow specifying disk formats for create operation]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
[FE: create: register all streams before entering coroutines]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
meson.build | 5 +
vma-reader.c | 867 ++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 816 ++++++++++++++++++++++++++++++++++++++++
vma.c | 941 ++++++++++++++++++++++++++++++++++++++++++++++
vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 790 ++++++++++++++++++++++++++++++++++++++++++
vma.c | 851 +++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 ++++++++
6 files changed, 2781 insertions(+)
6 files changed, 2655 insertions(+)
create mode 100644 vma-reader.c
create mode 100644 vma-writer.c
create mode 100644 vma.c
create mode 100644 vma.h
diff --git a/block/meson.build b/block/meson.build
index a21d9a5411..1373612c10 100644
index 7a0bc3df09..9ce9246194 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -42,6 +42,8 @@ block_ss.add(files(
@@ -44,6 +44,8 @@ block_ss.add(files(
'zeroinit.c',
), zstd, zlib)
), zstd, zlib, gnutls)
+block_ss.add(files('../vma-writer.c'), libuuid)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
block_ss.add(when: 'CONFIG_QCOW1', if_true: files('qcow.c'))
diff --git a/meson.build b/meson.build
index 8ec796d835..680ab48b9b 100644
index b3e7ec0e92..cc46eabb42 100644
--- a/meson.build
+++ b/meson.build
@@ -2161,6 +2161,8 @@ endif
@@ -1064,6 +1064,8 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
+libuuid = cc.find_library('uuid', required: true)
+
# libselinux
selinux = dependency('libselinux',
required: get_option('selinux'),
@@ -4367,6 +4369,9 @@ if have_tools
dependencies: [blockdev, qemuutil, selinux],
install: true)
# Malloc tests
malloc = []
@@ -2743,6 +2745,9 @@ if have_tools
qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
dependencies: [blockdev, qemuutil, gnutls], install: true)
+ vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
+ dependencies: [authz, block, crypto, io, qemuutil, qom], install: true)
+ dependencies: [authz, block, crypto, io, qom], install: true)
+
subdir('storage-daemon')
foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
subdir('contrib/rdmacm-mux')
subdir('contrib/elf2dmp')
diff --git a/vma-reader.c b/vma-reader.c
new file mode 100644
index 0000000000..1888b21851
index 0000000000..2b1d1cdab3
--- /dev/null
+++ b/vma-reader.c
@@ -0,0 +1,867 @@
@@ -0,0 +1,857 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -85,12 +78,12 @@ index 0000000000..1888b21851
+#include <glib.h>
+#include <uuid/uuid.h>
+
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/ratelimit.h"
+#include "vma.h"
+#include "block/block.h"
+#include "block/graph-lock.h"
+#include "system/block-backend.h"
+#include "sysemu/block-backend.h"
+
+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
+
@@ -99,7 +92,6 @@ index 0000000000..1888b21851
+ bool write_zeroes;
+ unsigned long *bitmap;
+ int bitmap_size;
+ bool skip;
+} VmaRestoreState;
+
+struct VmaReader {
@@ -263,9 +255,6 @@ index 0000000000..1888b21851
+ if (vmar->rstate[i].bitmap) {
+ g_free(vmar->rstate[i].bitmap);
+ }
+ if (vmar->rstate[i].target) {
+ blk_unref(vmar->rstate[i].target);
+ }
+ }
+
+ if (vmar->md5csum) {
@@ -382,6 +371,7 @@ index 0000000000..1888b21851
+ }
+
+
+ int count = 0;
+ for (i = 1; i < 256; i++) {
+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
@@ -389,6 +379,7 @@ index 0000000000..1888b21851
+ const char *devname = get_header_str(vmar, devname_ptr);
+
+ if (size && devname) {
+ count++;
+ vmar->devinfo[i].size = size;
+ vmar->devinfo[i].devname = devname;
+
@@ -495,14 +486,13 @@ index 0000000000..1888b21851
+}
+
+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes, bool skip)
+ BlockBackend *target, bool write_zeroes)
+{
+ assert(vmar);
+ assert(dev_id);
+
+ vmar->rstate[dev_id].target = target;
+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
+ vmar->rstate[dev_id].skip = skip;
+
+ int64_t size = vmar->devinfo[dev_id].size;
+
@@ -517,30 +507,28 @@ index 0000000000..1888b21851
+}
+
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
+ bool write_zeroes, bool skip, Error **errp)
+ bool write_zeroes, Error **errp)
+{
+ assert(vmar);
+ assert(target != NULL || skip);
+ assert(target != NULL);
+ assert(dev_id);
+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
+ assert(vmar->rstate[dev_id].target == NULL);
+
+ if (target != NULL) {
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+
+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
+ allocate_rstate(vmar, dev_id, target, write_zeroes);
+
+ return 0;
+}
@@ -598,12 +586,10 @@ index 0000000000..1888b21851
+ }
+ }
+ } else {
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
+ if (res < 0) {
+ bdrv_graph_rdlock_main_loop();
+ error_setg(errp, "blk_pwrite to %s failed (%d)",
+ bdrv_get_device_name(blk_bs(target)), res);
+ bdrv_graph_rdunlock_main_loop();
+ return -1;
+ }
+ }
@@ -635,23 +621,19 @@ index 0000000000..1888b21851
+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
+ BlockBackend *target = NULL;
+
+ bool skip = rstate->skip;
+
+ if (dev_id != vmar->vmstate_stream) {
+ target = rstate->target;
+ if (!verify && !target && !skip) {
+ if (!verify && !target) {
+ error_setg(errp, "got wrong dev id %d", dev_id);
+ return -1;
+ }
+
+ if (!skip) {
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+
+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
+ } else {
@@ -697,7 +679,7 @@ index 0000000000..1888b21851
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num, nb_sectors,
@@ -733,7 +715,7 @@ index 0000000000..1888b21851
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num,
@@ -758,7 +740,7 @@ index 0000000000..1888b21851
+ vmar->partial_zero_cluster_data += zero_size;
+ }
+
+ if (rstate->write_zeroes && !verify && !skip) {
+ if (rstate->write_zeroes && !verify) {
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ zero_vma_block, sector_num,
+ nb_sectors, errp) < 0) {
@@ -883,7 +865,8 @@ index 0000000000..1888b21851
+
+ int64_t cluster_num, end;
+
+ end = DIV_ROUND_UP(vmar->devinfo[i].size, VMA_CLUSTER_SIZE);
+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
+ VMA_CLUSTER_SIZE;
+
+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
@@ -928,7 +911,7 @@ index 0000000000..1888b21851
+
+ for (dev_id = 1; dev_id < 255; dev_id++) {
+ if (vma_reader_get_device_info(vmar, dev_id)) {
+ allocate_rstate(vmar, dev_id, NULL, false, false);
+ allocate_rstate(vmar, dev_id, NULL, false);
+ }
+ }
+
@@ -937,10 +920,10 @@ index 0000000000..1888b21851
+
diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644
index 0000000000..3f489092df
index 0000000000..11d8321ffd
--- /dev/null
+++ b/vma-writer.c
@@ -0,0 +1,816 @@
@@ -0,0 +1,790 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -956,8 +939,6 @@ index 0000000000..3f489092df
+
+#include "qemu/osdep.h"
+#include <glib.h>
+#include <linux/magic.h>
+#include <sys/vfs.h>
+#include <uuid/uuid.h>
+
+#include "vma.h"
@@ -966,8 +947,6 @@ index 0000000000..3f489092df
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
+
+#define DEBUG_VMA 0
+
@@ -1134,7 +1113,8 @@ index 0000000000..3f489092df
+ vmaw->stream_info[n].devname = g_strdup(devname);
+ vmaw->stream_info[n].size = size;
+
+ vmaw->stream_info[n].cluster_count = DIV_ROUND_UP(size, VMA_CLUSTER_SIZE);
+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
+ VMA_CLUSTER_SIZE;
+
+ vmaw->stream_count = n;
+
@@ -1149,10 +1129,10 @@ index 0000000000..3f489092df
+{
+ assert(qemu_in_coroutine());
+ AioContext *ctx = qemu_get_current_aio_context();
+ aio_set_fd_handler(ctx, fd, NULL, (IOHandler *)qemu_coroutine_enter, NULL,
+ aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter,
+ NULL, qemu_coroutine_self());
+ qemu_coroutine_yield();
+ aio_set_fd_handler(ctx, fd, NULL, NULL, NULL, NULL, NULL);
+ aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL);
+}
+
+static ssize_t coroutine_fn
@@ -1201,23 +1181,6 @@ index 0000000000..3f489092df
+ return (done == bytes) ? bytes : -1;
+}
+
+static bool is_path_tmpfs(const char *path) {
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ warn_report("statfs call for %s failed, assuming not tmpfs - %s\n",
+ path, strerror(errno));
+ return false;
+ }
+
+ return fs.f_type == TMPFS_MAGIC;
+}
+
+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
+{
+ const char *p;
@@ -1267,19 +1230,12 @@ index 0000000000..3f489092df
+ }
+ /* try to use O_NONBLOCK */
+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
+ } else {
+ gchar *dirname = g_path_get_dirname(filename);
+ oflags = O_NONBLOCK|O_WRONLY|O_EXCL;
+ if (!is_path_tmpfs(dirname)) {
+ oflags |= O_DIRECT;
+ }
+ g_free(dirname);
+ } else {
+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_EXCL;
+ vmaw->fd = qemu_create(filename, oflags, 0644, errp);
+ }
+
+ if (vmaw->fd < 0) {
+ error_free(*errp);
+ *errp = NULL;
+ error_setg(errp, "can't open file %s - %s\n", filename,
+ g_strerror(errno));
+ goto err;
@@ -1514,16 +1470,17 @@ index 0000000000..3f489092df
+ int i;
+
+ g_assert(vmaw != NULL);
+ g_assert(status != NULL);
+
+ status->status = vmaw->status;
+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
+ for (i = 0; i <= 255; i++) {
+ status->stream_info[i] = vmaw->stream_info[i];
+ if (status) {
+ status->status = vmaw->status;
+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
+ for (i = 0; i <= 255; i++) {
+ status->stream_info[i] = vmaw->stream_info[i];
+ }
+
+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
+ }
+
+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
+
+ status->closed = vmaw->closed;
+
+ return vmaw->status;
@@ -1759,10 +1716,10 @@ index 0000000000..3f489092df
+}
diff --git a/vma.c b/vma.c
new file mode 100644
index 0000000000..0e990b5e30
index 0000000000..df542b7732
--- /dev/null
+++ b/vma.c
@@ -0,0 +1,941 @@
@@ -0,0 +1,851 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -1780,13 +1737,13 @@ index 0000000000..0e990b5e30
+#include <glib.h>
+
+#include "vma.h"
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include "qemu/memalign.h"
+#include "qobject/qdict.h"
+#include "system/block-backend.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/block-backend.h"
+
+static void help(void)
+{
@@ -1794,9 +1751,9 @@ index 0000000000..0e990b5e30
+ "usage: vma command [command options]\n"
+ "\n"
+ "vma list <filename>\n"
+ "vma config <filename> [-c <config>]\n"
+ "vma create <filename> [-c <config>] [-d format=<format>:<device name>=<path> [-d ...]]\n"
+ "vma extract <filename> [-d <drive-list>] [-r <fifo>] <targetdir>\n"
+ "vma config <filename> [-c config]\n"
+ "vma create <filename> [-c config] pathname ...\n"
+ "vma extract <filename> [-r <fifo>] <targetdir>\n"
+ "vma verify <filename> [-v]\n"
+ ;
+
@@ -1903,7 +1860,6 @@ index 0000000000..0e990b5e30
+ char *throttling_group;
+ char *cache;
+ bool write_zero;
+ bool skip;
+} RestoreMap;
+
+static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -1941,10 +1897,9 @@ index 0000000000..0e990b5e30
+ const char *filename;
+ const char *dirname;
+ const char *readmap = NULL;
+ gchar **drive_list = NULL;
+
+ for (;;) {
+ c = getopt(argc, argv, "hvd:r:");
+ c = getopt(argc, argv, "hvr:");
+ if (c == -1) {
+ break;
+ }
@@ -1953,9 +1908,6 @@ index 0000000000..0e990b5e30
+ case 'h':
+ help();
+ break;
+ case 'd':
+ drive_list = g_strsplit(optarg, ",", 254);
+ break;
+ case 'r':
+ readmap = optarg;
+ break;
@@ -2015,89 +1967,76 @@ index 0000000000..0e990b5e30
+ char *bps = NULL;
+ char *group = NULL;
+ char *cache = NULL;
+ char *devname = NULL;
+ bool skip = false;
+ uint64_t bps_value = 0;
+ const char *path = NULL;
+ bool write_zero = true;
+
+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
+ break;
+ }
+ int len = strlen(line);
+ if (line[len - 1] == '\n') {
+ line[len - 1] = '\0';
+ len = len - 1;
+ if (len == 0) {
+ if (len == 1) {
+ break;
+ }
+ }
+
+ if (strncmp(line, "skip", 4) == 0) {
+ if (len < 6 || line[4] != '=') {
+ g_error("read map failed - option 'skip' has no value ('%s')",
+ inbuf);
+ } else {
+ devname = line + 5;
+ skip = true;
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ } else {
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ }
+
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ path = extract_devname(path, &devname, -1);
+ }
+
+ uint64_t bps_value = 0;
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ const char *path;
+ bool write_zero;
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ char *devname = NULL;
+ path = extract_devname(path, &devname, -1);
+ if (!devname) {
+ g_error("read map failed - no dev name specified ('%s')",
+ inbuf);
+ }
+
+ RestoreMap *restore_map = g_new0(RestoreMap, 1);
+ restore_map->devname = g_strdup(devname);
+ restore_map->path = g_strdup(path);
+ restore_map->format = format;
+ restore_map->throttling_bps = bps_value;
+ restore_map->throttling_group = group;
+ restore_map->cache = cache;
+ restore_map->write_zero = write_zero;
+ restore_map->skip = skip;
+ RestoreMap *map = g_new0(RestoreMap, 1);
+ map->devname = g_strdup(devname);
+ map->path = g_strdup(path);
+ map->format = format;
+ map->throttling_bps = bps_value;
+ map->throttling_group = group;
+ map->cache = cache;
+ map->write_zero = write_zero;
+
+ g_hash_table_insert(devmap, restore_map->devname, restore_map);
+ g_hash_table_insert(devmap, map->devname, map);
+
+ };
+ }
+
+ int i;
+ int vmstate_fd = -1;
+ bool drive_rename_bitmap[255];
+ memset(drive_rename_bitmap, 0, sizeof(drive_rename_bitmap));
+ guint8 vmstate_stream = 0;
+
+ BlockBackend *blk = NULL;
+
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
+ vmstate_stream = i;
+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
+ if (vmstate_fd < 0) {
@@ -2113,25 +2052,8 @@ index 0000000000..0e990b5e30
+ const char *cache = NULL;
+ int flags = BDRV_O_RDWR;
+ bool write_zero = true;
+ bool skip = false;
+
+ BlockBackend *blk = NULL;
+
+ if (drive_list) {
+ skip = true;
+ int j;
+ for (j = 0; drive_list[j]; j++) {
+ if (strcmp(drive_list[j], di->devname) == 0) {
+ skip = false;
+ drive_rename_bitmap[i] = true;
+ break;
+ }
+ }
+ } else {
+ drive_rename_bitmap[i] = true;
+ }
+
+ if (!skip && readmap) {
+ if (readmap) {
+ RestoreMap *map;
+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
+ if (map == NULL) {
@@ -2143,8 +2065,7 @@ index 0000000000..0e990b5e30
+ throttling_group = map->throttling_group;
+ cache = map->cache;
+ write_zero = map->write_zero;
+ skip = map->skip;
+ } else if (!skip) {
+ } else {
+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ printf("DEVINFO %s %zd\n", devfn, di->size);
@@ -2162,60 +2083,57 @@ index 0000000000..0e990b5e30
+ write_zero = false;
+ }
+
+ if (!skip) {
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+ g_error("%s", error_get_pretty(errp));
+ }
+
@@ -2225,10 +2143,6 @@ index 0000000000..0e990b5e30
+ }
+ }
+
+ if (drive_list) {
+ g_strfreev(drive_list);
+ }
+
+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
+ g_error("restore failed - %s", error_get_pretty(errp));
+ }
@@ -2236,7 +2150,7 @@ index 0000000000..0e990b5e30
+ if (!readmap) {
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && drive_rename_bitmap[i]) {
+ if (di && (i != vmstate_stream)) {
+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ char *fn = g_strdup_printf("%s/disk-%s.raw",
@@ -2251,6 +2165,8 @@ index 0000000000..0e990b5e30
+
+ vma_reader_destroy(vmar);
+
+ blk_unref(blk);
+
+ bdrv_close_all();
+
+ return ret;
@@ -2335,7 +2251,7 @@ index 0000000000..0e990b5e30
+ struct iovec iov;
+ QEMUIOVector qiov;
+
+ int64_t start, end, readlen;
+ int64_t start, end;
+ int ret = 0;
+
+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -2349,24 +2265,16 @@ index 0000000000..0e990b5e30
+ iov.iov_len = VMA_CLUSTER_SIZE;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ if (start + 1 == end) {
+ memset(buf, 0, VMA_CLUSTER_SIZE);
+ readlen = job->len - start * VMA_CLUSTER_SIZE;
+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+ } else {
+ readlen = VMA_CLUSTER_SIZE;
+ }
+
+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
+ readlen, &qiov, 0);
+ VMA_CLUSTER_SIZE, &qiov, 0);
+ if (ret < 0) {
+ vma_writer_set_error(job->vmaw, "read error");
+ vma_writer_set_error(job->vmaw, "read error", -1);
+ goto out;
+ }
+
+ size_t zb = 0;
+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed");
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
+ goto out;
+ }
+ }
@@ -2384,16 +2292,14 @@ index 0000000000..0e990b5e30
+
+static int create_archive(int argc, char **argv)
+{
+ int c;
+ int i, c;
+ int verbose = 0;
+ bool expect_format = true;
+ const char *archivename;
+ GList *backup_coroutines = NULL;
+ GList *config_files = NULL;
+ GList *disk_infos = NULL;
+
+ for (;;) {
+ c = getopt(argc, argv, "hvc:d:");
+ c = getopt(argc, argv, "hvc:");
+ if (c == -1) {
+ break;
+ }
@@ -2405,9 +2311,6 @@ index 0000000000..0e990b5e30
+ case 'c':
+ config_files = g_list_append(config_files, optarg);
+ break;
+ case 'd':
+ disk_infos = g_list_append(disk_infos, optarg);
+ break;
+ case 'v':
+ verbose = 1;
+ break;
@@ -2453,48 +2356,16 @@ index 0000000000..0e990b5e30
+ l = g_list_next(l);
+ }
+
+ /*
+ * Don't allow mixing new and old way to specifiy disks.
+ * TODO PVE 9 drop old way and always require format.
+ */
+ if (optind < argc && g_list_first(disk_infos)) {
+ unlink(archivename);
+ g_error("Unexpected extra argument - specify all devices via '-d'");
+ }
+
+ while (optind < argc) {
+ expect_format = false;
+ disk_infos = g_list_append(disk_infos, argv[optind++]);
+ }
+
+ int devcount = 0;
+ GList *disk_l = disk_infos;
+ while (disk_l && disk_l->data) {
+ char *disk_info = disk_l->data;
+ const char *path = NULL;
+ while (optind < argc) {
+ const char *path = argv[optind++];
+ char *devname = NULL;
+ char *format = NULL;
+ QDict *options = qdict_new();
+
+ if (try_parse_option(&disk_info, "format", &format, disk_info)) {
+ qdict_put_str(options, "driver", format);
+ } else {
+ if (expect_format) {
+ unlink(archivename);
+ g_error("No format specified for device: '%s'", disk_info);
+ } else {
+ g_warning("Specifying a device without a format is deprecated"
+ " - use '-d format=<format>:%s'",
+ disk_info);
+ }
+ }
+
+ path = extract_devname(disk_info, &devname, devcount++);
+ path = extract_devname(path, &devname, devcount++);
+
+ Error *errp = NULL;
+ BlockBackend *target;
+
+ target = blk_new_open(path, NULL, options, 0, &errp);
+ target = blk_new_open(path, NULL, NULL, 0, &errp);
+ if (!target) {
+ unlink(archivename);
+ g_error("bdrv_open '%s' failed - %s", path, error_get_pretty(errp));
@@ -2516,8 +2387,6 @@ index 0000000000..0e990b5e30
+ // Don't enter coroutine yet, because it might write the header before
+ // all streams can be registered.
+ backup_coroutines = g_list_append(backup_coroutines, co);
+
+ disk_l = g_list_next(disk_l);
+ }
+
+ VmaStatus vmastat;
@@ -2581,7 +2450,6 @@ index 0000000000..0e990b5e30
+ vma_writer_get_status(vmaw, &vmastat);
+
+ if (verbose) {
+ int i;
+ for (i = 0; i < 256; i++) {
+ VmaStreamInfo *si = &vmastat.stream_info[i];
+ if (si->size) {
@@ -2599,7 +2467,6 @@ index 0000000000..0e990b5e30
+
+ g_list_free(backup_coroutines);
+ g_list_free(config_files);
+ g_list_free(disk_infos);
+ vma_writer_destroy(vmaw);
+ return 0;
+}
@@ -2706,7 +2573,7 @@ index 0000000000..0e990b5e30
+}
diff --git a/vma.h b/vma.h
new file mode 100644
index 0000000000..86d2873aa5
index 0000000000..c895c97f6d
--- /dev/null
+++ b/vma.h
@@ -0,0 +1,150 @@
@@ -2844,7 +2711,7 @@ index 0000000000..86d2873aa5
+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
+
+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
+
+
+VmaReader *vma_reader_create(const char *filename, Error **errp);
@@ -2854,7 +2721,7 @@ index 0000000000..86d2873aa5
+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes,
+ bool skip, Error **errp);
+ Error **errp);
+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
+ Error **errp);
+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);

View File

@@ -9,23 +9,21 @@ Subject: [PATCH] PVE-Backup: add backup-dump block driver
- job.c: make job_should_pause non-static
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to coroutine changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/backup-dump.c | 172 +++++++++++++++++++++++++++++++
block/backup.c | 30 ++----
block/meson.build | 1 +
include/block/block_int-common.h | 35 +++++++
job.c | 3 +-
5 files changed, 218 insertions(+), 23 deletions(-)
block/backup-dump.c | 168 ++++++++++++++++++++++++++++++++++++++
block/backup.c | 32 +++-----
block/meson.build | 1 +
include/block/block_int.h | 35 ++++++++
job.c | 3 +-
5 files changed, 216 insertions(+), 23 deletions(-)
create mode 100644 block/backup-dump.c
diff --git a/block/backup-dump.c b/block/backup-dump.c
new file mode 100644
index 0000000000..354593bc10
index 0000000000..93d7f46950
--- /dev/null
+++ b/block/backup-dump.c
@@ -0,0 +1,172 @@
@@ -0,0 +1,168 @@
+/*
+ * BlockDriver to send backup data stream to a callback function
+ *
@@ -37,8 +35,7 @@ index 0000000000..354593bc10
+ */
+
+#include "qemu/osdep.h"
+
+#include "qobject/qdict.h"
+#include "qemu-common.h"
+#include "qom/object_interfaces.h"
+#include "block/block_int.h"
+
@@ -49,8 +46,7 @@ index 0000000000..354593bc10
+ void *dump_cb_data;
+} BDRVBackupDumpState;
+
+static coroutine_fn int qemu_backup_dump_co_get_info(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+static int qemu_backup_dump_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -91,7 +87,7 @@ index 0000000000..354593bc10
+ /* Nothing to do. */
+}
+
+static coroutine_fn int64_t qemu_backup_dump_co_getlength(BlockDriverState *bs)
+static int64_t qemu_backup_dump_getlength(BlockDriverState *bs)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -151,8 +147,8 @@ index 0000000000..354593bc10
+
+ .bdrv_close = qemu_backup_dump_close,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_getlength = qemu_backup_dump_co_getlength,
+ .bdrv_co_get_info = qemu_backup_dump_co_get_info,
+ .bdrv_getlength = qemu_backup_dump_getlength,
+ .bdrv_get_info = qemu_backup_dump_get_info,
+
+ .bdrv_co_writev = qemu_backup_dump_co_writev,
+
@@ -171,7 +167,7 @@ index 0000000000..354593bc10
+block_init(bdrv_backup_dump_init);
+
+
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
+BlockDriverState *bdrv_backup_dump_create(
+ int dump_cb_block_size,
+ uint64_t byte_size,
+ BackupDumpFunc *dump_cb,
@@ -179,11 +175,9 @@ index 0000000000..354593bc10
+ Error **errp)
+{
+ BDRVBackupDumpState *state;
+ BlockDriverState *bs = bdrv_new_open_driver(
+ &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
+
+ QDict *options = qdict_new();
+ qdict_put_str(options, "driver", "backup-dump-drive");
+
+ BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
+ if (!bs) {
+ return NULL;
+ }
@@ -199,16 +193,16 @@ index 0000000000..354593bc10
+ return bs;
+}
diff --git a/block/backup.c b/block/backup.c
index cc747e9163..6f7c45f922 100644
index bd3614ce70..8bae9b060e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,28 +29,6 @@
@@ -31,28 +31,6 @@
#include "block/copy-before-write.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-typedef struct BackupBlockJob {
- BlockJob common;
- BlockDriverState *cbw;
- BlockDriverState *backup_top;
- BlockDriverState *source_bs;
- BlockDriverState *target_bs;
-
@@ -231,10 +225,11 @@ index cc747e9163..6f7c45f922 100644
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -462,6 +440,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
@@ -504,6 +482,16 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
cluster_size = block_copy_cluster_size(bcs);
+ cluster_size = backup_calculate_cluster_size(target, errp);
+ if (cluster_size < 0) {
+ goto error;
+ }
@@ -243,11 +238,12 @@ index cc747e9163..6f7c45f922 100644
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ cluster_size = MAX(cluster_size, bdi.cluster_size);
+ }
if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+
/*
* If source is in backing chain of target assume that target is going to be
* used for "image fleecing", i.e. it should represent a kind of snapshot of
diff --git a/block/meson.build b/block/meson.build
index 1373612c10..6278c4af0f 100644
index 9ce9246194..19bc2b7cbb 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files(
@@ -255,28 +251,28 @@ index 1373612c10..6278c4af0f 100644
'amend.c',
'backup.c',
+ 'backup-dump.c',
'backup-top.c',
'blkdebug.c',
'blklogwrites.c',
'blkverify.c',
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index ebb4e56a50..e717a74e5f 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 11442893d0..8f6135e6a5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
#include "block/aio.h"
#include "block/block-common.h"
#include "block/accounting.h"
#include "block/block.h"
+#include "block/block-copy.h"
#include "block/block-global-state.h"
#include "block/snapshot.h"
#include "qemu/iov.h"
@@ -60,6 +61,40 @@
#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
@@ -63,6 +64,40 @@
#define BLOCK_PROBE_BUF_SIZE 512
+typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
+
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
+BlockDriverState *bdrv_backuo_dump_create(
+ int dump_cb_block_size,
+ uint64_t byte_size,
+ BackupDumpFunc *dump_cb,
@@ -288,7 +284,7 @@ index ebb4e56a50..e717a74e5f 100644
+typedef struct BlockCopyState BlockCopyState;
+typedef struct BackupBlockJob {
+ BlockJob common;
+ BlockDriverState *cbw;
+ BlockDriverState *backup_top;
+ BlockDriverState *source_bs;
+ BlockDriverState *target_bs;
+
@@ -312,16 +308,16 @@ index ebb4e56a50..e717a74e5f 100644
BDRV_TRACKED_READ,
BDRV_TRACKED_WRITE,
diff --git a/job.c b/job.c
index 0653bc2ba6..b981070ee8 100644
index e7a5d28854..44eec9a441 100644
--- a/job.c
+++ b/job.c
@@ -337,7 +337,8 @@ static bool job_started_locked(Job *job)
@@ -269,7 +269,8 @@ static bool job_started(Job *job)
return job->co;
}
/* Called with job_mutex held. */
-static bool job_should_pause_locked(Job *job)
+bool job_should_pause_locked(Job *job);
+bool job_should_pause_locked(Job *job)
-static bool job_should_pause(Job *job)
+bool job_should_pause(Job *job);
+bool job_should_pause(Job *job)
{
return job->pause_count > 0;
}

View File

@@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Mar 2022 08:35:05 +0100
Subject: [PATCH] block/backup: move bcs bitmap initialization to job creation
For backing up the state of multiple disks from the same time, a job
for each disk has to be created. It's convenient if the jobs don't
have to be started at the same time and if operation of the VM can be
resumed after job creation. This would lead to a window between job
creation and running the job, where writes can happen. But no writes
should happen between setting up the copy-before-write filter and
setting up the block copy state bitmap, because then new writes would
just pass through.
Commit 06e0a9c16405c0a4c1eca33cf286cc04c42066a2 moved initalization of
the bitmap to setting up the copy-before-write filter when sync_mode
is not MIRROR_SYNC_MODE_BITMAP. Ensure that the bitmap is initialized
upon job creation for the remaining case too, by moving the
backup_init_bcs_bitmap call to backup_job_create.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index 79652bf57b..cc747e9163 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
true);
} else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
/*
- * We can't hog the coroutine to initialize this thoroughly.
- * Set a flag and resume work when we are able to yield safely.
+ * Initialization is costly here. Simply set a flag and let the
+ * backup_run coroutine resume work once it can yield safely.
*/
block_copy_set_skip_unallocated(job->bcs, true);
}
@@ -252,8 +252,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
int ret;
- backup_init_bcs_bitmap(s);
-
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
@@ -502,6 +500,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
&error_abort);
bdrv_graph_wrunlock();
+ backup_init_bcs_bitmap(job);
+
return &job->common;
error:

File diff suppressed because it is too large Load Diff

View File

@@ -5,35 +5,33 @@ Subject: [PATCH] PVE-Backup: pbs-restore - new command to restore from proxmox
backup server
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
meson.build | 4 +
pbs-restore.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 240 insertions(+)
pbs-restore.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 228 insertions(+)
create mode 100644 pbs-restore.c
diff --git a/meson.build b/meson.build
index 1f74de1d93..8508aab9c9 100644
index 7d7e474313..dd1c5bdb4e 100644
--- a/meson.build
+++ b/meson.build
@@ -4373,6 +4373,10 @@ if have_tools
@@ -2749,6 +2749,10 @@ if have_tools
vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
dependencies: [authz, block, crypto, io, qemuutil, qom], install: true)
dependencies: [authz, block, crypto, io, qom], install: true)
+ pbs_restore = executable('pbs-restore', files('pbs-restore.c') + genh,
+ dependencies: [authz, block, crypto, io, qemuutil, qom,
+ dependencies: [authz, block, crypto, io, qom,
+ libproxmox_backup_qemu], install: true)
+
subdir('storage-daemon')
foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
subdir('contrib/rdmacm-mux')
subdir('contrib/elf2dmp')
diff --git a/pbs-restore.c b/pbs-restore.c
new file mode 100644
index 0000000000..f165f418af
index 0000000000..4d3f925a1b
--- /dev/null
+++ b/pbs-restore.c
@@ -0,0 +1,236 @@
@@ -0,0 +1,224 @@
+/*
+ * Qemu image restore helper for Proxmox Backup
+ *
@@ -52,20 +50,21 @@ index 0000000000..f165f418af
+#include <getopt.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qobject/qdict.h"
+#include "system/block-backend.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/block-backend.h"
+
+#include <proxmox-backup-qemu.h>
+
+static void help(void)
+{
+ const char *help_msg =
+ "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
+ "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+ ;
+
+ printf("%s", help_msg);
@@ -97,7 +96,7 @@ index 0000000000..f165f418af
+ }
+ res = blk_pwrite_zeroes(callback_data->target, offset, data_len, 0);
+ } else {
+ res = blk_pwrite(callback_data->target, offset, data_len, data, 0);
+ res = blk_pwrite(callback_data->target, offset, data, data_len, 0);
+ }
+
+ if (res < 0) {
@@ -113,7 +112,6 @@ index 0000000000..f165f418af
+ Error *main_loop_err = NULL;
+ const char *format = "raw";
+ const char *repository = NULL;
+ const char *backup_ns = NULL;
+ const char *keyfile = NULL;
+ int verbose = false;
+ bool skip_zero = false;
@@ -127,7 +125,6 @@ index 0000000000..f165f418af
+ {"verbose", no_argument, 0, 'v'},
+ {"format", required_argument, 0, 'f'},
+ {"repository", required_argument, 0, 'r'},
+ {"ns", required_argument, 0, 'n'},
+ {"keyfile", required_argument, 0, 'k'},
+ {0, 0, 0, 0}
+ };
@@ -148,9 +145,6 @@ index 0000000000..f165f418af
+ case 'r':
+ repository = g_strdup(argv[optind - 1]);
+ break;
+ case 'n':
+ backup_ns = g_strdup(argv[optind - 1]);
+ break;
+ case 'k':
+ keyfile = g_strdup(argv[optind - 1]);
+ break;
@@ -201,16 +195,8 @@ index 0000000000..f165f418af
+ fprintf(stderr, "connecting to repository '%s'\n", repository);
+ }
+ char *pbs_error = NULL;
+ ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+ repository,
+ snapshot,
+ backup_ns,
+ password,
+ keyfile,
+ key_password,
+ fingerprint,
+ &pbs_error
+ );
+ ProxmoxRestoreHandle *conn = proxmox_restore_new(
+ repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ if (conn == NULL) {
+ fprintf(stderr, "restore failed: %s\n", pbs_error);
+ return -1;

View File

@@ -0,0 +1,452 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 29 Jun 2020 11:06:03 +0200
Subject: [PATCH] PVE-Backup: Add dirty-bitmap tracking for incremental backups
Uses QEMU's existing MIRROR_SYNC_MODE_BITMAP and a dirty-bitmap on top
of all backed-up drives. This will only execute the data-write callback
for any changed chunks, the PBS rust code will reuse chunks from the
previous index for everything it doesn't receive if reuse_index is true.
On error or cancellation, remove all dirty bitmaps to ensure
consistency.
Add PBS/incremental specific information to query backup info QMP and
HMP commands.
Only supported for PBS backups.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
monitor/hmp-cmds.c | 45 ++++++++++----
proxmox-backup-client.c | 3 +-
proxmox-backup-client.h | 1 +
pve-backup.c | 103 ++++++++++++++++++++++++++++++---
qapi/block-core.json | 12 +++-
6 files changed, 142 insertions(+), 23 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 1e29681d30..3fca3ce3e9 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,6 +1042,7 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
+ false, false, // PBS incremental
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 7efcd2d641..b2b5f1298b 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -221,19 +221,42 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "End time: %s", ctime(&info->end_time));
}
- int per = (info->has_total && info->total &&
- info->has_transferred && info->transferred) ?
- (info->transferred * 100)/info->total : 0;
- int zero_per = (info->has_total && info->total &&
- info->has_zero_bytes && info->zero_bytes) ?
- (info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Backup file: %s\n", info->backup_file);
monitor_printf(mon, "Backup uuid: %s\n", info->uuid);
- monitor_printf(mon, "Total size: %zd\n", info->total);
- monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
- monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
- info->zero_bytes, zero_per);
+
+ if (!(info->has_total && info->total)) {
+ // this should not happen normally
+ monitor_printf(mon, "Total size: %d\n", 0);
+ } else {
+ bool incremental = false;
+ size_t total_or_dirty = info->total;
+ if (info->has_transferred) {
+ if (info->has_dirty && info->dirty) {
+ if (info->dirty < info->total) {
+ total_or_dirty = info->dirty;
+ incremental = true;
+ }
+ }
+ }
+
+ int per = (info->transferred * 100)/total_or_dirty;
+
+ monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+
+ int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
+ (info->zero_bytes * 100)/info->total : 0;
+ monitor_printf(mon, "Total size: %zd\n", info->total);
+ monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
+ info->transferred, per);
+ monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
+ info->zero_bytes, zero_per);
+
+ if (info->has_reused) {
+ int reused_per = (info->reused * 100)/total_or_dirty;
+ monitor_printf(mon, "Reused bytes: %zd (%d%%)\n",
+ info->reused, reused_per);
+ }
+ }
}
qapi_free_BackupStatus(info);
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index a8f6653a81..4ce7bc0b5e 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -89,6 +89,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp)
{
Coroutine *co = qemu_coroutine_self();
@@ -98,7 +99,7 @@ proxmox_backup_co_register_image(
int pbs_res = -1;
proxmox_backup_register_image_async(
- pbs, device_name, size ,proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
+ pbs, device_name, size, incremental, proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
qemu_coroutine_yield();
if (pbs_res < 0) {
if (errp) error_setg(errp, "backup register image failed: %s", pbs_err ? pbs_err : "unknown error");
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
index 1dda8b7d8f..8cbf645b2c 100644
--- a/proxmox-backup-client.h
+++ b/proxmox-backup-client.h
@@ -32,6 +32,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp);
diff --git a/pve-backup.c b/pve-backup.c
index 66868dec14..6cdbd40529 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -28,6 +28,8 @@
*
*/
+const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
+
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
@@ -39,7 +41,9 @@ static struct PVEBackupState {
uuid_t uuid;
char uuid_str[37];
size_t total;
+ size_t dirty;
size_t transferred;
+ size_t reused;
size_t zero_bytes;
} stat;
int64_t speed;
@@ -66,6 +70,7 @@ typedef struct PVEBackupDevInfo {
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
+ BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
} PVEBackupDevInfo;
@@ -105,11 +110,12 @@ static bool pvebackup_error_or_canceled(void)
return error_or_canceled;
}
-static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes, size_t reused)
{
qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.zero_bytes += zero_bytes;
backup_state.stat.transferred += transferred;
+ backup_state.stat.reused += reused;
qemu_mutex_unlock(&backup_state.stat.lock);
}
@@ -148,7 +154,8 @@ pvebackup_co_dump_pbs_cb(
pvebackup_propagate_error(local_err);
return pbs_res;
} else {
- pvebackup_add_transfered_bytes(size, !buf ? size : 0);
+ size_t reused = (pbs_res == 0) ? size : 0;
+ pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
}
return size;
@@ -208,11 +215,11 @@ pvebackup_co_dump_vma_cb(
} else {
if (remaining >= VMA_CLUSTER_SIZE) {
assert(ret == VMA_CLUSTER_SIZE);
- pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
+ pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes, 0);
remaining -= VMA_CLUSTER_SIZE;
} else {
assert(ret == remaining);
- pvebackup_add_transfered_bytes(remaining, zero_bytes);
+ pvebackup_add_transfered_bytes(remaining, zero_bytes, 0);
remaining = 0;
}
}
@@ -248,6 +255,18 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
+ } else {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+ }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -303,6 +322,12 @@ static void pvebackup_complete_cb(void *opaque, int ret)
// remove self from job queue
backup_state.di_list = g_list_remove(backup_state.di_list, di);
+ if (di->bitmap && ret < 0) {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
g_free(di);
qemu_mutex_unlock(&backup_state.backup_mutex);
@@ -472,12 +497,18 @@ static bool create_backup_jobs(void) {
assert(di->target != NULL);
+ MirrorSyncMode sync_mode = MIRROR_SYNC_MODE_FULL;
+ BitmapSyncMode bitmap_mode = BITMAP_SYNC_MODE_NEVER;
+ if (di->bitmap) {
+ sync_mode = MIRROR_SYNC_MODE_BITMAP;
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
AioContext *aio_context = bdrv_get_aio_context(di->bs);
aio_context_acquire(aio_context);
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
aio_context_release(aio_context);
@@ -528,6 +559,8 @@ typedef struct QmpBackupTask {
const char *fingerprint;
bool has_fingerprint;
int64_t backup_time;
+ bool has_use_dirty_bitmap;
+ bool use_dirty_bitmap;
bool has_format;
BackupFormat format;
bool has_config_file;
@@ -619,6 +652,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
+ size_t dirty = 0;
l = di_list;
while (l) {
@@ -656,6 +690,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
+ bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -675,7 +711,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
goto err;
}
- if (proxmox_backup_co_connect(pbs, task->errp) < 0)
+ int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ if (connect_result < 0)
goto err;
/* register all devices */
@@ -686,9 +723,40 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *devname = bdrv_get_device_name(di->bs);
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, task->errp);
- if (dev_id < 0)
+ BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
+ bool expect_only_dirty = false;
+
+ if (use_dirty_bitmap) {
+ if (bitmap == NULL) {
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ if (!bitmap) {
+ goto err;
+ }
+ } else {
+ expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ }
+
+ if (expect_only_dirty) {
+ dirty += bdrv_get_dirty_count(bitmap);
+ } else {
+ /* mark entire bitmap as dirty to make full backup */
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ dirty += di->size;
+ }
+ di->bitmap = bitmap;
+ } else {
+ dirty += di->size;
+
+ /* after a full backup the old dirty bitmap is invalid anyway */
+ if (bitmap != NULL) {
+ bdrv_release_dirty_bitmap(bitmap);
+ }
+ }
+
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ if (dev_id < 0) {
goto err;
+ }
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
goto err;
@@ -697,6 +765,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->dev_id = dev_id;
}
} else if (format == BACKUP_FORMAT_VMA) {
+ dirty = total;
+
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
@@ -724,6 +794,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
} else if (format == BACKUP_FORMAT_DIR) {
+ dirty = total;
+
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
@@ -796,8 +868,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
+ backup_state.stat.dirty = dirty;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -821,6 +895,10 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
if (di->target) {
bdrv_unref(di->target);
}
@@ -862,6 +940,7 @@ UuidInfo *qmp_backup(
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -880,6 +959,8 @@ UuidInfo *qmp_backup(
.backup_id = backup_id,
.has_backup_time = has_backup_time,
.backup_time = backup_time,
+ .has_use_dirty_bitmap = has_use_dirty_bitmap,
+ .use_dirty_bitmap = use_dirty_bitmap,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
@@ -948,10 +1029,14 @@ BackupStatus *qmp_query_backup(Error **errp)
info->has_total = true;
info->total = backup_state.stat.total;
+ info->has_dirty = true;
+ info->dirty = backup_state.stat.dirty;
info->has_zero_bytes = true;
info->zero_bytes = backup_state.stat.zero_bytes;
info->has_transferred = true;
info->transferred = backup_state.stat.transferred;
+ info->has_reused = true;
+ info->reused = backup_state.stat.reused;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c5d604693f..a138ad08d4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -712,8 +712,13 @@
#
# @total: total amount of bytes involved in the backup process
#
+# @dirty: with incremental mode (PBS) this is the amount of bytes involved
+# in the backup process which are marked dirty.
+#
# @transferred: amount of bytes already backed up.
#
+# @reused: amount of bytes reused due to deduplication.
+#
# @zero-bytes: amount of 'zero' bytes detected.
#
# @start-time: time (epoch) when backup job started.
@@ -726,8 +731,8 @@
#
##
{ 'struct': 'BackupStatus',
- 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int',
- '*transferred': 'int', '*zero-bytes': 'int',
+ 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
+ '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
'*backup-file': 'str', '*uuid': 'str' } }
@@ -770,6 +775,8 @@
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
#
+# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
+#
# Returns: the uuid of the backup job
#
##
@@ -780,6 +787,7 @@
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
+ '*use-dirty-bitmap': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,219 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Thu, 9 Jul 2020 12:53:08 +0200
Subject: [PATCH] PVE: various PBS fixes
pbs: fix crypt and compress parameters
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
PVE: handle PBS write callback with big blocks correctly
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
PVE: add zero block handling to PBS dump callback
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 ++-
pve-backup.c | 59 ++++++++++++++++++++++++++--------
qapi/block-core.json | 6 ++++
3 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 3fca3ce3e9..69254396d5 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,7 +1042,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
- false, false, // PBS incremental
+ false, false, // PBS use-dirty-bitmap
+ false, false, // PBS compress
+ false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/pve-backup.c b/pve-backup.c
index 6cdbd40529..7527885251 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -8,6 +8,7 @@
#include "block/blockjob.h"
#include "qapi/qapi-commands-block.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
/* PVE backup state and related function */
@@ -67,6 +68,7 @@ opts_init(pvebackup_init);
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
size_t size;
+ uint64_t block_size;
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
@@ -135,10 +137,13 @@ pvebackup_co_dump_pbs_cb(
PVEBackupDevInfo *di = opaque;
assert(backup_state.pbs);
+ assert(buf);
Error *local_err = NULL;
int pbs_res = -1;
+ bool is_zero_block = size == di->block_size && buffer_is_zero(buf, size);
+
qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
@@ -147,16 +152,28 @@ pvebackup_co_dump_pbs_cb(
return -1;
}
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ uint64_t transferred = 0;
+ uint64_t reused = 0;
+ while (transferred < size) {
+ uint64_t left = size - transferred;
+ uint64_t to_transfer = left < di->block_size ? left : di->block_size;
+
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
+ is_zero_block ? NULL : buf + transferred, start + transferred,
+ to_transfer, &local_err);
+ transferred += to_transfer;
+
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return pbs_res;
+ }
+
+ reused += pbs_res == 0 ? to_transfer : 0;
+ }
+
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
-
- if (pbs_res < 0) {
- pvebackup_propagate_error(local_err);
- return pbs_res;
- } else {
- size_t reused = (pbs_res == 0) ? size : 0;
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
- }
+ pvebackup_add_transfered_bytes(size, is_zero_block ? size : 0, reused);
return size;
}
@@ -178,6 +195,7 @@ pvebackup_co_dump_vma_cb(
int ret = -1;
assert(backup_state.vmaw);
+ assert(buf);
uint64_t remaining = size;
@@ -204,9 +222,7 @@ pvebackup_co_dump_vma_cb(
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
+ buf += VMA_CLUSTER_SIZE;
if (ret < 0) {
Error *local_err = NULL;
vma_writer_error_propagate(backup_state.vmaw, &local_err);
@@ -569,6 +585,10 @@ typedef struct QmpBackupTask {
const char *firewall_file;
bool has_devlist;
const char *devlist;
+ bool has_compress;
+ bool compress;
+ bool has_encrypt;
+ bool encrypt;
bool has_speed;
int64_t speed;
Error **errp;
@@ -692,6 +712,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -701,8 +722,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->has_password ? task->password : NULL,
task->has_keyfile ? task->keyfile : NULL,
task->has_key_password ? task->key_password : NULL,
+ task->has_compress ? task->compress : true,
+ task->has_encrypt ? task->encrypt : task->has_keyfile,
task->has_fingerprint ? task->fingerprint : NULL,
- &pbs_err);
+ &pbs_err);
if (!pbs) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
@@ -721,6 +744,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ di->block_size = dump_cb_block_size;
+
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
@@ -941,6 +966,8 @@ UuidInfo *qmp_backup(
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -951,6 +978,8 @@ UuidInfo *qmp_backup(
.backup_file = backup_file,
.has_password = has_password,
.password = password,
+ .has_keyfile = has_keyfile,
+ .keyfile = keyfile,
.has_key_password = has_key_password,
.key_password = key_password,
.has_fingerprint = has_fingerprint,
@@ -961,6 +990,10 @@ UuidInfo *qmp_backup(
.backup_time = backup_time,
.has_use_dirty_bitmap = has_use_dirty_bitmap,
.use_dirty_bitmap = use_dirty_bitmap,
+ .has_compress = has_compress,
+ .compress = compress,
+ .has_encrypt = has_encrypt,
+ .encrypt = encrypt,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a138ad08d4..a75f1b4687 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -777,6 +777,10 @@
#
# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
+# @compress: use compression (optional for format 'pbs', defaults to true)
+#
+# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
+#
# Returns: the uuid of the backup job
#
##
@@ -788,6 +792,8 @@
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
+ '*compress': 'bool',
+ '*encrypt': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -7,58 +7,51 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[error cleanups, file_open implementation]
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[FE: adapt to changed function signatures
make pbs_co_preadv return values consistent with QEMU
getlength is now a coroutine function]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
block/pbs.c | 306 +++++++++++++++++++++++++++++++++++++++++++
meson.build | 2 +-
qapi/block-core.json | 29 ++++
qapi/pragma.json | 1 +
5 files changed, 339 insertions(+), 1 deletion(-)
block/meson.build | 3 +
block/pbs.c | 271 +++++++++++++++++++++++++++++++++++++++++++
configure | 9 ++
meson.build | 1 +
qapi/block-core.json | 13 +++
5 files changed, 297 insertions(+)
create mode 100644 block/pbs.c
diff --git a/block/meson.build b/block/meson.build
index d1b16e40e9..d243372c41 100644
index 9e433daf2e..e3ed5ac97c 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -49,6 +49,8 @@ block_ss.add(files(
@@ -51,6 +51,9 @@ block_ss.add(files(
'../pve-backup.c',
), libproxmox_backup_qemu)
+block_ss.add(files('pbs.c'), libproxmox_backup_qemu)
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: files('pbs.c'))
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/pbs.c b/block/pbs.c
new file mode 100644
index 0000000000..3e41421716
index 0000000000..78dad0dcc4
--- /dev/null
+++ b/block/pbs.c
@@ -0,0 +1,306 @@
@@ -0,0 +1,271 @@
+/*
+ * Proxmox Backup Server read-only block driver
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qobject/qdict.h"
+#include "qobject/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+
+#include <proxmox-backup-qemu.h>
+
+#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
+#define PBS_OPT_SNAPSHOT "snapshot"
+#define PBS_OPT_ARCHIVE "archive"
+#define PBS_OPT_KEYFILE "keyfile"
@@ -68,11 +61,10 @@ index 0000000000..3e41421716
+
+typedef struct {
+ ProxmoxRestoreHandle *conn;
+ uint8_t aid;
+ char aid;
+ int64_t length;
+
+ char *repository;
+ char *namespace;
+ char *snapshot;
+ char *archive;
+} BDRVPBSState;
@@ -87,11 +79,6 @@ index 0000000000..3e41421716
+ .help = "The server address and repository to connect to.",
+ },
+ {
+ .name = PBS_OPT_NAMESPACE,
+ .type = QEMU_OPT_STRING,
+ .help = "Optional: The snapshot's namespace.",
+ },
+ {
+ .name = PBS_OPT_SNAPSHOT,
+ .type = QEMU_OPT_STRING,
+ .help = "The snapshot to read.",
@@ -127,7 +114,7 @@ index 0000000000..3e41421716
+
+
+// filename format:
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+// pbs:repository=<repo>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+static void pbs_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
@@ -163,7 +150,6 @@ index 0000000000..3e41421716
+ s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
+ const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
+ const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+ const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
+ const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
+ const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
+
@@ -176,12 +162,9 @@ index 0000000000..3e41421716
+ if (!key_password) {
+ key_password = getenv("PBS_ENCRYPTION_PASSWORD");
+ }
+ if (namespace) {
+ s->namespace = g_strdup(namespace);
+ }
+
+ /* connect to PBS server in read mode */
+ s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
+ s->conn = proxmox_restore_new(s->repository, s->snapshot, password,
+ keyfile, key_password, fingerprint, &pbs_error);
+
+ /* invalidates qemu_opt_get char pointers from above */
@@ -201,18 +184,12 @@ index 0000000000..3e41421716
+ }
+
+ /* acquire handle and length */
+ ret = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+ if (ret < 0) {
+ s->aid = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+ if (s->aid < 0) {
+ if (pbs_error && errp) error_setg(errp, "PBS open_image failed: %s", pbs_error);
+ if (pbs_error) proxmox_backup_free_error(pbs_error);
+ return -ENODEV;
+ }
+ if (ret > UINT8_MAX) {
+ error_setg(errp, "PBS open_image returned an ID larger than %u", UINT8_MAX);
+ return -ENODEV;
+ }
+ s->aid = ret;
+
+ s->length = proxmox_restore_get_image_length(s->conn, s->aid, &pbs_error);
+ if (s->length < 0) {
+ if (pbs_error && errp) error_setg(errp, "PBS get_image_length failed: %s", pbs_error);
@@ -223,17 +200,21 @@ index 0000000000..3e41421716
+ return 0;
+}
+
+static int pbs_file_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ return pbs_open(bs, options, flags, errp);
+}
+
+static void pbs_close(BlockDriverState *bs) {
+ BDRVPBSState *s = bs->opaque;
+ g_free(s->repository);
+ g_free(s->namespace);
+ g_free(s->snapshot);
+ g_free(s->archive);
+ proxmox_restore_disconnect(s->conn);
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+pbs_co_getlength(BlockDriverState *bs)
+static int64_t pbs_getlength(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ return s->length;
@@ -250,35 +231,21 @@ index 0000000000..3e41421716
+ aio_co_schedule(rcb->ctx, rcb->co);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BDRVPBSState *s = bs->opaque;
+ int ret;
+ char *pbs_error = NULL;
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+ uint8_t *buf = malloc(bytes);
+
+ ReadCallbackData rcb = {
+ .co = qemu_coroutine_self(),
+ .ctx = bdrv_get_aio_context(bs),
+ };
+
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, (uint64_t)offset, (uint64_t)bytes,
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, offset, bytes,
+ read_callback, (void *) &rcb, &ret, &pbs_error);
+
+ qemu_coroutine_yield();
@@ -289,34 +256,26 @@ index 0000000000..3e41421716
+ return -EIO;
+ }
+
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ free(buf);
+
+ return 0;
+ return ret;
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
+ "any attached disk devices are set to read-only!\n");
+ return -EPERM;
+}
+
+static void GRAPH_RDLOCK
+pbs_refresh_filename(BlockDriverState *bs)
+static void pbs_refresh_filename(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ if (s->namespace) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+ s->repository, s->namespace, s->snapshot, s->archive);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+ }
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+}
+
+static const char *const pbs_strong_runtime_opts[] = {
@@ -330,9 +289,10 @@ index 0000000000..3e41421716
+
+ .bdrv_parse_filename = pbs_parse_filename,
+
+ .bdrv_file_open = pbs_file_open,
+ .bdrv_open = pbs_open,
+ .bdrv_close = pbs_close,
+ .bdrv_co_getlength = pbs_co_getlength,
+ .bdrv_getlength = pbs_getlength,
+
+ .bdrv_co_preadv = pbs_co_preadv,
+ .bdrv_co_pwritev = pbs_co_pwritev,
@@ -347,32 +307,72 @@ index 0000000000..3e41421716
+}
+
+block_init(bdrv_pbs_init);
diff --git a/configure b/configure
index 6e308ed77f..869e97c72f 100755
--- a/configure
+++ b/configure
@@ -428,6 +428,7 @@ vdi=${default_feature:-yes}
vvfat=${default_feature:-yes}
qed=${default_feature:-yes}
parallels=${default_feature:-yes}
+pbs_bdrv="yes"
libxml2="auto"
debug_mutex="no"
libpmem="auto"
@@ -1486,6 +1487,10 @@ for opt do
;;
--enable-parallels) parallels="yes"
;;
+ --disable-pbs-bdrv) pbs_bdrv="no"
+ ;;
+ --enable-pbs-bdrv) pbs_bdrv="yes"
+ ;;
--disable-vhost-user) vhost_user="no"
;;
--enable-vhost-user) vhost_user="yes"
@@ -1956,6 +1961,7 @@ disabled with --disable-FEATURE, default is enabled if available
vvfat vvfat image format support
qed qed image format support
parallels parallels image format support
+ pbs-bdrv Proxmox backup server read-only block driver support
crypto-afalg Linux AF_ALG crypto backend driver
capstone capstone disassembler support
debug-mutex mutex debugging support
@@ -4624,6 +4630,9 @@ fi
if test "$linux_aio" = "yes" ; then
echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
fi
+if test "$pbs_bdrv" = "yes" ; then
+ echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
+fi
if test "$vhost_scsi" = "yes" ; then
echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
fi
diff --git a/meson.build b/meson.build
index 8508aab9c9..9c39f54f86 100644
index dd1c5bdb4e..45c1f2de73 100644
--- a/meson.build
+++ b/meson.build
@@ -4838,7 +4838,7 @@ summary_info += {'Query Processing Library support': qpl}
summary_info += {'UADK Library support': uadk}
summary_info += {'qatzip support': qatzip}
summary_info += {'NUMA host support': numa}
-summary_info += {'capstone': capstone}
@@ -3111,6 +3111,7 @@ summary_info += {'lzfse support': liblzfse.found()}
summary_info += {'zstd support': zstd.found()}
summary_info += {'NUMA host support': config_host.has_key('CONFIG_NUMA')}
summary_info += {'libxml2': libxml2.found()}
+summary_info += {'PBS bdrv support': config_host.has_key('CONFIG_PBS_BDRV')}
summary_info += {'libpmem support': libpmem}
summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libcbor support': libcbor}
summary_info += {'capstone': capstone_opt == 'disabled' ? false : capstone_opt}
summary_info += {'libpmem support': libpmem.found()}
summary_info += {'libdaxctl support': libdaxctl.found()}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index dd98e03bf1..0c3ebfa74e 100644
index a75f1b4687..e4d0c923a4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3470,6 +3470,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
@@ -2982,6 +2982,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
+ 'pbs',
'ssh', 'throttle', 'vdi', 'vhdx',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
@@ -3556,6 +3557,33 @@
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
@@ -3045,6 +3046,17 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
@@ -381,48 +381,20 @@ index dd98e03bf1..0c3ebfa74e 100644
+#
+# Driver specific block device options for the PBS backend.
+#
+# @repository: Proxmox Backup Server repository.
+#
+# @snapshot: backup snapshots ID.
+#
+# @archive: archive name.
+#
+# @keyfile: keyfile to use for encryption.
+#
+# @password: password to use for connection.
+#
+# @fingerprint: backup server fingerprint.
+#
+# @key_password: password to unlock key.
+#
+# @namespace: namespace where backup snapshot lives.
+#
+##
+{ 'struct': 'BlockdevOptionsPbs',
+ 'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
+ '*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
+ '*key_password': 'str', '*namespace': 'str' } }
+ '*key_password': 'str' } }
+
##
# @BlockdevOptionsNVMe:
#
@@ -5003,6 +5031,7 @@
@@ -4263,6 +4275,7 @@
'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
+ 'pbs': 'BlockdevOptionsPbs',
'nvme': 'BlockdevOptionsNVMe',
'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
'if': 'CONFIG_BLKIO' },
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 6aaa9cb975..e9c595c4ba 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -91,6 +91,7 @@
'BlockInfo', # query-block
'BlockdevAioOptions', # blockdev-add, -blockdev
'BlockdevDriver', # blockdev-add, query-blockstats, ...
+ 'BlockdevOptionsPbs', # for PBS backwards compat
'BlockdevVmdkAdapterType', # blockdev-create (to match VMDK spec)
'BlockdevVmdkSubformat', # blockdev-create (to match VMDK spec)
'ColoCompareProperties', # object_add, -object
'parallels': 'BlockdevOptionsGenericFormat',
'preallocate':'BlockdevOptionsPreallocate',

View File

@@ -0,0 +1,74 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 8 Jul 2020 11:57:53 +0200
Subject: [PATCH] PVE: add query_proxmox_support QMP command
Generic interface for future use, currently used for PBS dirty-bitmap
backup support.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[PVE: query-proxmox-support: include library version]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 9 +++++++++
qapi/block-core.json | 29 +++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/pve-backup.c b/pve-backup.c
index 7527885251..8cba8e97d3 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1075,3 +1075,12 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+
+ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+{
+ ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
+ ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
+ ret->pbs_dirty_bitmap = true;
+ ret->pbs_dirty_bitmap_savevm = true;
+ return ret;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e4d0c923a4..3eebe7ff71 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -822,6 +822,35 @@
##
{ 'command': 'backup-cancel' }
+##
+# @ProxmoxSupportStatus:
+#
+# Contains info about supported features added by Proxmox.
+#
+# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
+# supported.
+#
+# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
+# safely be set for savevm-async.
+#
+# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+#
+##
+{ 'struct': 'ProxmoxSupportStatus',
+ 'data': { 'pbs-dirty-bitmap': 'bool',
+ 'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-library-version': 'str' } }
+
+##
+# @query-proxmox-support:
+#
+# Returns information about supported features added by Proxmox.
+#
+# Returns: @ProxmoxSupportStatus
+#
+##
+{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -0,0 +1,441 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 19 Aug 2020 17:02:00 +0200
Subject: [PATCH] PVE: add query-pbs-bitmap-info QMP call
Returns advanced information about dirty bitmaps used (or not used) for
the latest PBS backup.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp-cmds.c | 28 ++++++-----
pve-backup.c | 117 ++++++++++++++++++++++++++++++++-----------
qapi/block-core.json | 56 +++++++++++++++++++++
3 files changed, 159 insertions(+), 42 deletions(-)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index b2b5f1298b..7a449edafa 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -198,6 +198,7 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
void hmp_info_backup(Monitor *mon, const QDict *qdict)
{
BackupStatus *info;
+ PBSBitmapInfoList *bitmap_info;
info = qmp_query_backup(NULL);
@@ -228,26 +229,29 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
// this should not happen normally
monitor_printf(mon, "Total size: %d\n", 0);
} else {
- bool incremental = false;
size_t total_or_dirty = info->total;
- if (info->has_transferred) {
- if (info->has_dirty && info->dirty) {
- if (info->dirty < info->total) {
- total_or_dirty = info->dirty;
- incremental = true;
- }
- }
+ bitmap_info = qmp_query_pbs_bitmap_info(NULL);
+
+ while (bitmap_info) {
+ monitor_printf(mon, "Drive %s:\n",
+ bitmap_info->value->drive);
+ monitor_printf(mon, " bitmap action: %s\n",
+ PBSBitmapAction_str(bitmap_info->value->action));
+ monitor_printf(mon, " size: %zd\n",
+ bitmap_info->value->size);
+ monitor_printf(mon, " dirty: %zd\n",
+ bitmap_info->value->dirty);
+ bitmap_info = bitmap_info->next;
}
- int per = (info->transferred * 100)/total_or_dirty;
-
- monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+ qapi_free_PBSBitmapInfoList(bitmap_info);
int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
(info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Total size: %zd\n", info->total);
+ int trans_per = (info->transferred * 100)/total_or_dirty;
monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
+ info->transferred, trans_per);
monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
info->zero_bytes, zero_per);
diff --git a/pve-backup.c b/pve-backup.c
index 8cba8e97d3..22420db26a 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -46,6 +46,7 @@ static struct PVEBackupState {
size_t transferred;
size_t reused;
size_t zero_bytes;
+ GList *bitmap_list;
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -672,7 +673,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
- size_t dirty = 0;
l = di_list;
while (l) {
@@ -693,18 +693,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_generate(uuid);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.reused = 0;
+
+ /* clear previous backup's bitmap_list */
+ if (backup_state.stat.bitmap_list) {
+ GList *bl = backup_state.stat.bitmap_list;
+ while (bl) {
+ g_free(((PBSBitmapInfo *)bl->data)->drive);
+ g_free(bl->data);
+ bl = g_list_next(bl);
+ }
+ g_list_free(backup_state.stat.bitmap_list);
+ backup_state.stat.bitmap_list = NULL;
+ }
+
if (format == BACKUP_FORMAT_PBS) {
if (!task->has_password) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_id) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_time) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
- goto err;
+ goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
@@ -731,12 +746,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
- goto err;
+ goto err_mutex;
}
int connect_result = proxmox_backup_co_connect(pbs, task->errp);
if (connect_result < 0)
- goto err;
+ goto err_mutex;
/* register all devices */
l = di_list;
@@ -747,6 +762,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->block_size = dump_cb_block_size;
const char *devname = bdrv_get_device_name(di->bs);
+ PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
+ size_t dirty = di->size;
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
@@ -755,49 +772,59 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (bitmap == NULL) {
bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
if (!bitmap) {
- goto err;
+ goto err_mutex;
}
+ action = PBS_BITMAP_ACTION_NEW;
} else {
expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
}
if (expect_only_dirty) {
- dirty += bdrv_get_dirty_count(bitmap);
+ /* track clean chunks as reused */
+ dirty = MIN(bdrv_get_dirty_count(bitmap), di->size);
+ backup_state.stat.reused += di->size - dirty;
+ action = PBS_BITMAP_ACTION_USED;
} else {
/* mark entire bitmap as dirty to make full backup */
bdrv_set_dirty_bitmap(bitmap, 0, di->size);
- dirty += di->size;
+ if (action != PBS_BITMAP_ACTION_NEW) {
+ action = PBS_BITMAP_ACTION_INVALID;
+ }
}
di->bitmap = bitmap;
} else {
- dirty += di->size;
-
/* after a full backup the old dirty bitmap is invalid anyway */
if (bitmap != NULL) {
bdrv_release_dirty_bitmap(bitmap);
+ action = PBS_BITMAP_ACTION_NOT_USED_REMOVED;
}
}
int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
if (dev_id < 0) {
- goto err;
+ goto err_mutex;
}
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
di->dev_id = dev_id;
+
+ PBSBitmapInfo *info = g_malloc(sizeof(*info));
+ info->drive = g_strdup(devname);
+ info->action = action;
+ info->size = di->size;
+ info->dirty = dirty;
+ backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- dirty = total;
-
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
error_propagate(task->errp, local_err);
}
- goto err;
+ goto err_mutex;
}
/* register all devices for vma writer */
@@ -807,7 +834,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
l = g_list_next(l);
if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
@@ -815,16 +842,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (di->dev_id <= 0) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
- goto err;
+ goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- dirty = total;
-
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
- goto err;
+ goto err_mutex;
}
backup_dir = task->backup_file;
@@ -841,18 +866,18 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->size, flags, false, &local_err);
if (local_err) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
}
} else {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
- goto err;
+ goto err_mutex;
}
@@ -860,7 +885,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_config_file) {
if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
@@ -868,12 +893,11 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_firewall_file) {
if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
/* initialize global backup_state now */
-
- qemu_mutex_lock(&backup_state.stat.lock);
+ /* note: 'reused' and 'bitmap_list' are initialized earlier */
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -893,10 +917,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
- backup_state.stat.dirty = dirty;
+ backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -913,6 +936,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->result = uuid_info;
return;
+err_mutex:
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
err:
l = di_list;
@@ -1076,11 +1102,42 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+PBSBitmapInfoList *qmp_query_pbs_bitmap_info(Error **errp)
+{
+ PBSBitmapInfoList *head = NULL, **p_next = &head;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+
+ GList *l = backup_state.stat.bitmap_list;
+ while (l) {
+ PBSBitmapInfo *info = (PBSBitmapInfo *)l->data;
+ l = g_list_next(l);
+
+ /* clone bitmap info to avoid auto free after QMP marshalling */
+ PBSBitmapInfo *info_ret = g_malloc0(sizeof(*info_ret));
+ info_ret->drive = g_strdup(info->drive);
+ info_ret->action = info->action;
+ info_ret->size = info->size;
+ info_ret->dirty = info->dirty;
+
+ PBSBitmapInfoList *info_list = g_malloc0(sizeof(*info_list));
+ info_list->value = info_ret;
+
+ *p_next = info_list;
+ p_next = &info_list->next;
+ }
+
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ return head;
+}
+
ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
{
ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->query_bitmap_info = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3eebe7ff71..170c13984d 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -830,6 +830,8 @@
# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
# supported.
#
+# @query-bitmap-info: True if the 'query-pbs-bitmap-info' QMP call is supported.
+#
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -838,6 +840,7 @@
##
{ 'struct': 'ProxmoxSupportStatus',
'data': { 'pbs-dirty-bitmap': 'bool',
+ 'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-library-version': 'str' } }
@@ -851,6 +854,59 @@
##
{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+##
+# @PBSBitmapAction:
+#
+# An action taken on a dirty-bitmap when a backup job was started.
+#
+# @not-used: Bitmap mode was not enabled.
+#
+# @not-used-removed: Bitmap mode was not enabled, but a bitmap from a
+# previous backup still existed and was removed.
+#
+# @new: A new bitmap was attached to the drive for this backup.
+#
+# @used: An existing bitmap will be used to only backup changed data.
+#
+# @invalid: A bitmap existed, but had to be cleared since it's associated
+# base snapshot did not match the base given for the current job or
+# the crypt mode has changed.
+#
+##
+{ 'enum': 'PBSBitmapAction',
+ 'data': ['not-used', 'not-used-removed', 'new', 'used', 'invalid'] }
+
+##
+# @PBSBitmapInfo:
+#
+# Contains information about dirty bitmaps used for each drive in a PBS backup.
+#
+# @drive: The underlying drive.
+#
+# @action: The action that was taken when the backup started.
+#
+# @size: The total size of the drive.
+#
+# @dirty: How much of the drive is considered dirty and will be backed up,
+# or 'size' if everything will be.
+#
+##
+{ 'struct': 'PBSBitmapInfo',
+ 'data': { 'drive': 'str', 'action': 'PBSBitmapAction', 'size': 'int',
+ 'dirty': 'int' } }
+
+##
+# @query-pbs-bitmap-info:
+#
+# Returns information about dirty bitmaps used on the most recently started
+# backup. Returns nothing when the last backup was not using PBS or if no
+# backup occured in this session.
+#
+# Returns: @PBSBitmapInfo
+#
+##
+{ 'command': 'query-pbs-bitmap-info', 'returns': ['PBSBitmapInfo'] }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -9,51 +9,50 @@ fitting.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
meson.build | 3 ++-
meson.build | 2 ++
os-posix.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-)
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build
index 9c39f54f86..60af7fa723 100644
index 45c1f2de73..44071acbb7 100644
--- a/meson.build
+++ b/meson.build
@@ -2162,6 +2162,7 @@ endif
@@ -1065,6 +1065,7 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true)
+libsystemd = cc.find_library('systemd', required: true)
libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
# libselinux
@@ -3766,7 +3767,7 @@ if have_block
if host_os == 'windows'
system_ss.add(files('os-win32.c'))
else
- blockdev_ss.add(files('os-posix.c'))
+ blockdev_ss.add(files('os-posix.c'), libsystemd)
endif
# Malloc tests
@@ -2246,6 +2247,7 @@ if have_block
# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
# os-win32.c does not
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+ blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
endif
diff --git a/os-posix.c b/os-posix.c
index 52925c23d3..84b96d3da9 100644
index ae6c9f2a5e..36807806bf 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -29,6 +29,8 @@
@@ -28,6 +28,8 @@
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
+#include <systemd/sd-journal.h>
+#include <syslog.h>
#include "qemu/error-report.h"
#include "qemu/log.h"
@@ -306,9 +308,10 @@ void os_setup_post(void)
#include "qemu-common.h"
/* Needed early for CONFIG_BSD etc. */
@@ -291,9 +293,10 @@ void os_setup_post(void)
dup2(fd, 0);
dup2(fd, 1);
- /* In case -D is given do not redirect stderr to /dev/null */
+ /* In case -D is given do not redirect stderr to journal */
if (!qemu_log_enabled()) {
if (!qemu_logfile) {
- dup2(fd, 2);
+ int journal_fd = sd_journal_stream_fd("QEMU", LOG_ERR, 0);
+ dup2(journal_fd, 2);

View File

@@ -7,14 +7,14 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/qemu/job.h | 12 ++++++++++++
job.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
job.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index a5a04155ea..562cc7eaec 100644
index 41162ed494..6662c63519 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -362,6 +362,18 @@ void job_unlock(void);
@@ -285,6 +285,18 @@ typedef enum JobCreateFlags {
*/
JobTxn *job_txn_new(void);
@@ -34,10 +34,10 @@ index a5a04155ea..562cc7eaec 100644
* Release a reference that was previously acquired with job_txn_add_job or
* job_txn_new. If it's the last reference to the object, it will be freed.
diff --git a/job.c b/job.c
index b981070ee8..f4646866ec 100644
index 44eec9a441..a0753ff2f1 100644
--- a/job.c
+++ b/job.c
@@ -94,6 +94,8 @@ struct JobTxn {
@@ -72,6 +72,8 @@ struct JobTxn {
/* Reference count */
int refcnt;
@@ -45,8 +45,8 @@ index b981070ee8..f4646866ec 100644
+ bool sequential;
};
void job_lock(void)
@@ -119,6 +121,25 @@ JobTxn *job_txn_new(void)
/* Right now, this mutex is only needed to synchronize accesses to job->busy
@@ -102,6 +104,25 @@ JobTxn *job_txn_new(void)
return txn;
}
@@ -69,23 +69,20 @@ index b981070ee8..f4646866ec 100644
+ job_start(first);
+}
+
/* Called with job_mutex held. */
static void job_txn_ref_locked(JobTxn *txn)
static void job_txn_ref(JobTxn *txn)
{
@@ -1048,6 +1069,12 @@ static void job_completed_txn_success_locked(Job *job)
txn->refcnt++;
@@ -850,6 +871,9 @@ static void job_completed_txn_success(Job *job)
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!job_is_completed_locked(other_job)) {
if (!job_is_completed(other_job)) {
+ if (txn->sequential) {
+ job_unlock();
+ /* Needs to be called without holding the job lock */
+ job_start(other_job);
+ job_lock();
+ }
return;
}
assert(other_job->ret == 0);
@@ -1259,6 +1286,13 @@ int job_finish_sync_locked(Job *job,
@@ -1020,6 +1044,13 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
return -EBUSY;
}
@@ -93,9 +90,9 @@ index b981070ee8..f4646866ec 100644
+ * of cancelling, these have not begun work so job_enter won't do anything,
+ * let's ensure they are marked as ABORTING if required */
+ if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+ job_update_rc_locked(job);
+ job_update_rc(job);
+ }
+
job_unlock();
AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
(job_enter(job), !job_is_completed(job)));
AIO_WAIT_WHILE(job->aio_context,
(job_enter(job), !job_is_completed(job)));

View File

@@ -0,0 +1,294 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 20 Aug 2020 14:25:00 +0200
Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states
By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were
successful - meaning we don't need to remove them when the backup fails,
since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
To keep the rate-limiting and IO impact from before, we use a sequential
transaction, so drives will still be backed up one after the other.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 169 +++++++++++++++------------------------------------
1 file changed, 50 insertions(+), 119 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 22420db26a..2e628d68e4 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -52,6 +52,7 @@ static struct PVEBackupState {
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
+ JobTxn *txn;
QemuMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
@@ -71,32 +72,12 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
- bool completed;
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
+ BlockJob *job;
} PVEBackupDevInfo;
-static void pvebackup_run_next_job(void);
-
-static BlockJob *
-lookup_active_block_job(PVEBackupDevInfo *di)
-{
- if (!di->completed && di->bs) {
- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue;
- }
-
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) {
- return job;
- }
- }
- }
- return NULL;
-}
-
static void pvebackup_propagate_error(Error *err)
{
qemu_mutex_lock(&backup_state.stat.lock);
@@ -272,18 +253,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
- } else {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
- }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -322,8 +291,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
qemu_mutex_lock(&backup_state.backup_mutex);
- di->completed = true;
-
if (ret < 0) {
Error *local_err = NULL;
error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
@@ -336,20 +303,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
block_on_coroutine_fn(pvebackup_complete_stream, di);
- // remove self from job queue
+ // remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
- if (di->bitmap && ret < 0) {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
g_free(di);
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ /* call cleanup if we're the last job */
+ if (!g_list_first(backup_state.di_list)) {
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ }
- pvebackup_run_next_job();
+ qemu_mutex_unlock(&backup_state.backup_mutex);
}
static void pvebackup_cancel(void)
@@ -371,36 +335,28 @@ static void pvebackup_cancel(void)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
+ /* it's enough to cancel one job in the transaction, the rest will follow
+ * automatically */
+ GList *bdi = g_list_first(backup_state.di_list);
+ BlockJob *cancel_job = bdi && bdi->data ?
+ ((PVEBackupDevInfo *)bdi->data)->job :
+ NULL;
+
+ /* ref the job before releasing the mutex, just to be safe */
+ if (cancel_job) {
+ job_ref(&cancel_job->job);
+ }
+
+ /* job_cancel_sync may enter the job, so we need to release the
+ * backup_mutex to avoid deadlock */
qemu_mutex_unlock(&backup_state.backup_mutex);
- for(;;) {
-
- BlockJob *next_job = NULL;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
- if (job != NULL) {
- next_job = job;
- break;
- }
- }
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (next_job) {
- AioContext *aio_context = next_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&next_job->job);
- aio_context_release(aio_context);
- } else {
- break;
- }
+ if (cancel_job) {
+ AioContext *aio_context = cancel_job->job.aio_context;
+ aio_context_acquire(aio_context);
+ job_cancel_sync(&cancel_job->job);
+ job_unref(&cancel_job->job);
+ aio_context_release(aio_context);
}
}
@@ -459,51 +415,19 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-bool job_should_pause(Job *job);
-
-static void pvebackup_run_next_job(void)
-{
- assert(!qemu_in_coroutine());
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
-
- if (job) {
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- AioContext *aio_context = job->job.aio_context;
- aio_context_acquire(aio_context);
-
- if (job_should_pause(&job->job)) {
- bool error_or_canceled = pvebackup_error_or_canceled();
- if (error_or_canceled) {
- job_cancel_sync(&job->job);
- } else {
- job_resume(&job->job);
- }
- }
- aio_context_release(aio_context);
- return;
- }
- }
-
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-}
-
static bool create_backup_jobs(void) {
assert(!qemu_in_coroutine());
Error *local_err = NULL;
+ /* create job transaction to synchronize bitmap commit and cancel all
+ * jobs in case one errors */
+ if (backup_state.txn) {
+ job_txn_unref(backup_state.txn);
+ }
+ backup_state.txn = job_txn_new_seq();
+
BackupPerf perf = { .max_workers = 16 };
/* create and start all jobs (paused state) */
@@ -526,7 +450,7 @@ static bool create_backup_jobs(void) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
aio_context_release(aio_context);
@@ -538,7 +462,8 @@ static bool create_backup_jobs(void) {
pvebackup_propagate_error(create_job_err);
break;
}
- job_start(&job->job);
+
+ di->job = job;
bdrv_unref(di->target);
di->target = NULL;
@@ -556,6 +481,10 @@ static bool create_backup_jobs(void) {
bdrv_unref(di->target);
di->target = NULL;
}
+
+ if (di->job) {
+ job_unref(&di->job->job);
+ }
}
}
@@ -946,10 +875,6 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
if (di->target) {
bdrv_unref(di->target);
}
@@ -1038,9 +963,15 @@ UuidInfo *qmp_backup(
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
- create_backup_jobs();
+ bool errors = create_backup_jobs();
qemu_mutex_unlock(&backup_state.backup_mutex);
- pvebackup_run_next_job();
+
+ if (!errors) {
+ /* start the first job in the transaction
+ * note: this might directly enter the job, so we need to do this
+ * after unlocking the backup_mutex */
+ job_txn_start_seq(backup_state.txn);
+ }
} else {
qemu_mutex_unlock(&backup_state.backup_mutex);
}

View File

@@ -0,0 +1,501 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 28 Sep 2020 13:40:51 +0200
Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
create_backup_jobs
proxmox_backup_co_finish is already async, but previously we would wait
for the coroutine using block_on_coroutine_fn(). Avoid this by
scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
as a real coroutine when calling from pvebackup_complete_cb. This is ok,
since complete_stream uses the backup_mutex internally to synchronize,
and other streams can happily continue writing in the meantime anyway.
To accomodate, backup_mutex is converted to a CoMutex. This means
converting every user to a coroutine. This is not just useful here, but
will come in handy once this series[0] is merged, and QMP calls can be
yield-able coroutines too. Then we can also finally get rid of
block_on_coroutine_fn.
Cases of aio_context_acquire/release from within what is now a coroutine
are changed to aio_co_reschedule_self, which works since a running
coroutine always holds the aio lock for the context it is running in.
job_cancel_sync is called from a BH since it can't be run from a
coroutine (uses AIO_WAIT_WHILE internally).
Same thing for create_backup_jobs, which is converted to a BH too.
To communicate the finishing state, a new property is introduced to
query-backup: 'finishing'. A new state is explicitly not used, since
that would break compatibility with older qemu-server versions.
Also fix create_backup_jobs:
No more weird bool returns, just the standard "errp" format used
everywhere else too. With this, if backup_job_create fails, the error
message is actually returned over QMP and can be shown to the user.
To facilitate correct cleanup on such an error, we call
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
This additionally allows us to actually hold the backup_mutex during
operation.
Also add a job_cancel_sync before job_unref, since a job must be in
STATUS_NULL to be deleted by unref, which could trigger an assert
before.
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
qapi/block-core.json | 5 +-
2 files changed, 144 insertions(+), 78 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 2e628d68e4..9c20ef3a5e 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
static struct PVEBackupState {
struct {
- // Everithing accessed from qmp_backup_query command is protected using lock
+ // Everything accessed from qmp_backup_query command is protected using
+ // this lock. Do NOT hold this lock for long times, as it is sometimes
+ // acquired from coroutines, and thus any wait time may block the guest.
QemuMutex lock;
Error *error;
time_t start_time;
@@ -47,20 +49,22 @@ static struct PVEBackupState {
size_t reused;
size_t zero_bytes;
GList *bitmap_list;
+ bool finishing;
+ bool starting;
} stat;
int64_t speed;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
JobTxn *txn;
- QemuMutex backup_mutex;
+ CoMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
qemu_mutex_init(&backup_state.stat.lock);
- qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.backup_mutex);
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
+ int completed_ret; // INT_MAX if not completed
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
}
// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_co_cleanup(void *unused)
+static void coroutine_fn pvebackup_co_cleanup(void)
{
assert(qemu_in_coroutine());
qemu_mutex_lock(&backup_state.stat.lock);
- backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = true;
qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
}
-// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_complete_stream(void *opaque)
+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
{
PVEBackupDevInfo *di = opaque;
+ int ret = di->completed_ret;
- bool error_or_canceled = pvebackup_error_or_canceled();
-
- if (backup_state.vmaw) {
- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool starting = backup_state.stat.starting;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ if (starting) {
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
+ * must) skip all cleanup, as we don't know what has and hasn't been
+ * initialized yet. */
+ return;
}
- if (backup_state.pbs && !error_or_canceled) {
- Error *local_err = NULL;
- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
- if (local_err != NULL) {
- pvebackup_propagate_error(local_err);
- }
- }
-}
-
-static void pvebackup_complete_cb(void *opaque, int ret)
-{
- assert(!qemu_in_coroutine());
-
- PVEBackupDevInfo *di = opaque;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (ret < 0) {
Error *local_err = NULL;
@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
assert(di->target == NULL);
- block_on_coroutine_fn(pvebackup_complete_stream, di);
+ bool error_or_canceled = pvebackup_error_or_canceled();
+
+ if (backup_state.vmaw) {
+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ }
+
+ if (backup_state.pbs && !error_or_canceled) {
+ Error *local_err = NULL;
+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
+ if (local_err != NULL) {
+ pvebackup_propagate_error(local_err);
+ }
+ }
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/* call cleanup if we're the last job */
if (!g_list_first(backup_state.di_list)) {
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ pvebackup_co_cleanup();
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-static void pvebackup_cancel(void)
+static void pvebackup_complete_cb(void *opaque, int ret)
{
- assert(!qemu_in_coroutine());
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+ /*
+ * Schedule stream cleanup in async coroutine. close_image and finish might
+ * take a while, so we can't block on them here. This way it also doesn't
+ * matter if we're already running in a coroutine or not.
+ * Note: di is a pointer to an entry in the global backup_state struct, so
+ * it stays valid.
+ */
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
+ aio_co_enter(qemu_get_aio_context(), co);
+}
+
+/*
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
+ * main loop processing via a bottom half.
+ */
+static void job_cancel_bh(void *opaque) {
+ CoCtxData *data = (CoCtxData*)opaque;
+ Job *job = (Job*)data->data;
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job);
+ aio_context_release(job_ctx);
+ aio_co_enter(data->ctx, data->co);
+}
+
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
+{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
((PVEBackupDevInfo *)bdi->data)->job :
NULL;
- /* ref the job before releasing the mutex, just to be safe */
if (cancel_job) {
- job_ref(&cancel_job->job);
+ CoCtxData data = {
+ .ctx = qemu_get_current_aio_context(),
+ .co = qemu_coroutine_self(),
+ .data = &cancel_job->job,
+ };
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+ qemu_coroutine_yield();
}
- /* job_cancel_sync may enter the job, so we need to release the
- * backup_mutex to avoid deadlock */
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (cancel_job) {
- AioContext *aio_context = cancel_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&cancel_job->job);
- job_unref(&cancel_job->job);
- aio_context_release(aio_context);
- }
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
void qmp_backup_cancel(Error **errp)
{
- pvebackup_cancel();
+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
}
// assumes the caller holds backup_mutex
@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-static bool create_backup_jobs(void) {
+/*
+ * backup_job_create can *not* be run from a coroutine (and requires an
+ * acquired AioContext), so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+ */
+static void create_backup_jobs_bh(void *opaque) {
assert(!qemu_in_coroutine());
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
Error *local_err = NULL;
/* create job transaction to synchronize bitmap commit and cancel all
@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
aio_context_release(aio_context);
- if (!job || local_err != NULL) {
- Error *create_job_err = NULL;
- error_setg(&create_job_err, "backup_job_create failed: %s",
+ di->job = job;
+
+ if (!job || local_err) {
+ error_setg(errp, "backup_job_create failed: %s",
local_err ? error_get_pretty(local_err) : "null");
-
- pvebackup_propagate_error(create_job_err);
break;
}
- di->job = job;
-
bdrv_unref(di->target);
di->target = NULL;
}
- bool errors = pvebackup_error_or_canceled();
-
- if (errors) {
+ if (*errp) {
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
}
if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_cancel_sync(&di->job->job);
job_unref(&di->job->job);
+ aio_context_release(ctx);
}
}
}
- return errors;
+ /* return */
+ aio_co_enter(data->ctx, data->co);
}
typedef struct QmpBackupTask {
@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
-// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
QmpBackupTask *task = opaque;
task->result = NULL; // just to be sure
@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
di->size = size;
total += size;
+
+ di->completed_ret = INT_MAX;
}
uuid_generate(uuid);
@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info->UUID = uuid_str;
task->result = uuid_info;
+
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(task->errp, local_err);
+ goto err;
+ }
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.starting = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ /* start the first job in the transaction */
+ job_txn_start_seq(backup_state.txn);
+
return;
err_mutex:
@@ -885,6 +960,7 @@ err:
g_free(di);
}
g_list_free(di_list);
+ backup_state.di_list = NULL;
if (devs) {
g_strfreev(devs);
@@ -905,6 +981,8 @@ err:
}
task->result = NULL;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
.errp = errp,
};
- qemu_mutex_lock(&backup_state.backup_mutex);
-
block_on_coroutine_fn(pvebackup_co_prepare, &task);
- if (*errp == NULL) {
- bool errors = create_backup_jobs();
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (!errors) {
- /* start the first job in the transaction
- * note: this might directly enter the job, so we need to do this
- * after unlocking the backup_mutex */
- job_txn_start_seq(backup_state.txn);
- }
- } else {
- qemu_mutex_unlock(&backup_state.backup_mutex);
- }
-
return task.result;
}
@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
info->transferred = backup_state.stat.transferred;
info->has_reused = true;
info->reused = backup_state.stat.reused;
+ info->finishing = backup_state.stat.finishing;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 170c13984d..a0d1d278e9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -729,12 +729,15 @@
#
# @uuid: uuid for this backup job
#
+# @finishing: if status='active' and finishing=true, then the backup process is
+# waiting for the target to finish.
+#
##
{ 'struct': 'BackupStatus',
'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
'*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
- '*backup-file': 'str', '*uuid': 'str' } }
+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
##
# @BackupFormat:

View File

@@ -13,68 +13,61 @@ safe migration is possible and makes sense.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: split up state_pending for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/misc.h | 3 ++
migration/meson.build | 2 +
migration/migration.c | 1 +
migration/pbs-state.c | 104 +++++++++++++++++++++++++++++++++++++++
migration/pbs-state.c | 106 +++++++++++++++++++++++++++++++++++++++
pve-backup.c | 1 +
qapi/block-core.json | 6 +++
6 files changed, 117 insertions(+)
6 files changed, 119 insertions(+)
create mode 100644 migration/pbs-state.c
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 8fd36eba1d..e963e93e71 100644
index 465906710d..4f0aeceb6f 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -140,4 +140,7 @@ bool multifd_device_state_save_thread_should_exit(void);
void multifd_abort_device_state_save_threads(void);
bool multifd_join_device_state_save_threads(void);
@@ -75,4 +75,7 @@ bool migration_in_bg_snapshot(void);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
+/* migration/pbs-state.c */
+void pbs_state_mig_init(void);
+
#endif
diff --git a/migration/meson.build b/migration/meson.build
index 46e92249a1..fb3fd7d7d0 100644
index ea9aedeefc..c27dc9bd97 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -8,6 +8,7 @@ migration_files = files(
@@ -7,8 +7,10 @@ migration_files = files(
'qemu-file-channel.c',
'qemu-file.c',
'yank_functions.c',
)
+system_ss.add(libproxmox_backup_qemu)
system_ss.add(files(
'block-dirty-bitmap.c',
@@ -31,6 +32,7 @@ system_ss.add(files(
'multifd-zlib.c',
'multifd-zero-page.c',
'options.c',
+ 'pbs-state.c',
'postcopy-ram.c',
'savevm.c',
'savevm-async.c',
)
softmmu_ss.add(migration_files)
+softmmu_ss.add(libproxmox_backup_qemu)
softmmu_ss.add(files(
'block-dirty-bitmap.c',
diff --git a/migration/migration.c b/migration/migration.c
index d46e776e24..2f3430f440 100644
index 041b8451a6..9df2eed75e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -319,6 +319,7 @@ void migration_object_init(void)
/* Initialize cpu throttle timers */
cpu_throttle_init();
@@ -218,6 +218,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ pbs_state_mig_init();
}
typedef struct {
void migration_cancel(void)
diff --git a/migration/pbs-state.c b/migration/pbs-state.c
new file mode 100644
index 0000000000..a97187e4d7
index 0000000000..29f2b3860d
--- /dev/null
+++ b/migration/pbs-state.c
@@ -0,0 +1,104 @@
@@ -0,0 +1,106 @@
+/*
+ * PBS (dirty-bitmap) state migration
+ */
@@ -93,8 +86,11 @@ index 0000000000..a97187e4d7
+/* state is accessed via this static variable directly, 'opaque' is NULL */
+static PBSState pbs_state;
+
+static void pbs_state_pending(void *opaque, uint64_t *must_precopy,
+ uint64_t *can_postcopy)
+static void pbs_state_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ /* we send everything in save_setup, so nothing is ever pending */
+}
@@ -120,7 +116,7 @@ index 0000000000..a97187e4d7
+}
+
+/* serialize PBS state and send to target via f, called on source */
+static int pbs_state_save_setup(QEMUFile *f, void *opaque, Error **errp)
+static int pbs_state_save_setup(QEMUFile *f, void *opaque)
+{
+ size_t buf_size;
+ uint8_t *buf = proxmox_export_state(&buf_size);
@@ -164,8 +160,7 @@ index 0000000000..a97187e4d7
+static SaveVMHandlers savevm_pbs_state_handlers = {
+ .save_setup = pbs_state_save_setup,
+ .has_postcopy = pbs_state_has_postcopy,
+ .state_pending_exact = pbs_state_pending,
+ .state_pending_estimate = pbs_state_pending,
+ .save_live_pending = pbs_state_save_pending,
+ .is_active_iterate = pbs_state_is_active_iterate,
+ .load_state = pbs_state_load,
+ .is_active = pbs_state_is_active,
@@ -180,22 +175,22 @@ index 0000000000..a97187e4d7
+ NULL);
+}
diff --git a/pve-backup.c b/pve-backup.c
index e931cb9203..366b015589 100644
index 9c20ef3a5e..59ccb38ceb 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1089,6 +1089,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
@@ -1132,6 +1132,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0c3ebfa74e..6838187607 100644
index a0d1d278e9..e5de769dc1 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1008,6 +1008,11 @@
@@ -838,6 +838,11 @@
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -204,14 +199,14 @@ index 0c3ebfa74e..6838187607 100644
+# migration cap if this is false/unset may lead
+# to crashes on migration!
+#
# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
# parameter.
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
@@ -1021,6 +1026,7 @@
##
@@ -845,6 +850,7 @@
'data': { 'pbs-dirty-bitmap': 'bool',
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
'backup-max-workers': 'bool' } }
'pbs-library-version': 'str' } }
##

View File

@@ -1,81 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:05 +0200
Subject: [PATCH] Revert "block/rbd: workaround for ceph issue #53784"
This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/rbd.c | 42 ++----------------------------------------
1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index bf143fac00..70d92966f7 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1515,7 +1515,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
- uint64_t head = 0;
assert(offset + bytes <= s->image_size);
@@ -1543,43 +1542,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
-#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
- /*
- * librbd had a bug until early 2022 that affected all versions of ceph that
- * supported fast-diff. This bug results in reporting of incorrect offsets
- * if the offset parameter to rbd_diff_iterate2 is not object aligned.
- * Work around this bug by rounding down the offset to object boundaries.
- * This is OK because we call rbd_diff_iterate2 with whole_object = true.
- * However, this workaround only works for non cloned images with default
- * striping.
- *
- * See: https://tracker.ceph.com/issues/53784
- */
-
- /* check if RBD image has non-default striping enabled */
- if (features & RBD_FEATURE_STRIPINGV2) {
- return status;
- }
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- /*
- * check if RBD image is a clone (= has a parent).
- *
- * rbd_get_parent_info is deprecated from Nautilus onwards, but the
- * replacement rbd_get_parent is not present in Luminous and Mimic.
- */
- if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
- return status;
- }
-#pragma GCC diagnostic pop
-
- head = req.offs & (s->object_size - 1);
- req.offs -= head;
- bytes += head;
-#endif
-
- r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1598,8 +1561,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}
- assert(req.bytes > head);
- *pnum = req.bytes - head;
+ *pnum = req.bytes;
return status;
}

View File

@@ -15,22 +15,18 @@ transferred.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/block-dirty-bitmap.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
migration/block-dirty-bitmap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index f2c352d4a7..931a8481e9 100644
index 35f5ef688d..c4640925e7 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -539,7 +539,11 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
}
@@ -538,7 +538,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) {
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
error_report_err(local_err);
- return -1;
+ if (errp != NULL) {
+ error_report_err(*errp);
+ *errp = NULL;
+ }
+ continue;
}

View File

@@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 30 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 2f0f4dac09..b523137cff 100644
index 4d2a416ce7..c345d30812 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1392,12 +1392,42 @@ static char *get_initiator_name(QemuOpts *opts)
@@ -1372,12 +1372,42 @@ static char *get_initiator_name(QemuOpts *opts)
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;

View File

@@ -1,36 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:07 +0200
Subject: [PATCH] Revert "block/rbd: fix handling of holes in
.bdrv_co_block_status"
This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/rbd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 70d92966f7..931b513828 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1474,11 +1474,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);
-
- /* treat a hole like an unallocated area and bail out */
- if (!exists) {
- return 0;
- }
+ /*
+ * we do not diff against a snapshot so we should never receive a callback
+ * for a hole.
+ */
+ assert(exists);
if (!req->exists && offs > req->offs) {
/*

View File

@@ -0,0 +1,598 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 26 Jan 2021 15:45:30 +0100
Subject: [PATCH] PVE: Use coroutine QMP for backup/cancel_backup
Finally turn backup QMP calls into coroutines, now that it's possible.
This has the benefit that calls are asynchronous to the main loop, i.e.
long running operations like connecting to a PBS server will no longer
hang the VM.
Additionally, it allows us to get rid of block_on_coroutine_fn, which
was always a hacky workaround.
While we're already spring cleaning, also remove the QmpBackupTask
struct, since we can now put the 'prepare' function directly into
qmp_backup and thus no longer need those giant walls of text.
(Note that for our patches to work with 5.2.0 this change is actually
required, otherwise monitor_get_fd() fails as we're not in a QMP
coroutine, but one we start ourselves - we could of course set the
monitor for that coroutine ourselves, but let's just fix it the right
way instead)
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +-
hmp-commands.hx | 2 +
proxmox-backup-client.c | 31 -----
pve-backup.c | 232 ++++++++++-----------------------
qapi/block-core.json | 4 +-
5 files changed, 77 insertions(+), 196 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 69254396d5..b838586fc0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1016,7 +1016,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
g_free(global_snapshots);
}
-void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup_cancel(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
@@ -1025,7 +1025,7 @@ void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, error);
}
-void hmp_backup(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 7faba36b39..dca4e58858 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -109,6 +109,7 @@ ERST
"\n\t\t\t Use -d to dump data into a directory instead"
"\n\t\t\t of using VMA format.",
.cmd = hmp_backup,
+ .coroutine = true,
},
SRST
@@ -122,6 +123,7 @@ ERST
.params = "",
.help = "cancel the current VM backup",
.cmd = hmp_backup_cancel,
+ .coroutine = true,
},
SRST
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index 4ce7bc0b5e..0923037dec 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -5,37 +5,6 @@
/* Proxmox Backup Server client bindings using coroutines */
-typedef struct BlockOnCoroutineWrapper {
- AioContext *ctx;
- CoroutineEntry *entry;
- void *entry_arg;
- bool finished;
-} BlockOnCoroutineWrapper;
-
-static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
-{
- BlockOnCoroutineWrapper *wrapper = opaque;
- wrapper->entry(wrapper->entry_arg);
- wrapper->finished = true;
- aio_wait_kick();
-}
-
-void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
-{
- assert(!qemu_in_coroutine());
-
- AioContext *ctx = qemu_get_current_aio_context();
- BlockOnCoroutineWrapper wrapper = {
- .finished = false,
- .entry = entry,
- .entry_arg = entry_arg,
- .ctx = ctx,
- };
- Coroutine *wrapper_co = qemu_coroutine_create(block_on_coroutine_wrapper, &wrapper);
- aio_co_enter(ctx, wrapper_co);
- AIO_WAIT_WHILE(ctx, !wrapper.finished);
-}
-
// This is called from another thread, so we use aio_co_schedule()
static void proxmox_backup_schedule_wake(void *data) {
CoCtxData *waker = (CoCtxData *)data;
diff --git a/pve-backup.c b/pve-backup.c
index 59ccb38ceb..f858003a06 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -357,7 +357,7 @@ static void job_cancel_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
+void coroutine_fn qmp_backup_cancel(Error **errp)
{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
@@ -394,11 +394,6 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-void qmp_backup_cancel(Error **errp)
-{
- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
-}
-
// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
@@ -533,50 +528,27 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-typedef struct QmpBackupTask {
- const char *backup_file;
- bool has_password;
- const char *password;
- bool has_keyfile;
- const char *keyfile;
- bool has_key_password;
- const char *key_password;
- bool has_backup_id;
- const char *backup_id;
- bool has_backup_time;
- const char *fingerprint;
- bool has_fingerprint;
- int64_t backup_time;
- bool has_use_dirty_bitmap;
- bool use_dirty_bitmap;
- bool has_format;
- BackupFormat format;
- bool has_config_file;
- const char *config_file;
- bool has_firewall_file;
- const char *firewall_file;
- bool has_devlist;
- const char *devlist;
- bool has_compress;
- bool compress;
- bool has_encrypt;
- bool encrypt;
- bool has_speed;
- int64_t speed;
- Error **errp;
- UuidInfo *result;
-} QmpBackupTask;
-
-static void coroutine_fn pvebackup_co_prepare(void *opaque)
+UuidInfo coroutine_fn *qmp_backup(
+ const char *backup_file,
+ bool has_password, const char *password,
+ bool has_keyfile, const char *keyfile,
+ bool has_key_password, const char *key_password,
+ bool has_fingerprint, const char *fingerprint,
+ bool has_backup_id, const char *backup_id,
+ bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
+ bool has_format, BackupFormat format,
+ bool has_config_file, const char *config_file,
+ bool has_firewall_file, const char *firewall_file,
+ bool has_devlist, const char *devlist,
+ bool has_speed, int64_t speed, Error **errp)
{
assert(qemu_in_coroutine());
qemu_co_mutex_lock(&backup_state.backup_mutex);
- QmpBackupTask *task = opaque;
-
- task->result = NULL; // just to be sure
-
BlockBackend *blk;
BlockDriverState *bs = NULL;
const char *backup_dir = NULL;
@@ -593,17 +565,17 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
+ return NULL;
}
/* Todo: try to auto-detect format based on file name */
- BackupFormat format = task->has_format ? task->format : BACKUP_FORMAT_VMA;
+ format = has_format ? format : BACKUP_FORMAT_VMA;
- if (task->has_devlist) {
- devs = g_strsplit_set(task->devlist, ",;:", -1);
+ if (has_devlist) {
+ devs = g_strsplit_set(devlist, ",;:", -1);
gchar **d = devs;
while (d && *d) {
@@ -611,14 +583,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (blk) {
bs = blk_bs(blk);
if (!bdrv_is_inserted(bs)) {
- error_setg(task->errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
goto err;
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
di_list = g_list_append(di_list, di);
} else {
- error_set(task->errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", *d);
goto err;
}
@@ -641,7 +613,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (!di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
goto err;
}
@@ -651,13 +623,13 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, task->errp)) {
+ if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
goto err;
}
ssize_t size = bdrv_getlength(di->bs);
if (size < 0) {
- error_setg_errno(task->errp, -di->size, "bdrv_getlength failed");
+ error_setg_errno(errp, -di->size, "bdrv_getlength failed");
goto err;
}
di->size = size;
@@ -684,47 +656,44 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (format == BACKUP_FORMAT_PBS) {
- if (!task->has_password) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
+ if (!has_password) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
goto err_mutex;
}
- if (!task->has_backup_id) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
+ if (!has_backup_id) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
goto err_mutex;
}
- if (!task->has_backup_time) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
+ if (!has_backup_time) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
- bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
-
-
char *pbs_err = NULL;
pbs = proxmox_backup_new(
- task->backup_file,
- task->backup_id,
- task->backup_time,
+ backup_file,
+ backup_id,
+ backup_time,
dump_cb_block_size,
- task->has_password ? task->password : NULL,
- task->has_keyfile ? task->keyfile : NULL,
- task->has_key_password ? task->key_password : NULL,
- task->has_compress ? task->compress : true,
- task->has_encrypt ? task->encrypt : task->has_keyfile,
- task->has_fingerprint ? task->fingerprint : NULL,
+ has_password ? password : NULL,
+ has_keyfile ? keyfile : NULL,
+ has_key_password ? key_password : NULL,
+ has_compress ? compress : true,
+ has_encrypt ? encrypt : has_keyfile,
+ has_fingerprint ? fingerprint : NULL,
&pbs_err);
if (!pbs) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
goto err_mutex;
}
- int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ int connect_result = proxmox_backup_co_connect(pbs, errp);
if (connect_result < 0)
goto err_mutex;
@@ -743,9 +712,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
- if (use_dirty_bitmap) {
+ if (has_use_dirty_bitmap && use_dirty_bitmap) {
if (bitmap == NULL) {
- bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, errp);
if (!bitmap) {
goto err_mutex;
}
@@ -775,12 +744,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
if (dev_id < 0) {
goto err_mutex;
}
- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, errp))) {
goto err_mutex;
}
@@ -794,10 +763,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
}
goto err_mutex;
}
@@ -808,25 +777,25 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, errp))) {
goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
if (di->dev_id <= 0) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- if (mkdir(task->backup_file, 0640) != 0) {
- error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
- task->backup_file);
+ if (mkdir(backup_file, 0640) != 0) {
+ error_setg_errno(errp, errno, "can't create directory '%s'\n",
+ backup_file);
goto err_mutex;
}
- backup_dir = task->backup_file;
+ backup_dir = backup_file;
l = di_list;
while (l) {
@@ -840,34 +809,34 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_img_create(di->targetfile, "raw", NULL, NULL, NULL,
di->size, flags, false, &local_err);
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
}
} else {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
goto err_mutex;
}
/* add configuration file to archive */
- if (task->has_config_file) {
- if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_config_file) {
+ if (pvebackup_co_add_config(config_file, config_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
/* add firewall file to archive */
- if (task->has_firewall_file) {
- if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_firewall_file) {
+ if (pvebackup_co_add_config(firewall_file, firewall_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
@@ -885,7 +854,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (backup_state.stat.backup_file) {
g_free(backup_state.stat.backup_file);
}
- backup_state.stat.backup_file = g_strdup(task->backup_file);
+ backup_state.stat.backup_file = g_strdup(backup_file);
uuid_copy(backup_state.stat.uuid, uuid);
uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
@@ -900,7 +869,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_mutex_unlock(&backup_state.stat.lock);
- backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+ backup_state.speed = (has_speed && speed > 0) ? speed : 0;
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -910,8 +879,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
- task->result = uuid_info;
-
/* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
* backup_mutex locked. This is fine, a CoMutex can be held across yield
* points, and we'll release it as soon as the BH reschedules us.
@@ -925,7 +892,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_coroutine_yield();
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err;
}
@@ -938,7 +905,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
/* start the first job in the transaction */
job_txn_start_seq(backup_state.txn);
- return;
+ return uuid_info;
err_mutex:
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -969,7 +936,7 @@ err:
if (vmaw) {
Error *err = NULL;
vma_writer_close(vmaw, &err);
- unlink(task->backup_file);
+ unlink(backup_file);
}
if (pbs) {
@@ -980,65 +947,8 @@ err:
rmdir(backup_dir);
}
- task->result = NULL;
-
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
-}
-
-UuidInfo *qmp_backup(
- const char *backup_file,
- bool has_password, const char *password,
- bool has_keyfile, const char *keyfile,
- bool has_key_password, const char *key_password,
- bool has_fingerprint, const char *fingerprint,
- bool has_backup_id, const char *backup_id,
- bool has_backup_time, int64_t backup_time,
- bool has_use_dirty_bitmap, bool use_dirty_bitmap,
- bool has_compress, bool compress,
- bool has_encrypt, bool encrypt,
- bool has_format, BackupFormat format,
- bool has_config_file, const char *config_file,
- bool has_firewall_file, const char *firewall_file,
- bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
-{
- QmpBackupTask task = {
- .backup_file = backup_file,
- .has_password = has_password,
- .password = password,
- .has_keyfile = has_keyfile,
- .keyfile = keyfile,
- .has_key_password = has_key_password,
- .key_password = key_password,
- .has_fingerprint = has_fingerprint,
- .fingerprint = fingerprint,
- .has_backup_id = has_backup_id,
- .backup_id = backup_id,
- .has_backup_time = has_backup_time,
- .backup_time = backup_time,
- .has_use_dirty_bitmap = has_use_dirty_bitmap,
- .use_dirty_bitmap = use_dirty_bitmap,
- .has_compress = has_compress,
- .compress = compress,
- .has_encrypt = has_encrypt,
- .encrypt = encrypt,
- .has_format = has_format,
- .format = format,
- .has_config_file = has_config_file,
- .config_file = config_file,
- .has_firewall_file = has_firewall_file,
- .firewall_file = firewall_file,
- .has_devlist = has_devlist,
- .devlist = devlist,
- .has_speed = has_speed,
- .speed = speed,
- .errp = errp,
- };
-
- block_on_coroutine_fn(pvebackup_co_prepare, &task);
-
- return task.result;
+ return NULL;
}
BackupStatus *qmp_query_backup(Error **errp)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e5de769dc1..afa67c28d2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -801,7 +801,7 @@
'*config-file': 'str',
'*firewall-file': 'str',
'*devlist': 'str', '*speed': 'int' },
- 'returns': 'UuidInfo' }
+ 'returns': 'UuidInfo', 'coroutine': true }
##
# @query-backup:
@@ -823,7 +823,7 @@
# Notes: This command succeeds even if there is no backup process running.
#
##
-{ 'command': 'backup-cancel' }
+{ 'command': 'backup-cancel', 'coroutine': true }
##
# @ProxmoxSupportStatus:

View File

@@ -1,162 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Tue, 17 May 2022 09:46:02 +0200
Subject: [PATCH] Revert "block/rbd: implement bdrv_co_block_status"
During backup, bdrv_co_block_status is called for each block copy
chunk. When RBD is used, the current implementation with
rbd_diff_iterate2() using whole_object=true takes about linearly more
time, depending on the image size. Since there are linearly more
chunks, the slowdown is quadratic, becoming unacceptable for large
images (starting somewhere between 500-1000 GiB in my testing).
This reverts commit 0347a8fd4c3faaedf119be04c197804be40a384b as a
stop-gap measure, until it's clear how to make the implemenation
more efficient.
Upstream bug report:
https://gitlab.com/qemu-project/qemu/-/issues/1026
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/rbd.c | 112 ----------------------------------------------------
1 file changed, 112 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 931b513828..4ab9bb5e02 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -108,12 +108,6 @@ typedef struct RBDTask {
int64_t ret;
} RBDTask;
-typedef struct RBDDiffIterateReq {
- uint64_t offs;
- uint64_t bytes;
- bool exists;
-} RBDDiffIterateReq;
-
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid,
@@ -1460,111 +1454,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}
-/*
- * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
- * value in the callback routine. Choose a value that does not conflict with
- * an existing exitcode and return it if we want to prematurely stop the
- * execution because we detected a change in the allocation status.
- */
-#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
-
-static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
- int exists, void *opaque)
-{
- RBDDiffIterateReq *req = opaque;
-
- assert(req->offs + req->bytes <= offs);
- /*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
- assert(exists);
-
- if (!req->exists && offs > req->offs) {
- /*
- * we started in an unallocated area and hit the first allocated
- * block. req->bytes must be set to the length of the unallocated area
- * before the allocated area. stop further processing.
- */
- req->bytes = offs - req->offs;
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- if (req->exists && offs > req->offs + req->bytes) {
- /*
- * we started in an allocated area and jumped over an unallocated area,
- * req->bytes contains the length of the allocated area before the
- * unallocated area. stop further processing.
- */
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- req->bytes += len;
- req->exists = true;
-
- return 0;
-}
-
-static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
-{
- BDRVRBDState *s = bs->opaque;
- int status, r;
- RBDDiffIterateReq req = { .offs = offset };
- uint64_t features, flags;
-
- assert(offset + bytes <= s->image_size);
-
- /* default to all sectors allocated */
- status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
- *map = offset;
- *file = bs;
- *pnum = bytes;
-
- /* check if RBD image supports fast-diff */
- r = rbd_get_features(s->image, &features);
- if (r < 0) {
- return status;
- }
- if (!(features & RBD_FEATURE_FAST_DIFF)) {
- return status;
- }
-
- /* check if RBD fast-diff result is valid */
- r = rbd_get_flags(s->image, &flags);
- if (r < 0) {
- return status;
- }
- if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
- return status;
- }
-
- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
- qemu_rbd_diff_iterate_cb, &req);
- if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
- return status;
- }
- assert(req.bytes <= bytes);
- if (!req.exists) {
- if (r == 0) {
- /*
- * rbd_diff_iterate2 does not invoke callbacks for unallocated
- * areas. This here catches the case where no callback was
- * invoked at all (req.bytes == 0).
- */
- assert(req.bytes == 0);
- req.bytes = bytes;
- }
- status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
- }
-
- *pnum = req.bytes;
- return status;
-}
-
static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
{
BDRVRBDState *s = bs->opaque;
@@ -1801,7 +1690,6 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif
- .bdrv_co_block_status = qemu_rbd_co_block_status,
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,

View File

@@ -0,0 +1,98 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 10 Feb 2021 11:07:06 +0100
Subject: [PATCH] PBS: add master key support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
this requires a new enough libproxmox-backup-qemu0, and allows querying
from the PVE side to avoid QMP calls with unsupported parameters.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 3 +++
qapi/block-core.json | 7 +++++++
3 files changed, 11 insertions(+)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index b838586fc0..5b52b93232 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1039,6 +1039,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS password
false, NULL, // PBS keyfile
false, NULL, // PBS key_password
+ false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
diff --git a/pve-backup.c b/pve-backup.c
index f858003a06..04ebfc1e33 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -533,6 +533,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_password, const char *password,
bool has_keyfile, const char *keyfile,
bool has_key_password, const char *key_password,
+ bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
@@ -681,6 +682,7 @@ UuidInfo coroutine_fn *qmp_backup(
has_password ? password : NULL,
has_keyfile ? keyfile : NULL,
has_key_password ? key_password : NULL,
+ has_master_keyfile ? master_keyfile : NULL,
has_compress ? compress : true,
has_encrypt ? encrypt : has_keyfile,
has_fingerprint ? fingerprint : NULL,
@@ -1044,5 +1046,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_savevm = true;
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index afa67c28d2..84e4406d21 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -772,6 +772,8 @@
#
# @key-password: password for keyfile (optional for format 'pbs')
#
+# @master-keyfile: PEM-formatted master public keyfile (optional for format 'pbs')
+#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
# @backup-id: backup ID (required for format 'pbs')
@@ -791,6 +793,7 @@
'*password': 'str',
'*keyfile': 'str',
'*key-password': 'str',
+ '*master-keyfile': 'str',
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
@@ -843,6 +846,9 @@
# migration cap if this is false/unset may lead
# to crashes on migration!
#
+# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
+# parameter.
+#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
##
@@ -851,6 +857,7 @@
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
'pbs-library-version': 'str' } }
##

View File

@@ -1,472 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 11 Apr 2024 11:29:28 +0200
Subject: [PATCH] PVE backup: add fleecing option
When a fleecing option is given, it is expected that each device has
a corresponding "-fleecing" block device already attached, except for
EFI disk and TPM state, where fleecing is never used.
The following graph was adapted from [0] which also contains more
details about fleecing.
[guest]
|
| root
v file
[copy-before-write]<------[snapshot-access]
| |
| file | target
v v
[source] [fleecing]
For fleecing, a copy-before-write filter is inserted on top of the
source node, as well as a snapshot-access node pointing to the filter
node which allows to read the consistent state of the image at the
time it was inserted. New guest writes are passed through the
copy-before-write filter which will first copy over old data to the
fleecing image in case that old data is still needed by the
snapshot-access node.
The backup process will sequentially read from the snapshot access,
which has a bitmap and knows whether to read from the original image
or the fleecing image to get the "snapshot" state, i.e. data from the
source image at the time when the copy-before-write filter was
inserted. After reading, the copied sections are discarded from the
fleecing image to reduce space usage.
All of this can be restricted by an initial dirty bitmap to parts of
the source image that are required for an incremental backup.
For discard to work, it is necessary that the fleecing image does not
have a larger cluster size than the backup job granularity. Since
querying that size does not always work, e.g. for RBD with krbd, the
cluster size will not be reported, a minimum of 4 MiB is used. A job
with PBS target already has at least this granularity, so it's just
relevant for other targets. I.e. edge cases where this minimum is not
enough should be very rare in practice. If ever necessary in the
future, can still add a passed-in value for the backup QMP command to
override.
Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
are set when installing the copy-before-write filter and
snapshot-access. When an error or timeout occurs, the problematic (and
each further) snapshot operation will fail and thus cancel the backup
instead of breaking the guest write.
Note that job_id cannot be inferred from the snapshot-access bs because
it has no parent, so just pass the one from the original bs.
[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: improve error when cbw fails as reported by Friedrich Weber]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/copy-before-write.c | 18 ++--
block/copy-before-write.h | 1 +
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 175 ++++++++++++++++++++++++++++++++-
qapi/block-core.json | 10 +-
5 files changed, 195 insertions(+), 10 deletions(-)
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index fd470f5f92..5c23b578ef 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -27,6 +27,7 @@
#include "qobject/qjson.h"
#include "system/block-backend.h"
+#include "qemu/atomic.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "block/block_int.h"
@@ -75,7 +76,8 @@ typedef struct BDRVCopyBeforeWriteState {
* @snapshot_error is normally zero. But on first copy-before-write failure
* when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
* value of this error (<0). After that all in-flight and further
- * snapshot-API requests will fail with that error.
+ * snapshot-API requests will fail with that error. To be accessed with
+ * atomics.
*/
int snapshot_error;
} BDRVCopyBeforeWriteState;
@@ -115,7 +117,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
return 0;
}
- if (s->snapshot_error) {
+ if (qatomic_read(&s->snapshot_error)) {
return 0;
}
@@ -139,9 +141,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
WITH_QEMU_LOCK_GUARD(&s->lock) {
if (ret < 0) {
assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
- if (!s->snapshot_error) {
- s->snapshot_error = ret;
- }
+ qatomic_cmpxchg(&s->snapshot_error, 0, ret);
} else {
bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
}
@@ -215,7 +215,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
QEMU_LOCK_GUARD(&s->lock);
- if (s->snapshot_error) {
+ if (qatomic_read(&s->snapshot_error)) {
g_free(req);
return NULL;
}
@@ -595,6 +595,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
bdrv_unref(bs);
}
+int bdrv_cbw_snapshot_error(BlockDriverState *bs)
+{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+ return qatomic_read(&s->snapshot_error);
+}
+
static void cbw_init(void)
{
bdrv_register(&bdrv_cbw_filter);
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 2a5d4ba693..969da3620f 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockCopyState **bcs,
Error **errp);
void bdrv_cbw_drop(BlockDriverState *bs);
+int bdrv_cbw_snapshot_error(BlockDriverState *bs);
#endif /* COPY_BEFORE_WRITE_H */
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4f30f99644..66d16d342f 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1045,6 +1045,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
NULL, NULL,
devlist, qdict_haskey(qdict, "speed"), speed,
false, 0, // BackupPerf max-workers
+ false, false, // fleecing
&error);
hmp_handle_error(mon, error);
diff --git a/pve-backup.c b/pve-backup.c
index 366b015589..9b66788ab5 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -7,6 +7,7 @@
#include "system/blockdev.h"
#include "block/block_int-global-state.h"
#include "block/blockjob.h"
+#include "block/copy-before-write.h"
#include "block/dirty-bitmap.h"
#include "block/graph-lock.h"
#include "qapi/qapi-commands-block.h"
@@ -81,8 +82,15 @@ static void pvebackup_init(void)
// initialize PVEBackupState at startup
opts_init(pvebackup_init);
+typedef struct PVEBackupFleecingInfo {
+ BlockDriverState *bs;
+ BlockDriverState *cbw;
+ BlockDriverState *snapshot_access;
+} PVEBackupFleecingInfo;
+
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
+ PVEBackupFleecingInfo fleecing;
size_t size;
uint64_t block_size;
uint8_t dev_id;
@@ -352,11 +360,44 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
+static void cleanup_snapshot_access(PVEBackupDevInfo *di)
+{
+ if (di->fleecing.snapshot_access) {
+ bdrv_unref(di->fleecing.snapshot_access);
+ di->fleecing.snapshot_access = NULL;
+ }
+ if (di->fleecing.cbw) {
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+ }
+}
+
static void pvebackup_complete_cb(void *opaque, int ret)
{
PVEBackupDevInfo *di = opaque;
di->completed_ret = ret;
+ if (di->fleecing.cbw) {
+ /*
+ * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
+ * error, making further requests to the snapshot fail with EACCES, which then also fail the
+ * job. But that code is not the root cause and just confusing, so update it.
+ */
+ int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
+ if (di->completed_ret == -EACCES && snapshot_error) {
+ di->completed_ret = snapshot_error;
+ }
+ }
+
+ /*
+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
+ * won't be done as a coroutine anyways:
+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
+ * just spawn a BH calling bdrv_unref().
+ * - For cbw, draining would need to spawn a BH.
+ */
+ cleanup_snapshot_access(di);
+
/*
* Needs to happen outside of coroutine, because it takes the graph write lock.
*/
@@ -487,6 +528,65 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
+/*
+ * Setup a snapshot-access block node for a device with associated fleecing image.
+ */
+static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
+{
+ Error *local_err = NULL;
+
+ if (!di->fleecing.bs) {
+ error_setg(errp, "no associated fleecing image");
+ return -1;
+ }
+
+ QDict *cbw_opts = qdict_new();
+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
+
+ if (di->bitmap) {
+ /*
+ * Only guest writes to parts relevant for the backup need to be intercepted with
+ * old data being copied to the fleecing image.
+ */
+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
+ }
+ /*
+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
+ * abort a bit before that.
+ */
+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
+
+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
+
+ if (!di->fleecing.cbw) {
+ error_setg(errp, "appending cbw node for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ return -1;
+ }
+
+ QDict *snapshot_access_opts = qdict_new();
+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
+
+ di->fleecing.snapshot_access =
+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
+ if (!di->fleecing.snapshot_access) {
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ return -1;
+ }
+
+ return 0;
+}
+
/*
* backup_job_create can *not* be run from a coroutine, so this can't either.
* The caller is responsible that backup_mutex is held nonetheless.
@@ -523,9 +623,42 @@ static void create_backup_jobs_bh(void *opaque) {
}
bdrv_drained_begin(di->bs);
+ BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
+
+ BlockDriverState *source_bs = di->bs;
+ bool discard_source = false;
+ if (di->fleecing.bs) {
+ if (setup_snapshot_access(di, &local_err) < 0) {
+ error_setg(errp, "%s - setting up snapshot access for fleecing failed: %s",
+ di->device_name,
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ bdrv_drained_end(di->bs);
+ break;
+ }
+
+ source_bs = di->fleecing.snapshot_access;
+ discard_source = true;
+
+ /*
+ * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
+ * on the fleecing image won't work if the backup job's granularity is less than the RBD
+ * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
+ * target, the backup job granularity would already be at least this much.
+ */
+ perf.min_cluster_size = 4 * 1024 * 1024;
+ /*
+ * For discard to work, cluster size for the backup job must be at least the same as for
+ * the fleecing image.
+ */
+ BlockDriverInfo bdi;
+ if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
+ perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
+ }
+ }
+
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
- bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
+ di->device_name, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
&local_err);
@@ -539,6 +672,7 @@ static void create_backup_jobs_bh(void *opaque) {
}
if (!job || local_err) {
+ cleanup_snapshot_access(di);
error_setg(errp, "backup_job_create failed: %s",
local_err ? error_get_pretty(local_err) : "null");
break;
@@ -581,6 +715,14 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
+/*
+ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
+ */
+static bool device_uses_fleecing(const char *device_id)
+{
+ return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
+}
+
/*
* Returns a list of device infos, which needs to be freed by the caller. In
* case of an error, errp will be set, but the returned value might still be a
@@ -588,6 +730,7 @@ static void create_backup_jobs_bh(void *opaque) {
*/
static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
const char *devlist,
+ bool fleecing,
Error **errp)
{
gchar **devs = NULL;
@@ -613,6 +756,30 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
di->bs = bs;
di->device_name = g_strdup(bdrv_get_device_name(bs));
+ if (fleecing && device_uses_fleecing(*d)) {
+ g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
+ BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
+ if (!fleecing_blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", fleecing_devid);
+ goto err;
+ }
+ BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
+ if (!bdrv_co_is_inserted(fleecing_bs)) {
+ error_setg(errp, "Device '%s' has no medium", fleecing_devid);
+ goto err;
+ }
+ /*
+ * Fleecing image needs to be the same size to act as a cbw target.
+ */
+ if (bs->total_sectors != fleecing_bs->total_sectors) {
+ error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
+ fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
+ goto err;
+ }
+ di->fleecing.bs = fleecing_bs;
+ }
+
di_list = g_list_append(di_list, di);
d++;
}
@@ -663,6 +830,7 @@ UuidInfo coroutine_fn *qmp_backup(
const char *devlist,
bool has_speed, int64_t speed,
bool has_max_workers, int64_t max_workers,
+ bool has_fleecing, bool fleecing,
Error **errp)
{
assert(qemu_in_coroutine());
@@ -691,7 +859,7 @@ UuidInfo coroutine_fn *qmp_backup(
format = has_format ? format : BACKUP_FORMAT_VMA;
bdrv_graph_co_rdlock();
- di_list = get_device_info(devlist, &local_err);
+ di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
bdrv_graph_co_rdunlock();
if (local_err) {
error_propagate(errp, local_err);
@@ -1093,5 +1261,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
+ ret->backup_fleecing = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6838187607..9bdcfa31ea 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -952,6 +952,10 @@
#
# @max-workers: see @BackupPerf for details. Default 16.
#
+# @fleecing: perform a backup with fleecing. For each device in @devlist, a
+# corresponing '-fleecing' device with the same size already needs to
+# be present.
+#
# Returns: the uuid of the backup job
#
##
@@ -972,7 +976,8 @@
'*firewall-file': 'str',
'*devlist': 'str',
'*speed': 'int',
- '*max-workers': 'int' },
+ '*max-workers': 'int',
+ '*fleecing': 'bool' },
'returns': 'UuidInfo', 'coroutine': true }
##
@@ -1018,6 +1023,8 @@
#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
+# @backup-fleecing: Whether backup fleecing is supported or not.
+#
# @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
# supported or not.
#
@@ -1029,6 +1036,7 @@
'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
+ 'backup-fleecing': 'bool',
'backup-max-workers': 'bool' } }
##

View File

@@ -0,0 +1,53 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 9 Dec 2020 11:46:57 +0100
Subject: [PATCH] PVE: block/pbs: fast-path reads without allocation if
possible
...and switch over to g_malloc/g_free while at it to align with other
QEMU code.
Tracing shows the fast-path is taken almost all the time, though not
100% so the slow one is still necessary.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/pbs.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/pbs.c b/block/pbs.c
index 78dad0dcc4..ac54e816c0 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -200,7 +200,16 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
BDRVPBSState *s = bs->opaque;
int ret;
char *pbs_error = NULL;
- uint8_t *buf = malloc(bytes);
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
ReadCallbackData rcb = {
.co = qemu_coroutine_self(),
@@ -218,8 +227,10 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
return -EIO;
}
- qemu_iovec_from_buf(qiov, 0, buf, bytes);
- free(buf);
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
return ret;
}

View File

@@ -1,137 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 3 Jan 2025 14:03:12 +0100
Subject: [PATCH] adapt machine version deprecation for Proxmox VE
In commit a35f8577a0 ("include/hw: add macros for deprecation &
removal of versioned machines"), a new machine version deprecation and
removal policy was introduced. After only 3 years a machine version
will be deprecated while being removed after 6 years.
The deprecation is a bit early considering major PVE releases are
approximately every 2 years. This means that a deprecation warning can
already happen for a machine version that was introduced during the
previous major release. This would scare users for no good reason, so
avoid deprecating machine versions in PVE too early and define a
baseline of machine versions that will be supported throughout a
single major PVE release.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/hw/boards.h | 78 +++++++++++++++++++++++++++++----------------
1 file changed, 51 insertions(+), 27 deletions(-)
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 8ada4d5832..f9f3b75284 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -636,42 +636,66 @@ struct MachineState {
/*
- * How many years/major releases for each phase
- * of the life cycle. Assumes use of versioning
- * scheme where major is bumped each year
+ * Baseline of machine versions that are still considered supported throughout
+ * current major Proxmox VE release. Machine versions older than this are
+ * considered to be deprecated in Proxmox VE.
+ *
+ * Machine versions older than 6 years are removed just like in upstream QEMU.
+ * (policy takes effect with QEMU 10.1). Assumes yearly major QEMU release.
+ *
+ * QEMU release cylce N.0 in ~April, N.1 in ~August, N.2 in ~December
+ * Debian/PVE release cylce ~every two years in summer
+ *
+ * PVE - last QEMU - machine versions dropped - baseline
+ * 8 9.2 2.3 and older 2.4
+ * 9 11.2 5.2 and older 6.0
+ * 10 13.2 7.2 and older 8.0
+ */
+#define MACHINE_VER_BASELINE_PVE_MAJOR 2
+#define MACHINE_VER_BASELINE_PVE_MINOR 4
+#define MACHINE_VER_DELETION_MAJOR (QEMU_VERSION_MAJOR - 6)
+#define MACHINE_VER_DELETION_MINOR QEMU_VERSION_MINOR
+
+/*
+ * Proxmox VE needs to support the baseline throughout a major PVE release. So
+ * a QEMU release where the baseline is already deleted cannot be used.
+ * Removal policy after 6 years takes effect with QEMU 10.1.
*/
-#define MACHINE_VER_DELETION_MAJOR 6
-#define MACHINE_VER_DEPRECATION_MAJOR 3
+#if ((QEMU_VERSION_MAJOR > 10) || ((QEMU_VERSION_MAJOR == 10) && (QEMU_VERSION_MINOR >= 1)))
+#if ((MACHINE_VER_BASELINE_PVE_MAJOR < MACHINE_VER_DELETION_MAJOR) || \
+ ((MACHINE_VER_BASELINE_PVE_MAJOR == MACHINE_VER_DELETION_MAJOR) && \
+ (MACHINE_VER_BASELINE_PVE_MINOR < MACHINE_VER_DELETION_MINOR)))
+#error "Baseline machine version needed by Proxmox VE not supported anymore by this QEMU release"
+#endif
+#endif
/*
* Expands to a static string containing a deprecation
* message for a versioned machine type
*/
#define MACHINE_VER_DEPRECATION_MSG \
- "machines more than " stringify(MACHINE_VER_DEPRECATION_MAJOR) \
- " years old are subject to deletion after " \
- stringify(MACHINE_VER_DELETION_MAJOR) " years"
-
-#define _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) \
- (((QEMU_VERSION_MAJOR - major) > cutoff) || \
- (((QEMU_VERSION_MAJOR - major) == cutoff) && \
- (QEMU_VERSION_MINOR - minor) >= 0))
-
-#define _MACHINE_VER_IS_EXPIRED2(cutoff, major, minor) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-#define _MACHINE_VER_IS_EXPIRED3(cutoff, major, minor, micro) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-#define _MACHINE_VER_IS_EXPIRED4(cutoff, major, minor, _unused, tag) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-#define _MACHINE_VER_IS_EXPIRED5(cutoff, major, minor, micro, _unused, tag) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-
-#define _MACHINE_IS_EXPIRED(cutoff, ...) \
+ "old machine version is subject to deletion during current major Proxmox VE release"
+
+#define _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor) \
+ ((major < baseline_major) || \
+ ((major == baseline_major) && \
+ (minor < baseline_minor)))
+
+#define _MACHINE_VER_IS_EXPIRED2(baseline_major, baseline_minor, major, minor) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+#define _MACHINE_VER_IS_EXPIRED3(baseline_major, baseline_minor, major, minor, micro) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+#define _MACHINE_VER_IS_EXPIRED4(baseline_major, baseline_minor, major, minor, _unused, tag) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+#define _MACHINE_VER_IS_EXPIRED5(baseline_major, baseline_minor, major, minor, micro, _unused, tag) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+
+#define _MACHINE_IS_EXPIRED(baseline_major, baseline_minor, ...) \
_MACHINE_VER_PICK(__VA_ARGS__, \
_MACHINE_VER_IS_EXPIRED5, \
_MACHINE_VER_IS_EXPIRED4, \
_MACHINE_VER_IS_EXPIRED3, \
- _MACHINE_VER_IS_EXPIRED2) (cutoff, __VA_ARGS__)
+ _MACHINE_VER_IS_EXPIRED2) (baseline_major, baseline_minor, __VA_ARGS__)
/*
* Evaluates true when a machine type with (major, minor)
@@ -680,7 +704,7 @@ struct MachineState {
* lifecycle rules
*/
#define MACHINE_VER_IS_DEPRECATED(...) \
- _MACHINE_IS_EXPIRED(MACHINE_VER_DEPRECATION_MAJOR, __VA_ARGS__)
+ _MACHINE_IS_EXPIRED(MACHINE_VER_BASELINE_PVE_MAJOR, MACHINE_VER_BASELINE_PVE_MINOR, __VA_ARGS__)
/*
* Evaluates true when a machine type with (major, minor)
@@ -689,7 +713,7 @@ struct MachineState {
* lifecycle rules
*/
#define MACHINE_VER_SHOULD_DELETE(...) \
- _MACHINE_IS_EXPIRED(MACHINE_VER_DELETION_MAJOR, __VA_ARGS__)
+ _MACHINE_IS_EXPIRED(MACHINE_VER_DELETION_MAJOR, MACHINE_VER_DELETION_MINOR, __VA_ARGS__)
/*
* Sets the deprecation reason for a versioned machine based

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/stream.c b/block/stream.c
index 999d9e56d4..e187cd1262 100644
index 97bee482dc..50093c9f57 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -27,7 +27,7 @@ enum {
@@ -28,7 +28,7 @@ enum {
* large enough to process multiple clusters in a single call, so
* that populating contiguous regions of the image is efficient.
*/

View File

@@ -1,50 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:05 +0100
Subject: [PATCH] Revert "hpet: avoid timer storms on periodic timers"
This reverts commit 7c912ffb59e8137091894d767433e65c3df8b0bf.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index ccb97b6806..0f45af8bbe 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -61,7 +61,6 @@ typedef struct HPETTimer { /* timers */
uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit
* mode. Next pop will be actual timer expiration.
*/
- uint64_t last; /* last value armed, to avoid timer storms */
} HPETTimer;
struct HPETState {
@@ -262,7 +261,6 @@ static int hpet_post_load(void *opaque, int version_id)
for (i = 0; i < s->num_timers; i++) {
HPETTimer *t = &s->timer[i];
t->cmp64 = hpet_calculate_cmp64(t, s->hpet_counter, t->cmp);
- t->last = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - NANOSECONDS_PER_SECOND;
}
/* Recalculate the offset between the main counter and guest time */
if (!s->hpet_offset_saved) {
@@ -350,15 +348,8 @@ static const VMStateDescription vmstate_hpet = {
static void hpet_arm(HPETTimer *t, uint64_t tick)
{
- uint64_t ns = hpet_get_ns(t->state, tick);
-
- /* Clamp period to reasonable min value (1 us) */
- if (timer_is_periodic(t) && ns - t->last < 1000) {
- ns = t->last + 1000;
- }
-
- t->last = ns;
- timer_mod(t->qemu_timer, ns);
+ /* FIXME: Clamp period to reasonable min value? */
+ timer_mod(t->qemu_timer, hpet_get_ns(t->state, tick));
}
/*

View File

@@ -1,202 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:08 +0100
Subject: [PATCH] Revert "hpet: store full 64-bit target value of the counter"
This reverts commit 242d665396407f83a6acbffc804882eeb21cfdad.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 109 +++++++++++++++++++++++++++---------------------
1 file changed, 61 insertions(+), 48 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 0f45af8bbe..635a060d38 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -56,7 +56,6 @@ typedef struct HPETTimer { /* timers */
uint64_t cmp; /* comparator */
uint64_t fsb; /* FSB route */
/* Hidden register state */
- uint64_t cmp64; /* comparator (extended to counter width) */
uint64_t period; /* Last value written to comparator */
uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit
* mode. Next pop will be actual timer expiration.
@@ -119,6 +118,11 @@ static uint32_t timer_enabled(HPETTimer *t)
}
static uint32_t hpet_time_after(uint64_t a, uint64_t b)
+{
+ return ((int32_t)(b - a) < 0);
+}
+
+static uint32_t hpet_time_after64(uint64_t a, uint64_t b)
{
return ((int64_t)(b - a) < 0);
}
@@ -155,32 +159,27 @@ static uint64_t hpet_get_ticks(HPETState *s)
return ns_to_ticks(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->hpet_offset);
}
-static uint64_t hpet_get_ns(HPETState *s, uint64_t tick)
-{
- return ticks_to_ns(tick) - s->hpet_offset;
-}
-
/*
- * calculate next value of the general counter that matches the
- * target (either entirely, or the low 32-bit only depending on
- * the timer mode).
+ * calculate diff between comparator value and current ticks
*/
-static uint64_t hpet_calculate_cmp64(HPETTimer *t, uint64_t cur_tick, uint64_t target)
+static inline uint64_t hpet_calculate_diff(HPETTimer *t, uint64_t current)
{
+
if (t->config & HPET_TN_32BIT) {
- uint64_t result = deposit64(cur_tick, 0, 32, target);
- if (result < cur_tick) {
- result += 0x100000000ULL;
- }
- return result;
+ uint32_t diff, cmp;
+
+ cmp = (uint32_t)t->cmp;
+ diff = cmp - (uint32_t)current;
+ diff = (int32_t)diff > 0 ? diff : (uint32_t)1;
+ return (uint64_t)diff;
} else {
- return target;
- }
-}
+ uint64_t diff, cmp;
-static uint64_t hpet_next_wrap(uint64_t cur_tick)
-{
- return (cur_tick | 0xffffffffU) + 1;
+ cmp = t->cmp;
+ diff = cmp - current;
+ diff = (int64_t)diff > 0 ? diff : (uint64_t)1;
+ return diff;
+ }
}
static void update_irq(struct HPETTimer *timer, int set)
@@ -256,12 +255,7 @@ static bool hpet_validate_num_timers(void *opaque, int version_id)
static int hpet_post_load(void *opaque, int version_id)
{
HPETState *s = opaque;
- int i;
- for (i = 0; i < s->num_timers; i++) {
- HPETTimer *t = &s->timer[i];
- t->cmp64 = hpet_calculate_cmp64(t, s->hpet_counter, t->cmp);
- }
/* Recalculate the offset between the main counter and guest time */
if (!s->hpet_offset_saved) {
s->hpet_offset = ticks_to_ns(s->hpet_counter)
@@ -346,10 +340,14 @@ static const VMStateDescription vmstate_hpet = {
}
};
-static void hpet_arm(HPETTimer *t, uint64_t tick)
+static void hpet_arm(HPETTimer *t, uint64_t ticks)
{
- /* FIXME: Clamp period to reasonable min value? */
- timer_mod(t->qemu_timer, hpet_get_ns(t->state, tick));
+ if (ticks < ns_to_ticks(INT64_MAX / 2)) {
+ timer_mod(t->qemu_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ticks_to_ns(ticks));
+ } else {
+ timer_del(t->qemu_timer);
+ }
}
/*
@@ -358,44 +356,54 @@ static void hpet_arm(HPETTimer *t, uint64_t tick)
static void hpet_timer(void *opaque)
{
HPETTimer *t = opaque;
+ uint64_t diff;
+
uint64_t period = t->period;
uint64_t cur_tick = hpet_get_ticks(t->state);
if (timer_is_periodic(t) && period != 0) {
- while (hpet_time_after(cur_tick, t->cmp64)) {
- t->cmp64 += period;
- }
if (t->config & HPET_TN_32BIT) {
- t->cmp = (uint32_t)t->cmp64;
+ while (hpet_time_after(cur_tick, t->cmp)) {
+ t->cmp = (uint32_t)(t->cmp + t->period);
+ }
} else {
- t->cmp = t->cmp64;
+ while (hpet_time_after64(cur_tick, t->cmp)) {
+ t->cmp += period;
+ }
+ }
+ diff = hpet_calculate_diff(t, cur_tick);
+ hpet_arm(t, diff);
+ } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
+ if (t->wrap_flag) {
+ diff = hpet_calculate_diff(t, cur_tick);
+ hpet_arm(t, diff);
+ t->wrap_flag = 0;
}
- hpet_arm(t, t->cmp64);
- } else if (t->wrap_flag) {
- t->wrap_flag = 0;
- hpet_arm(t, t->cmp64);
}
update_irq(t, 1);
}
static void hpet_set_timer(HPETTimer *t)
{
+ uint64_t diff;
+ uint32_t wrap_diff; /* how many ticks until we wrap? */
uint64_t cur_tick = hpet_get_ticks(t->state);
+ /* whenever new timer is being set up, make sure wrap_flag is 0 */
t->wrap_flag = 0;
- t->cmp64 = hpet_calculate_cmp64(t, cur_tick, t->cmp);
- if (t->config & HPET_TN_32BIT) {
+ diff = hpet_calculate_diff(t, cur_tick);
- /* hpet spec says in one-shot 32-bit mode, generate an interrupt when
- * counter wraps in addition to an interrupt with comparator match.
- */
- if (!timer_is_periodic(t) && t->cmp64 > hpet_next_wrap(cur_tick)) {
+ /* hpet spec says in one-shot 32-bit mode, generate an interrupt when
+ * counter wraps in addition to an interrupt with comparator match.
+ */
+ if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
+ wrap_diff = 0xffffffff - (uint32_t)cur_tick;
+ if (wrap_diff < (uint32_t)diff) {
+ diff = wrap_diff;
t->wrap_flag = 1;
- hpet_arm(t, hpet_next_wrap(cur_tick));
- return;
}
}
- hpet_arm(t, t->cmp64);
+ hpet_arm(t, diff);
}
static void hpet_del_timer(HPETTimer *t)
@@ -526,7 +534,12 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
timer->cmp = deposit64(timer->cmp, shift, len, value);
}
if (timer_is_periodic(timer)) {
- timer->period = deposit64(timer->period, shift, len, value);
+ /*
+ * FIXME: Clamp period to reasonable min value?
+ * Clamp period to reasonable max value
+ */
+ new_val = deposit64(timer->period, shift, len, value);
+ timer->period = MIN(new_val, (timer->config & HPET_TN_32BIT ? ~0u : ~0ull) >> 1);
}
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 2 Mar 2021 16:11:54 +0100
Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request
Some operations, e.g. block-stream, perform reads while discarding the
results (only copy-on-read matters). In this case they will pass NULL as
the target QEMUIOVector, which will however trip bdrv_pad_request, since
it wants to extend its passed vector.
Simply check for NULL and do nothing, there's no reason to pad the
target if it will be discarded anyway.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/io.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/block/io.c b/block/io.c
index f38e7f81d8..28c3a712b6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1764,6 +1764,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
{
int ret;
+ if (!qiov) {
+ return 0;
+ }
+
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {

View File

@@ -1,281 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:09 +0100
Subject: [PATCH] Revert "hpet: accept 64-bit reads and writes"
This reverts commit c2366567378dd8fb89329816003801f54e30e6f3.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 137 +++++++++++++++++++++++++++++-------------
hw/timer/trace-events | 3 +-
2 files changed, 96 insertions(+), 44 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 635a060d38..5f4bb5667d 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -421,7 +421,6 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
unsigned size)
{
HPETState *s = opaque;
- int shift = (addr & 4) * 8;
uint64_t cur_tick;
trace_hpet_ram_read(addr);
@@ -436,33 +435,52 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
return 0;
}
- switch (addr & 0x18) {
- case HPET_TN_CFG: // including interrupt capabilities
- return timer->config >> shift;
+ switch ((addr - 0x100) % 0x20) {
+ case HPET_TN_CFG:
+ return timer->config;
+ case HPET_TN_CFG + 4: // Interrupt capabilities
+ return timer->config >> 32;
case HPET_TN_CMP: // comparator register
- return timer->cmp >> shift;
+ return timer->cmp;
+ case HPET_TN_CMP + 4:
+ return timer->cmp >> 32;
case HPET_TN_ROUTE:
- return timer->fsb >> shift;
+ return timer->fsb;
+ case HPET_TN_ROUTE + 4:
+ return timer->fsb >> 32;
default:
trace_hpet_ram_read_invalid();
break;
}
} else {
- switch (addr & ~4) {
- case HPET_ID: // including HPET_PERIOD
- return s->capability >> shift;
+ switch (addr) {
+ case HPET_ID:
+ return s->capability;
+ case HPET_PERIOD:
+ return s->capability >> 32;
case HPET_CFG:
- return s->config >> shift;
+ return s->config;
+ case HPET_CFG + 4:
+ trace_hpet_invalid_hpet_cfg(4);
+ return 0;
case HPET_COUNTER:
if (hpet_enabled(s)) {
cur_tick = hpet_get_ticks(s);
} else {
cur_tick = s->hpet_counter;
}
- trace_hpet_ram_read_reading_counter(addr & 4, cur_tick);
- return cur_tick >> shift;
+ trace_hpet_ram_read_reading_counter(0, cur_tick);
+ return cur_tick;
+ case HPET_COUNTER + 4:
+ if (hpet_enabled(s)) {
+ cur_tick = hpet_get_ticks(s);
+ } else {
+ cur_tick = s->hpet_counter;
+ }
+ trace_hpet_ram_read_reading_counter(4, cur_tick);
+ return cur_tick >> 32;
case HPET_STATUS:
- return s->isr >> shift;
+ return s->isr;
default:
trace_hpet_ram_read_invalid();
break;
@@ -476,11 +494,11 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
{
int i;
HPETState *s = opaque;
- int shift = (addr & 4) * 8;
- int len = MIN(size * 8, 64 - shift);
uint64_t old_val, new_val, cleared;
trace_hpet_ram_write(addr, value);
+ old_val = hpet_ram_read(opaque, addr, 4);
+ new_val = value;
/*address range of all TN regs*/
if (addr >= 0x100 && addr <= 0x3ff) {
@@ -492,12 +510,9 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
trace_hpet_timer_id_out_of_range(timer_id);
return;
}
- switch (addr & 0x18) {
+ switch ((addr - 0x100) % 0x20) {
case HPET_TN_CFG:
- trace_hpet_ram_write_tn_cfg(addr & 4);
- old_val = timer->config;
- new_val = deposit64(old_val, shift, len, value);
- new_val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
+ trace_hpet_ram_write_tn_cfg();
if (deactivating_bit(old_val, new_val, HPET_TN_TYPE_LEVEL)) {
/*
* Do this before changing timer->config; otherwise, if
@@ -505,7 +520,8 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
*/
update_irq(timer, 0);
}
- timer->config = new_val;
+ new_val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
+ timer->config = (timer->config & 0xffffffff00000000ULL) | new_val;
if (activating_bit(old_val, new_val, HPET_TN_ENABLE)
&& (s->isr & (1 << timer_id))) {
update_irq(timer, 1);
@@ -518,28 +534,56 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
hpet_set_timer(timer);
}
break;
+ case HPET_TN_CFG + 4: // Interrupt capabilities
+ trace_hpet_ram_write_invalid_tn_cfg(4);
+ break;
case HPET_TN_CMP: // comparator register
+ trace_hpet_ram_write_tn_cmp(0);
if (timer->config & HPET_TN_32BIT) {
- /* High 32-bits are zero, leave them untouched. */
- if (shift) {
- trace_hpet_ram_write_invalid_tn_cmp();
- break;
+ new_val = (uint32_t)new_val;
+ }
+ if (!timer_is_periodic(timer)
+ || (timer->config & HPET_TN_SETVAL)) {
+ timer->cmp = (timer->cmp & 0xffffffff00000000ULL) | new_val;
+ }
+ if (timer_is_periodic(timer)) {
+ /*
+ * FIXME: Clamp period to reasonable min value?
+ * Clamp period to reasonable max value
+ */
+ if (timer->config & HPET_TN_32BIT) {
+ new_val = MIN(new_val, ~0u >> 1);
}
- len = 64;
- value = (uint32_t) value;
+ timer->period =
+ (timer->period & 0xffffffff00000000ULL) | new_val;
+ }
+ /*
+ * FIXME: on a 64-bit write, HPET_TN_SETVAL should apply to the
+ * high bits part as well.
+ */
+ timer->config &= ~HPET_TN_SETVAL;
+ if (hpet_enabled(s)) {
+ hpet_set_timer(timer);
}
- trace_hpet_ram_write_tn_cmp(addr & 4);
+ break;
+ case HPET_TN_CMP + 4: // comparator register high order
+ if (timer->config & HPET_TN_32BIT) {
+ trace_hpet_ram_write_invalid_tn_cmp();
+ break;
+ }
+ trace_hpet_ram_write_tn_cmp(4);
if (!timer_is_periodic(timer)
|| (timer->config & HPET_TN_SETVAL)) {
- timer->cmp = deposit64(timer->cmp, shift, len, value);
+ timer->cmp = (timer->cmp & 0xffffffffULL) | new_val << 32;
}
if (timer_is_periodic(timer)) {
/*
* FIXME: Clamp period to reasonable min value?
* Clamp period to reasonable max value
*/
- new_val = deposit64(timer->period, shift, len, value);
- timer->period = MIN(new_val, (timer->config & HPET_TN_32BIT ? ~0u : ~0ull) >> 1);
+ new_val = MIN(new_val, ~0u >> 1);
+ timer->period =
+ (timer->period & 0xffffffffULL) | new_val << 32;
}
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {
@@ -547,7 +591,10 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
}
break;
case HPET_TN_ROUTE:
- timer->fsb = deposit64(timer->fsb, shift, len, value);
+ timer->fsb = (timer->fsb & 0xffffffff00000000ULL) | new_val;
+ break;
+ case HPET_TN_ROUTE + 4:
+ timer->fsb = (new_val << 32) | (timer->fsb & 0xffffffff);
break;
default:
trace_hpet_ram_write_invalid();
@@ -555,14 +602,12 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
}
return;
} else {
- switch (addr & ~4) {
+ switch (addr) {
case HPET_ID:
return;
case HPET_CFG:
- old_val = s->config;
- new_val = deposit64(old_val, shift, len, value);
new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
- s->config = new_val;
+ s->config = (s->config & 0xffffffff00000000ULL) | new_val;
if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
/* Enable main counter and interrupt generation. */
s->hpet_offset =
@@ -592,8 +637,10 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
}
break;
+ case HPET_CFG + 4:
+ trace_hpet_invalid_hpet_cfg(4);
+ break;
case HPET_STATUS:
- new_val = value << shift;
cleared = new_val & s->isr;
for (i = 0; i < s->num_timers; i++) {
if (cleared & (1 << i)) {
@@ -605,7 +652,15 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
if (hpet_enabled(s)) {
trace_hpet_ram_write_counter_write_while_enabled();
}
- s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
+ s->hpet_counter =
+ (s->hpet_counter & 0xffffffff00000000ULL) | value;
+ trace_hpet_ram_write_counter_written(0, value, s->hpet_counter);
+ break;
+ case HPET_COUNTER + 4:
+ trace_hpet_ram_write_counter_write_while_enabled();
+ s->hpet_counter =
+ (s->hpet_counter & 0xffffffffULL) | (((uint64_t)value) << 32);
+ trace_hpet_ram_write_counter_written(4, value, s->hpet_counter);
break;
default:
trace_hpet_ram_write_invalid();
@@ -619,11 +674,7 @@ static const MemoryRegionOps hpet_ram_ops = {
.write = hpet_ram_write,
.valid = {
.min_access_size = 4,
- .max_access_size = 8,
- },
- .impl = {
- .min_access_size = 4,
- .max_access_size = 8,
+ .max_access_size = 4,
},
.endianness = DEVICE_NATIVE_ENDIAN,
};
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index c5b6db49f5..dd8a53c690 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -114,7 +114,8 @@ hpet_ram_read_reading_counter(uint8_t reg_off, uint64_t cur_tick) "reading count
hpet_ram_read_invalid(void) "invalid hpet_ram_readl"
hpet_ram_write(uint64_t addr, uint64_t value) "enter hpet_ram_writel at 0x%" PRIx64 " = 0x%" PRIx64
hpet_ram_write_timer_id(uint64_t timer_id) "hpet_ram_writel timer_id = 0x%" PRIx64
-hpet_ram_write_tn_cfg(uint8_t reg_off) "hpet_ram_writel HPET_TN_CFG + %" PRIu8
+hpet_ram_write_tn_cfg(void) "hpet_ram_writel HPET_TN_CFG"
+hpet_ram_write_invalid_tn_cfg(uint8_t reg_off) "invalid HPET_TN_CFG + %" PRIu8 " write"
hpet_ram_write_tn_cmp(uint8_t reg_off) "hpet_ram_writel HPET_TN_CMP + %" PRIu8
hpet_ram_write_invalid_tn_cmp(void) "invalid HPET_TN_CMP + 4 write"
hpet_ram_write_invalid(void) "invalid hpet_ram_writel"

View File

@@ -19,34 +19,23 @@ well.
This only worked if the target supports backing images, so up until now
only for qcow2, with alloc-track any driver for the target can be used.
Replacing the node cannot be done in the
track_co_change_backing_file() callback, because replacing a node
cannot happen in a coroutine and requires the block graph lock
exclusively. Could either become a special option for the stream job,
or maybe the upcoming blockdev-replace QMP command can be used in the
future.
If 'auto-remove' is set, alloc-track will automatically detach itself
once the backing image is removed. It will be replaced by 'file'.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
make error return value consistent with QEMU
avoid premature break during read
adhere to block graph lock requirements
avoid superfluous child permission update]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 343 ++++++++++++++++++++++++++++++++++++++++++++
block/alloc-track.c | 345 ++++++++++++++++++++++++++++++++++++++++++++
block/meson.build | 1 +
block/stream.c | 34 +++++
3 files changed, 378 insertions(+)
2 files changed, 346 insertions(+)
create mode 100644 block/alloc-track.c
diff --git a/block/alloc-track.c b/block/alloc-track.c
new file mode 100644
index 0000000000..718aaabf2a
index 0000000000..35f2737c89
--- /dev/null
+++ b/block/alloc-track.c
@@ -0,0 +1,343 @@
@@ -0,0 +1,345 @@
+/*
+ * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this
@@ -62,21 +51,25 @@ index 0000000000..718aaabf2a
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
+#include "qobject/qdict.h"
+#include "qobject/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "system/block-backend.h"
+#include "sysemu/block-backend.h"
+
+#define TRACK_OPT_AUTO_REMOVE "auto-remove"
+
+typedef enum DropState {
+ DropNone,
+ DropRequested,
+ DropInProgress,
+} DropState;
+
+typedef struct {
+ BdrvDirtyBitmap *bitmap;
+ uint64_t granularity;
+ DropState drop_state;
+ bool auto_remove;
+} BDRVAllocTrackState;
+
+static QemuOptsList runtime_opts = {
@@ -93,29 +86,26 @@ index 0000000000..718aaabf2a
+ },
+};
+
+static void GRAPH_RDLOCK
+track_refresh_limits(BlockDriverState *bs, Error **errp)
+static void track_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ BlockDriverInfo bdi;
+
+ if (!bs->file) {
+ return;
+ }
+
+ /*
+ * Always use alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv. Also use at
+ * least the bitmap granularity.
+ */
+ /* always use alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv (no partial
+ * cluster allocation in 'file') */
+ bdrv_get_info(bs->file->bs, &bdi);
+ bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
+ s->granularity);
+ MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+}
+
+static int track_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ BdrvChild *file = NULL;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ int ret = 0;
@@ -128,63 +118,32 @@ index 0000000000..718aaabf2a
+ goto fail;
+ }
+
+ if (!qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false)) {
+ error_setg(errp, "alloc-track: requires auto-remove option to be set to on");
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+
+ /* open the target (write) node, backing will be attached by block layer */
+ file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+ &local_err);
+ bdrv_graph_wrlock();
+ bs->file = file;
+ bdrv_graph_wrunlock();
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ bdrv_graph_rdlock_main_loop();
+ BlockDriverInfo bdi = {0};
+ ret = bdrv_get_info(bs->file->bs, &bdi);
+ if (ret < 0) {
+ /*
+ * Not a hard failure. Worst that can happen is partial cluster
+ * allocation in the write target. However, the driver here returns its
+ * allocation status based on the dirty bitmap, so any other data that
+ * maps to such a cluster will still be copied later by a stream job (or
+ * during writes to that cluster).
+ */
+ warn_report("alloc-track: unable to query cluster size for write target: %s",
+ strerror(ret));
+ }
+ ret = 0;
+ /*
+ * Always consider alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv. Also try to
+ * avoid partial cluster allocation in the write target by considering the
+ * cluster size.
+ */
+ s->granularity = MAX(bs->file->bs->bl.request_alignment,
+ MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+ track_refresh_limits(bs, errp);
+ s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, s->granularity, NULL,
+ &local_err);
+ bdrv_graph_rdunlock_main_loop();
+ uint64_t gran = bs->bl.request_alignment;
+ s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, gran, NULL, &local_err);
+ if (local_err) {
+ ret = -EIO;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ s->drop_state = DropNone;
+
+fail:
+ if (ret < 0) {
+ bdrv_graph_wrlock();
+ bdrv_unref_child(bs, bs->file);
+ bdrv_graph_wrunlock();
+ if (s->bitmap) {
+ bdrv_release_dirty_bitmap(s->bitmap);
+ }
@@ -201,15 +160,13 @@ index 0000000000..718aaabf2a
+ }
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+track_co_getlength(BlockDriverState *bs)
+static int64_t track_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn track_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ QEMUIOVector local_qiov;
@@ -220,11 +177,6 @@ index 0000000000..718aaabf2a
+ int64_t local_bytes;
+ bool alloc;
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+
+ /* a read request can span multiple granularity-sized chunks, and can thus
+ * contain blocks with different allocation status - we could just iterate
+ * granularity-wise, but for better performance use bdrv_dirty_bitmap_next_X
@@ -255,8 +207,7 @@ index 0000000000..718aaabf2a
+ ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
+ &local_qiov, flags);
+ } else {
+ qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ ret = 0;
+ ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ }
+
+ if (ret != 0) {
@@ -267,39 +218,36 @@ index 0000000000..718aaabf2a
+ return ret;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags)
+static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+track_co_flush(BlockDriverState *bs)
+static coroutine_fn int track_co_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_block_status(BlockDriverState *bs, bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+static int coroutine_fn track_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+
@@ -325,13 +273,23 @@ index 0000000000..718aaabf2a
+ return 0;
+}
+
+static void GRAPH_RDLOCK
+track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+ BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+
+ *nshared = BLK_PERM_ALL;
+
+ /* in case we're currently dropping ourselves, claim to not use any
+ * permissions at all - which is fine, since from this point on we will
+ * never issue a read or write anymore */
+ if (s->drop_state == DropInProgress) {
+ *nperm = 0;
+ return;
+ }
+
+ if (role & BDRV_CHILD_DATA) {
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+ } else {
@@ -341,22 +299,55 @@ index 0000000000..718aaabf2a
+ }
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+ const char *backing_fmt)
+static void track_drop(void *opaque)
+{
+ /*
+ * Note that the actual backing file graph change is already done in the
+ * stream job itself with bdrv_set_backing_hd_drained(), so no need to
+ * actually do anything here. But still needs to be implemented, to make
+ * our caller (i.e. bdrv_co_change_backing_file() do the right thing).
+ *
+ * FIXME
+ * We'd like to auto-remove ourselves from the block graph, but it cannot
+ * be done from a coroutine. Currently done in the stream job, where it
+ * kinda fits better, but in the long-term, a special parameter would be
+ * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+ */
+ BlockDriverState *bs = (BlockDriverState*)opaque;
+ BlockDriverState *file = bs->file->bs;
+ BDRVAllocTrackState *s = bs->opaque;
+
+ assert(file);
+
+ /* we rely on the fact that we're not used anywhere else, so let's wait
+ * until we're only used once - in the drive connected to the guest (and one
+ * ref is held by bdrv_ref in track_change_backing_file) */
+ if (bs->refcnt > 2) {
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
+ return;
+ }
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress;
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+ bdrv_drained_end(bs);
+ bdrv_unref(bs);
+ aio_context_release(aio_context);
+}
+
+static int track_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ if (s->auto_remove && s->drop_state == DropNone &&
+ backing_file == NULL && backing_fmt == NULL)
+ {
+ /* backing file has been disconnected, there's no longer any use for
+ * this node, so let's remove ourselves from the block graph - we need
+ * to schedule this for later however, since when this function is
+ * called, the blockjob modifying us is probably not done yet and has a
+ * blocker on 'bs' */
+ s->drop_state = DropRequested;
+ bdrv_ref(bs);
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ }
+
+ return 0;
+}
+
@@ -364,9 +355,9 @@ index 0000000000..718aaabf2a
+ .format_name = "alloc-track",
+ .instance_size = sizeof(BDRVAllocTrackState),
+
+ .bdrv_open = track_open,
+ .bdrv_file_open = track_open,
+ .bdrv_close = track_close,
+ .bdrv_co_getlength = track_co_getlength,
+ .bdrv_getlength = track_getlength,
+ .bdrv_child_perm = track_child_perm,
+ .bdrv_refresh_limits = track_refresh_limits,
+
@@ -381,7 +372,7 @@ index 0000000000..718aaabf2a
+ .supports_backing = true,
+
+ .bdrv_co_block_status = track_co_block_status,
+ .bdrv_co_change_backing_file = track_co_change_backing_file,
+ .bdrv_change_backing_file = track_change_backing_file,
+};
+
+static void bdrv_alloc_track_init(void)
@@ -391,7 +382,7 @@ index 0000000000..718aaabf2a
+
+block_init(bdrv_alloc_track_init);
diff --git a/block/meson.build b/block/meson.build
index d243372c41..9b45b5256d 100644
index e3ed5ac97c..d1ee260048 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh)
@@ -402,48 +393,3 @@ index d243372c41..9b45b5256d 100644
'amend.c',
'backup.c',
'backup-dump.c',
diff --git a/block/stream.c b/block/stream.c
index e187cd1262..0b61029399 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -120,6 +120,40 @@ static int stream_prepare(Job *job)
ret = -EPERM;
goto out;
}
+
+ /*
+ * This cannot be done in the co_change_backing_file callback, because
+ * bdrv_replace_node() cannot be done in a coroutine. The latter also
+ * requires the graph lock exclusively. Only required for the
+ * alloc-track driver.
+ *
+ * The long-term plan is to either have an explicit parameter for the
+ * stream job or use the upcoming blockdev-replace QMP command.
+ */
+ if (base_id == NULL && strcmp(unfiltered_bs->drv->format_name, "alloc-track") == 0) {
+ BlockDriverState *file_bs;
+
+ bdrv_graph_rdlock_main_loop();
+ file_bs = unfiltered_bs->file->bs;
+ bdrv_graph_rdunlock_main_loop();
+
+ bdrv_ref(unfiltered_bs); // unrefed by bdrv_replace_node()
+ bdrv_drained_begin(file_bs);
+ bdrv_graph_wrlock();
+
+ bdrv_replace_node(unfiltered_bs, file_bs, &local_err);
+
+ bdrv_graph_wrunlock();
+ bdrv_drained_end(file_bs);
+ bdrv_unref(unfiltered_bs);
+
+ if (local_err) {
+ error_prepend(&local_err, "failed to replace alloc-track node: ");
+ error_report_err(local_err);
+ ret = -EPERM;
+ goto out;
+ }
+ }
}
out:

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 15:26:30 +0200
Subject: [PATCH] PVE: whitelist 'invalid' QAPI names for backwards compat
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qapi/pragma.json | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7c91ea3685..c3888d654c 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -12,6 +12,7 @@
'device_add',
'device_del',
'expire_password',
+ 'get_link_status',
'migrate_cancel',
'netdev_add',
'netdev_del',
@@ -60,6 +61,8 @@
'SysEmuTarget', # query-cpu-fast, query-target
'UuidInfo', # query-uuid
'VncClientInfo', # query-vnc, query-vnc-servers, ...
- 'X86CPURegister32' # qom-get of x86 CPU properties
+ 'X86CPURegister32', # qom-get of x86 CPU properties
# feature-words, filtered-features
+ 'BlockdevOptionsPbs', # for PBS backwards compat
+ 'BalloonInfo'
] } }

View File

@@ -1,64 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:10 +0100
Subject: [PATCH] Revert "hpet: place read-only bits directly in "new_val""
This reverts commit ba88935b0fac2588b0a739f810b58dfabf7f92c8.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 5f4bb5667d..5e3bf1f153 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -494,7 +494,7 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
{
int i;
HPETState *s = opaque;
- uint64_t old_val, new_val, cleared;
+ uint64_t old_val, new_val, val;
trace_hpet_ram_write(addr, value);
old_val = hpet_ram_read(opaque, addr, 4);
@@ -520,12 +520,13 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
*/
update_irq(timer, 0);
}
- new_val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
- timer->config = (timer->config & 0xffffffff00000000ULL) | new_val;
+ val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
+ timer->config = (timer->config & 0xffffffff00000000ULL) | val;
if (activating_bit(old_val, new_val, HPET_TN_ENABLE)
&& (s->isr & (1 << timer_id))) {
update_irq(timer, 1);
}
+
if (new_val & HPET_TN_32BIT) {
timer->cmp = (uint32_t)timer->cmp;
timer->period = (uint32_t)timer->period;
@@ -606,8 +607,8 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
case HPET_ID:
return;
case HPET_CFG:
- new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
- s->config = (s->config & 0xffffffff00000000ULL) | new_val;
+ val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
+ s->config = (s->config & 0xffffffff00000000ULL) | val;
if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
/* Enable main counter and interrupt generation. */
s->hpet_offset =
@@ -641,9 +642,9 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
trace_hpet_invalid_hpet_cfg(4);
break;
case HPET_STATUS:
- cleared = new_val & s->isr;
+ val = new_val & s->isr;
for (i = 0; i < s->num_timers; i++) {
- if (cleared & (1 << i)) {
+ if (val & (1 << i)) {
update_irq(&s->timer[i], 0);
}
}

View File

@@ -0,0 +1,35 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 17:36:55 +0200
Subject: [PATCH] PVE: savevm-async: register yank before
migration_incoming_state_destroy
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/savevm-async.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 970ee3b3fc..b3ccc069f1 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -19,6 +19,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
+#include "qemu/yank.h"
/* #define DEBUG_SAVEVM_STATE */
@@ -580,6 +581,10 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
dirty_bitmap_mig_before_vm_start();
qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
migration_incoming_state_destroy();
if (ret < 0) {
error_setg_errno(errp, -ret, "Error while loading VM state");

View File

@@ -1,68 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:11 +0100
Subject: [PATCH] Revert "hpet: remove unnecessary variable "index""
This reverts commit 5895879aca252f4ebb2d1078eaf836c61ec54e9b.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 5e3bf1f153..daef12c8cf 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -421,12 +421,12 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
unsigned size)
{
HPETState *s = opaque;
- uint64_t cur_tick;
+ uint64_t cur_tick, index;
trace_hpet_ram_read(addr);
-
+ index = addr;
/*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ if (index >= 0x100 && index <= 0x3ff) {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -453,7 +453,7 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
break;
}
} else {
- switch (addr) {
+ switch (index) {
case HPET_ID:
return s->capability;
case HPET_PERIOD:
@@ -494,14 +494,15 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
{
int i;
HPETState *s = opaque;
- uint64_t old_val, new_val, val;
+ uint64_t old_val, new_val, val, index;
trace_hpet_ram_write(addr, value);
+ index = addr;
old_val = hpet_ram_read(opaque, addr, 4);
new_val = value;
/*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ if (index >= 0x100 && index <= 0x3ff) {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -603,7 +604,7 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
}
return;
} else {
- switch (addr) {
+ switch (index) {
case HPET_ID:
return;
case HPET_CFG:

View File

@@ -1,40 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:12 +0100
Subject: [PATCH] Revert "hpet: ignore high bits of comparator in 32-bit mode"
This reverts commit 9eb7fad3546a89ee7cf0e90f5b1daccf89725cea.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 4 ----
hw/timer/trace-events | 1 -
2 files changed, 5 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index daef12c8cf..927263e2ff 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -569,10 +569,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
}
break;
case HPET_TN_CMP + 4: // comparator register high order
- if (timer->config & HPET_TN_32BIT) {
- trace_hpet_ram_write_invalid_tn_cmp();
- break;
- }
trace_hpet_ram_write_tn_cmp(4);
if (!timer_is_periodic(timer)
|| (timer->config & HPET_TN_SETVAL)) {
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index dd8a53c690..2b81ee0812 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -117,7 +117,6 @@ hpet_ram_write_timer_id(uint64_t timer_id) "hpet_ram_writel timer_id = 0x%" PRIx
hpet_ram_write_tn_cfg(void) "hpet_ram_writel HPET_TN_CFG"
hpet_ram_write_invalid_tn_cfg(uint8_t reg_off) "invalid HPET_TN_CFG + %" PRIu8 " write"
hpet_ram_write_tn_cmp(uint8_t reg_off) "hpet_ram_writel HPET_TN_CMP + %" PRIu8
-hpet_ram_write_invalid_tn_cmp(void) "invalid HPET_TN_CMP + 4 write"
hpet_ram_write_invalid(void) "invalid hpet_ram_writel"
hpet_ram_write_counter_write_while_enabled(void) "Writing counter while HPET enabled!"
hpet_ram_write_counter_written(uint8_t reg_off, uint64_t value, uint64_t counter) "HPET counter + %" PRIu8 "written. crt = 0x%" PRIx64 " -> 0x%" PRIx64

View File

@@ -1,120 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 19 Mar 2025 17:31:13 +0100
Subject: [PATCH] Revert "hpet: fix and cleanup persistence of interrupt
status"
This reverts commit f0ccf770789e48b7a73497b465fdc892d28c1339.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/timer/hpet.c | 60 ++++++++++++++++---------------------------------
1 file changed, 19 insertions(+), 41 deletions(-)
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 927263e2ff..5aae09f166 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -199,31 +199,21 @@ static void update_irq(struct HPETTimer *timer, int set)
}
s = timer->state;
mask = 1 << timer->tn;
-
- if (set && (timer->config & HPET_TN_TYPE_LEVEL)) {
- /*
- * If HPET_TN_ENABLE bit is 0, "the timer will still operate and
- * generate appropriate status bits, but will not cause an interrupt"
- */
- s->isr |= mask;
- } else {
+ if (!set || !timer_enabled(timer) || !hpet_enabled(timer->state)) {
s->isr &= ~mask;
- }
-
- if (set && timer_enabled(timer) && hpet_enabled(s)) {
- if (timer_fsb_route(timer)) {
- address_space_stl_le(&address_space_memory, timer->fsb >> 32,
- timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED,
- NULL);
- } else if (timer->config & HPET_TN_TYPE_LEVEL) {
- qemu_irq_raise(s->irqs[route]);
- } else {
- qemu_irq_pulse(s->irqs[route]);
- }
- } else {
if (!timer_fsb_route(timer)) {
qemu_irq_lower(s->irqs[route]);
}
+ } else if (timer_fsb_route(timer)) {
+ address_space_stl_le(&address_space_memory, timer->fsb >> 32,
+ timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED,
+ NULL);
+ } else if (timer->config & HPET_TN_TYPE_LEVEL) {
+ s->isr |= mask;
+ qemu_irq_raise(s->irqs[route]);
+ } else {
+ s->isr &= ~mask;
+ qemu_irq_pulse(s->irqs[route]);
}
}
@@ -408,13 +398,8 @@ static void hpet_set_timer(HPETTimer *t)
static void hpet_del_timer(HPETTimer *t)
{
- HPETState *s = t->state;
timer_del(t->qemu_timer);
-
- if (s->isr & (1 << t->tn)) {
- /* For level-triggered interrupt, this leaves ISR set but lowers irq. */
- update_irq(t, 1);
- }
+ update_irq(t, 0);
}
static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
@@ -514,26 +499,20 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
switch ((addr - 0x100) % 0x20) {
case HPET_TN_CFG:
trace_hpet_ram_write_tn_cfg();
- if (deactivating_bit(old_val, new_val, HPET_TN_TYPE_LEVEL)) {
- /*
- * Do this before changing timer->config; otherwise, if
- * HPET_TN_FSB is set, update_irq will not lower the qemu_irq.
- */
+ if (activating_bit(old_val, new_val, HPET_TN_FSB_ENABLE)) {
update_irq(timer, 0);
}
val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
timer->config = (timer->config & 0xffffffff00000000ULL) | val;
- if (activating_bit(old_val, new_val, HPET_TN_ENABLE)
- && (s->isr & (1 << timer_id))) {
- update_irq(timer, 1);
- }
-
if (new_val & HPET_TN_32BIT) {
timer->cmp = (uint32_t)timer->cmp;
timer->period = (uint32_t)timer->period;
}
- if (hpet_enabled(s)) {
+ if (activating_bit(old_val, new_val, HPET_TN_ENABLE) &&
+ hpet_enabled(s)) {
hpet_set_timer(timer);
+ } else if (deactivating_bit(old_val, new_val, HPET_TN_ENABLE)) {
+ hpet_del_timer(timer);
}
break;
case HPET_TN_CFG + 4: // Interrupt capabilities
@@ -611,10 +590,9 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
s->hpet_offset =
ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
for (i = 0; i < s->num_timers; i++) {
- if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
- update_irq(&s->timer[i], 1);
+ if ((&s->timer[i])->cmp != ~0ULL) {
+ hpet_set_timer(&s->timer[i]);
}
- hpet_set_timer(&s->timer[i]);
}
} else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
/* Halt main counter and disable interrupt generation. */

View File

@@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 3 Apr 2025 14:30:42 +0200
Subject: [PATCH] PVE backup: factor out helper to clear backup state's bitmap
list
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 9b66788ab5..588ee98ffc 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -811,6 +811,23 @@ err:
return di_list;
}
+/*
+ * To be called with the backup_state.stat mutex held.
+ */
+static void clear_backup_state_bitmap_list(void) {
+
+ if (backup_state.stat.bitmap_list) {
+ GList *bl = backup_state.stat.bitmap_list;
+ while (bl) {
+ g_free(((PBSBitmapInfo *)bl->data)->drive);
+ g_free(bl->data);
+ bl = g_list_next(bl);
+ }
+ g_list_free(backup_state.stat.bitmap_list);
+ backup_state.stat.bitmap_list = NULL;
+ }
+}
+
UuidInfo coroutine_fn *qmp_backup(
const char *backup_file,
const char *password,
@@ -898,16 +915,7 @@ UuidInfo coroutine_fn *qmp_backup(
backup_state.stat.reused = 0;
/* clear previous backup's bitmap_list */
- if (backup_state.stat.bitmap_list) {
- GList *bl = backup_state.stat.bitmap_list;
- while (bl) {
- g_free(((PBSBitmapInfo *)bl->data)->drive);
- g_free(bl->data);
- bl = g_list_next(bl);
- }
- g_list_free(backup_state.stat.bitmap_list);
- backup_state.stat.bitmap_list = NULL;
- }
+ clear_backup_state_bitmap_list();
if (format == BACKUP_FORMAT_PBS) {
if (!password) {

View File

@@ -1,95 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 3 Apr 2025 14:30:43 +0200
Subject: [PATCH] PVE backup: factor out helper to initialize backup state stat
struct
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 62 ++++++++++++++++++++++++++++++++--------------------
1 file changed, 38 insertions(+), 24 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 588ee98ffc..3be9930ad3 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -828,6 +828,43 @@ static void clear_backup_state_bitmap_list(void) {
}
}
+/*
+ * Initializes most of the backup state 'stat' struct. Note that 'reused' and
+ * 'bitmap_list' are not changed by this function and need to be handled by
+ * the caller. In particular, 'reused' needs to be set before calling this
+ * function.
+ *
+ * To be called with the backup_state.stat mutex held.
+ */
+static void initialize_backup_state_stat(
+ const char *backup_file,
+ uuid_t uuid,
+ size_t total)
+{
+ if (backup_state.stat.error) {
+ error_free(backup_state.stat.error);
+ backup_state.stat.error = NULL;
+ }
+
+ backup_state.stat.start_time = time(NULL);
+ backup_state.stat.end_time = 0;
+
+ if (backup_state.stat.backup_file) {
+ g_free(backup_state.stat.backup_file);
+ }
+ backup_state.stat.backup_file = g_strdup(backup_file);
+
+ uuid_copy(backup_state.stat.uuid, uuid);
+ uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
+
+ backup_state.stat.total = total;
+ backup_state.stat.dirty = total - backup_state.stat.reused;
+ backup_state.stat.transferred = 0;
+ backup_state.stat.zero_bytes = 0;
+ backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
+}
+
UuidInfo coroutine_fn *qmp_backup(
const char *backup_file,
const char *password,
@@ -1070,32 +1107,9 @@ UuidInfo coroutine_fn *qmp_backup(
}
}
/* initialize global backup_state now */
- /* note: 'reused' and 'bitmap_list' are initialized earlier */
-
- if (backup_state.stat.error) {
- error_free(backup_state.stat.error);
- backup_state.stat.error = NULL;
- }
-
- backup_state.stat.start_time = time(NULL);
- backup_state.stat.end_time = 0;
-
- if (backup_state.stat.backup_file) {
- g_free(backup_state.stat.backup_file);
- }
- backup_state.stat.backup_file = g_strdup(backup_file);
-
- uuid_copy(backup_state.stat.uuid, uuid);
- uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
+ initialize_backup_state_stat(backup_file, uuid, total);
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
- backup_state.stat.total = total;
- backup_state.stat.dirty = total - backup_state.stat.reused;
- backup_state.stat.transferred = 0;
- backup_state.stat.zero_bytes = 0;
- backup_state.stat.finishing = false;
- backup_state.stat.starting = true;
-
qemu_mutex_unlock(&backup_state.stat.lock);
backup_state.speed = (has_speed && speed > 0) ? speed : 0;

View File

@@ -1,63 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 3 Apr 2025 14:30:44 +0200
Subject: [PATCH] PVE backup: add target ID in backup state
In preparation for allowing multiple backup providers and potentially
multiple targets for a given provider. Each backup target can then
have its own dirty bitmap and there can be additional checks that the
current backup state is actually associated to the expected target.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/pve-backup.c b/pve-backup.c
index 3be9930ad3..87778f7e76 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -70,6 +70,7 @@ static struct PVEBackupState {
JobTxn *txn;
CoMutex backup_mutex;
CoMutex dump_callback_mutex;
+ char *target_id;
} backup_state;
static void pvebackup_init(void)
@@ -865,6 +866,16 @@ static void initialize_backup_state_stat(
backup_state.stat.starting = true;
}
+/*
+ * To be called with the backup_state mutex held.
+ */
+static void backup_state_set_target_id(const char *target_id) {
+ if (backup_state.target_id) {
+ g_free(backup_state.target_id);
+ }
+ backup_state.target_id = g_strdup(target_id);
+}
+
UuidInfo coroutine_fn *qmp_backup(
const char *backup_file,
const char *password,
@@ -904,7 +915,7 @@ UuidInfo coroutine_fn *qmp_backup(
if (backup_state.di_list) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR,
- "previous backup not finished");
+ "previous backup for target '%s' not finished", backup_state.target_id);
qemu_co_mutex_unlock(&backup_state.backup_mutex);
return NULL;
}
@@ -1122,6 +1133,8 @@ UuidInfo coroutine_fn *qmp_backup(
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
+ backup_state_set_target_id("Proxmox");
+
backup_state.di_list = di_list;
uuid_info = g_malloc0(sizeof(*uuid_info));

View File

@@ -1,57 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 3 Apr 2025 14:30:45 +0200
Subject: [PATCH] PVE backup: get device info: allow caller to specify filter
for which devices use fleecing
For providing snapshot-access to external backup providers, EFI and
TPM also need an associated fleecing image. The new caller will thus
need a different filter.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 87778f7e76..bd81621d51 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -719,7 +719,7 @@ static void create_backup_jobs_bh(void *opaque) {
/*
* EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
*/
-static bool device_uses_fleecing(const char *device_id)
+static bool fleecing_no_efi_tpm(const char *device_id)
{
return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
}
@@ -731,7 +731,7 @@ static bool device_uses_fleecing(const char *device_id)
*/
static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
const char *devlist,
- bool fleecing,
+ bool (*device_uses_fleecing)(const char*),
Error **errp)
{
gchar **devs = NULL;
@@ -757,7 +757,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
di->bs = bs;
di->device_name = g_strdup(bdrv_get_device_name(bs));
- if (fleecing && device_uses_fleecing(*d)) {
+ if (device_uses_fleecing && device_uses_fleecing(*d)) {
g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
if (!fleecing_blk) {
@@ -924,7 +924,8 @@ UuidInfo coroutine_fn *qmp_backup(
format = has_format ? format : BACKUP_FORMAT_VMA;
bdrv_graph_co_rdlock();
- di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
+ di_list = get_device_info(devlist, (has_fleecing && fleecing) ? fleecing_no_efi_tpm : NULL,
+ &local_err);
bdrv_graph_co_rdunlock();
if (local_err) {
error_propagate(errp, local_err);

View File

@@ -1,898 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 3 Apr 2025 14:30:46 +0200
Subject: [PATCH] PVE backup: implement backup access setup and teardown API
for external providers
For external backup providers, the state of the VM's disk images at
the time the backup is started is preserved via a snapshot-access
block node. Old data is moved to the fleecing image when new guest
writes come in. The snapshot-access block node, as well as the
associated bitmap in case of incremental backup, will be exported via
NBD to the external provider. The NBD export will be done by the
management layer, the missing functionality is setting up and tearing
down the snapshot-access block nodes, which this patch adds.
It is necessary to also set up fleecing for EFI and TPM disks, so that
old data can be moved out of the way when a new guest write comes in.
There can only be one regular backup or one active backup access at
a time, because both require replacing the original block node of the
drive. Thus the backup state is re-used, and checks are added to
prohibit regular backup while snapshot access is active and vice
versa.
The block nodes added by the backup-access-setup QMP call are not
tracked anywhere else (there is no job they are associated to like for
regular backup). This requires adding a callback for teardown when
QEMU exits, i.e. in qemu_cleanup(). Otherwise, there will be an
assertion failure that the block graph is not empty when QEMU exits
before the backup-access-teardown QMP command is called.
The code for the qmp_backup_access_setup() was based on the existing
qmp_backup() routine.
The return value for the setup QMP command contains information about
the snapshot-access block nodes that can be used by the management
layer to set up the NBD exports.
There can be one dirty bitmap for each backup target ID for each
device (which are tracked in the backup_access_bitmaps hash table).
The QMP user can specify the ID of the bitmap it likes to use. This ID
is then compared to the current one for the given target and device.
If they match, the bitmap is re-used (should it still exist on the
drive, otherwise re-created). If there is a mismatch, the old bitmap
is removed and a new one is created.
The return value of the QMP command includes information about what
bitmap action was taken. Similar to what the query-backup QMP command
returns for regular backup. It also includes the bitmap name and
associated block node, so the management layer can then set up an NBD
export with the bitmap.
While the backup access is active, a background bitmap is also
required. This is necessary to implement bitmap handling according to
the original reference [0]. In particular:
- in the error case, new writes since the backup access was set up are
in the background bitmap. Because of failure, the previously tracked
writes from the backup access bitmap are still required too. Thus,
the bitmap is merged with the background bitmap to get all new
writes since the last backup.
- in the success case, continue tracking for the next incremental
backup in the backup access bitmap. New writes since the backup
access was set up are in the background bitmap. Because the backup
was successfully, clear the backup access bitmap and merge back the
background bitmap to get only the new writes.
Since QEMU cannot know if the backup was successful or not (except if
failure already happens during the setup QMP command), the management
layer needs to tell it via the teardown QMP command.
The bitmap action is also recorded in the device info now.
The backup-access api keeps track of what bitmap names got used for
which devices and thus knows when a bitmap went missing. Propagate
this information to the QMP user with a new 'missing-recreated'
variant for the taken bitmap action.
[0]: https://lore.kernel.org/qemu-devel/b68833dd-8864-4d72-7c61-c134a9835036@ya.ru/
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 519 +++++++++++++++++++++++++++++++++++++++----
pve-backup.h | 16 ++
qapi/block-core.json | 99 ++++++++-
system/runstate.c | 6 +
4 files changed, 596 insertions(+), 44 deletions(-)
create mode 100644 pve-backup.h
diff --git a/pve-backup.c b/pve-backup.c
index bd81621d51..0450303017 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1,4 +1,5 @@
#include "proxmox-backup-client.h"
+#include "pve-backup.h"
#include "vma.h"
#include "qemu/osdep.h"
@@ -14,6 +15,7 @@
#include "qobject/qdict.h"
#include "qapi/qmp/qerror.h"
#include "qemu/cutils.h"
+#include "qemu/error-report.h"
#if defined(CONFIG_MALLOC_TRIM)
#include <malloc.h>
@@ -40,6 +42,7 @@
*/
const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
+const char *BACKGROUND_BITMAP_NAME = "backup-access-background-bitmap";
static struct PVEBackupState {
struct {
@@ -98,8 +101,11 @@ typedef struct PVEBackupDevInfo {
char* device_name;
int completed_ret; // INT_MAX if not completed
BdrvDirtyBitmap *bitmap;
+ BdrvDirtyBitmap *background_bitmap; // used for external backup access
+ PBSBitmapAction bitmap_action;
BlockDriverState *target;
BlockJob *job;
+ BackupAccessSetupBitmapMode requested_bitmap_mode;
} PVEBackupDevInfo;
static void pvebackup_propagate_error(Error *err)
@@ -361,6 +367,67 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
+/*
+ * New writes since the backup access was set up are in the background bitmap. Because of failure,
+ * the previously tracked writes in di->bitmap are still required too. Thus, merge with the
+ * background bitmap to get all new writes since the last backup.
+ */
+static void handle_backup_access_bitmaps_in_error_case(PVEBackupDevInfo *di)
+{
+ Error *local_err = NULL;
+
+ if (di->bs && di->background_bitmap) {
+ bdrv_drained_begin(di->bs);
+ if (di->bitmap) {
+ bdrv_enable_dirty_bitmap(di->bitmap);
+ if (!bdrv_merge_dirty_bitmap(di->bitmap, di->background_bitmap, NULL, &local_err)) {
+ warn_report("backup access: %s - could not merge bitmaps in error path - %s",
+ di->device_name,
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ /*
+ * Could not merge, drop original bitmap too.
+ */
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+ } else {
+ warn_report("backup access: %s - expected bitmap not present", di->device_name);
+ }
+ bdrv_release_dirty_bitmap(di->background_bitmap);
+ bdrv_drained_end(di->bs);
+ }
+}
+
+/*
+ * Continue tracking for next incremental backup in di->bitmap. New writes since the backup access
+ * was set up are in the background bitmap. Because the backup was successful, clear di->bitmap and
+ * merge back the background bitmap to get only the new writes.
+ */
+static void handle_backup_access_bitmaps_after_success(PVEBackupDevInfo *di)
+{
+ Error *local_err = NULL;
+
+ if (di->bs && di->background_bitmap) {
+ bdrv_drained_begin(di->bs);
+ if (di->bitmap) {
+ bdrv_enable_dirty_bitmap(di->bitmap);
+ bdrv_clear_dirty_bitmap(di->bitmap, NULL);
+ if (!bdrv_merge_dirty_bitmap(di->bitmap, di->background_bitmap, NULL, &local_err)) {
+ warn_report("backup access: %s - could not merge bitmaps after backup - %s",
+ di->device_name,
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ /*
+ * Could not merge, drop original bitmap too.
+ */
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+ } else {
+ warn_report("backup access: %s - expected bitmap not present", di->device_name);
+ }
+ bdrv_release_dirty_bitmap(di->background_bitmap);
+ bdrv_drained_end(di->bs);
+ }
+}
+
static void cleanup_snapshot_access(PVEBackupDevInfo *di)
{
if (di->fleecing.snapshot_access) {
@@ -588,6 +655,51 @@ static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
return 0;
}
+static void setup_all_snapshot_access_bh(void *opaque)
+{
+ assert(!qemu_in_coroutine());
+
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
+ Error *local_err = NULL;
+
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ bdrv_drained_begin(di->bs);
+
+ if (di->bitmap) {
+ BdrvDirtyBitmap *background_bitmap =
+ bdrv_create_dirty_bitmap(di->bs, PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
+ BACKGROUND_BITMAP_NAME, &local_err);
+ if (!background_bitmap) {
+ error_setg(errp, "%s - creating background bitmap for backup access failed: %s",
+ di->device_name,
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ bdrv_drained_end(di->bs);
+ break;
+ }
+ di->background_bitmap = background_bitmap;
+ bdrv_disable_dirty_bitmap(di->bitmap);
+ }
+
+ if (setup_snapshot_access(di, &local_err) < 0) {
+ bdrv_drained_end(di->bs);
+ error_setg(errp, "%s - setting up snapshot access failed: %s", di->device_name,
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ break;
+ }
+
+ bdrv_drained_end(di->bs);
+ }
+
+ /* return */
+ aio_co_enter(data->ctx, data->co);
+}
+
/*
* backup_job_create can *not* be run from a coroutine, so this can't either.
* The caller is responsible that backup_mutex is held nonetheless.
@@ -724,6 +836,62 @@ static bool fleecing_no_efi_tpm(const char *device_id)
return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
}
+static bool fleecing_all(const char *device_id)
+{
+ return true;
+}
+
+static PVEBackupDevInfo coroutine_fn GRAPH_RDLOCK *get_single_device_info(
+ const char *device,
+ bool (*device_uses_fleecing)(const char*),
+ Error **errp)
+{
+ BlockBackend *blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ return NULL;
+ }
+ BlockDriverState *bs = blk_bs(blk);
+ if (!bdrv_co_is_inserted(bs)) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ return NULL;
+ }
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
+
+ if (device_uses_fleecing && device_uses_fleecing(device)) {
+ g_autofree gchar *fleecing_devid = g_strconcat(device, "-fleecing", NULL);
+ BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
+ if (!fleecing_blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", fleecing_devid);
+ goto fail;
+ }
+ BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
+ if (!bdrv_co_is_inserted(fleecing_bs)) {
+ error_setg(errp, "Device '%s' has no medium", fleecing_devid);
+ goto fail;
+ }
+ /*
+ * Fleecing image needs to be the same size to act as a cbw target.
+ */
+ if (bs->total_sectors != fleecing_bs->total_sectors) {
+ error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
+ fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
+ goto fail;
+ }
+ di->fleecing.bs = fleecing_bs;
+ }
+
+ return di;
+fail:
+ g_free(di->device_name);
+ g_free(di);
+ return NULL;
+}
+
/*
* Returns a list of device infos, which needs to be freed by the caller. In
* case of an error, errp will be set, but the returned value might still be a
@@ -742,45 +910,10 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
gchar **d = devs;
while (d && *d) {
- BlockBackend *blk = blk_by_name(*d);
- if (!blk) {
- error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
- "Device '%s' not found", *d);
- goto err;
- }
- BlockDriverState *bs = blk_bs(blk);
- if (!bdrv_co_is_inserted(bs)) {
- error_setg(errp, "Device '%s' has no medium", *d);
+ PVEBackupDevInfo *di = get_single_device_info(*d, device_uses_fleecing, errp);
+ if (!di) {
goto err;
}
- PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
- di->bs = bs;
- di->device_name = g_strdup(bdrv_get_device_name(bs));
-
- if (device_uses_fleecing && device_uses_fleecing(*d)) {
- g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
- BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
- if (!fleecing_blk) {
- error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
- "Device '%s' not found", fleecing_devid);
- goto err;
- }
- BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
- if (!bdrv_co_is_inserted(fleecing_bs)) {
- error_setg(errp, "Device '%s' has no medium", fleecing_devid);
- goto err;
- }
- /*
- * Fleecing image needs to be the same size to act as a cbw target.
- */
- if (bs->total_sectors != fleecing_bs->total_sectors) {
- error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
- fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
- goto err;
- }
- di->fleecing.bs = fleecing_bs;
- }
-
di_list = g_list_append(di_list, di);
d++;
}
@@ -839,8 +972,9 @@ static void clear_backup_state_bitmap_list(void) {
*/
static void initialize_backup_state_stat(
const char *backup_file,
- uuid_t uuid,
- size_t total)
+ uuid_t *uuid,
+ size_t total,
+ bool starting)
{
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -855,15 +989,19 @@ static void initialize_backup_state_stat(
}
backup_state.stat.backup_file = g_strdup(backup_file);
- uuid_copy(backup_state.stat.uuid, uuid);
- uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
+ if (uuid) {
+ uuid_copy(backup_state.stat.uuid, *uuid);
+ uuid_unparse_lower(*uuid, backup_state.stat.uuid_str);
+ } else {
+ backup_state.stat.uuid_str[0] = '\0';
+ }
backup_state.stat.total = total;
backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
backup_state.stat.finishing = false;
- backup_state.stat.starting = true;
+ backup_state.stat.starting = starting;
}
/*
@@ -876,6 +1014,299 @@ static void backup_state_set_target_id(const char *target_id) {
backup_state.target_id = g_strdup(target_id);
}
+BackupAccessInfoList *coroutine_fn qmp_backup_access_setup(
+ const char *target_id,
+ BackupAccessSourceDeviceList *devices,
+ Error **errp)
+{
+ assert(qemu_in_coroutine());
+
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
+ Error *local_err = NULL;
+ GList *di_list = NULL;
+ GList *l;
+
+ if (backup_state.di_list) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "previous backup for target '%s' not finished", backup_state.target_id);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ return NULL;
+ }
+
+ bdrv_graph_co_rdlock();
+ for (BackupAccessSourceDeviceList *it = devices; it; it = it->next) {
+ PVEBackupDevInfo *di = get_single_device_info(it->value->device, fleecing_all, &local_err);
+ if (!di) {
+ bdrv_graph_co_rdunlock();
+ error_propagate(errp, local_err);
+ goto err;
+ }
+ di->requested_bitmap_mode = it->value->bitmap_mode;
+ di_list = g_list_append(di_list, di);
+ }
+ bdrv_graph_co_rdunlock();
+ assert(di_list);
+
+ size_t total = 0;
+
+ l = di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ ssize_t size = bdrv_getlength(di->bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "bdrv_getlength failed");
+ goto err;
+ }
+ di->size = size;
+ total += size;
+
+ di->completed_ret = INT_MAX;
+ }
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.reused = 0;
+
+ /* clear previous backup's bitmap_list */
+ clear_backup_state_bitmap_list();
+
+ const char *bitmap_name = target_id;
+
+ /* create bitmaps if requested */
+ l = di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ di->block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE;
+
+ PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
+ size_t dirty = di->size;
+
+ if (di->requested_bitmap_mode == BACKUP_ACCESS_SETUP_BITMAP_MODE_NONE ||
+ di->requested_bitmap_mode == BACKUP_ACCESS_SETUP_BITMAP_MODE_NEW) {
+ BdrvDirtyBitmap *old_bitmap = bdrv_find_dirty_bitmap(di->bs, bitmap_name);
+ if (old_bitmap) {
+ bdrv_release_dirty_bitmap(old_bitmap);
+ action = PBS_BITMAP_ACTION_NOT_USED_REMOVED; // set below for new
+ }
+ }
+
+ BdrvDirtyBitmap *bitmap = NULL;
+ if (di->requested_bitmap_mode == BACKUP_ACCESS_SETUP_BITMAP_MODE_NEW ||
+ di->requested_bitmap_mode == BACKUP_ACCESS_SETUP_BITMAP_MODE_USE) {
+ bitmap = bdrv_find_dirty_bitmap(di->bs, bitmap_name);
+ if (!bitmap) {
+ bitmap = bdrv_create_dirty_bitmap(di->bs, PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE,
+ bitmap_name, errp);
+ if (!bitmap) {
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ goto err;
+ }
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ if (di->requested_bitmap_mode == BACKUP_ACCESS_SETUP_BITMAP_MODE_USE) {
+ action = PBS_BITMAP_ACTION_MISSING_RECREATED;
+ } else {
+ action = PBS_BITMAP_ACTION_NEW;
+ }
+ } else {
+ if (di->requested_bitmap_mode == BACKUP_ACCESS_SETUP_BITMAP_MODE_NEW) {
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ error_setg(errp, "internal error - removed old bitmap still present");
+ goto err;
+ }
+ /* track clean chunks as reused */
+ dirty = MIN(bdrv_get_dirty_count(bitmap), di->size);
+ backup_state.stat.reused += di->size - dirty;
+ action = PBS_BITMAP_ACTION_USED;
+ }
+ }
+
+ PBSBitmapInfo *info = g_malloc(sizeof(*info));
+ info->drive = g_strdup(di->device_name);
+ info->action = action;
+ info->size = di->size;
+ info->dirty = dirty;
+ backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
+
+ di->bitmap = bitmap;
+ di->bitmap_action = action;
+ }
+
+ /* starting=false, because there is no associated QEMU job */
+ initialize_backup_state_stat(NULL, NULL, total, false);
+
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ backup_state_set_target_id(target_id);
+
+ backup_state.vmaw = NULL;
+ backup_state.pbs = NULL;
+
+ backup_state.di_list = di_list;
+
+ /* Run setup_all_snapshot_access_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, setup_all_snapshot_access_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto err;
+ }
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ BackupAccessInfoList *bai_head = NULL, **p_bai_next = &bai_head;
+
+ l = di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ BackupAccessInfoList *info = g_malloc0(sizeof(*info));
+ info->value = g_malloc0(sizeof(*info->value));
+ info->value->node_name = g_strdup(bdrv_get_node_name(di->fleecing.snapshot_access));
+ info->value->device = g_strdup(di->device_name);
+ info->value->size = di->size;
+ if (di->bitmap) {
+ info->value->bitmap_node_name = g_strdup(bdrv_get_node_name(di->bs));
+ info->value->bitmap_name = g_strdup(bitmap_name);
+ info->value->bitmap_action = di->bitmap_action;
+ info->value->has_bitmap_action = true;
+ }
+
+ *p_bai_next = info;
+ p_bai_next = &info->next;
+ }
+
+ return bai_head;
+
+err:
+
+ l = di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ handle_backup_access_bitmaps_in_error_case(di);
+
+ g_free(di->device_name);
+ di->device_name = NULL;
+
+ g_free(di);
+ }
+ g_list_free(di_list);
+ backup_state.di_list = NULL;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ return NULL;
+}
+
+/*
+ * Caller needs to hold the backup mutex or the BQL.
+ */
+void backup_access_teardown(bool success)
+{
+ GList *l = backup_state.di_list;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.finishing = true;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ if (di->fleecing.snapshot_access) {
+ bdrv_unref(di->fleecing.snapshot_access);
+ di->fleecing.snapshot_access = NULL;
+ }
+ if (di->fleecing.cbw) {
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+ }
+
+ if (success) {
+ handle_backup_access_bitmaps_after_success(di);
+ } else {
+ handle_backup_access_bitmaps_in_error_case(di);
+ }
+
+ g_free(di->device_name);
+ di->device_name = NULL;
+
+ g_free(di);
+ }
+ g_list_free(backup_state.di_list);
+ backup_state.di_list = NULL;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+}
+
+// Not done in a coroutine, because bdrv_co_unref() and cbw_drop() would just spawn BHs anyways.
+// Caller needs to hold the backup_state.backup_mutex lock
+static void backup_access_teardown_bh(void *opaque)
+{
+ CoCtxData *data = (CoCtxData*)opaque;
+
+ backup_access_teardown(*((bool*)data->data));
+
+ /* return */
+ aio_co_enter(data->ctx, data->co);
+}
+
+void coroutine_fn qmp_backup_access_teardown(const char *target_id, bool success, Error **errp)
+{
+ assert(qemu_in_coroutine());
+
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
+ if (!backup_state.target_id) { // nothing to do
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ return;
+ }
+
+ /*
+ * Continue with target_id == NULL, used by the callback registered for qemu_cleanup()
+ */
+ if (target_id && strcmp(backup_state.target_id, target_id)) {
+ error_setg(errp, "cannot teardown backup access - got target %s instead of %s",
+ target_id, backup_state.target_id);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ return;
+ }
+
+ if (!strcmp(backup_state.target_id, "Proxmox VE")) {
+ error_setg(errp, "cannot teardown backup access for PVE - use backup-cancel instead");
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ return;
+ }
+
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &success,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, backup_access_teardown_bh, &waker);
+ qemu_coroutine_yield();
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ return;
+}
+
UuidInfo coroutine_fn *qmp_backup(
const char *backup_file,
const char *password,
@@ -1068,6 +1499,7 @@ UuidInfo coroutine_fn *qmp_backup(
}
di->dev_id = dev_id;
+ di->bitmap_action = action;
PBSBitmapInfo *info = g_malloc(sizeof(*info));
info->drive = g_strdup(di->device_name);
@@ -1119,7 +1551,7 @@ UuidInfo coroutine_fn *qmp_backup(
}
}
/* initialize global backup_state now */
- initialize_backup_state_stat(backup_file, uuid, total);
+ initialize_backup_state_stat(backup_file, &uuid, total, true);
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -1298,5 +1730,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
ret->backup_fleecing = true;
+ ret->backup_access_api = true;
return ret;
}
diff --git a/pve-backup.h b/pve-backup.h
new file mode 100644
index 0000000000..9ebeef7c8f
--- /dev/null
+++ b/pve-backup.h
@@ -0,0 +1,16 @@
+/*
+ * Bacup code used by Proxmox VE
+ *
+ * Copyright (C) Proxmox Server Solutions
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PVE_BACKUP_H
+#define PVE_BACKUP_H
+
+void backup_access_teardown(bool success);
+
+#endif /* PVE_BACKUP_H */
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9bdcfa31ea..2fb51215f2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1023,6 +1023,9 @@
#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
+# @backup-access-api: Whether backup access API for external providers is
+# supported or not.
+#
# @backup-fleecing: Whether backup fleecing is supported or not.
#
# @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
@@ -1036,6 +1039,7 @@
'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
+ 'backup-access-api': 'bool',
'backup-fleecing': 'bool',
'backup-max-workers': 'bool' } }
@@ -1067,9 +1071,16 @@
# base snapshot did not match the base given for the current job or
# the crypt mode has changed.
#
+# @missing-recreated: A bitmap for incremental backup was expected to be
+# present, but was missing and thus got recreated. For example, this can
+# happen if the drive was re-attached or if the bitmap was deleted for some
+# other reason. PBS does not currently keep track of this; the backup-access
+# mechanism does.
+#
##
{ 'enum': 'PBSBitmapAction',
- 'data': ['not-used', 'not-used-removed', 'new', 'used', 'invalid'] }
+ 'data': ['not-used', 'not-used-removed', 'new', 'used', 'invalid',
+ 'missing-recreated'] }
##
# @PBSBitmapInfo:
@@ -1102,6 +1113,92 @@
##
{ 'command': 'query-pbs-bitmap-info', 'returns': ['PBSBitmapInfo'] }
+##
+# @BackupAccessInfo:
+#
+# Info associated to a snapshot access for backup. For more information about
+# the bitmap see @BackupAccessBitmapMode.
+#
+# @node-name: the block node name of the snapshot-access node.
+#
+# @device: the device on top of which the snapshot access was created.
+#
+# @size: the size of the block device in bytes.
+#
+# @bitmap-node-name: the block node name the dirty bitmap is associated to.
+#
+# @bitmap-name: the name of the dirty bitmap associated to the backup access.
+#
+# @bitmap-action: the action taken on the dirty bitmap.
+#
+##
+{ 'struct': 'BackupAccessInfo',
+ 'data': { 'node-name': 'str', 'device': 'str', 'size': 'size',
+ '*bitmap-node-name': 'str', '*bitmap-name': 'str',
+ '*bitmap-action': 'PBSBitmapAction' } }
+
+##
+# @BackupAccessSourceDevice:
+#
+# Source block device information for creating a backup access.
+#
+# @device: the block device name.
+#
+# @bitmap-mode: used to control whether the bitmap should be reused or
+# recreated or not used. Default is not using a bitmap.
+#
+##
+{ 'struct': 'BackupAccessSourceDevice',
+ 'data': { 'device': 'str', '*bitmap-mode': 'BackupAccessSetupBitmapMode' } }
+
+##
+# @BackupAccessSetupBitmapMode:
+#
+# How to setup a bitmap for a device for @backup-access-setup.
+#
+# @none: do not use a bitmap. Removes an existing bitmap if present.
+#
+# @new: create and use a new bitmap.
+#
+# @use: try to re-use an existing bitmap. Create a new one if it doesn't exist.
+##
+{ 'enum': 'BackupAccessSetupBitmapMode',
+ 'data': ['none', 'new', 'use' ] }
+
+##
+# @backup-access-setup:
+#
+# Set up snapshot access to VM drives for an external backup provider. No other
+# backup or backup access can be done before tearing down the backup access.
+#
+# @target-id: the unique ID of the backup target.
+#
+# @devices: list of devices for which to create the backup access. Also
+# controls whether to use/create a bitmap for the device. Check the
+# @bitmap-action in the result to see what action was actually taken for the
+# bitmap. Each target controls its own bitmaps.
+#
+# Returns: a list of @BackupAccessInfo, one for each device.
+#
+##
+{ 'command': 'backup-access-setup',
+ 'data': { 'target-id': 'str', 'devices': [ 'BackupAccessSourceDevice' ] },
+ 'returns': [ 'BackupAccessInfo' ], 'coroutine': true }
+
+##
+# @backup-access-teardown:
+#
+# Tear down previously setup snapshot access for the same target.
+#
+# @target-id: the ID of the backup target.
+#
+# @success: whether the backup done by the external provider was successful.
+#
+##
+{ 'command': 'backup-access-teardown',
+ 'data': { 'target-id': 'str', 'success': 'bool' },
+ 'coroutine': true }
+
##
# @BlockDeviceTimedStats:
#
diff --git a/system/runstate.c b/system/runstate.c
index 272801d307..cf775213bd 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -60,6 +60,7 @@
#include "system/system.h"
#include "system/tpm.h"
#include "trace.h"
+#include "pve-backup.h"
static NotifierList exit_notifiers =
NOTIFIER_LIST_INITIALIZER(exit_notifiers);
@@ -921,6 +922,11 @@ void qemu_cleanup(int status)
* requests happening from here on anyway.
*/
bdrv_drain_all_begin();
+ /*
+ * The backup access is set up by a QMP command, but is neither owned by a monitor nor
+ * associated to a BlockBackend. Need to tear it down manually here.
+ */
+ backup_access_teardown(false);
job_cancel_sync_all();
bdrv_close_all();

View File

@@ -1,106 +0,0 @@
From 5a8cf9e98ba1668a6a20c2fcda1704de4103ff58 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Jul 2025 18:27:34 +0200
Subject: [PATCH 56/59] PVE backup: prepare for the switch to using blockdev
rather than drive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Also allow finding block nodes by their node name rather than just via
an associated block backend, which might not exist for block nodes.
For regular drives, it is essential to not use the throttle group,
because otherwise the limits intended only for the guest would also
apply to the backup job.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
pve-backup.c | 51 +++++++++++++++++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 12 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 0450303017..457fcb7e5c 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -847,29 +847,56 @@ static PVEBackupDevInfo coroutine_fn GRAPH_RDLOCK *get_single_device_info(
Error **errp)
{
BlockBackend *blk = blk_by_name(device);
- if (!blk) {
- error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
- "Device '%s' not found", device);
- return NULL;
+ BlockDriverState *root_bs, *bs;
+
+ if (blk) {
+ root_bs = bs = blk_bs(blk);
+ } else {
+ /* TODO PVE 10 - fleecing will always be attached without blk */
+ root_bs = bs = bdrv_find_node(device);
+ if (!bs) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ return NULL;
+ }
+ /* For TPM, bs is already correct, otherwise need the file child. */
+ if (!strncmp(bs->drv->format_name, "throttle", 8)) {
+ if (!bs->file || !bs->file->bs) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found (no file child)", device);
+ return NULL;
+ }
+ bs = bs->file->bs;
+ }
}
- BlockDriverState *bs = blk_bs(blk);
+
if (!bdrv_co_is_inserted(bs)) {
error_setg(errp, "Device '%s' has no medium", device);
return NULL;
}
+
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
- di->device_name = g_strdup(bdrv_get_device_name(bs));
+ /* Need the name of the root node, e.g. drive-scsi0 */
+ di->device_name = g_strdup(bdrv_get_device_or_node_name(root_bs));
if (device_uses_fleecing && device_uses_fleecing(device)) {
g_autofree gchar *fleecing_devid = g_strconcat(device, "-fleecing", NULL);
+ BlockDriverState *fleecing_bs;
+
BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
- if (!fleecing_blk) {
- error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
- "Device '%s' not found", fleecing_devid);
- goto fail;
+ if (fleecing_blk) {
+ fleecing_bs = blk_bs(fleecing_blk);
+ } else {
+ /* TODO PVE 10 - fleecing will always be attached without blk */
+ fleecing_bs = bdrv_find_node(fleecing_devid);
+ if (!fleecing_bs) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", fleecing_devid);
+ goto fail;
+ }
}
- BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
+
if (!bdrv_co_is_inserted(fleecing_bs)) {
error_setg(errp, "Device '%s' has no medium", fleecing_devid);
goto fail;
@@ -927,7 +954,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
- di->device_name = g_strdup(bdrv_get_device_name(bs));
+ di->device_name = g_strdup(bdrv_get_device_or_node_name(bs));
di_list = g_list_append(di_list, di);
}
}
--
2.39.5

View File

@@ -1,71 +0,0 @@
From 5beb1f48555d74f468b6c0ca657d3be44c8ea8e3 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Jul 2025 18:27:35 +0200
Subject: [PATCH 57/59] block/zeroinit: support using as blockdev driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
block/zeroinit.c | 12 +++++++++---
qapi/block-core.json | 5 +++--
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/block/zeroinit.c b/block/zeroinit.c
index f9d513db15..036edb17f5 100644
--- a/block/zeroinit.c
+++ b/block/zeroinit.c
@@ -66,6 +66,7 @@ static int zeroinit_open(BlockDriverState *bs, QDict *options, int flags,
QemuOpts *opts;
Error *local_err = NULL;
int ret;
+ const char *next = NULL;
s->extents = 0;
@@ -77,9 +78,14 @@ static int zeroinit_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- /* Open the raw file */
- ret = bdrv_open_file_child(qemu_opt_get(opts, "x-next"), options, "next",
- bs, &local_err);
+
+ next = qemu_opt_get(opts, "x-next");
+
+ if (next) {
+ ret = bdrv_open_file_child(next, options, "next", bs, &local_err);
+ } else { /* when opened as a blockdev, there is no 'next' option */
+ ret = bdrv_open_file_child(NULL, options, "file", bs, &local_err);
+ }
if (ret < 0) {
error_propagate(errp, local_err);
goto fail;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2fb51215f2..f8ed564cf0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3586,7 +3586,7 @@
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
- 'vmdk', 'vpc', 'vvfat' ] }
+ 'vmdk', 'vpc', 'vvfat', 'zeroinit' ] }
##
# @BlockdevOptionsFile:
@@ -5172,7 +5172,8 @@
'if': 'CONFIG_BLKIO' },
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
- 'vvfat': 'BlockdevOptionsVVFAT'
+ 'vvfat': 'BlockdevOptionsVVFAT',
+ 'zeroinit': 'BlockdevOptionsGenericFormat'
} }
##
--
2.39.5

View File

@@ -1,61 +0,0 @@
From d180b059731818ae34e43e11495c8ac081ab89b9 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Jul 2025 18:27:36 +0200
Subject: [PATCH 58/59] block/alloc-track: support using as blockdev driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
qapi/block-core.json | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f8ed564cf0..07c5773717 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3567,7 +3567,8 @@
# Since: 2.9
##
{ 'enum': 'BlockdevDriver',
- 'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
+ 'data': [ 'alloc-track',
+ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg',
'file', 'snapshot-access', 'ftp', 'ftps',
{'name': 'gluster', 'features': [ 'deprecated' ] },
@@ -3668,6 +3669,21 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
+##
+# @BlockdevOptionsAllocTrack:
+#
+# Driver specific block device options for the alloc-track backend.
+#
+# @backing: backing file with the data.
+#
+# @auto-remove: whether the alloc-track driver should drop itself
+# after completing the stream.
+#
+##
+{ 'struct': 'BlockdevOptionsAllocTrack',
+ 'base': 'BlockdevOptionsGenericFormat',
+ 'data': { 'auto-remove': 'bool', 'backing': 'BlockdevRefOrNull' } }
+
##
# @BlockdevOptionsPbs:
#
@@ -5114,6 +5130,7 @@
'*detect-zeroes': 'BlockdevDetectZeroesOptions' },
'discriminator': 'driver',
'data': {
+ 'alloc-track':'BlockdevOptionsAllocTrack',
'blkdebug': 'BlockdevOptionsBlkdebug',
'blklogwrites':'BlockdevOptionsBlklogwrites',
'blkverify': 'BlockdevOptionsBlkverify',
--
2.39.5

View File

@@ -1,137 +0,0 @@
From 76442f3eafa8cbe647fe2d39e78e817ec681143c Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Jul 2025 18:27:37 +0200
Subject: [PATCH 59/59] block/qapi: include child references in block device
info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In combination with using a throttle filter to enforce IO limits for
a guest device, knowing the 'file' child of a block device can be
useful. If the throttle filter is only intended for guest IO, block
jobs should not also be limited by the throttle filter, so the
block operations need to be done with the 'file' child of the top
throttle node as the target. In combination with mirroring, the name
of that child is not fixed.
Another scenario is when unplugging a guest device after mirroring
below a top throttle node, where the mirror target is added explicitly
via blockdev-add. After mirroring, the target becomes the new 'file'
child of the throttle node. For unplugging, both the top throttle node
and the mirror target need to be deleted, because only implicitly
added child nodes are deleted automatically, and the current 'file'
child of the throttle node was explicitly added (as the mirror
target).
In other scenarios, it could be useful to follow the backing chain.
Note that iotests 191 and 273 use _filter_img_info, so the 'children'
information is filtered out there.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
block/qapi.c | 10 ++++++++++
qapi/block-core.json | 16 ++++++++++++++++
tests/qemu-iotests/184.out | 8 ++++++++
3 files changed, 34 insertions(+)
diff --git a/block/qapi.c b/block/qapi.c
index 2c50a6bf3b..e08a1e970f 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
ImageInfo *backing_info;
BlockDriverState *backing;
BlockDeviceInfo *info;
+ BlockdevChildList **children_list_tail;
+ BdrvChild *child;
if (!bs->drv) {
error_setg(errp, "Block device %s is ejected", bs->node_name);
@@ -77,6 +79,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->node_name = g_strdup(bs->node_name);
}
+ children_list_tail = &info->children;
+ QLIST_FOREACH(child, &bs->children, next) {
+ BlockdevChild *child_ref = g_new0(BlockdevChild, 1);
+ child_ref->child = g_strdup(child->name);
+ child_ref->node_name = g_strdup(child->bs->node_name);
+ QAPI_LIST_APPEND(children_list_tail, child_ref);
+ }
+
backing = bdrv_cow_bs(bs);
if (backing) {
info->backing_file = g_strdup(backing->filename);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 07c5773717..4db27f5819 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -461,6 +461,19 @@
'direct': 'bool',
'no-flush': 'bool' } }
+##
+# @BlockdevChild:
+#
+# @child: The name of the child, for example 'file' or 'backing'.
+#
+# @node-name: The name of the child's block driver node.
+#
+# Since: 10.1
+##
+{ 'struct': 'BlockdevChild',
+ 'data': { 'child': 'str',
+ 'node-name': 'str' } }
+
##
# @BlockDeviceInfo:
#
@@ -486,6 +499,8 @@
# @backing_file_depth: number of files in the backing file chain
# (since: 1.2)
#
+# @children: Information about child block nodes. (since: 10.1)
+#
# @active: true if the backend is active; typical cases for inactive backends
# are on the migration source instance after migration completes and on the
# destination before it completes. (since: 10.0)
@@ -560,6 +575,7 @@
{ 'struct': 'BlockDeviceInfo',
'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
'*backing_file': 'str', 'backing_file_depth': 'int',
+ 'children': ['BlockdevChild'],
'active': 'bool', 'encrypted': 'bool',
'detect_zeroes': 'BlockdevDetectZeroesOptions',
'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out
index 52692b6b3b..ef99bb2e9a 100644
--- a/tests/qemu-iotests/184.out
+++ b/tests/qemu-iotests/184.out
@@ -41,6 +41,12 @@ Testing:
},
"iops_wr": 0,
"ro": false,
+ "children": [
+ {
+ "node-name": "disk0",
+ "child": "file"
+ }
+ ],
"node-name": "throttle0",
"backing_file_depth": 1,
"drv": "throttle",
@@ -69,6 +75,8 @@ Testing:
},
"iops_wr": 0,
"ro": false,
+ "children": [
+ ],
"node-name": "disk0",
"backing_file_depth": 0,
"drv": "null-co",
--
2.39.5

View File

@@ -1,150 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 18 Jun 2025 12:25:31 +0200
Subject: [PATCH] savevm-async: reuse migration blocker check for
snapshots/hibernation
Same rationale as with upstream QEMU commit 5aaac46793 ("migration:
savevm: consult migration blockers"), migration and (async) snapshot
are essentially the same operation and thus snapshot also needs to
check for migration blockers. For example, this catches passed-through
PCI devices, where the driver does not support migration and VirtIO-GL
display, which also does not support migration yet.
In the case of VirtIO-GL, there were crashes [0].
However, the commit notes:
> There is really no difference between live migration and savevm, except
> that savevm does not require bdrv_invalidate_cache to be implemented
> by all disks. However, it is unlikely that savevm is used with anything
> except qcow2 disks, so the penalty is small and worth the improvement
> in catching bad usage of savevm.
and for Proxmox VE, suspend-to-disk with VMDK does use savevm-async
and would be broken by simply using migration_is_blocked(). To keep
this working, introduce a new helper that filters blockers with the
prefix used by the VMDK migration blocker.
The function qemu_savevm_state_blocked() is called as part of
savevm_async_is_blocked() so no check is lost with this
patch. The helper is declared in migration/migration.c to be able to
access the 'migration_blockers'.
The VMDK blocker message is declared via a '#define', because using a
'const char*' led to the linker to complain about multiple
declarations. The message does not include the reference to the block
node anymore, but users can still easily find a VMDK disk in the VM
configuration.
Note, this also "breaks" snapshot and hibernate with VNC clipboard by
preventing it. Previously, this would "work", because the Proxmox VE
API has no check yet, but the clipboard will be broken after rollback,
in the sense that it cannot be used anymore, not just lost contents.
So some users might consider adding the check here a breaking change
even if it's technically correct to prevent snapshot and hibernate
with VNC clipboard. But other users might rightfully complain about
broken clipboard. And again, the check also prevents blockers from
passed-through PCI devices, etc. so it seems worth tolerating that
breakage.
[0]: https://forum.proxmox.com/threads/136976/
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Message-ID: <20250618102531.57444-1-f.ebner@proxmox.com>
---
block/vmdk.c | 4 +---
include/migration/blocker.h | 2 ++
migration/migration.c | 24 ++++++++++++++++++++++++
migration/migration.h | 1 +
migration/savevm-async.c | 2 +-
5 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/block/vmdk.c b/block/vmdk.c
index 2adec49912..80696a8d27 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1402,9 +1402,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
qemu_co_mutex_init(&s->lock);
/* Disable migration when VMDK images are used */
- error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
- "does not support live migration",
- bdrv_get_device_or_node_name(bs));
+ error_setg(&s->migration_blocker, "%s", MIGRATION_BLOCKER_VMDK);
ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
if (ret < 0) {
goto fail;
diff --git a/include/migration/blocker.h b/include/migration/blocker.h
index a687ac0efe..f36bfb2df1 100644
--- a/include/migration/blocker.h
+++ b/include/migration/blocker.h
@@ -18,6 +18,8 @@
#define MIG_MODE_ALL MIG_MODE__MAX
+#define MIGRATION_BLOCKER_VMDK "The vmdk format used by a disk does not support live migration"
+
/**
* @migrate_add_blocker - prevent all modes of migration from proceeding
*
diff --git a/migration/migration.c b/migration/migration.c
index 2f3430f440..ecad1aca32 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2030,6 +2030,30 @@ bool migration_is_blocked(Error **errp)
return false;
}
+bool savevm_async_is_blocked(Error **errp)
+{
+ GSList *blockers = migration_blockers[migrate_mode()];
+
+ if (qemu_savevm_state_blocked(errp)) {
+ return true;
+ }
+
+ /*
+ * The limitation for VMDK images only applies to live-migration, not
+ * snapshots, see commit 5aaac46793 ("migration: savevm: consult migration
+ * blockers").
+ */
+ while (blockers) {
+ if (strcmp(error_get_pretty(blockers->data), MIGRATION_BLOCKER_VMDK)) {
+ error_propagate(errp, error_copy(blockers->data));
+ return true;
+ }
+ blockers = g_slist_next(blockers);
+ }
+
+ return false;
+}
+
/* Returns true if continue to migrate, or false if error detected */
static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
{
diff --git a/migration/migration.h b/migration/migration.h
index d53f7cad84..b772073572 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -531,6 +531,7 @@ int migration_call_notifiers(MigrationState *s, MigrationEventType type,
int migrate_init(MigrationState *s, Error **errp);
bool migration_is_blocked(Error **errp);
+bool savevm_async_is_blocked(Error **errp);
/* True if outgoing migration has entered postcopy phase */
bool migration_in_postcopy(void);
bool migration_postcopy_is_alive(MigrationStatus state);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 730b815494..6cb91dca27 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -375,7 +375,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
return;
}
- if (qemu_savevm_state_blocked(errp)) {
+ if (savevm_async_is_blocked(errp)) {
goto fail;
}

95
debian/patches/series vendored
View File

@@ -1,5 +1,7 @@
extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
extra/0002-ide-avoid-potential-deadlock-when-draining-during-tr.patch
extra/0002-monitor-hmp-add-support-for-flag-argument-with-value.patch
extra/0003-monitor-refactor-set-expire_password-and-allow-VNC-d.patch
extra/0004-block-mirror-fix-NULL-pointer-dereference-in-mirror_.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -12,57 +14,46 @@ pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
pve/0007-PVE-Up-qmp-add-get_link_status.patch
pve/0008-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0009-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0010-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0011-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0012-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
pve/0014-PVE-qapi-modify-query-machines.patch
pve/0015-PVE-qapi-modify-spice-query.patch
pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0023-PVE-monitor-disable-oob-capability.patch
pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0025-PVE-Allow-version-code-in-machine-type.patch
pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
pve/0027-PVE-Backup-add-vma-backup-format-code.patch
pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
pve/0029-PVE-Add-sequential-job-transaction-support.patch
pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0037-PVE-block-stream-increase-chunk-size.patch
pve/0038-block-add-alloc-track-driver.patch
pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
pve/0042-PVE-backup-add-fleecing-option.patch
pve/0043-adapt-machine-version-deprecation-for-Proxmox-VE.patch
pve/0044-Revert-hpet-avoid-timer-storms-on-periodic-timers.patch
pve/0045-Revert-hpet-store-full-64-bit-target-value-of-the-co.patch
pve/0046-Revert-hpet-accept-64-bit-reads-and-writes.patch
pve/0047-Revert-hpet-place-read-only-bits-directly-in-new_val.patch
pve/0048-Revert-hpet-remove-unnecessary-variable-index.patch
pve/0049-Revert-hpet-ignore-high-bits-of-comparator-in-32-bit.patch
pve/0050-Revert-hpet-fix-and-cleanup-persistence-of-interrupt.patch
pve/0051-PVE-backup-factor-out-helper-to-clear-backup-state-s.patch
pve/0052-PVE-backup-factor-out-helper-to-initialize-backup-st.patch
pve/0053-PVE-backup-add-target-ID-in-backup-state.patch
pve/0054-PVE-backup-get-device-info-allow-caller-to-specify-f.patch
pve/0055-PVE-backup-implement-backup-access-setup-and-teardow.patch
pve/0056-PVE-backup-prepare-for-the-switch-to-using-blockdev-.patch
pve/0057-block-zeroinit-support-using-as-blockdev-driver.patch
pve/0058-block-alloc-track-support-using-as-blockdev-driver.patch
pve/0059-block-qapi-include-child-references-in-block-device-.patch
pve/0060-savevm-async-reuse-migration-blocker-check-for-snaps.patch
pve/0016-PVE-add-savevm-async-for-background-state-snapshots.patch
pve/0017-PVE-add-optional-buffer-size-to-QEMUFile.patch
pve/0018-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0019-PVE-Add-dummy-id-command-line-parameter.patch
pve/0020-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0021-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0022-PVE-monitor-disable-oob-capability.patch
pve/0023-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0024-PVE-Allow-version-code-in-machine-type.patch
pve/0025-PVE-Backup-add-vma-backup-format-code.patch
pve/0026-PVE-Backup-add-backup-dump-block-driver.patch
pve/0027-PVE-Backup-proxmox-backup-patches-for-qemu.patch
pve/0028-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0029-PVE-Backup-Add-dirty-bitmap-tracking-for-incremental.patch
pve/0030-PVE-various-PBS-fixes.patch
pve/0031-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0032-PVE-add-query_proxmox_support-QMP-command.patch
pve/0033-PVE-add-query-pbs-bitmap-info-QMP-call.patch
pve/0034-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0035-PVE-Add-sequential-job-transaction-support.patch
pve/0036-PVE-Backup-Use-a-transaction-to-synchronize-job-stat.patch
pve/0037-PVE-Backup-Don-t-block-on-finishing-and-cleanup-crea.patch
pve/0038-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0039-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0040-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0041-PVE-Use-coroutine-QMP-for-backup-cancel_backup.patch
pve/0042-PBS-add-master-key-support.patch
pve/0043-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch
pve/0044-PVE-block-stream-increase-chunk-size.patch
pve/0045-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
pve/0046-block-add-alloc-track-driver.patch
pve/0047-PVE-whitelist-invalid-QAPI-names-for-backwards-compa.patch
pve/0048-PVE-savevm-async-register-yank-before-migration_inco.patch
pve-qemu-6.1-vitastor.patch

View File

@@ -1,6 +1,7 @@
# install the userspace utilities
debian/kvm-ifup etc/kvm/
debian/kvm-ifdown etc/kvm/
#install ovmf uefi rom
debian/OVMF_CODE-pure-efi.fd usr/share/kvm/
debian/OVMF_VARS-pure-efi.fd usr/share/kvm/
debian/kvm-ifdown etc/kvm/
# install the userspace utilities
debian/kvm-ifup etc/kvm/

View File

@@ -1,13 +1,16 @@
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
usr/bin/qemu-system-x86_64 usr/bin/kvm
# qemu-system-i386 and qemu-system-x86_64 provides the same hardware emulation
usr/bin/qemu-system-x86_64 usr/bin/qemu-system-i386
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
# upstream provides a qemu man page,
# we symlink to kvm for backward compatibility
# and to qemu-system-{i386,x86_64} to fullfill our 'Provides: qemu-system-x86'
usr/share/man/man1/qemu.1 usr/share/man/man1/kvm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-i386.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-x86_64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1

View File

@@ -1,7 +1,4 @@
pve-qemu-kvm: arch-dependent-file-in-usr-share [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: binary-from-other-architecture [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: embedded-javascript-library please use * [usr/share/doc/pve-qemu-kvm/kvm/_static/*]
pve-qemu-kvm: groff-message *: warning [*]: can't break line [usr/share/man/*]
pve-qemu-kvm: groff-message *: warning [*]: cannot adjust line [usr/share/man/*]
pve-qemu-kvm: statically-linked-binary [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: unstripped-binary-or-object [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: arch-dependent-file-in-usr-share usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: binary-from-other-architecture usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: unstripped-binary-or-object usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: statically-linked-binary usr/share/kvm/hppa-firmware.img

140
debian/rules vendored
View File

@@ -1,12 +1,22 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Sample debian/rules that uses debhelper.
# This file was originally written by Joey Hess and Craig Small.
# As a special exception, when this file is copied by dh-make into a
# dh-make output file, you may use that output file without restriction.
# This special exception was added by Craig Small in version 0.37 of dh-make.
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
HOST_CPU ?= $(DEB_HOST_GNU_CPU)
# These are used for cross-compiling and for saving the configure script
# from having to guess our platform (since we know it already)
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
ARCH ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU)
PACKAGE=pve-qemu-kvm
destdir := $(CURDIR)/debian/$(PACKAGE)
@@ -17,69 +27,58 @@ machinefile := $(destdir)/usr/share/kvm/machine-versions-x86_64.json
# default QEMU out-of-tree build directory is ./build
BUILDDIR=build
# FIXME: pass to configure as --extra-cflags=CFLAGS ?! also LDFLAGS?
CFLAGS += -Wall
CFLAGS = -Wall
export CFLAGS
# DEB_BUILD_OPTIONS=parallel=N
MAKEFLAGS += $(subst parallel=,-j,$(filter parallel=%,${DEB_BUILD_OPTIONS}))
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
CFLAGS += -O0
else
CFLAGS += -O2
endif
${BUILDDIR}/config.status: configure
dh_testdir
# Add here commands to configure the package.
ifneq "$(wildcard /usr/share/misc/config.sub)" ""
cp -f /usr/share/misc/config.sub config.sub
endif
ifneq "$(wildcard /usr/share/misc/config.guess)" ""
cp -f /usr/share/misc/config.guess config.guess
endif
# guest-agent is only required for guest systems
./configure \
--disable-download \
--docdir=/usr/share/doc/pve-qemu-kvm \
--localstatedir=/var \
--prefix=/usr \
--sysconfdir=/etc \
--target-list=$(HOST_CPU)-softmmu,aarch64-softmmu \
--with-suffix="kvm" \
--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM_REVISION}" \
--audio-drv-list="alsa" \
--datadir=/usr/share \
--libexecdir=/usr/lib/kvm \
--disable-capstone \
--disable-gtk \
--disable-guest-agent \
--disable-guest-agent-msi \
--disable-libnfs \
--disable-libssh \
--disable-sdl \
--disable-smartcard \
--disable-strip \
--disable-xen \
--enable-curl \
--enable-docs \
--enable-gnutls \
--enable-libiscsi \
--enable-libusb \
--enable-linux-aio \
--enable-linux-io-uring \
--enable-numa \
--enable-opengl \
--enable-rbd \
--enable-seccomp \
--enable-slirp \
--enable-spice \
--enable-usb-redir \
--enable-virglrenderer \
--enable-virtfs \
--enable-zstd
--with-git-submodules=ignore \
--docdir=/usr/share/doc/pve-qemu-kvm \
--localstatedir=/var \
--prefix=/usr \
--sysconfdir=/etc \
--target-list=$(ARCH)-softmmu,aarch64-softmmu \
--with-suffix="kvm" \
--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM}" \
--audio-drv-list="alsa" \
--datadir=/usr/share \
--libexecdir=/usr/lib/kvm \
--disable-capstone \
--disable-gtk \
--disable-guest-agent \
--disable-guest-agent-msi \
--disable-libnfs \
--disable-libxml2 \
--disable-sdl \
--disable-smartcard \
--disable-strip \
--disable-xen \
--enable-curl \
--enable-docs \
--enable-glusterfs \
--enable-gnutls \
--enable-libiscsi \
--enable-libusb \
--enable-linux-aio \
--enable-linux-io-uring \
--enable-numa \
--enable-rbd \
--enable-seccomp \
--enable-spice \
--enable-usb-redir \
--enable-virtfs \
--enable-virtiofsd \
--enable-xfsctl
build: build-arch build-indep
build-arch: build-stamp
build-indep: build-stamp
build: build-stamp
build-stamp: ${BUILDDIR}/config.status
@@ -97,8 +96,15 @@ clean:
dh_testroot
rm -f build-stamp
# Add here commands to clean up before the build process.
# Add here commands to clean up after the build process.
-$(MAKE) distclean
ifneq "$(wildcard /usr/share/misc/config.sub)" ""
cp -f /usr/share/misc/config.sub config.sub
endif
ifneq "$(wildcard /usr/share/misc/config.guess)" ""
cp -f /usr/share/misc/config.guess config.guess
endif
dh_clean
@@ -112,6 +118,21 @@ install: build
# Add here commands to install the package into debian/pve-kvm.
$(MAKE) DESTDIR=$(destdir) install
# we do not need openbios files (sparc/ppc)
rm -rf $(destdir)/usr/share/kvm/openbios-*
# remove ppc files
rm $(destdir)/usr/share/kvm/*.dtb
rm $(destdir)/usr/share/kvm/s390-ccw.img
rm $(destdir)/usr/share/kvm/s390-netboot.img
rm $(destdir)/usr/share/kvm/qemu_vga.ndrv
rm $(destdir)/usr/share/kvm/slof.bin
rm $(destdir)/usr/share/kvm/u-boot.e500
# remove Alpha files
rm $(destdir)/usr/share/kvm/palcode-clipper
# remove RISC-V files
rm $(destdir)/usr/share/kvm/opensbi-riscv32-generic-fw_dynamic.elf
rm $(destdir)/usr/share/kvm/opensbi-riscv64-generic-fw_dynamic.elf
# Remove things we don't package at all, would be a "kvm-dev" package
rm -Rf $(destdir)/usr/include/linux/
rm -Rf $(destdir)/usr/include
@@ -130,7 +151,8 @@ binary-indep: build install
binary-arch: build install
dh_testdir
dh_testroot
dh_installchangelogs
# exclude historic Changelog file, which stops at release 0.14
dh_installchangelogs --exclude=Changelog
dh_installdocs
dh_installexamples
dh_install
@@ -146,7 +168,7 @@ binary-arch: build install
# dh_installinfo
dh_installman
dh_link
dh_strip --dbgsym-migration='pve-qemu-kvm-dbg (<<8.0.0-1~)'
dh_strip --dbg-package=pve-qemu-kvm-dbg
dh_compress
dh_fixperms
# dh_perl

View File

@@ -1,3 +0,0 @@
debian/OVMF_CODE-pure-efi.fd
debian/OVMF_VARS-pure-efi.fd
debian/Logo.bmp

Some files were not shown because too many files have changed in this diff Show More