Compare commits

..

2 Commits

Author SHA1 Message Date
0115e4efb0 Add bdrv_co_block_status 2023-01-13 23:50:40 +03:00
4948452f3c Add Vitastor support 2022-12-14 19:16:08 +03:00
119 changed files with 12885 additions and 7202 deletions

7
.gitignore vendored
View File

@@ -1,7 +0,0 @@
/*.build
/*.buildinfo
/*.changes
/*.deb
/*.dsc
/*.tar*
/pve-qemu-kvm-*.*/

View File

@@ -1,90 +1,60 @@
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
include /usr/share/dpkg/architecture.mk
PACKAGE = pve-qemu-kvm
SRCDIR := qemu
BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
ORIG_SRC_TAR=$(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz
BUILDDIR ?= ${PACKAGE}-${DEB_VERSION_UPSTREAM}
GITVERSION := $(shell git rev-parse HEAD)
DSC=$(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION).dsc
DEB = $(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB_DBG = $(PACKAGE)-dbgsym_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB = ${PACKAGE}_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEB_DBG = ${PACKAGE}-dbg_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEBS = $(DEB) $(DEB_DBG)
all: $(DEBS)
.PHONY: submodule
submodule:
ifeq ($(shell test -f "$(SRCDIR)/configure" && echo 1 || echo 0), 0)
git submodule update --init --recursive
cd $(SRCDIR); meson subprojects download
endif
test -f "${SRCDIR}/configure" || git submodule update --init --recursive
PC_BIOS_FW_PURGE_LIST_IN = \
hppa-firmware.img \
hppa-firmware64.img \
openbios-ppc \
openbios-sparc32 \
openbios-sparc64 \
palcode-clipper \
s390-ccw.img \
s390-netboot.img \
u-boot.e500 \
.*[a-zA-Z0-9]\.dtb \
.*[a-zA-Z0-9]\.dts \
qemu_vga.ndrv \
slof.bin \
opensbi-riscv.*-generic-fw_dynamic.bin \
BLOB_PURGE_SED_CMDS = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "/$(FILE)/d")
BLOB_PURGE_FILTER = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "$(FILE)")
$(BUILDDIR): submodule
$(BUILDDIR): keycodemapdb | submodule
# check if qemu/ was used for a build
# if so, please run 'make distclean' in the submodule and try again
test ! -f $(SRCDIR)/build/config.status
rm -rf $@.tmp $@
cp -a $(SRCDIR) $@.tmp
cp -a debian $@.tmp/debian
rm -rf $@.tmp/roms/edk2 # packaged separately
find $@.tmp/pc-bios -type f | grep $(BLOB_PURGE_FILTER) | xargs rm -f
sed -i $(BLOB_PURGE_SED_CMDS) $@.tmp/pc-bios/meson.build
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $@.tmp/debian/SOURCE
mv $@.tmp $@
rm -rf $(BUILDDIR)
cp -a $(SRCDIR) $(BUILDDIR)
cp -a debian $(BUILDDIR)/debian
rm -rf $(BUILDDIR)/ui/keycodemapdb
cp -a keycodemapdb $(BUILDDIR)/ui/
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $(BUILDDIR)/debian/SOURCE
.PHONY: deb kvm
deb kvm: $(DEBS)
$(DEB_DBG): $(DEB)
$(DEB): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j
lintian $(DEBS)
sbuild: $(DSC)
sbuild $(DSC)
$(ORIG_SRC_TAR): $(BUILDDIR)
tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
.PHONY: dsc
dsc:
rm -rf *.dsc $(BUILDDIR)
$(MAKE) $(DSC)
lintian $(DSC)
$(DSC): $(ORIG_SRC_TAR) $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
.PHONY: update
update:
cd $(SRCDIR) && git submodule deinit ui/keycodemapdb || true
rm -rf $(SRCDIR)/ui/keycodemapdb
mkdir $(SRCDIR)/ui/keycodemapdb
cd $(SRCDIR) && git submodule update --init ui/keycodemapdb
rm -rf keycodemapdb
mkdir keycodemapdb
cp -R $(SRCDIR)/ui/keycodemapdb/* keycodemapdb/
git add keycodemapdb
.PHONY: upload
upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
upload: $(DEBS)
tar cf - $(DEBS) | ssh repoman@repo.proxmox.com upload --product pve --dist $(UPLOAD_DIST)
tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist bullseye
.PHONY: distclean clean
distclean: clean
clean:
rm -rf $(PACKAGE)-[0-9]*/ $(PACKAGE)*.tar* *.deb *.dsc *.build *.buildinfo *.changes
rm -rf $(BUILDDIR) $(PACKAGE)*.deb *.buildinfo *.changes
.PHONY: dinstall
dinstall: $(DEBS)

563
debian/changelog vendored
View File

@@ -1,565 +1,14 @@
pve-qemu-kvm (9.2.0-2) bookworm; urgency=medium
pve-qemu-kvm (6.1.1-2+vitastor2) bullseye; urgency=medium
* fix assertion failure when migrating a VM with multiple disks on a
replicated ZFS.
* Add bdrv_co_block_status implementation for QCOW2 export support
-- Proxmox Support Team <support@proxmox.com> Mon, 24 Feb 2025 17:33:34 +0100
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 13 Jan 2023 20:20:16 +0300
pve-qemu-kvm (9.2.0-1) bookworm; urgency=medium
pve-qemu-kvm (6.1.1-2+vitastor1) bullseye; urgency=medium
* update submodule and patches to QEMU 9.2.0
* Add Vitastor support
-- Proxmox Support Team <support@proxmox.com> Tue, 04 Feb 2025 08:49:20 +0100
pve-qemu-kvm (9.1.2-3) bookworm; urgency=medium
* async snapshot: explicitly specify raw format when loading the VM state
file
* vma create: rework CLI parameters for passing disk to a more structured
style and use that to allow explicitly specifying the format
-- Proxmox Support Team <support@proxmox.com> Fri, 24 Jan 2025 16:12:34 +0100
pve-qemu-kvm (9.1.2-2) bookworm; urgency=medium
* adapt machine version deprecation for Proxmox VE release and support
cycle.
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Jan 2025 16:34:06 +0100
pve-qemu-kvm (9.1.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.1.2
* improve error handling and edge cases with fleecing backups.
-- Proxmox Support Team <support@proxmox.com> Wed, 11 Dec 2024 16:47:21 +0100
pve-qemu-kvm (9.0.2-4) bookworm; urgency=medium
* async snapshot: ensure any dynamic vCPU-throttling applied for
auto-converge gets always disabled again after finishing the snapshot.
-- Proxmox Support Team <support@proxmox.com> Sun, 10 Nov 2024 11:23:09 +0100
pve-qemu-kvm (9.0.2-3) bookworm; urgency=medium
* pick up fix for VirtIO PCI regressions
* pick up stable fixes for 9.0, including fixes for VirtIO-net, ARM and
x86(_64) emulation, CVEs to harden NBD server against malicious clients,
as well as a few others (VNC, physmem, Intel IOMMU, ...).
-- Proxmox Support Team <support@proxmox.com> Fri, 06 Sep 2024 16:21:42 +0200
pve-qemu-kvm (9.0.2-2) bookworm; urgency=medium
* actually update submodule to QEMU 9.0.2. The previous release was still
based on 9.0.0 by mistake.
-- Proxmox Support Team <support@proxmox.com> Wed, 07 Aug 2024 10:16:01 +0200
pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.0.2. While our version had most
stable fixes included already, there are new fixes for VirtIO and VGA
display screen blanking (#4786)
* backport fix for a regression with the LSI-53c895a controller and one for
the boot order getting ignored for USB storage
-- Proxmox Support Team <support@proxmox.com> Mon, 29 Jul 2024 18:59:40 +0200
pve-qemu-kvm (9.0.0-6) bookworm; urgency=medium
* fix a regression in the zeroinit block driver that prevented importing and
cloning disks to RBD storages which are not using the krbd setting
-- Proxmox Support Team <support@proxmox.com> Mon, 08 Jul 2024 16:11:15 +0200
pve-qemu-kvm (9.0.0-5) bookworm; urgency=medium
* backport fix for CVE-2024-4467 to prevent malicious qcow2 image files from
already causing bad effects if being queried via 'qemu-img info'. For
Proxmox VE, this is an additional safe guard, as currently it directly
creates and manages the qcow2 images used by VMs and does not allow
unprivileged users to import them
* fix #4726: code cleanup: avoid superfluous check in vma backup code
-- Proxmox Support Team <support@proxmox.com> Wed, 03 Jul 2024 13:13:35 +0200
pve-qemu-kvm (9.0.0-4) bookworm; urgency=medium
* fix crash after saving a snapshot without including VM state when a VirtIO
block device with iothread is configured.
* fix edge case in error handling when opening a block device from PBS fails
* minor code cleanup in backup code
-- Proxmox Support Team <support@proxmox.com> Mon, 01 Jul 2024 11:26:11 +0200
pve-qemu-kvm (9.0.0-3) bookworm; urgency=medium
* fix crash when doing resize after hotplugging a disk using io_uring
* fix some minor issues in software CPU emulation (i.e. non-KVM) for ARM and
x86(_64)
-- Proxmox Support Team <support@proxmox.com> Wed, 29 May 2024 15:55:44 +0200
pve-qemu-kvm (9.0.0-2) bookworm; urgency=medium
* fix #5409: backup: fix copy-before-write timeout
* backup: improve error when copy-before-write fails for fleecing
* fix forwards and backwards migration with VirtIO-GPU display
* fix a regression in pflash device introduced in 8.2
* revert a commit for VirtIO PCI devices that turned out to cause more
potential security issues than what it fixed
* move compatibility flags for a new VirtIO-net feature to the correct
machine type. The feature was introduced in QEMU 8.2, but the
compatibility flags got added to machine version 8.0 instead of 8.1. This
breaks backwards migration with machine version 8.1 from a 8.2/9.0 binary
to an 8.1 binary, in cases where the guest kernel enables the feature
(e.g. Ubuntu 23.10).
While that breaks migration with machine version 8.1 from an unpatched to
a patched binary, Proxmox VE only ever had 8.2 on the test repository and
9.0 not yet in any public repository.
-- Proxmox Support Team <support@proxmox.com> Fri, 17 May 2024 17:04:52 +0200
pve-qemu-kvm (9.0.0-1) bookworm; urgency=medium
* update submodule and patches to QEMU 9.0.0
-- Proxmox Support Team <support@proxmox.com> Mon, 29 Apr 2024 10:51:37 +0200
pve-qemu-kvm (8.2.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.2.2
-- Proxmox Support Team <support@proxmox.com> Sat, 27 Apr 2024 12:44:30 +0200
pve-qemu-kvm (8.1.5-5) bookworm; urgency=medium
* implement support for backup fleecing
-- Proxmox Support Team <support@proxmox.com> Thu, 11 Apr 2024 17:46:48 +0200
pve-qemu-kvm (8.1.5-4) bookworm; urgency=medium
* fix live-import for certain kinds of VMDK images that rely on padding
* backup: avoid bubbling up first error if it's an ECANCELED one, as those
are often a result of cancling the job due to running into an actual
issue.
* backup: factor out & clean up gathering device info into helper
-- Proxmox Support Team <support@proxmox.com> Tue, 12 Mar 2024 14:08:40 +0100
pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
* backport fix for potential deadlock during QMP stop command if the VM has
disks attached through VirtIO-Block and IO-Thread enabled
* fix #4507: add patch to automatically increase NOFILE soft limit
-- Proxmox Support Team <support@proxmox.com> Wed, 21 Feb 2024 20:11:23 +0100
pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium
* work around for a situation where guest IO might get stuck, if the VM is
configure with iothread and VirtIO block/SCSI
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:41:27 +0100
pve-qemu-kvm (8.1.5-1) bookworm; urgency=medium
* update to 8.1.5 stable release, including more relevant fixes like:
- virtio-net: correctly copy vnet header when flushing TX
- hw/pflash: implement update buffer for block writes
- Fixes to i386 emulation and ARM emulation.
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:08:13 +0100
pve-qemu-kvm (8.1.2-6) bookworm; urgency=medium
* revert attempted fix to avoid rare issue with stuck guest IO when using
iothread, because it caused a much more common issue with iothreads
consuming too much CPU
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Dec 2023 14:22:06 +0100
pve-qemu-kvm (8.1.2-5) bookworm; urgency=medium
* backport workaround for stuck guest IO with iothread and VirtIO block/SCSI
in some rare edge cases
* backport fix for potential deadlock when issuing the "resize" QMP command
for a disk that is using iothread
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Dec 2023 16:58:27 +0100
pve-qemu-kvm (8.1.2-4) bookworm; urgency=medium
* fix vnc clipboard in the host to guest direction
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Nov 2023 14:28:21 +0100
pve-qemu-kvm (8.1.2-3) bookworm; urgency=medium
* fix #5054: backport fix for software reset with SATA, avoiding breakage
with, e.g., some FreeBSD VMs
-- Proxmox Support Team <support@proxmox.com> Mon, 20 Nov 2023 10:24:50 +0100
pve-qemu-kvm (8.1.2-2) bookworm; urgency=medium
* revert "x86: acpi: workaround Windows not handling name references in
Package properly" as that seems to have broken networking (and possibly
other things) one some localized variants of Windows (e.g., the German
versions).
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Nov 2023 11:55:23 +0100
pve-qemu-kvm (8.1.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.1.2
* use QEMU's keycode-map-db again instead of our static copy from QEMU 6.0
* disable graph locking, newly introduced in the 8.1 release, as it has
still various deadlock issuess, e.g., during canceling backup jobs.
-- Proxmox Support Team <support@proxmox.com> Tue, 24 Oct 2023 13:42:45 +0200
pve-qemu-kvm (8.0.2-7) bookworm; urgency=medium
* fix #2874: SATA: avoid unsolicited write to sector 0 during reset
-- Proxmox Support Team <support@proxmox.com> Wed, 04 Oct 2023 08:33:35 +0200
pve-qemu-kvm (8.0.2-6) bookworm; urgency=medium
* fix #1534: vma: add extract-filter for disk images allowing users to pass
a comma separated list of the disks they want to extract from an archive.
* backup: create jobs in a drained section to avoid subtle bugs where
something interferes with the block-copy-state bitmap on initialization
* backup: drop experimental, and since a while also fully broken, directory
backup format (BACKUP_FORMAT_DIR). This format was never exposed via the
Proxmox VE API, but only available via QMP, as its broken since QEMU 8 and
we got zero reports about that, it's safe to assume that there are no
public users, so just remove it completely.
-- Proxmox Support Team <support@proxmox.com> Wed, 06 Sep 2023 17:03:59 +0200
pve-qemu-kvm (8.0.2-5) bookworm; urgency=medium
* improve memory footprint after backup by not keeping as much memory
resident.
* fix file descriptor leak for vhost (used by default by vNICs).
-- Proxmox Support Team <support@proxmox.com> Wed, 16 Aug 2023 11:52:24 +0200
pve-qemu-kvm (8.0.2-4) bookworm; urgency=medium
* fix resume for snapshot and hibernate in combination with iothread and
dirty bitmap
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Jul 2023 12:58:22 +0200
pve-qemu-kvm (8.0.2-3) bookworm; urgency=medium
* fix regression in QEMU 8.0 for drive mirror with bitmap
-- Proxmox Support Team <support@proxmox.com> Thu, 15 Jun 2023 13:57:46 +0200
pve-qemu-kvm (8.0.2-2) bookworm; urgency=medium
* drop custom get_link_status QMP command, was never really used.
* drop custom & deprecated drive snapshot QMP commands, we use a better
alternative since a while.
-- Proxmox Support Team <support@proxmox.com> Fri, 09 Jun 2023 07:57:56 +0200
pve-qemu-kvm (8.0.2-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.2
* update patches for avoiding issues with DMA reentrancy to current,
slightly optimized version.
-- Proxmox Support Team <support@proxmox.com> Tue, 06 Jun 2023 16:34:50 +0200
pve-qemu-kvm (8.0.0-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.0
* re-build for Proxmox VE 8 / Debian 12 Bookworm
* adapt to the local virtiofsd C variant being dropped, it has been
rewritten in Rust and is now hosted in a separate source repository.
-- Proxmox Support Team <support@proxmox.com> Mon, 22 May 2023 13:45:49 +0200
pve-qemu-kvm (7.2.0-8) bullseye; urgency=medium
* backport fix for ACPI CPU hotplug issue with TCG
* cherry-pick TCG-related stable fixes for 7.2 for users that turned off KVM
HW acceleration
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Mar 2023 15:47:08 +0100
pve-qemu-kvm (7.2.0-7) bullseye; urgency=medium
* improve fix for potential deadlock with trim for IDE/SATA and draining
* backport stable fixes:
- hw/nvme: fix missing endian conversions for doorbell buffers
- hw/smbios: fix field corruption in type 4 table
- virtio-rng-pci: fix transitional migration compat for vectors
- hw/timer/hpet: Fix expiration time overflow
- vhost/vdpa: stop all svq on device deletion
- vhost: avoid a potential use of an uninitialized variable in the call to
vhost_svq_poll
- chardev/char-socket: set s->listener = NULL in char_socket_finalize to
fix a potential crash after live-migration
- intel-iommu: fail MAP notifier without caching mode
- intel-iommu: fail DEVIOTLB_UNMAP without dt mode
* fix a regression for when the LSI SCSI controller is used
-- Proxmox Support Team <support@proxmox.com> Mon, 13 Mar 2023 17:42:49 +0100
pve-qemu-kvm (7.2.0-6) bullseye; urgency=medium
* fix 7.2 regression for Linux boot failures with megasas SCSI
* fix 7.0 regression for a potential deadlock with trim for IDE/SATA and
draining
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Mar 2023 14:32:17 +0100
pve-qemu-kvm (7.2.0-5) bullseye; urgency=medium
* fix #4476: savevm-async: avoid looping without progress
* savevm-async: decrease the boundary for free space for (memory) state left
on target from 30 MiB to 100 MiB, improving the heuristic for when to
enter the final "pause and sync" stage.
* QMP backup: use correct error number when getting blockdrive length fails
* backport fix for some DMA reentrancy issues, better protecting against
malicious guests
* backport fix for iSCSI double free issue leading to crashes
-- Proxmox Support Team <support@proxmox.com> Tue, 21 Feb 2023 13:49:43 +0100
pve-qemu-kvm (7.2.0-4) bullseye; urgency=medium
* backport fix for a 7.2 regression when using VirtIO disk with
detect-zeroes=unmap
-- Proxmox Support Team <support@proxmox.com> Fri, 27 Jan 2023 09:37:49 +0100
pve-qemu-kvm (7.2.0-3) bullseye; urgency=medium
* add fix for live-migration with virtio-rng devices, which regressed in
QEMU 7.2.0.
-- Proxmox Support Team <support@proxmox.com> Thu, 12 Jan 2023 13:13:14 +0100
pve-qemu-kvm (7.2.0-2) bullseye; urgency=medium
* enable slirp again for now, as in qemu-server, user networking is
supported (via CLI/API) when no bridge is set on a virtual NIC
* cherry-pick stable fixes for 7.2. Two for virtio-mem and one for vIOMMU.
Both features are not yet exposed in PVE's qemu-server, but there's work
going on to change that.
-- Proxmox Support Team <support@proxmox.com> Tue, 10 Jan 2023 15:47:48 +0100
pve-qemu-kvm (7.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.2.0
* drop 'slirp' networking
-- Proxmox Support Team <support@proxmox.com> Fri, 16 Dec 2022 13:18:21 +0100
pve-qemu-kvm (7.1.0-4) bullseye; urgency=medium
* cherry-pick "block/block-backend: blk_set_enable_write_cache is IO_CODE"
-- Proxmox Support Team <support@proxmox.com> Tue, 22 Nov 2022 09:21:06 +0100
pve-qemu-kvm (7.1.0-3) bullseye; urgency=medium
* init: daemonize: defuse PID file resolve error to a warning at max, fixing
some usecases that regressed with 7.1, like tracking start up in our
file-restore VM.
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Oct 2022 10:27:49 +0200
pve-qemu-kvm (7.1.0-2) bullseye; urgency=medium
* fix an issue with error handling in async backup code
-- Proxmox Support Team <support@proxmox.com> Tue, 18 Oct 2022 15:33:44 +0200
pve-qemu-kvm (7.1.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.1.0
* add fix for io_uring_register_ring_fd from upstream
-- Proxmox Support Team <support@proxmox.com> Fri, 14 Oct 2022 14:54:09 +0200
pve-qemu-kvm (7.0.0-4) bullseye; urgency=medium
* add revision to version output
* PVE Backup: allow passing max-workers performance setting
-- Proxmox Support Team <support@proxmox.com> Mon, 10 Oct 2022 11:55:37 +0200
pve-qemu-kvm (7.0.0-3) bullseye; urgency=medium
* savevm-async: avoid segfault when aborting snapshot creation task
* savevm-async: set SAVE_STATE_DONE when closing state file was successful
allowing one to start a new snapshot task after aborting one.
-- Proxmox Support Team <support@proxmox.com> Tue, 30 Aug 2022 12:54:03 +0200
pve-qemu-kvm (7.0.0-2) bullseye; urgency=medium
* backport "io_uring: fix short read slow path"
* backport "e1000: set RX descriptor status in a separate operation"
-- Proxmox Support Team <support@proxmox.com> Wed, 20 Jul 2022 09:17:07 +0200
pve-qemu-kvm (7.0.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.0.0
-- Proxmox Support Team <support@proxmox.com> Thu, 30 Jun 2022 11:07:37 +0200
pve-qemu-kvm (6.2.0-11) bullseye; urgency=medium
* add 'namespace' to BlockdevOptionsPbs for live-restore support
* vma: create: support 64KiB-unaligned input images like to improve backing
up some VM templates
* block: alloc-track: avoid unlikely, but possible premature break
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Jun 2022 15:54:54 +0200
pve-qemu-kvm (6.2.0-10) bullseye; urgency=medium
* fix #4101: fix backup cancellation bug with iothreads
-- Proxmox Support Team <support@proxmox.com> Thu, 9 Jun 2022 16:35:51 +0200
pve-qemu-kvm (6.2.0-9) bullseye; urgency=medium
* fix possible race conditions during cancellation of a PBS backup
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Jun 2022 14:03:22 +0200
pve-qemu-kvm (6.2.0-8) bullseye; urgency=medium
* revert "block/rbd: implement bdrv_co_block_status" to work around
performance regression when backing up large RBD disk
-- Proxmox Support Team <support@proxmox.com> Thu, 19 May 2022 09:24:45 +0200
pve-qemu-kvm (6.2.0-7) bullseye; urgency=medium
* Proxmox Backup Server namespace support
-- Proxmox Support Team <support@proxmox.com> Thu, 12 May 2022 16:05:56 +0200
pve-qemu-kvm (6.2.0-6) bullseye; urgency=medium
* block/gluster: correctly set max_pdiscard which is int64_t to avoid
triggering assertion
* ui/vnc.c: Fixed a deadlock bug
* display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) and
integer overflow in cursor_alloc (CVE-2021-4206)
-- Proxmox Support Team <support@proxmox.com> Wed, 11 May 2022 10:42:53 +0200
pve-qemu-kvm (6.2.0-5) bullseye; urgency=medium
* vma: allow partial restore by skipping some disk
-- Proxmox Support Team <support@proxmox.com> Mon, 25 Apr 2022 10:13:46 +0200
pve-qemu-kvm (6.2.0-4) bullseye; urgency=medium
* d/control: add libgbm to build dependencies
* d/control: add suggest dependency-hint for libgl1
* various stable backports:
+ virtio-net: fix map leaking on error during receive
+ memory: Fix incorrect calls of log_global_start/stop
+ acpi: fix OEM ID/OEM Table ID padding
+ vhost-vsock: detach the virqueue element in case of error
+ vhost-user: remove VirtQ notifier restore
+ vhost-user: fix VirtQ notifier cleanup
+ virtio: fix the condition for iommu_platform not supported
-- Proxmox Support Team <support@proxmox.com> Fri, 22 Apr 2022 11:52:30 +0200
pve-qemu-kvm (6.2.0-3) bullseye; urgency=medium
* cherry-pick fix for some manually added ACPI table SLIC entries via the
custom args flag.
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Apr 2022 09:09:37 +0200
pve-qemu-kvm (6.2.0-2) bullseye; urgency=medium
* compile in virgl support
* enable zstd support
* drop sdl dependency (it was disabled at compile time already)
* recommend 'numactl'
* fix an issue with multi-disk backups where chunks would be written
multiple times
-- Proxmox Support Team <support@proxmox.com> Thu, 03 Mar 2022 12:03:44 +0100
pve-qemu-kvm (6.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 6.2.0
-- Proxmox Support Team <support@proxmox.com> Thu, 17 Feb 2022 06:23:14 +0100
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 14 Dec 2022 19:15:40 +0300
pve-qemu-kvm (6.1.1-2) bullseye; urgency=medium

1
debian/compat vendored Normal file
View File

@@ -0,0 +1 @@
10

33
debian/control vendored
View File

@@ -2,43 +2,41 @@ Source: pve-qemu-kvm
Section: admin
Priority: optional
Maintainer: Proxmox Support Team <support@proxmox.com>
Build-Depends: debhelper-compat (= 13),
Build-Depends: autotools-dev,
check,
debhelper (>= 9),
libacl1-dev,
libaio-dev,
libattr1-dev,
libcap-ng-dev,
libcurl4-gnutls-dev,
libepoxy-dev,
libfdt-dev,
libgbm-dev,
libglusterfs-dev (>= 5.2-2),
libgnutls28-dev,
libiscsi-dev (>= 1.12.0),
libjemalloc-dev,
libjpeg-dev,
libjson-perl,
libnuma-dev,
libpci-dev,
libpixman-1-dev,
libproxmox-backup-qemu0-dev (>= 1.3.0),
libproxmox-backup-qemu0-dev (>= 1.0.3-1),
librbd-dev (>= 0.48),
libsdl1.2-dev,
libseccomp-dev,
libslirp-dev,
libspice-protocol-dev (>= 0.12.14~),
libspice-server-dev (>= 0.14.0~),
libsystemd-dev,
liburing-dev,
libusb-1.0-0-dev (>= 1.0.17),
libusb-1.0-0-dev (>= 1.0.17-1),
libusbredirparser-dev (>= 0.6-2),
libvirglrenderer-dev,
libzstd-dev,
meson,
python3-minimal,
python3-sphinx,
python3-sphinx-rtd-theme,
python3-venv,
quilt,
texi2html,
texinfo,
uuid-dev,
xfslibs-dev,
Standards-Version: 3.7.2
@@ -47,6 +45,7 @@ Package: pve-qemu-kvm
Architecture: any
Depends: ceph-common (>= 0.48),
iproute2,
libaio1,
libgfapi0 | glusterfs-common (>= 5.6),
libgfchangelog0 | glusterfs-common (>= 5.6),
libgfdb0 | glusterfs-common (>= 5.6),
@@ -55,15 +54,16 @@ Depends: ceph-common (>= 0.48),
libglusterfs-dev | glusterfs-common (>= 5.6),
libglusterfs0 | glusterfs-common (>= 5.6),
libiscsi4 (>= 1.12.0) | libiscsi7,
libjemalloc2,
libjpeg62-turbo,
libsdl1.2debian,
libspice-server1 (>= 0.14.0~),
libusb-1.0-0 (>= 1.0.17-1),
libusbredirparser1 (>= 0.6-2),
libuuid1,
numactl,
${misc:Depends},
${shlibs:Depends},
Recommends: numactl,
Suggests: libgl1,
Conflicts: kvm,
pve-kvm,
pve-qemu-kvm-2.6.18,
@@ -71,17 +71,22 @@ Conflicts: kvm,
qemu-kvm,
qemu-system-arm,
qemu-system-common,
qemu-system-data,
qemu-system-x86,
qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils
Replaces: pve-kvm,
pve-qemu-kvm-2.6.18,
qemu-system-arm,
qemu-system-x86,
qemu-utils,
Breaks: qemu-server (<= 8.0.6)
Description: Full virtualization on x86 hardware
Using KVM, one can run multiple virtual PCs, each running unmodified Linux or
Windows images. Each virtual machine has private virtualized hardware: a
network card, disk, graphics adapter, etc.
Package: pve-qemu-kvm-dbg
Architecture: any
Section: debug
Depends: pve-qemu-kvm (= ${binary:Version})
Description: pve qemu debugging symbols
This package contains the debugging symbols for pve-qemu-kvm.

2
debian/copyright vendored
View File

@@ -25,7 +25,7 @@ License:
In particular, the QEMU virtual CPU core library (libqemu.a) is
released under the GNU Lesser General Public License version 2 or later.
On Debian systems, the complete text of the GNU Lesser General Public
On Debian systems, the complete text of the GNU Lesser General Public
License can be found in the file /usr/share/common-licenses/LGPL.
Some hardware device emulation sources and other QEMU functionality are

View File

@@ -24,5 +24,4 @@ while (<STDIN>) {
die "no QEMU machine types detected from STDIN input" if scalar (@$machines) <= 0;
print to_json($machines, { utf8 => 1, canonical => 1 })
or die "failed to encode detected machines as JSON - $!\n";
print to_json($machines, { utf8 => 1 }) or die "$!\n";

View File

@@ -27,21 +27,19 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebased for 9.1.2]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 99 ++++++++++++++++++++------
blockdev.c | 38 +++++++++-
include/block/block_int-global-state.h | 4 +-
qapi/block-core.json | 25 ++++++-
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 142 insertions(+), 28 deletions(-)
block/mirror.c | 98 +++++++++++++++++++++++++-------
blockdev.c | 39 ++++++++++++-
include/block/block_int.h | 4 +-
qapi/block-core.json | 29 ++++++++--
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 145 insertions(+), 29 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 2afe700b4d..c3d4be9b15 100644
index 85b781bc21..0821214138 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@@ -50,7 +50,7 @@ typedef struct MirrorBlockJob {
BlockDriverState *to_replace;
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
@@ -50,7 +48,7 @@ index 2afe700b4d..c3d4be9b15 100644
BlockMirrorBackingMode backing_mode;
/* Whether the target image requires explicit zero-initialization */
bool zero_target;
@@ -73,6 +73,8 @@ typedef struct MirrorBlockJob {
@@ -65,6 +65,8 @@ typedef struct MirrorBlockJob {
size_t buf_size;
int64_t bdev_length;
unsigned long *cow_bitmap;
@@ -59,9 +57,9 @@ index 2afe700b4d..c3d4be9b15 100644
BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi;
uint8_t *buf;
@@ -723,7 +725,8 @@ static int mirror_exit_common(Job *job)
@@ -697,7 +699,8 @@ static int mirror_exit_common(Job *job)
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ BlockDriverState *backing;
@@ -69,7 +67,7 @@ index 2afe700b4d..c3d4be9b15 100644
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
if (bdrv_cow_bs(unfiltered_target) != backing) {
@@ -824,6 +827,16 @@ static void mirror_abort(Job *job)
@@ -802,6 +805,16 @@ static void mirror_abort(Job *job)
assert(ret == 0);
}
@@ -86,7 +84,7 @@ index 2afe700b4d..c3d4be9b15 100644
static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -1020,7 +1033,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
@@ -983,7 +996,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -96,7 +94,7 @@ index 2afe700b4d..c3d4be9b15 100644
ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit;
@@ -1309,6 +1323,7 @@ static const BlockJobDriver mirror_job_driver = {
@@ -1216,6 +1230,7 @@ static const BlockJobDriver mirror_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
@@ -104,15 +102,15 @@ index 2afe700b4d..c3d4be9b15 100644
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = mirror_cancel,
@@ -1327,6 +1342,7 @@ static const BlockJobDriver commit_active_job_driver = {
@@ -1232,6 +1247,7 @@ static const BlockJobDriver commit_active_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
+ .clean = mirror_clean,
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = commit_active_cancel,
@@ -1719,7 +1735,10 @@ static BlockJob *mirror_start_job(
},
@@ -1594,7 +1610,10 @@ static BlockJob *mirror_start_job(
BlockCompletionFunc *cb,
void *opaque,
const BlockJobDriver *driver,
@@ -123,13 +121,12 @@ index 2afe700b4d..c3d4be9b15 100644
+ BlockDriverState *base,
bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode,
bool base_ro,
@@ -1734,10 +1753,39 @@ static BlockJob *mirror_start_job(
GLOBAL_STATE_CODE();
Error **errp)
@@ -1606,10 +1625,39 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
- if (granularity == 0) {
- granularity = bdrv_get_default_bitmap_granularity(target);
+ if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+ error_setg(errp, "Sync mode '%s' not supported",
+ MirrorSyncMode_str(sync_mode));
@@ -150,8 +147,8 @@ index 2afe700b4d..c3d4be9b15 100644
+ "sync mode '%s' is not compatible with bitmaps",
+ MirrorSyncMode_str(sync_mode));
+ return NULL;
}
+ }
+
+ if (bitmap) {
+ if (granularity) {
+ error_setg(errp, "granularity (%d)"
@@ -161,12 +158,13 @@ index 2afe700b4d..c3d4be9b15 100644
+ }
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ } else if (granularity == 0) {
+ granularity = bdrv_get_default_bitmap_granularity(target);
+ }
granularity = bdrv_get_default_bitmap_granularity(target);
}
-
assert(is_power_of_2(granularity));
if (buf_size < 0) {
@@ -1878,7 +1926,9 @@ static BlockJob *mirror_start_job(
@@ -1747,7 +1795,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
@@ -176,10 +174,10 @@ index 2afe700b4d..c3d4be9b15 100644
+ s->bitmap_mode = bitmap_mode;
s->backing_mode = backing_mode;
s->zero_target = zero_target;
qatomic_set(&s->copy_mode, copy_mode);
@@ -1904,6 +1954,18 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
s->copy_mode = copy_mode;
@@ -1768,6 +1818,18 @@ static BlockJob *mirror_start_job(
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
}
+ if (s->sync_bitmap) {
+ bdrv_dirty_bitmap_set_busy(s->sync_bitmap, true);
@@ -193,10 +191,10 @@ index 2afe700b4d..c3d4be9b15 100644
+ }
+ }
+
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
@@ -1986,6 +2048,9 @@ fail:
BLK_PERM_CONSISTENT_READ,
@@ -1845,6 +1907,9 @@ fail:
if (s->dirty_bitmap) {
bdrv_release_dirty_bitmap(s->dirty_bitmap);
}
@@ -206,7 +204,7 @@ index 2afe700b4d..c3d4be9b15 100644
job_early_fail(&s->common.job);
}
@@ -2008,35 +2073,28 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1862,29 +1927,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -223,31 +221,25 @@ index 2afe700b4d..c3d4be9b15 100644
- bool is_none_mode;
BlockDriverState *base;
GLOBAL_STATE_CODE();
- if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
- (mode == MIRROR_SYNC_MODE_BITMAP)) {
- error_setg(errp, "Sync mode '%s' not supported",
- MirrorSyncMode_str(mode));
- return;
- }
-
bdrv_graph_rdlock_main_loop();
- is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
bdrv_graph_rdunlock_main_loop();
mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode, zero_target,
on_source_error, on_target_error, unmap, NULL, NULL,
- &mirror_job_driver, is_none_mode, base, false,
- filter_node_name, true, copy_mode, false, errp);
- filter_node_name, true, copy_mode, errp);
+ &mirror_job_driver, mode, bitmap, bitmap_mode, base,
+ false, filter_node_name, true, copy_mode, false, errp);
+ false, filter_node_name, true, copy_mode, errp);
}
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -2063,7 +2121,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1909,7 +1968,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque,
@@ -255,35 +247,36 @@ index 2afe700b4d..c3d4be9b15 100644
+ &commit_active_job_driver, MIRROR_SYNC_MODE_FULL,
+ NULL, 0, base, auto_complete,
filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
base_read_only, errp);
errp);
if (!job) {
diff --git a/blockdev.c b/blockdev.c
index 6740663fda..38fa63155c 100644
index 3d8ac368a1..03e99264dc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2781,6 +2781,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2957,6 +2957,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target,
const char *replaces,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
+ bool has_bitmap,
+ const char *bitmap_name,
+ bool has_bitmap_mode,
+ BitmapSyncMode bitmap_mode,
BlockMirrorBackingMode backing_mode,
bool zero_target,
bool has_speed, int64_t speed,
@@ -2799,6 +2802,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2976,6 +2980,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
{
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT;
+ BdrvDirtyBitmap *bitmap = NULL;
GLOBAL_STATE_CODE();
GRAPH_RDLOCK_GUARD_MAINLOOP();
@@ -2853,6 +2857,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_speed) {
speed = 0;
@@ -3030,6 +3035,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if (bitmap_name) {
+ if (has_bitmap) {
+ if (granularity) {
+ error_setg(errp, "Granularity and bitmap cannot both be set");
+ return;
@@ -306,53 +299,53 @@ index 6740663fda..38fa63155c 100644
+ }
+ }
+
if (!replaces) {
if (!has_replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
@@ -2894,8 +2921,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3076,8 +3104,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
- replaces, job_flags,
- has_replaces ? replaces : NULL, job_flags,
- speed, granularity, buf_size, sync, backing_mode, zero_target,
+ replaces, job_flags, speed, granularity, buf_size, sync,
+ bitmap, bitmap_mode, backing_mode, zero_target,
+ has_replaces ? replaces : NULL, job_flags, speed, granularity,
+ buf_size, sync, bitmap, bitmap_mode, backing_mode, zero_target,
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
}
@@ -3039,6 +3066,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3222,6 +3250,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->job_id, bs, target_bs,
arg->replaces, arg->sync,
+ arg->bitmap,
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
arg->has_replaces, arg->replaces, arg->sync,
+ arg->has_bitmap, arg->bitmap,
+ arg->has_bitmap_mode, arg->bitmap_mode,
backing_mode, zero_target,
arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
@@ -3058,6 +3087,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3243,6 +3273,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
const char *device, const char *target,
const char *replaces,
bool has_replaces, const char *replaces,
MirrorSyncMode sync,
+ const char *bitmap,
+ bool has_bitmap, const char *bitmap,
+ bool has_bitmap_mode, BitmapSyncMode bitmap_mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
@@ -3098,7 +3129,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3292,7 +3324,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
}
blockdev_mirror_common(job_id, bs, target_bs,
- replaces, sync, backing_mode,
+ replaces, sync,
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
- has_replaces, replaces, sync, backing_mode,
+ has_replaces, replaces, sync, has_bitmap,
+ bitmap, has_bitmap_mode, bitmap_mode, backing_mode,
zero_target, has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index eb2d92a226..f0c642b194 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c31cbd034a..11442893d0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1254,7 +1254,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -364,26 +357,31 @@ index eb2d92a226..f0c642b194 100644
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index fd3bcc1c17..48ba32049f 100644
index 675d8265eb..6356a63695 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2178,6 +2178,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -1938,10 +1938,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (Since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2220,7 +2229,9 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M (since 1.4).
+# power of 2 between 512 and 64M. Must not be specified if
+# @bitmap is present (since 1.4).
#
# @buf-size: maximum amount of data in flight from source to
# target (since 1.4).
@@ -1979,7 +1988,9 @@
{ 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -394,23 +392,28 @@ index fd3bcc1c17..48ba32049f 100644
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
@@ -2499,6 +2510,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -2247,10 +2258,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2547,7 +2567,8 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M
+# power of 2 between 512 and 64M . Must not be specified if
+# @bitmap is present.
#
# @buf-size: maximum amount of data in flight from source to
# target
@@ -2299,7 +2319,8 @@
{ 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str',
@@ -421,10 +424,10 @@ index fd3bcc1c17..48ba32049f 100644
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 20ed54f570..4f50a99334 100644
index c39e70b2f5..470ef79ae0 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -755,8 +755,8 @@ static void test_propagate_mirror(void)
@@ -617,8 +617,8 @@ static void test_propagate_mirror(void)
/* Start a mirror job */
mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@@ -434,4 +437,4 @@ index 20ed54f570..4f50a99334 100644
+ false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
&error_abort);
job = job_get("job0");

View File

@@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index c3d4be9b15..7b6f7c0068 100644
index 0821214138..c688726fae 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -694,8 +694,6 @@ static int mirror_exit_common(Job *job)
@@ -674,8 +674,6 @@ static int mirror_exit_common(Job *job)
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
}
@@ -36,9 +36,9 @@ index c3d4be9b15..7b6f7c0068 100644
/* Make sure that the source BDS doesn't go away during bdrv_replace_node,
* before we can call bdrv_drained_end */
bdrv_ref(src);
@@ -805,6 +803,18 @@ static int mirror_exit_common(Job *job)
bdrv_drained_end(target_bs);
bdrv_unref(target_bs);
@@ -783,6 +781,18 @@ static int mirror_exit_common(Job *job)
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+ if (s->sync_bitmap) {
+ if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@@ -55,7 +55,7 @@ index c3d4be9b15..7b6f7c0068 100644
bs_opaque->job = NULL;
bdrv_drained_end(src);
@@ -1763,10 +1773,6 @@ static BlockJob *mirror_start_job(
@@ -1635,10 +1645,6 @@ static BlockJob *mirror_start_job(
" sync mode",
MirrorSyncMode_str(sync_mode));
return NULL;
@@ -66,7 +66,7 @@ index c3d4be9b15..7b6f7c0068 100644
}
} else if (bitmap) {
error_setg(errp,
@@ -1783,6 +1789,12 @@ static BlockJob *mirror_start_job(
@@ -1655,6 +1661,12 @@ static BlockJob *mirror_start_job(
return NULL;
}
granularity = bdrv_dirty_bitmap_granularity(bitmap);

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
index 38fa63155c..204cf6fad1 100644
index 03e99264dc..9e14feec87 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2878,6 +2878,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3056,6 +3056,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return;
}
@@ -28,4 +28,4 @@ index 38fa63155c..204cf6fad1 100644
+ return;
}
if (!replaces) {
if (!has_replaces) {

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 7b6f7c0068..2b1c07095d 100644
index c688726fae..a7f829f766 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -809,8 +809,8 @@ static int mirror_exit_common(Job *job)
@@ -787,8 +787,8 @@ static int mirror_exit_common(Job *job)
job->ret == 0 && ret == 0)) {
/* Success; synchronize copy back to sync. */
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@@ -30,7 +30,7 @@ index 7b6f7c0068..2b1c07095d 100644
}
}
bdrv_release_dirty_bitmap(s->dirty_bitmap);
@@ -1971,11 +1971,8 @@ static BlockJob *mirror_start_job(
@@ -1835,11 +1835,8 @@ static BlockJob *mirror_start_job(
}
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
@@ -43,4 +43,4 @@ index 7b6f7c0068..2b1c07095d 100644
+ NULL, true);
}
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,

View File

@@ -12,8 +12,6 @@ uniform w.r.t. backup block jobs.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebase for 8.2.2]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 28 +++------------
blockdev.c | 29 +++++++++++++++
@@ -21,12 +19,12 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 2b1c07095d..f5787b380c 100644
index a7f829f766..6a126d18c8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1763,31 +1763,13 @@ static BlockJob *mirror_start_job(
GLOBAL_STATE_CODE();
@@ -1635,31 +1635,13 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
- error_setg(errp, "Sync mode '%s' not supported",
@@ -62,17 +60,17 @@ index 2b1c07095d..f5787b380c 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
index 204cf6fad1..79d47b1920 100644
index 9e14feec87..b6f797b41f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2857,7 +2857,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3035,7 +3035,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if ((sync == MIRROR_SYNC_MODE_BITMAP) ||
+ (sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
+ /* done before desugaring 'incremental' to print the right message */
+ if (!bitmap_name) {
+ if (!has_bitmap) {
+ error_setg(errp, "Must provide a valid bitmap name for "
+ "'%s' sync mode", MirrorSyncMode_str(sync));
+ return;
@@ -93,7 +91,7 @@ index 204cf6fad1..79d47b1920 100644
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
+
if (bitmap_name) {
if (has_bitmap) {
+ if (sync != MIRROR_SYNC_MODE_BITMAP) {
+ error_setg(errp, "Sync mode '%s' not supported with bitmap.",
+ MirrorSyncMode_str(sync));

View File

@@ -48,7 +48,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index c3740ec616..7f38ce6b8b 100644
index 1a8a369b50..2c8a558c67 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
@@ -60,10 +60,10 @@ index c3740ec616..7f38ce6b8b 100644
void monitor_init_globals(void);
void monitor_init_globals_core(void);
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index cb628f681d..93dbd62fc2 100644
index 9c3a09cb01..a92be8c3f7 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -151,6 +151,13 @@ typedef struct {
@@ -144,6 +144,13 @@ typedef struct {
QemuMutex qmp_queue_lock;
/* Input queue that holds all the parsed QMP requests */
GQueue *qmp_requests;
@@ -78,10 +78,10 @@ index cb628f681d..93dbd62fc2 100644
/**
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 56786c0ccc..30071d0c8a 100644
index 46a171bca6..5ccdd2424b 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -116,6 +116,21 @@ bool monitor_cur_is_qmp(void)
@@ -135,6 +135,21 @@ bool monitor_cur_is_qmp(void)
return cur_mon && monitor_is_qmp(cur_mon);
}
@@ -104,10 +104,10 @@ index 56786c0ccc..30071d0c8a 100644
* Is @mon is using readline?
* Note: not all HMP monitors use readline, e.g., gdbserver has a
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 5e538f34c0..eb181d5979 100644
index 092c527b6f..6b8cfcf6d8 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -165,6 +165,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
QDict *rsp;
QDict *error;
@@ -116,7 +116,7 @@ index 5e538f34c0..eb181d5979 100644
rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
&mon->common);
@@ -180,7 +182,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
}
}
@@ -135,7 +135,7 @@ index 5e538f34c0..eb181d5979 100644
qobject_unref(rsp);
}
@@ -461,6 +473,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
switch (event) {
case CHR_EVENT_OPENED:
@@ -144,10 +144,10 @@ index 5e538f34c0..eb181d5979 100644
monitor_qmp_caps_reset(mon);
data = qmp_greeting(mon);
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 176b549473..790bb7d1da 100644
index 59600210ce..95602446eb 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -117,16 +117,28 @@ typedef struct QmpDispatchBH {
@@ -120,16 +120,28 @@ typedef struct QmpDispatchBH {
QObject **ret;
Error **errp;
Coroutine *co;
@@ -180,19 +180,19 @@ index 176b549473..790bb7d1da 100644
aio_co_wake(data->co);
}
@@ -253,6 +265,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
@@ -243,6 +255,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
.ret = &ret,
.errp = &err,
.co = qemu_coroutine_self(),
+ .conn_nr = monitor_get_connection_nr(cur_mon),
};
aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh,
aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
&data);
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index 1894cdfe1f..d74d0459f0 100644
index d058a2a00d..3290b58120 100644
--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
@@ -12,6 +12,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
@@ -13,6 +13,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
return NULL;
}
@@ -201,6 +201,6 @@ index 1894cdfe1f..d74d0459f0 100644
+ return -1;
+}
+
void qapi_event_emit(QAPIEvent event, QDict *qdict)
void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
{
}

View File

@@ -1,100 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Tue, 7 Mar 2023 15:03:02 +0100
Subject: [PATCH] ide: avoid potential deadlock when draining during trim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The deadlock can happen as follows:
1. ide_issue_trim is called, and increments the in_flight counter.
2. ide_issue_trim_cb calls blk_aio_pdiscard.
3. Somebody else starts draining (e.g. backup to insert the cbw node).
4. ide_issue_trim_cb is called as the completion callback for
blk_aio_pdiscard.
5. ide_issue_trim_cb issues yet another blk_aio_pdiscard request.
6. The request is added to the wait queue via blk_wait_while_drained,
because draining has been started.
7. Nobody ever decrements the in_flight counter and draining can't
finish. This would be done by ide_trim_bh_cb, which is called after
ide_issue_trim_cb has issued its last request, but
ide_issue_trim_cb is not called anymore, because it's the
completion callback of blk_aio_pdiscard, which waits on draining.
Quoting Hanna Czenczek:
> The point of 7e5cdb345f was that we need any in-flight count to
> accompany a set s->bus->dma->aiocb. While blk_aio_pdiscard() is
> happening, we dont necessarily need another count. But we do need
> it while there is no blk_aio_pdiscard().
> ide_issue_trim_cb() returns in two cases (and, recursively through
> its callers, leaves s->bus->dma->aiocb set):
> 1. After calling blk_aio_pdiscard(), which will keep an in-flight
> count,
> 2. After calling replay_bh_schedule_event() (i.e.
> qemu_bh_schedule()), which does not keep an in-flight count.
Thus, even after moving the blk_inc_in_flight to above the
replay_bh_schedule_event call, the invariant "ide_issue_trim_cb
returns with an accompanying in-flight count" is still satisfied.
However, the issue 7e5cdb345f fixed for canceling resurfaces, because
ide_cancel_dma_sync assumes that it just needs to drain once. But now
the in_flight count is not consistently > 0 during the trim operation.
So, change it to drain until !s->bus->dma->aiocb, which means that the
operation finished (s->bus->dma->aiocb is cleared by ide_set_inactive
via the ide_dma_cb when the end of the transfer is reached).
Discussion here:
https://lists.nongnu.org/archive/html/qemu-devel/2023-03/msg02506.html
Fixes: 7e5cdb345f ("ide: Increment BB in-flight counter for TRIM BH")
Suggested-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/ide/core.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 08d9218455..20d8c0cf66 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -456,7 +456,7 @@ static void ide_trim_bh_cb(void *opaque)
iocb->bh = NULL;
qemu_aio_unref(iocb);
- /* Paired with an increment in ide_issue_trim() */
+ /* Paired with an increment in ide_issue_trim_cb() */
blk_dec_in_flight(blk);
}
@@ -516,6 +516,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
done:
iocb->aiocb = NULL;
if (iocb->bh) {
+ /* Paired with a decrement in ide_trim_bh_cb() */
+ blk_inc_in_flight(s->blk);
replay_bh_schedule_event(iocb->bh);
}
}
@@ -528,9 +530,6 @@ BlockAIOCB *ide_issue_trim(
IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
- /* Paired with a decrement in ide_trim_bh_cb() */
- blk_inc_in_flight(s->blk);
-
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
@@ -754,8 +753,9 @@ void ide_cancel_dma_sync(IDEState *s)
*/
if (s->bus->dma->aiocb) {
trace_ide_cancel_dma_sync_remaining();
- blk_drain(s->blk);
- assert(s->bus->dma->aiocb == NULL);
+ while (s->bus->dma->aiocb) {
+ blk_drain(s->blk);
+ }
}
}

View File

@@ -0,0 +1,55 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 1 Sep 2021 16:51:04 +0200
Subject: [PATCH] monitor/hmp: add support for flag argument with value
Adds support for the "-xS" parameter type, where "-x" denotes a flag
name and the "S" suffix indicates that this flag is supposed to take an
arbitrary string parameter.
These parameters are always optional, the entry in the qdict will be
omitted if the flag is not given.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/monitor/hmp.c b/monitor/hmp.c
index d50c3124e1..a32dce7a35 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -980,6 +980,7 @@ static QDict *monitor_parse_arguments(Monitor *mon,
{
const char *tmp = p;
int skip_key = 0;
+ int ret;
/* option */
c = *typestr++;
@@ -1002,8 +1003,22 @@ static QDict *monitor_parse_arguments(Monitor *mon,
}
if (skip_key) {
p = tmp;
+ } else if (*typestr == 'S') {
+ /* has option with string value */
+ typestr++;
+ tmp = p++;
+ while (qemu_isspace(*p)) {
+ p++;
+ }
+ ret = get_str(buf, sizeof(buf), &p);
+ if (ret < 0) {
+ monitor_printf(mon, "%s: value expected for -%c\n",
+ cmd->name, *tmp);
+ goto fail;
+ }
+ qdict_put_str(qdict, key, buf);
} else {
- /* has option */
+ /* has boolean option */
p++;
qdict_put_bool(qdict, key, true);
}

View File

@@ -0,0 +1,479 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 25 Aug 2021 11:14:13 +0200
Subject: [PATCH] monitor: refactor set/expire_password and allow VNC display
id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It is possible to specify more than one VNC server on the command line,
either with an explicit ID or the auto-generated ones à la "default",
"vnc2", "vnc3", ...
It is not possible to change the password on one of these extra VNC
displays though. Fix this by adding a "display" parameter to the
"set_password" and "expire_password" QMP and HMP commands.
For HMP, the display is specified using the "-d" value flag.
For QMP, the schema is updated to explicitly express the supported
variants of the commands with protocol-discriminated unions.
Suggested-by: Eric Blake <eblake@redhat.com>
Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hmp-commands.hx | 29 ++++----
monitor/hmp-cmds.c | 57 +++++++++++++++-
monitor/qmp-cmds.c | 62 ++++++-----------
qapi/ui.json | 165 ++++++++++++++++++++++++++++++++++++++-------
4 files changed, 233 insertions(+), 80 deletions(-)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 8e45bce2cd..d78e4cfc47 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1514,34 +1514,35 @@ ERST
{
.name = "set_password",
- .args_type = "protocol:s,password:s,connected:s?",
- .params = "protocol password action-if-connected",
+ .args_type = "protocol:s,password:s,display:-dS,connected:s?",
+ .params = "protocol password [-d display] [action-if-connected]",
.help = "set spice/vnc password",
.cmd = hmp_set_password,
},
SRST
-``set_password [ vnc | spice ] password [ action-if-connected ]``
- Change spice/vnc password. Use zero to make the password stay valid
- forever. *action-if-connected* specifies what should happen in
- case a connection is established: *fail* makes the password change
- fail. *disconnect* changes the password and disconnects the
- client. *keep* changes the password and keeps the connection up.
- *keep* is the default.
+``set_password [ vnc | spice ] password [ -d display ] [ action-if-connected ]``
+ Change spice/vnc password. *display* can be used with 'vnc' to specify
+ which display to set the password on. *action-if-connected* specifies
+ what should happen in case a connection is established: *fail* makes
+ the password change fail. *disconnect* changes the password and
+ disconnects the client. *keep* changes the password and keeps the
+ connection up. *keep* is the default.
ERST
{
.name = "expire_password",
- .args_type = "protocol:s,time:s",
- .params = "protocol time",
+ .args_type = "protocol:s,time:s,display:-dS",
+ .params = "protocol time [-d display]",
.help = "set spice/vnc password expire-time",
.cmd = hmp_expire_password,
},
SRST
-``expire_password [ vnc | spice ]`` *expire-time*
- Specify when a password for spice/vnc becomes
- invalid. *expire-time* accepts:
+``expire_password [ vnc | spice ] expire-time [ -d display ]``
+ Specify when a password for spice/vnc becomes invalid.
+ *display* behaves the same as in ``set_password``.
+ *expire-time* accepts:
``now``
Invalidate password instantly.
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a7e197a90b..f4ef58d257 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1451,10 +1451,41 @@ void hmp_set_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
const char *password = qdict_get_str(qdict, "password");
+ const char *display = qdict_get_try_str(qdict, "display");
const char *connected = qdict_get_try_str(qdict, "connected");
Error *err = NULL;
+ DisplayProtocol proto;
- qmp_set_password(protocol, password, !!connected, connected, &err);
+ SetPasswordOptions opts = {
+ .password = g_strdup(password),
+ .u.vnc.display = NULL,
+ };
+
+ proto = qapi_enum_parse(&DisplayProtocol_lookup, protocol,
+ DISPLAY_PROTOCOL_VNC, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ opts.protocol = proto;
+
+ if (proto == DISPLAY_PROTOCOL_VNC) {
+ opts.u.vnc.has_display = !!display;
+ opts.u.vnc.display = g_strdup(display);
+ } else if (proto == DISPLAY_PROTOCOL_SPICE) {
+ opts.u.spice.has_connected = !!connected;
+ opts.u.spice.connected =
+ qapi_enum_parse(&SetPasswordAction_lookup, connected,
+ SET_PASSWORD_ACTION_KEEP, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ }
+
+ qmp_set_password(&opts, &err);
+ g_free(opts.password);
+ g_free(opts.u.vnc.display);
hmp_handle_error(mon, err);
}
@@ -1462,9 +1493,31 @@ void hmp_expire_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
const char *whenstr = qdict_get_str(qdict, "time");
+ const char *display = qdict_get_try_str(qdict, "display");
Error *err = NULL;
+ DisplayProtocol proto;
- qmp_expire_password(protocol, whenstr, &err);
+ ExpirePasswordOptions opts = {
+ .time = g_strdup(whenstr),
+ .u.vnc.display = NULL,
+ };
+
+ proto = qapi_enum_parse(&DisplayProtocol_lookup, protocol,
+ DISPLAY_PROTOCOL_VNC, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ opts.protocol = proto;
+
+ if (proto == DISPLAY_PROTOCOL_VNC) {
+ opts.u.vnc.has_display = !!display;
+ opts.u.vnc.display = g_strdup(display);
+ }
+
+ qmp_expire_password(&opts, &err);
+ g_free(opts.time);
+ g_free(opts.u.vnc.display);
hmp_handle_error(mon, err);
}
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index f7d64a6457..65882b5997 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -164,45 +164,30 @@ void qmp_system_wakeup(Error **errp)
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, errp);
}
-void qmp_set_password(const char *protocol, const char *password,
- bool has_connected, const char *connected, Error **errp)
+void qmp_set_password(SetPasswordOptions *opts, Error **errp)
{
- int disconnect_if_connected = 0;
- int fail_if_connected = 0;
- int rc;
+ bool disconnect_if_connected = false;
+ bool fail_if_connected = false;
+ int rc = 0;
- if (has_connected) {
- if (strcmp(connected, "fail") == 0) {
- fail_if_connected = 1;
- } else if (strcmp(connected, "disconnect") == 0) {
- disconnect_if_connected = 1;
- } else if (strcmp(connected, "keep") == 0) {
- /* nothing */
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER, "connected");
- return;
- }
- }
-
- if (strcmp(protocol, "spice") == 0) {
+ if (opts->protocol == DISPLAY_PROTOCOL_SPICE) {
if (!qemu_using_spice(errp)) {
return;
}
- rc = qemu_spice.set_passwd(password, fail_if_connected,
+ if (opts->u.spice.has_connected) {
+ fail_if_connected =
+ opts->u.spice.connected == SET_PASSWORD_ACTION_FAIL;
+ disconnect_if_connected =
+ opts->u.spice.connected == SET_PASSWORD_ACTION_DISCONNECT;
+ }
+ rc = qemu_spice.set_passwd(opts->password, fail_if_connected,
disconnect_if_connected);
- } else if (strcmp(protocol, "vnc") == 0) {
- if (fail_if_connected || disconnect_if_connected) {
- /* vnc supports "connected=keep" only */
- error_setg(errp, QERR_INVALID_PARAMETER, "connected");
- return;
- }
+ } else if (opts->protocol == DISPLAY_PROTOCOL_VNC) {
/* Note that setting an empty password will not disable login through
* this interface. */
- rc = vnc_display_password(NULL, password);
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol",
- "'vnc' or 'spice'");
- return;
+ rc = vnc_display_password(
+ opts->u.vnc.has_display ? opts->u.vnc.display : NULL,
+ opts->password);
}
if (rc != 0) {
@@ -210,11 +195,11 @@ void qmp_set_password(const char *protocol, const char *password,
}
}
-void qmp_expire_password(const char *protocol, const char *whenstr,
- Error **errp)
+void qmp_expire_password(ExpirePasswordOptions *opts, Error **errp)
{
time_t when;
int rc;
+ const char* whenstr = opts->time;
if (strcmp(whenstr, "now") == 0) {
when = 0;
@@ -226,17 +211,14 @@ void qmp_expire_password(const char *protocol, const char *whenstr,
when = strtoull(whenstr, NULL, 10);
}
- if (strcmp(protocol, "spice") == 0) {
+ if (opts->protocol == DISPLAY_PROTOCOL_SPICE) {
if (!qemu_using_spice(errp)) {
return;
}
rc = qemu_spice.set_pw_expire(when);
- } else if (strcmp(protocol, "vnc") == 0) {
- rc = vnc_display_pw_expire(NULL, when);
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol",
- "'vnc' or 'spice'");
- return;
+ } else if (opts->protocol == DISPLAY_PROTOCOL_VNC) {
+ rc = vnc_display_pw_expire(
+ opts->u.vnc.has_display ? opts->u.vnc.display : NULL, when);
}
if (rc != 0) {
diff --git a/qapi/ui.json b/qapi/ui.json
index fd9677d48e..cba8665b73 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -9,22 +9,23 @@
{ 'include': 'common.json' }
{ 'include': 'sockets.json' }
+##
+# @DisplayProtocol:
+#
+# Display protocols which support changing password options.
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'DisplayProtocol',
+ 'data': [ { 'name': 'vnc', 'if': 'defined(CONFIG_VNC)' },
+ { 'name': 'spice', 'if': 'defined(CONFIG_SPICE)' } ] }
+
##
# @set_password:
#
# Sets the password of a remote display session.
#
-# @protocol: - 'vnc' to modify the VNC server password
-# - 'spice' to modify the Spice server password
-#
-# @password: the new password
-#
-# @connected: how to handle existing clients when changing the
-# password. If nothing is specified, defaults to 'keep'
-# 'fail' to fail the command if clients are connected
-# 'disconnect' to disconnect existing clients
-# 'keep' to maintain existing clients
-#
# Returns: - Nothing on success
# - If Spice is not enabled, DeviceNotFound
#
@@ -37,16 +38,123 @@
# <- { "return": {} }
#
##
-{ 'command': 'set_password',
- 'data': {'protocol': 'str', 'password': 'str', '*connected': 'str'} }
+{ 'command': 'set_password', 'boxed': true, 'data': 'SetPasswordOptions' }
+
+##
+# @SetPasswordOptions:
+#
+# Data required to set a new password on a display server protocol.
+#
+# @protocol: - 'vnc' to modify the VNC server password
+# - 'spice' to modify the Spice server password
+#
+# @password: the new password
+#
+# Since: 6.2
+#
+##
+{ 'union': 'SetPasswordOptions',
+ 'base': { 'protocol': 'DisplayProtocol',
+ 'password': 'str' },
+ 'discriminator': 'protocol',
+ 'data': { 'vnc': 'SetPasswordOptionsVnc',
+ 'spice': 'SetPasswordOptionsSpice' } }
+
+##
+# @SetPasswordAction:
+#
+# An action to take on changing a password on a connection with active clients.
+#
+# @fail: fail the command if clients are connected
+#
+# @disconnect: disconnect existing clients
+#
+# @keep: maintain existing clients
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'SetPasswordAction',
+ 'data': [ 'fail', 'disconnect', 'keep' ] }
+
+##
+# @SetPasswordActionVnc:
+#
+# See @SetPasswordAction. VNC only supports the keep action. 'connection'
+# should just be omitted for VNC, this is kept for backwards compatibility.
+#
+# @keep: maintain existing clients
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'SetPasswordActionVnc',
+ 'data': [ 'keep' ] }
+
+##
+# @SetPasswordOptionsSpice:
+#
+# Options for set_password specific to the VNC procotol.
+#
+# @connected: How to handle existing clients when changing the
+# password. If nothing is specified, defaults to 'keep'.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'SetPasswordOptionsSpice',
+ 'data': { '*connected': 'SetPasswordAction' } }
+
+##
+# @SetPasswordOptionsVnc:
+#
+# Options for set_password specific to the VNC procotol.
+#
+# @display: The id of the display where the password should be changed.
+# Defaults to the first.
+#
+# @connected: How to handle existing clients when changing the
+# password.
+#
+# Features:
+# @deprecated: For VNC, @connected will always be 'keep', parameter should be
+# omitted.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'SetPasswordOptionsVnc',
+ 'data': { '*display': 'str',
+ '*connected': { 'type': 'SetPasswordActionVnc',
+ 'features': ['deprecated'] } } }
##
# @expire_password:
#
# Expire the password of a remote display server.
#
-# @protocol: the name of the remote display protocol 'vnc' or 'spice'
+# Returns: - Nothing on success
+# - If @protocol is 'spice' and Spice is not active, DeviceNotFound
#
+# Since: 0.14
+#
+# Example:
+#
+# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
+# "time": "+60" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'expire_password', 'boxed': true, 'data': 'ExpirePasswordOptions' }
+
+##
+# @ExpirePasswordOptions:
+#
+# Data required to set password expiration on a display server protocol.
+#
+# @protocol: - 'vnc' to modify the VNC server expiration
+# - 'spice' to modify the Spice server expiration
+
# @time: when to expire the password.
#
# - 'now' to expire the password immediately
@@ -54,24 +162,33 @@
# - '+INT' where INT is the number of seconds from now (integer)
# - 'INT' where INT is the absolute time in seconds
#
-# Returns: - Nothing on success
-# - If @protocol is 'spice' and Spice is not active, DeviceNotFound
-#
-# Since: 0.14
-#
# Notes: Time is relative to the server and currently there is no way to
# coordinate server time with client time. It is not recommended to
# use the absolute time version of the @time parameter unless you're
# sure you are on the same machine as the QEMU instance.
#
-# Example:
+# Since: 6.2
#
-# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
-# "time": "+60" } }
-# <- { "return": {} }
+##
+{ 'union': 'ExpirePasswordOptions',
+ 'base': { 'protocol': 'DisplayProtocol',
+ 'time': 'str' },
+ 'discriminator': 'protocol',
+ 'data': { 'vnc': 'ExpirePasswordOptionsVnc' } }
+
+##
+# @ExpirePasswordOptionsVnc:
+#
+# Options for expire_password specific to the VNC procotol.
+#
+# @display: The id of the display where the expiration should be changed.
+# Defaults to the first.
+#
+# Since: 6.2
#
##
-{ 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
+{ 'struct': 'ExpirePasswordOptionsVnc',
+ 'data': { '*display': 'str' } }
##
# @screendump:

View File

@@ -1,82 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 7 Dec 2024 18:14:45 +0000
Subject: [PATCH] tcg: Reset free_temps before tcg_optimize
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When allocating new temps during tcg_optmize, do not re-use
any EBB temps that were used within the TB. We do not have
any idea what span of the TB in which the temp was live.
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
as well as replacing the equivalent in plugin_gen_inject and
tcg_func_start.
Cc: qemu-stable@nongnu.org
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
Reported-by: wannacu <wannacu2049@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 04e006ab36a8565b92d4e21dd346367fbade7d74)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
accel/tcg/plugin-gen.c | 2 +-
include/tcg/tcg-temp-internal.h | 6 ++++++
tcg/tcg.c | 5 ++++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 0f47bfbb48..1ef075552c 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -275,7 +275,7 @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
* that might be live within the existing opcode stream.
* The simplest solution is to release them all and create new.
*/
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
+ tcg_temp_ebb_reset_freed(tcg_ctx);
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
switch (op->opc) {
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
index 44192c55a9..98f91e68b7 100644
--- a/include/tcg/tcg-temp-internal.h
+++ b/include/tcg/tcg-temp-internal.h
@@ -42,4 +42,10 @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
TCGv_ptr tcg_temp_ebb_new_ptr(void);
TCGv_i128 tcg_temp_ebb_new_i128(void);
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
+{
+ memset(s->free_temps, 0, sizeof(s->free_temps));
+}
+
#endif /* TCG_TEMP_FREE_H */
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0babae1b88..4578b185be 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1489,7 +1489,7 @@ void tcg_func_start(TCGContext *s)
s->nb_temps = s->nb_globals;
/* No temps have been previously allocated for size or locality. */
- memset(s->free_temps, 0, sizeof(s->free_temps));
+ tcg_temp_ebb_reset_freed(s);
/* No constant temps have been previously allocated. */
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
@@ -6120,6 +6120,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
}
#endif
+ /* Do not reuse any EBB that may be allocated within the TB. */
+ tcg_temp_ebb_reset_freed(s);
+
tcg_optimize(s);
reachable_code_pass(s);

View File

@@ -0,0 +1,83 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 10 Sep 2021 14:45:33 +0200
Subject: [PATCH] block/mirror: fix NULL pointer dereference in
mirror_wait_on_conflicts()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In mirror_iteration() we call mirror_wait_on_conflicts() with
`self` parameter set to NULL.
Starting from commit d44dae1a7c we dereference `self` pointer in
mirror_wait_on_conflicts() without checks if it is not NULL.
Backtrace:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
at ../block/mirror.c:172
172 self->waiting_for_op = op;
[Current thread is 1 (Thread 0x7f0908931ec0 (LWP 380249))]
(gdb) bt
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
at ../block/mirror.c:172
#1 0x00005610c5d9d631 in mirror_run (job=0x5610c76a2c00, errp=<optimized out>) at ../block/mirror.c:491
#2 0x00005610c5d58726 in job_co_entry (opaque=0x5610c76a2c00) at ../job.c:917
#3 0x00005610c5f046c6 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>)
at ../util/coroutine-ucontext.c:173
#4 0x00007f0909975820 in ?? () at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91
from /usr/lib64/libc.so.6
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001404
Fixes: d44dae1a7c ("block/mirror: fix active mirror dead-lock in mirror_wait_on_conflicts")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20210910124533.288318-1-sgarzare@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
(cherry picked from commit 66fed30c9cd11854fc878a4eceb507e915d7c9cd)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 98fc66eabf..85b781bc21 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -160,18 +160,25 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
if (ranges_overlap(self_start_chunk, self_nb_chunks,
op_start_chunk, op_nb_chunks))
{
- /*
- * If the operation is already (indirectly) waiting for us, or
- * will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case).
- */
- if (op->waiting_for_op) {
- continue;
+ if (self) {
+ /*
+ * If the operation is already (indirectly) waiting for us,
+ * or will wait for us as soon as it wakes up, then just go
+ * on (instead of producing a deadlock in the former case).
+ */
+ if (op->waiting_for_op) {
+ continue;
+ }
+
+ self->waiting_for_op = op;
}
- self->waiting_for_op = op;
qemu_co_queue_wait(&op->waiting_requests, NULL);
- self->waiting_for_op = NULL;
+
+ if (self) {
+ self->waiting_for_op = NULL;
+ }
+
break;
}
}

View File

@@ -1,149 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Thu, 12 Dec 2024 15:51:15 +0100
Subject: [PATCH] target/i386: Reset TSCs of parked vCPUs too on VM reset
Since commit 5286c3662294 ("target/i386: properly reset TSC on reset")
QEMU writes the special value of "1" to each online vCPU TSC on VM reset
to reset it.
However parked vCPUs don't get that handling and due to that their TSCs
get desynchronized when the VM gets reset.
This in turn causes KVM to turn off PVCLOCK_TSC_STABLE_BIT in its exported
PV clock.
Note that KVM has no understanding of vCPU being currently parked.
Without PVCLOCK_TSC_STABLE_BIT the sched clock is marked unstable in
the guest's kvm_sched_clock_init().
This causes a performance regressions to show in some tests.
Fix this issue by writing the special value of "1" also to TSCs of parked
vCPUs on VM reset.
Reproducing the issue:
1) Boot a VM with "-smp 2,maxcpus=3" or similar
2) device_add host-x86_64-cpu,id=vcpu,node-id=0,socket-id=0,core-id=2,thread-id=0
3) Wait a few seconds
4) device_del vcpu
5) Inside the VM run:
# echo "t" >/proc/sysrq-trigger; dmesg | grep sched_clock_stable
Observe the sched_clock_stable() value is 1.
6) Reboot the VM
7) Once the VM boots once again run inside it:
# echo "t" >/proc/sysrq-trigger; dmesg | grep sched_clock_stable
Observe the sched_clock_stable() value is now 0.
Fixes: 5286c3662294 ("target/i386: properly reset TSC on reset")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Link: https://lore.kernel.org/r/5a605a88e9a231386dc803c60f5fed9b48108139.1734014926.git.maciej.szmigiero@oracle.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3f2a05b31ee9ce2ddb6c75a9bc3f5e7f7af9a76f)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
accel/kvm/kvm-all.c | 11 +++++++++++
configs/targets/i386-softmmu.mak | 1 +
configs/targets/x86_64-softmmu.mak | 1 +
include/sysemu/kvm.h | 8 ++++++++
target/i386/kvm/kvm.c | 15 +++++++++++++++
5 files changed, 36 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 801cff16a5..dec1d1c16a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -437,6 +437,16 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
return kvm_fd;
}
+static void kvm_reset_parked_vcpus(void *param)
+{
+ KVMState *s = param;
+ struct KVMParkedVcpu *cpu;
+
+ QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
+ kvm_arch_reset_parked_vcpu(cpu->vcpu_id, cpu->kvm_fd);
+ }
+}
+
int kvm_create_vcpu(CPUState *cpu)
{
unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
@@ -2728,6 +2738,7 @@ static int kvm_init(MachineState *ms)
}
qemu_register_reset(kvm_unpoison_all, NULL);
+ qemu_register_reset(kvm_reset_parked_vcpus, s);
if (s->kernel_irqchip_allowed) {
kvm_irqchip_create(s);
diff --git a/configs/targets/i386-softmmu.mak b/configs/targets/i386-softmmu.mak
index 2ac69d5ba3..2eb0e86250 100644
--- a/configs/targets/i386-softmmu.mak
+++ b/configs/targets/i386-softmmu.mak
@@ -1,4 +1,5 @@
TARGET_ARCH=i386
TARGET_SUPPORTS_MTTCG=y
TARGET_KVM_HAVE_GUEST_DEBUG=y
+TARGET_KVM_HAVE_RESET_PARKED_VCPU=y
TARGET_XML_FILES= gdb-xml/i386-32bit.xml
diff --git a/configs/targets/x86_64-softmmu.mak b/configs/targets/x86_64-softmmu.mak
index e12ac3dc59..920e9a4200 100644
--- a/configs/targets/x86_64-softmmu.mak
+++ b/configs/targets/x86_64-softmmu.mak
@@ -2,4 +2,5 @@ TARGET_ARCH=x86_64
TARGET_BASE_ARCH=i386
TARGET_SUPPORTS_MTTCG=y
TARGET_KVM_HAVE_GUEST_DEBUG=y
+TARGET_KVM_HAVE_RESET_PARKED_VCPU=y
TARGET_XML_FILES= gdb-xml/i386-64bit.xml
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c3a60b2890..ab17c09a55 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -377,6 +377,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s);
int kvm_arch_init_vcpu(CPUState *cpu);
int kvm_arch_destroy_vcpu(CPUState *cpu);
+#ifdef TARGET_KVM_HAVE_RESET_PARKED_VCPU
+void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd);
+#else
+static inline void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
+{
+}
+#endif
+
bool kvm_vcpu_id_is_valid(int vcpu_id);
/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 8e17942c3b..2ff618fbf1 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2415,6 +2415,21 @@ void kvm_arch_after_reset_vcpu(X86CPU *cpu)
}
}
+void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
+{
+ g_autofree struct kvm_msrs *msrs = NULL;
+
+ msrs = g_malloc0(sizeof(*msrs) + sizeof(msrs->entries[0]));
+ msrs->entries[0].index = MSR_IA32_TSC;
+ msrs->entries[0].data = 1; /* match the value in x86_cpu_reset() */
+ msrs->nmsrs++;
+
+ if (ioctl(kvm_fd, KVM_SET_MSRS, msrs) != 1) {
+ warn_report("parked vCPU %lu TSC reset failed: %d",
+ vcpu_id, errno);
+ }
+}
+
void kvm_arch_do_init_vcpu(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;

View File

@@ -1,41 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Wed, 6 Nov 2024 11:07:18 +0800
Subject: [PATCH] i386/cpu: Mark avx10_version filtered when prefix is NULL
In x86_cpu_filter_features(), if host doesn't support AVX10, the
configured avx10_version should be marked as filtered regardless of
whether prefix is NULL or not.
Check prefix before warn_report() instead of checking for
have_filtered_features.
Cc: qemu-stable@nongnu.org
Fixes: commit bccfb846fd52 ("target/i386: add AVX10 feature and AVX10 version property")
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Tao Su <tao1.su@linux.intel.com>
Link: https://lore.kernel.org/r/20241106030728.553238-2-zhao1.liu@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit cf4c263551886964c5d58bd7b675b13fd497b402)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
target/i386/cpu.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 3725dbbc4b..1981aeaba5 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7718,8 +7718,10 @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose)
env->avx10_version = version;
have_filtered_features = true;
}
- } else if (env->avx10_version && prefix) {
- warn_report("%s: avx10.%d.", prefix, env->avx10_version);
+ } else if (env->avx10_version) {
+ if (prefix) {
+ warn_report("%s: avx10.%d.", prefix, env->avx10_version);
+ }
have_filtered_features = true;
}

View File

@@ -1,67 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Fri, 17 Jan 2025 12:17:08 +0100
Subject: [PATCH] net: Fix announce_self
b9ad513e1876 ("net: Remove receive_raw()") adds an iovec entry
in qemu_deliver_packet_iov() to add the virtio-net header
in the data when QEMU_NET_PACKET_FLAG_RAW is set but forgets
to increase the number of iovec entries in the array, so
receive_iov() will only send the first entry (the virtio-net
entry, full of 0) and no data. The packet will be discarded.
The only user of QEMU_NET_PACKET_FLAG_RAW is announce_self.
We can see the problem with tcpdump:
- QEMU parameters:
.. -monitor stdio \
-netdev bridge,id=netdev0,br=virbr0 \
-device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \
- HMP command:
(qemu) announce_self
- TCP dump:
$ sudo tcpdump -nxi virbr0
without the fix:
<nothing>
with the fix:
ARP, Reverse Request who-is 9a:2b:2c:2d:2e:2f tell 9a:2b:2c:2d:2e:2f, length 46
0x0000: 0001 0800 0604 0003 9a2b 2c2d 2e2f 0000
0x0010: 0000 9a2b 2c2d 2e2f 0000 0000 0000 0000
0x0020: 0000 0000 0000 0000 0000 0000 0000
Reported-by: Xiaohui Li <xiaohli@redhat.com>
Bug: https://issues.redhat.com/browse/RHEL-73891
Fixes: b9ad513e1876 ("net: Remove receive_raw()")
Cc: akihiko.odaki@daynix.com
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
(picked from https://lore.kernel.org/qemu-devel/20250117111709.970789-2-lvivier@redhat.com/)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
net/net.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/net.c b/net/net.c
index 7ef6885876..fefa701bb2 100644
--- a/net/net.c
+++ b/net/net.c
@@ -822,6 +822,7 @@ static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
iov_copy[0].iov_len = nc->vnet_hdr_len;
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
iov = iov_copy;
+ iovcnt++;
}
if (nc->info->receive_iov) {

View File

@@ -1,67 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Fri, 17 Jan 2025 12:17:09 +0100
Subject: [PATCH] net/dump: Correctly compute Ethernet packet offset
When a packet is sent with QEMU_NET_PACKET_FLAG_RAW by QEMU it
never includes virtio-net header even if qemu_get_vnet_hdr_len()
is not 0, and filter-dump is not managing this case.
The only user of QEMU_NET_PACKET_FLAG_RAW is announce_self,
we can show the problem using it and tcpddump:
- QEMU parameters:
.. -monitor stdio \
-netdev bridge,id=netdev0,br=virbr0 \
-device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \
-object filter-dump,netdev=netdev0,file=log.pcap,id=pcap0
- HMP command:
(qemu) announce_self
- TCP dump:
$ tcpdump -nxr log.pcap
without the fix:
08:00:06:04:00:03 > 2e:2f:80:35:00:01, ethertype Unknown (0x9a2b), length 50:
0x0000: 2c2d 2e2f 0000 0000 9a2b 2c2d 2e2f 0000
0x0010: 0000 0000 0000 0000 0000 0000 0000 0000
0x0020: 0000 0000
with the fix:
ARP, Reverse Request who-is 9a:2b:2c:2d:2e:2f tell 9a:2b:2c:2d:2e:2f, length 46
0x0000: 0001 0800 0604 0003 9a2b 2c2d 2e2f 0000
0x0010: 0000 9a2b 2c2d 2e2f 0000 0000 0000 0000
0x0020: 0000 0000 0000 0000 0000 0000 0000
Fixes: 481c52320a26 ("net: Strip virtio-net header when dumping")
Cc: akihiko.odaki@daynix.com
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
(picked from https://lore.kernel.org/qemu-devel/20250117111709.970789-3-lvivier@redhat.com/)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
net/dump.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/dump.c b/net/dump.c
index 956e34a123..42ab8d7716 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -155,7 +155,8 @@ static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
{
NetFilterDumpState *nfds = FILTER_DUMP(nf);
- dump_receive_iov(&nfds->ds, iov, iovcnt, qemu_get_vnet_hdr_len(nf->netdev));
+ dump_receive_iov(&nfds->ds, iov, iovcnt, flags & QEMU_NET_PACKET_FLAG_RAW ?
+ 0 : qemu_get_vnet_hdr_len(nf->netdev));
return 0;
}

View File

@@ -1,96 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Wed, 15 Jan 2025 13:53:41 +0100
Subject: [PATCH] pci: acpi: Windows 'PCI Label Id' bug workaround
Current versions of Windows call _DSM(func=7) regardless
of whether it is supported or not. It leads to NICs having bogus
'PCI Label Id = 0', where none should be set at all.
Also presence of 'PCI Label Id' triggers another Windows bug
on localized versions that leads to hangs. The later bug is fixed
in latest updates for 'Windows Server' but not in consumer
versions of Windows (and there is no plans to fix it
as far as I'm aware).
Given it's easy, implement Microsoft suggested workaround
(return invalid Package) so that affected Windows versions
could boot on QEMU.
This would effectvely remove bogus 'PCI Label Id's on NICs,
but MS teem confirmed that flipping 'PCI Label Id' should not
change 'Network Connection' ennumeration, so it should be safe
for QEMU to change _DSM without any compat code.
Smoke tested with WinXP and WS2022
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/774
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20250115125342.3883374-3-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 0b053391985abcc40b16ac8fc4a7f6588d1d95c1)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/acpi-build.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9fcc2897b8..f7b961e04c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -654,6 +654,7 @@ static Aml *aml_pci_pdsm(void)
Aml *acpi_index = aml_local(2);
Aml *zero = aml_int(0);
Aml *one = aml_int(1);
+ Aml *not_supp = aml_int(0xFFFFFFFF);
Aml *func = aml_arg(2);
Aml *params = aml_arg(4);
Aml *bnum = aml_derefof(aml_index(params, aml_int(0)));
@@ -678,7 +679,7 @@ static Aml *aml_pci_pdsm(void)
*/
ifctx1 = aml_if(aml_lnot(
aml_or(aml_equal(acpi_index, zero),
- aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL)
+ aml_equal(acpi_index, not_supp), NULL)
));
{
/* have supported functions */
@@ -704,18 +705,30 @@ static Aml *aml_pci_pdsm(void)
{
Aml *pkg = aml_package(2);
- aml_append(pkg, zero);
- /*
- * optional, if not impl. should return null string
- */
- aml_append(pkg, aml_string("%s", ""));
- aml_append(ifctx, aml_store(pkg, ret));
-
aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
+ aml_append(ifctx, aml_store(pkg, ret));
/*
- * update acpi-index to actual value
+ * Windows calls func=7 without checking if it's available,
+ * as workaround Microsoft has suggested to return invalid for func7
+ * Package, so return 2 elements package but only initialize elements
+ * when acpi_index is supported and leave them uninitialized, which
+ * leads elements to being Uninitialized ObjectType and should trip
+ * Windows into discarding result as an unexpected and prevent setting
+ * bogus 'PCI Label' on the device.
*/
- aml_append(ifctx, aml_store(acpi_index, aml_index(ret, zero)));
+ ifctx1 = aml_if(aml_lnot(aml_lor(
+ aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp)
+ )));
+ {
+ aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero)));
+ /*
+ * optional, if not impl. should return null string
+ */
+ aml_append(ifctx1, aml_store(aml_string("%s", ""),
+ aml_index(ret, one)));
+ }
+ aml_append(ifctx, ifctx1);
+
aml_append(ifctx, aml_return(ret));
}

View File

@@ -1,53 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Fri, 13 Dec 2024 17:06:14 +0100
Subject: [PATCH] hw/usb/hcd-xhci-pci: Use modulo to select MSI vector as per
spec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
QEMU would crash with a failed assertion if the XHCI controller
attempted to raise the interrupt on an interrupter corresponding
to a MSI vector with a higher index than the highest configured
for the device by the guest driver.
This behaviour is correct on the MSI/PCI side: per PCI 3.0 spec,
devices must ensure they do not send MSI notifications for
vectors beyond the range of those allocated by the system/driver
software. Unlike MSI-X, there is no generic way for handling
aliasing in the case of fewer allocated vectors than requested,
so the specifics are up to device implementors. (Section
6.8.3.4. "Sending Messages")
It turns out the XHCI spec (Implementation Note in section 4.17,
"Interrupters") requires that the host controller signal the MSI
vector with the number computed by taking the interrupter number
modulo the number of enabled MSI vectors.
This change introduces that modulo calculation, fixing the
failed assertion. This makes the device work correctly in MSI mode
with macOS's XHCI driver, which only allocates a single vector.
Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250112210056.16658-2-phil@philjordan.eu>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit bb5b7fced6b5d3334ab20702fc846e47bb1fb731)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/usb/hcd-xhci-pci.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index a039f5778a..516e6909d2 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -74,6 +74,7 @@ static bool xhci_pci_intr_raise(XHCIState *xhci, int n, bool level)
}
if (msi_enabled(pci_dev) && level) {
+ n %= msi_nr_vectors_allocated(pci_dev);
msi_notify(pci_dev, n);
return true;
}

View File

@@ -1,63 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@redhat.com>
Date: Tue, 3 Dec 2024 13:19:28 +0100
Subject: [PATCH] pci: ensure valid link status bits for downstream ports
PCI hotplug for downstream endpoints on arm fails because Linux'
PCIe hotplug driver doesn't like the QEMU provided LNKSTA:
pcieport 0000:08:01.0: pciehp: Slot(2): Card present
pcieport 0000:08:01.0: pciehp: Slot(2): Link Up
pcieport 0000:08:01.0: pciehp: Slot(2): Cannot train link: status 0x2000
There's 2 cases where LNKSTA isn't setup properly:
* the downstream device has no express capability
* max link width of the bridge is 0
Move the sanity checks added via 88c869198aa63
("pci: Sanity test minimum downstream LNKSTA") outside of the
branch to make sure downstream ports always have a valid LNKSTA.
Signed-off-by: Sebastian Ott <sebott@redhat.com>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-Id: <20241203121928.14861-1-sebott@redhat.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 694632fd44987cc4618612a38ad151047524a590)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/pci/pcie.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 0b455c8654..1b12db6fa2 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1113,18 +1113,22 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) {
lnksta &= ~PCI_EXP_LNKSTA_NLW;
lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW;
- } else if (!(lnksta & PCI_EXP_LNKSTA_NLW)) {
- lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1);
}
if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) {
lnksta &= ~PCI_EXP_LNKSTA_CLS;
lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS;
- } else if (!(lnksta & PCI_EXP_LNKSTA_CLS)) {
- lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT);
}
}
+ if (!(lnksta & PCI_EXP_LNKSTA_NLW)) {
+ lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1);
+ }
+
+ if (!(lnksta & PCI_EXP_LNKSTA_CLS)) {
+ lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT);
+ }
+
pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta &

View File

@@ -1,36 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Dec 2024 22:04:02 +1000
Subject: [PATCH] pci/msix: Fix msix pba read vector poll end calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The end vector calculation has a bug that results in polling fewer
than required vectors when reading at a non-zero offset in PBA memory.
Fixes: bbef882cc193 ("msi: add API to get notified about pending bit poll")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Message-Id: <20241212120402.1475053-1-npiggin@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 42e2a7a0ab23784e44fcb18369e06067abc89305)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/pci/msix.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 487e49834e..cc6e79ec67 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -250,7 +250,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
PCIDevice *dev = opaque;
if (dev->msix_vector_poll_notifier) {
unsigned vector_start = addr * 8;
- unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
+ unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr);
dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
}

View File

@@ -0,0 +1,990 @@
Index: pve-qemu-kvm-6.1.0/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-6.1.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-6.1.0/qapi/block-core.json
@@ -3084,7 +3084,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4020,6 +4020,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4392,6 +4414,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4782,6 +4805,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -4977,6 +5011,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-6.1.0/block/meson.build
===================================================================
--- pve-qemu-kvm-6.1.0.orig/block/meson.build
+++ pve-qemu-kvm-6.1.0/block/meson.build
@@ -91,6 +91,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-6.1.0/configure
===================================================================
--- pve-qemu-kvm-6.1.0.orig/configure
+++ pve-qemu-kvm-6.1.0/configure
@@ -375,6 +375,7 @@ trace_file="trace"
spice="$default_feature"
spice_protocol="auto"
rbd="auto"
+vitastor="auto"
smartcard="auto"
u2f="auto"
libusb="auto"
@@ -1293,6 +1294,10 @@ for opt do
;;
--enable-rbd) rbd="enabled"
;;
+ --disable-vitastor) vitastor="disabled"
+ ;;
+ --enable-vitastor) vitastor="enabled"
+ ;;
--disable-xfsctl) xfs="no"
;;
--enable-xfsctl) xfs="yes"
@@ -1921,6 +1926,7 @@ disabled with --disable-FEATURE, default
spice spice
spice-protocol spice-protocol
rbd rados block device (rbd)
+ vitastor vitastor block device
libiscsi iscsi support
libnfs nfs support
smartcard smartcard support (libcacard)
@@ -5211,7 +5217,7 @@ if test "$skip_meson" = no; then
-Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt -Dbrlapi=$brlapi \
-Dcurl=$curl -Dglusterfs=$glusterfs -Dbzip2=$bzip2 -Dlibiscsi=$libiscsi \
-Dlibnfs=$libnfs -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
- -Drbd=$rbd -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse -Dlibxml2=$libxml2 \
+ -Drbd=$rbd -Dvitastor=$vitastor -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse -Dlibxml2=$libxml2 \
-Dlibdaxctl=$libdaxctl -Dlibpmem=$libpmem -Dlinux_io_uring=$linux_io_uring \
-Dgnutls=$gnutls -Dnettle=$nettle -Dgcrypt=$gcrypt -Dauth_pam=$auth_pam \
-Dzstd=$zstd -Dseccomp=$seccomp -Dvirtfs=$virtfs -Dcap_ng=$cap_ng \
Index: pve-qemu-kvm-6.1.0/meson.build
===================================================================
--- pve-qemu-kvm-6.1.0.orig/meson.build
+++ pve-qemu-kvm-6.1.0/meson.build
@@ -729,6 +729,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1268,6 +1288,7 @@ config_host_data.set('CONFIG_LIBNFS', li
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3087,6 +3108,7 @@ summary_info += {'bpf support': libbpf.f
# TODO: add back protocol and server version
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
summary_info += {'rbd support': rbd.found()}
+summary_info += {'vitastor support': vitastor.found()}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': cacard.found()}
summary_info += {'U2F support': u2f.found()}
Index: pve-qemu-kvm-6.1.0/meson_options.txt
===================================================================
--- pve-qemu-kvm-6.1.0.orig/meson_options.txt
+++ pve-qemu-kvm-6.1.0/meson_options.txt
@@ -102,6 +102,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
Index: a/block/vitastor.c
===================================================================
--- /dev/null
+++ a/block/vitastor.c
@@ -0,0 +1,797 @@
+// Copyright (c) Vitaliy Filippov, 2019+
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
+
+// QEMU block driver
+
+#ifdef VITASTOR_SOURCE_TREE
+#define BUILD_DSO
+#define _GNU_SOURCE
+#endif
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/uri.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+
+#if QEMU_VERSION_MAJOR >= 3
+#include "qemu/units.h"
+#include "block/qdict.h"
+#include "qemu/cutils.h"
+#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10
+#include "qemu/cutils.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"
+#else
+#include "qapi/qmp/qint.h"
+#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val)))
+#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
+#define qobject_unref QDECREF
+#endif
+
+#include "vitastor_c.h"
+
+#ifdef VITASTOR_SOURCE_TREE
+void qemu_module_dummy(void)
+{
+}
+
+void DSO_STAMP_FUN(void)
+{
+}
+#endif
+
+typedef struct VitastorClient
+{
+ void *proxy;
+ void *watch;
+ char *config_path;
+ char *etcd_host;
+ char *etcd_prefix;
+ char *image;
+ int skip_parents;
+ uint64_t inode;
+ uint64_t pool;
+ uint64_t size;
+ long readonly;
+ int use_rdma;
+ char *rdma_device;
+ int rdma_port_num;
+ int rdma_gid_index;
+ int rdma_mtu;
+ QemuMutex mutex;
+
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
+ uint32_t last_bitmap_granularity;
+ uint8_t *last_bitmap;
+} VitastorClient;
+
+typedef struct VitastorRPC
+{
+ BlockDriverState *bs;
+ Coroutine *co;
+ QEMUIOVector *iov;
+ long ret;
+ int complete;
+ uint64_t inode, offset, len;
+ uint32_t bitmap_granularity;
+ uint8_t *bitmap;
+} VitastorRPC;
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
+static void vitastor_co_generic_bh_cb(void *opaque, long retval);
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
+static void vitastor_close(BlockDriverState *bs);
+
+static char *qemu_vitastor_next_tok(char *src, char delim, char **p)
+{
+ char *end;
+ *p = NULL;
+ for (end = src; *end; ++end)
+ {
+ if (*end == delim)
+ break;
+ if (*end == '\\' && end[1] != '\0')
+ end++;
+ }
+ if (*end == delim)
+ {
+ *p = end + 1;
+ *end = '\0';
+ }
+ return src;
+}
+
+static void qemu_vitastor_unescape(char *src)
+{
+ char *p;
+ for (p = src; *src; ++src, ++p)
+ {
+ if (*src == '\\' && src[1] != '\0')
+ src++;
+ *p = *src;
+ }
+ *p = '\0';
+}
+
+// vitastor[:key=value]*
+// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
+// vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg
+static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
+{
+ const char *start;
+ char *p, *buf;
+
+ if (!strstart(filename, "vitastor:", &start))
+ {
+ error_setg(errp, "File name must start with 'vitastor:'");
+ return;
+ }
+
+ buf = g_strdup(start);
+ p = buf;
+
+ // The following are all key/value pairs
+ while (p)
+ {
+ int i;
+ char *name, *value;
+ name = qemu_vitastor_next_tok(p, '=', &p);
+ if (!p)
+ {
+ error_setg(errp, "conf option %s has no value", name);
+ break;
+ }
+ for (i = 0; i < strlen(name); i++)
+ if (name[i] == '_')
+ name[i] = '-';
+ qemu_vitastor_unescape(name);
+ value = qemu_vitastor_next_tok(p, ':', &p);
+ qemu_vitastor_unescape(value);
+ if (!strcmp(name, "inode") ||
+ !strcmp(name, "pool") ||
+ !strcmp(name, "size") ||
+ !strcmp(name, "skip-parents") ||
+ !strcmp(name, "use-rdma") ||
+ !strcmp(name, "rdma-port_num") ||
+ !strcmp(name, "rdma-gid-index") ||
+ !strcmp(name, "rdma-mtu"))
+ {
+ unsigned long long num_val;
+ if (parse_uint_full(value, &num_val, 0))
+ {
+ error_setg(errp, "Illegal %s: %s", name, value);
+ goto out;
+ }
+ qdict_put_int(options, name, num_val);
+ }
+ else
+ {
+ qdict_put_str(options, name, value);
+ }
+ }
+ if (!qdict_get_try_str(options, "image"))
+ {
+ if (!qdict_get_try_int(options, "inode", 0))
+ {
+ error_setg(errp, "one of image (name) and inode (number) must be specified");
+ goto out;
+ }
+ if (!(qdict_get_try_int(options, "inode", 0) >> (64-POOL_ID_BITS)) &&
+ !qdict_get_try_int(options, "pool", 0))
+ {
+ error_setg(errp, "pool number must be specified or included in the inode number");
+ goto out;
+ }
+ if (!qdict_get_try_int(options, "size", 0))
+ {
+ error_setg(errp, "size must be specified when inode number is used instead of image name");
+ goto out;
+ }
+ }
+
+out:
+ g_free(buf);
+ return;
+}
+
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
+{
+ BlockDriverState *bs = task->bs;
+ VitastorClient *client = bs->opaque;
+ task->co = qemu_coroutine_self();
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task->complete)
+ {
+ qemu_coroutine_yield();
+ }
+}
+
+static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ aio_set_fd_handler(ctx, fd,
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
+ 0 /*is_external*/,
+#endif
+ fd_read, fd_write,
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
+ NULL /*io_flush*/,
+#endif
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
+ NULL /*io_poll*/,
+#endif
+#if QEMU_VERSION_MAJOR >= 7
+ NULL /*io_poll_ready*/,
+#endif
+ opaque);
+}
+
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+ VitastorRPC task;
+ VitastorClient *client = bs->opaque;
+ void *image = NULL;
+ int64_t ret = 0;
+ qemu_mutex_init(&client->mutex);
+ client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
+ // FIXME: Rename to etcd_address
+ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
+ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
+ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
+ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
+ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
+ if (!image)
+ client->image = (char*)"x";
+ task.complete = 0;
+ task.bs = bs;
+ if (qemu_in_coroutine())
+ {
+ vitastor_co_get_metadata(&task);
+ }
+ else
+ {
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+ BDRV_POLL_WHILE(bs, !task.complete);
+ }
+ client->image = image;
+ if (client->image)
+ {
+ client->watch = (void*)task.ret;
+ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
+ client->size = vitastor_c_inode_get_size(client->watch);
+ if (!vitastor_c_inode_get_num(client->watch))
+ {
+ error_setg(errp, "image does not exist");
+ vitastor_close(bs);
+ return -1;
+ }
+ if (!client->size)
+ {
+ client->size = qdict_get_try_int(options, "size", 0);
+ }
+ }
+ else
+ {
+ client->watch = NULL;
+ client->inode = qdict_get_try_int(options, "inode", 0);
+ client->pool = qdict_get_try_int(options, "pool", 0);
+ if (client->pool)
+ {
+ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
+ }
+ client->size = qdict_get_try_int(options, "size", 0);
+ vitastor_c_close_watch(client->proxy, (void*)task.ret);
+ }
+ if (!client->size)
+ {
+ error_setg(errp, "image size not specified");
+ vitastor_close(bs);
+ return -1;
+ }
+ bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
+ //client->aio_context = bdrv_get_aio_context(bs);
+ qdict_del(options, "use-rdma");
+ qdict_del(options, "rdma-mtu");
+ qdict_del(options, "rdma-gid-index");
+ qdict_del(options, "rdma-port-num");
+ qdict_del(options, "rdma-device");
+ qdict_del(options, "config-path");
+ qdict_del(options, "etcd-host");
+ qdict_del(options, "etcd-prefix");
+ qdict_del(options, "image");
+ qdict_del(options, "inode");
+ qdict_del(options, "pool");
+ qdict_del(options, "size");
+ qdict_del(options, "skip-parents");
+ return ret;
+}
+
+static void vitastor_close(BlockDriverState *bs)
+{
+ VitastorClient *client = bs->opaque;
+ vitastor_c_destroy(client->proxy);
+ qemu_mutex_destroy(&client->mutex);
+ if (client->config_path)
+ g_free(client->config_path);
+ if (client->etcd_host)
+ g_free(client->etcd_host);
+ if (client->etcd_prefix)
+ g_free(client->etcd_prefix);
+ if (client->image)
+ g_free(client->image);
+ free(client->last_bitmap);
+ client->last_bitmap = NULL;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
+static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+ bsz->phys = 4096;
+ bsz->log = 512;
+ return 0;
+}
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+static int coroutine_fn vitastor_co_create_opts(
+#if QEMU_VERSION_MAJOR >= 4
+ BlockDriver *drv,
+#endif
+ const char *url, QemuOpts *opts, Error **errp)
+{
+ QDict *options;
+ int ret;
+
+ options = qdict_new();
+ vitastor_parse_filename(url, options, errp);
+ if (*errp)
+ {
+ ret = -1;
+ goto out;
+ }
+
+ // inodes don't require creation in Vitastor. FIXME: They will when there will be some metadata
+
+ ret = 0;
+out:
+ qobject_unref(options);
+ return ret;
+}
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3
+static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offset,
+#if QEMU_VERSION_MAJOR >= 4
+ bool exact,
+#endif
+ PreallocMode prealloc,
+#if QEMU_VERSION_MAJOR >= 5 && QEMU_VERSION_MINOR >= 1 || QEMU_VERSION_MAJOR > 5 || defined RHEL_BDRV_CO_TRUNCATE_FLAGS
+ BdrvRequestFlags flags,
+#endif
+ Error **errp)
+{
+ VitastorClient *client = bs->opaque;
+
+ if (prealloc != PREALLOC_MODE_OFF)
+ {
+ error_setg(errp, "Unsupported preallocation mode '%s'", PreallocMode_str(prealloc));
+ return -ENOTSUP;
+ }
+
+ // TODO: Resize inode to <offset> bytes
+ client->size = offset / BDRV_SECTOR_SIZE;
+
+ return 0;
+}
+#endif
+
+static int vitastor_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ bdi->cluster_size = 4096;
+ return 0;
+}
+
+static int64_t vitastor_getlength(BlockDriverState *bs)
+{
+ VitastorClient *client = bs->opaque;
+ return client->size;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
+static void vitastor_refresh_limits(BlockDriverState *bs, Error **errp)
+#else
+static int vitastor_refresh_limits(BlockDriverState *bs)
+#endif
+{
+ bs->bl.request_alignment = 4096;
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 3
+ bs->bl.min_mem_alignment = 4096;
+#endif
+ bs->bl.opt_mem_alignment = 4096;
+#if QEMU_VERSION_MAJOR < 2 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR == 0
+ return 0;
+#endif
+}
+
+//static int64_t vitastor_get_allocated_file_size(BlockDriverState *bs)
+//{
+// return 0;
+//}
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
+{
+ *task = (VitastorRPC) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ };
+}
+
+static void vitastor_co_generic_bh_cb(void *opaque, long retval)
+{
+ VitastorRPC *task = opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
+{
+ vitastor_co_generic_bh_cb(opaque, retval);
+}
+
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
+#else
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
+#endif
+)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+ task.iov = iov;
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+
+ return task.ret;
+}
+
+static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
+#else
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
+#endif
+)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+ task.iov = iov;
+
+ if (client->last_bitmap)
+ {
+ // Invalidate last bitmap on write
+ free(client->last_bitmap);
+ client->last_bitmap = NULL;
+ }
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+
+ return task.ret;
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
+#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
+static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
+{
+ VitastorRPC *task = opaque;
+ VitastorClient *client = task->bs->opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (retval >= 0)
+ {
+ task->bitmap = bitmap;
+ if (client->last_bitmap_inode == task->inode &&
+ client->last_bitmap_offset == task->offset &&
+ client->last_bitmap_len == task->len)
+ {
+ free(client->last_bitmap);
+ client->last_bitmap = bitmap;
+ }
+ }
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static int coroutine_fn vitastor_co_block_status(
+ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
+{
+ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
+ // Not allocated => return 0
+ // Error => return -errno
+ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
+ VitastorRPC task;
+ VitastorClient *client = bs->opaque;
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ uint8_t bit = 0;
+ if (client->last_bitmap && client->last_bitmap_inode == inode &&
+ client->last_bitmap_offset <= offset &&
+ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
+ {
+ // Use the previously read bitmap
+ task.bitmap_granularity = client->last_bitmap_granularity;
+ task.offset = client->last_bitmap_offset;
+ task.len = client->last_bitmap_len;
+ task.bitmap = client->last_bitmap;
+ }
+ else
+ {
+ // Read bitmap from this position, rounding to full inode PG blocks
+ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
+ if (!block_size)
+ return -EAGAIN;
+ // Init coroutine
+ vitastor_co_init_task(bs, &task);
+ free(client->last_bitmap);
+ task.inode = client->last_bitmap_inode = inode;
+ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
+ task.offset = client->last_bitmap_offset = offset / block_size * block_size;
+ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
+ task.bitmap = client->last_bitmap = NULL;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+ if (task.ret < 0)
+ {
+ // Error
+ return task.ret;
+ }
+ }
+ if (want_zero)
+ {
+ // Get precise mapping with all holes
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
+ uint64_t bmp_len = task.len / task.bitmap_granularity;
+ uint64_t bmp_end = bmp_pos+1;
+ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
+ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
+ {
+ bmp_end++;
+ }
+ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
+ }
+ else
+ {
+ // Get larger allocated extents, possibly with false positives
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
+ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
+ while (bmp_pos < bmp_end)
+ {
+ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
+ {
+ bit = bit || task.bitmap[bmp_pos >> 3];
+ bmp_pos += 8;
+ }
+ else
+ {
+ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
+ bmp_pos++;
+ }
+ }
+ *pnum = bytes;
+ }
+ if (bit)
+ {
+ *map = offset;
+ *file = bs;
+ }
+ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
+}
+#endif
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
+// QEMU 1.7-2.11
+static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+{
+ int64_t map = 0;
+ int64_t pnumbytes = 0;
+ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
+ *pnum = pnumbytes/BDRV_SECTOR_SIZE;
+ return r;
+}
+#endif
+#endif
+
+#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
+static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
+{
+ return vitastor_co_preadv(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
+}
+
+static int coroutine_fn vitastor_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
+{
+ return vitastor_co_pwritev(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
+}
+#endif
+
+static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+
+ return task.ret;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
+static QemuOptsList vitastor_create_opts = {
+ .name = "vitastor-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vitastor_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
+};
+#else
+static QEMUOptionParameter vitastor_create_opts[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+#endif
+
+#if QEMU_VERSION_MAJOR >= 4
+static const char *vitastor_strong_runtime_opts[] = {
+ "inode",
+ "pool",
+ "config-path",
+ "etcd-host",
+ "etcd-prefix",
+
+ NULL
+};
+#endif
+
+static BlockDriver bdrv_vitastor = {
+ .format_name = "vitastor",
+ .protocol_name = "vitastor",
+
+ .instance_size = sizeof(VitastorClient),
+ .bdrv_parse_filename = vitastor_parse_filename,
+
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_get_info = vitastor_get_info,
+ .bdrv_getlength = vitastor_getlength,
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
+ .bdrv_probe_blocksizes = vitastor_probe_blocksizes,
+#endif
+ .bdrv_refresh_limits = vitastor_refresh_limits,
+
+ // FIXME: Implement it along with per-inode statistics
+ //.bdrv_get_allocated_file_size = vitastor_get_allocated_file_size,
+
+ .bdrv_file_open = vitastor_file_open,
+ .bdrv_close = vitastor_close,
+
+ // Option list for the create operation
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
+ .create_opts = &vitastor_create_opts,
+#else
+ .create_options = vitastor_create_opts,
+#endif
+
+ // For qmp_blockdev_create(), used by the qemu monitor / QAPI
+ // Requires patching QAPI IDL, thus unimplemented
+ //.bdrv_co_create = vitastor_co_create,
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+ // For bdrv_create(), used by qemu-img
+ .bdrv_co_create_opts = vitastor_co_create_opts,
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3
+ .bdrv_co_truncate = vitastor_co_truncate,
+#endif
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+ // For snapshot export
+ .bdrv_co_block_status = vitastor_co_block_status,
+#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
+ .bdrv_co_get_block_status = vitastor_co_get_block_status,
+#endif
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
+ .bdrv_co_preadv = vitastor_co_preadv,
+ .bdrv_co_pwritev = vitastor_co_pwritev,
+#else
+ .bdrv_co_readv = vitastor_co_readv,
+ .bdrv_co_writev = vitastor_co_writev,
+#endif
+
+ .bdrv_co_flush_to_disk = vitastor_co_flush,
+
+#if QEMU_VERSION_MAJOR >= 4
+ .strong_runtime_opts = vitastor_strong_runtime_opts,
+#endif
+};
+
+static void vitastor_block_init(void)
+{
+ bdrv_register(&bdrv_vitastor);
+}
+
+block_init(vitastor_block_init);

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 90fa54352c..e2ea071315 100644
index dd295cfc6d..3ac5177cbb 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -564,7 +564,7 @@ static QemuOptsList raw_runtime_opts = {
@@ -533,7 +533,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "locking",
.type = QEMU_OPT_STRING,
@@ -26,7 +26,7 @@ index 90fa54352c..e2ea071315 100644
},
{
.name = "pr-manager",
@@ -664,7 +664,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
@@ -631,7 +631,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->use_lock = false;
break;
case ON_OFF_AUTO_AUTO:

View File

@@ -9,12 +9,12 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h
index cdd5b109b0..653a37e9d1 100644
index 5d1508081f..f665924193 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -305,8 +305,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
@@ -219,8 +219,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
int net_hub_id_for_client(NetClientState *nc, int *id);
NetClientState *net_hub_port_find(int hub_id);
-#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifdown"

View File

@@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 4c239a6970..be09263fb0 100644
index 21b33fbe2e..32514193a9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2475,9 +2475,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
@@ -2007,9 +2007,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define CPU_RESOLVING_TYPE TYPE_X86_CPU
#ifdef TARGET_X86_64
@@ -24,4 +24,4 @@ index 4c239a6970..be09263fb0 100644
+#define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("kvm32")
#endif
#define cpu_list x86_cpu_list
#define cpu_signal_handler cpu_x86_signal_handler

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/ui/spice-core.c b/ui/spice-core.c
index bd9dbe03f1..a7ecaad9c7 100644
index 0371055e6c..840cf56923 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -690,32 +690,35 @@ static void qemu_spice_init(void)
@@ -694,32 +694,35 @@ static void qemu_spice_init(void)
if (tls_port) {
x509_dir = qemu_opt_get(opts, "x509-dir");

View File

@@ -9,7 +9,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/block/gluster.c b/block/gluster.c
index e9c038042b..c8457a5014 100644
index e8ee14c8e9..3eb6a05500 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -42,7 +42,7 @@
@@ -21,15 +21,15 @@ index e9c038042b..c8457a5014 100644
/*
* Several versions of GlusterFS (3.12? -> 6.0.1) fail when the transfer size
* is greater or equal to 1024 MiB, so we are limiting the transfer size to 512
@@ -421,6 +421,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -424,6 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
int old_errno;
SocketAddressList *server;
uint64_t port;
unsigned long long port;
+ const char *logfile;
glfs = glfs_find_preopened(gconf->volume);
if (glfs) {
@@ -463,9 +464,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -466,9 +467,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
}
}

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index 04ed0e242e..728bce3b1e 100644
index dcf82b15b8..feeec452f0 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -814,6 +814,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
rados_conf_set(*cluster, "rbd_cache", "false");
}

View File

@@ -0,0 +1,88 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:37 +0200
Subject: [PATCH] PVE: [Up] qmp: add get_link_status
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
net/net.c | 27 +++++++++++++++++++++++++++
qapi/net.json | 15 +++++++++++++++
qapi/pragma.json | 1 +
3 files changed, 43 insertions(+)
diff --git a/net/net.c b/net/net.c
index 76bbb7c31b..82e0a768b4 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1314,6 +1314,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
}
}
+int64_t qmp_get_link_status(const char *name, Error **errp)
+{
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ NetClientState *nc;
+ int queues;
+ bool ret;
+
+ queues = qemu_find_net_clients_except(name, ncs,
+ NET_CLIENT_DRIVER__MAX,
+ MAX_QUEUE_NUM);
+
+ if (queues == 0) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", name);
+ return (int64_t) -1;
+ }
+
+ nc = ncs[0];
+ ret = ncs[0]->link_down;
+
+ if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+ ret = ncs[0]->peer->link_down;
+ }
+
+ return (int64_t) ret ? 0 : 1;
+}
+
void colo_notify_filters_event(int event, Error **errp)
{
NetClientState *nc;
diff --git a/qapi/net.json b/qapi/net.json
index 7fab2e7cd8..74c9a6109e 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -35,6 +35,21 @@
##
{ 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
+##
+# @get_link_status:
+#
+# Get the current link state of the nics or nic.
+#
+# @name: name of the nic you get the state of
+#
+# Return: If link is up 1
+# If link is down 0
+# If an error occure an empty string.
+#
+# Notes: this is an Proxmox VE extension and not offical part of Qemu.
+##
+{ 'command': 'get_link_status', 'data': {'name': 'str'} , 'returns': 'int' }
+
##
# @netdev_add:
#
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 3bc0335d1f..7c91ea3685 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -22,6 +22,7 @@
'system_reset',
'system_wakeup' ],
'command-returns-exceptions': [
+ 'get_link_status',
'human-monitor-command',
'qom-get',
'query-tpm-models',

View File

@@ -16,7 +16,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/gluster.c b/block/gluster.c
index c8457a5014..c3a9555591 100644
index 3eb6a05500..b612918ee8 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -57,6 +57,7 @@ typedef struct GlusterAIOCB {
@@ -27,7 +27,7 @@ index c8457a5014..c3a9555591 100644
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -746,8 +747,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
@@ -752,8 +753,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
acb->ret = 0; /* Success */
} else if (ret < 0) {
acb->ret = -errno; /* Read/Write failed */
@@ -39,15 +39,15 @@ index c8457a5014..c3a9555591 100644
}
aio_co_schedule(acb->aio_context, acb->coroutine);
@@ -1018,6 +1021,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
@@ -1021,6 +1024,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1198,9 +1202,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
@@ -1202,9 +1206,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
@@ -59,7 +59,7 @@ index c8457a5014..c3a9555591 100644
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
}
@@ -1263,6 +1269,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1268,6 +1274,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
@@ -67,11 +67,11 @@ index c8457a5014..c3a9555591 100644
ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1311,6 +1318,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
@@ -1314,6 +1321,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c
index 7668f86769..2575e97b43 100644
index 908fd0cce5..5dc1d0a2ca 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3075,7 +3075,8 @@ static int img_info(int argc, char **argv)
@@ -2977,7 +2977,8 @@ static int img_info(int argc, char **argv)
list = collect_image_info_list(image_opts, filename, fmt, chain,
force_share);
if (!list) {

View File

@@ -31,17 +31,16 @@ override the output file's size.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img-cmds.hx | 4 +-
qemu-img.c | 202 ++++++++++++++++++++++++++++++-----------------
2 files changed, 133 insertions(+), 73 deletions(-)
qemu-img.c | 187 +++++++++++++++++++++++++++++------------------
2 files changed, 119 insertions(+), 72 deletions(-)
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index c9dd70a892..048788b23d 100644
index b3620f29e5..e70ef3dc91 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST
@@ -58,9 +58,9 @@ SRST
ERST
DEF("dd", img_dd,
@@ -54,10 +53,10 @@ index c9dd70a892..048788b23d 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 2575e97b43..8ec68b346f 100644
index 5dc1d0a2ca..f773182bd0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4993,10 +4993,12 @@ static int img_bitmap(int argc, char **argv)
@@ -4793,10 +4793,12 @@ static int img_bitmap(int argc, char **argv)
#define C_IF 04
#define C_OF 010
#define C_SKIP 020
@@ -70,7 +69,7 @@ index 2575e97b43..8ec68b346f 100644
};
struct DdIo {
@@ -5072,6 +5074,19 @@ static int img_dd_skip(const char *arg,
@@ -4872,6 +4874,19 @@ static int img_dd_skip(const char *arg,
return 0;
}
@@ -90,7 +89,7 @@ index 2575e97b43..8ec68b346f 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5112,6 +5127,7 @@ static int img_dd(int argc, char **argv)
@@ -4912,6 +4927,7 @@ static int img_dd(int argc, char **argv)
{ "if", img_dd_if, C_IF },
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
@@ -98,7 +97,7 @@ index 2575e97b43..8ec68b346f 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5187,91 +5203,112 @@ static int img_dd(int argc, char **argv)
@@ -4987,91 +5003,112 @@ static int img_dd(int argc, char **argv)
arg = NULL;
}
@@ -135,7 +134,11 @@ index 2575e97b43..8ec68b346f 100644
- error_report_err(local_err);
- ret = -1;
- goto out;
- }
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- if (!drv->create_opts) {
- error_report("Format driver '%s' does not support image creation",
- drv->format_name);
@@ -147,11 +150,7 @@ index 2575e97b43..8ec68b346f 100644
- proto_drv->format_name);
- ret = -1;
- goto out;
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- }
- create_opts = qemu_opts_append(create_opts, drv->create_opts);
- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
-
@@ -275,54 +274,41 @@ index 2575e97b43..8ec68b346f 100644
}
if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
@@ -5288,20 +5325,43 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
@@ -5089,11 +5126,17 @@ static int img_dd(int argc, char **argv)
for (out_pos = 0; in_pos < size; ) {
+ int in_ret, out_ret;
int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
for (out_pos = 0; in_pos < size; block_count++) {
int in_ret, out_ret;
-
- ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
- if (ret < 0) {
- if (in_pos + in.bsz > size) {
- in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
+ size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ if (blk1) {
+ in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+ if (in_ret == 0) {
+ in_ret = bytes;
+ }
+ } else {
+ in_ret = read(STDIN_FILENO, in.buf, bytes);
+ in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
- in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
+ in_ret = read(STDIN_FILENO, in.buf, in_bsz);
+ if (in_ret == 0) {
+ /* early EOF is considered an error */
+ error_report("Input ended unexpectedly");
+ ret = -1;
+ goto out;
+ }
+ }
+ if (in_ret < 0) {
error_report("error while reading from input image file: %s",
- strerror(-ret));
+ strerror(-in_ret));
+ ret = -1;
goto out;
}
in_pos += bytes;
if (in_ret < 0) {
error_report("error while reading from input image file: %s",
@@ -5103,9 +5146,13 @@ static int img_dd(int argc, char **argv)
}
in_pos += in_ret;
- ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
- if (ret < 0) {
- out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ if (blk2) {
+ out_ret = blk_pwrite(blk2, out_pos, in_ret, in.buf, 0);
+ if (out_ret == 0) {
+ out_ret = in_ret;
+ }
+ out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ } else {
+ out_ret = write(STDOUT_FILENO, in.buf, in_ret);
+ }
+
- if (out_ret < 0) {
+ if (out_ret != in_ret) {
error_report("error while writing to output image file: %s",
- strerror(-ret));
+ strerror(-out_ret));
+ ret = -1;
goto out;
}
out_pos += bytes;
strerror(-out_ret));
ret = -1;

View File

@@ -10,16 +10,15 @@ an expected end of input.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 8ec68b346f..b98184bba1 100644
index f773182bd0..98a6562364 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4994,11 +4994,13 @@ static int img_bitmap(int argc, char **argv)
@@ -4794,11 +4794,13 @@ static int img_bitmap(int argc, char **argv)
#define C_OF 010
#define C_SKIP 020
#define C_OSIZE 040
@@ -33,7 +32,7 @@ index 8ec68b346f..b98184bba1 100644
};
struct DdIo {
@@ -5087,6 +5089,19 @@ static int img_dd_osize(const char *arg,
@@ -4887,6 +4889,19 @@ static int img_dd_osize(const char *arg,
return 0;
}
@@ -53,13 +52,13 @@ index 8ec68b346f..b98184bba1 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5101,12 +5116,14 @@ static int img_dd(int argc, char **argv)
@@ -4901,12 +4916,14 @@ static int img_dd(int argc, char **argv)
int c, i;
const char *out_fmt = "raw";
const char *fmt = NULL;
- int64_t size = 0;
+ int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
int64_t block_count = 0, out_pos, in_pos;
bool force_share = false;
struct DdInfo dd = {
.flags = 0,
@@ -69,7 +68,7 @@ index 8ec68b346f..b98184bba1 100644
};
struct DdIo in = {
.bsz = 512, /* Block size is by default 512 bytes */
@@ -5128,6 +5145,7 @@ static int img_dd(int argc, char **argv)
@@ -4928,6 +4945,7 @@ static int img_dd(int argc, char **argv)
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
{ "osize", img_dd_osize, C_OSIZE },
@@ -77,22 +76,20 @@ index 8ec68b346f..b98184bba1 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5324,9 +5342,10 @@ static int img_dd(int argc, char **argv)
@@ -5124,14 +5142,18 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
- for (out_pos = 0; in_pos < size; ) {
- for (out_pos = 0; in_pos < size; block_count++) {
+ readsize = (dd.isize > 0) ? dd.isize : size;
+ for (out_pos = 0; in_pos < readsize; ) {
+ for (out_pos = 0; in_pos < readsize; block_count++) {
int in_ret, out_ret;
- int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
+ int bytes = (in_pos + in.bsz > readsize) ? readsize - in_pos : in.bsz;
- size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ size_t in_bsz = in_pos + in.bsz > readsize ? readsize - in_pos : in.bsz;
if (blk1) {
in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
if (in_ret == 0) {
@@ -5335,6 +5354,9 @@ static int img_dd(int argc, char **argv)
in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
in_ret = read(STDIN_FILENO, in.buf, bytes);
in_ret = read(STDIN_FILENO, in.buf, in_bsz);
if (in_ret == 0) {
+ if (dd.isize == 0) {
+ goto out;

View File

@@ -1,121 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: fix getopt-string + add documentation]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
docs/tools/qemu-img.rst | 11 ++++++++++-
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 23 ++++++++++++++---------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 3653adb963..d83e8fb3c0 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -212,6 +212,10 @@ Parameters to convert subcommand:
Parameters to dd subcommand:
+.. option:: -n
+
+ Skip the creation of the target volume
+
.. program:: qemu-img-dd
.. option:: bs=BLOCK_SIZE
@@ -492,7 +496,7 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
dd copies from *INPUT* file to *OUTPUT* file converting it from
*FMT* format to *OUTPUT_FMT* format.
@@ -503,6 +507,11 @@ Command description:
The size syntax is similar to :manpage:`dd(1)`'s size syntax.
+ If the ``-n`` option is specified, the target volume creation will be
+ skipped. This is useful for formats such as ``rbd`` if the target
+ volume has already been created with site specific options that cannot
+ be supplied through ``qemu-img``.
+
.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME
Give information about the disk image *FILENAME*. Use it in
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 048788b23d..0b29a67a06 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index b98184bba1..6fc8384f64 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5118,7 +5118,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5156,7 +5156,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5176,6 +5176,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5306,13 +5309,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -0,0 +1,65 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qemu-img.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 98a6562364..355b3b82f4 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4918,7 +4918,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t block_count = 0, out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -4956,7 +4956,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:U:n", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -4976,6 +4976,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5106,13 +5109,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -1,130 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Mon, 7 Feb 2022 14:21:01 +0100
Subject: [PATCH] qemu-img dd: add -l option for loading a snapshot
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
docs/tools/qemu-img.rst | 6 +++---
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 33 +++++++++++++++++++++++++++++++--
3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index d83e8fb3c0..61c6b21859 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -496,10 +496,10 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
- dd copies from *INPUT* file to *OUTPUT* file converting it from
- *FMT* format to *OUTPUT_FMT* format.
+ dd copies from *INPUT* file or snapshot *SNAPSHOT_PARAM* to *OUTPUT* file
+ converting it from *FMT* format to *OUTPUT_FMT* format.
The data is by default read and written using blocks of 512 bytes but can be
modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 0b29a67a06..758f397232 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -60,9 +60,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [-l snapshot_param] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 6fc8384f64..a6c88e0860 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5110,6 +5110,7 @@ static int img_dd(int argc, char **argv)
BlockDriver *drv = NULL, *proto_drv = NULL;
BlockBackend *blk1 = NULL, *blk2 = NULL;
QemuOpts *opts = NULL;
+ QemuOpts *sn_opts = NULL;
QemuOptsList *create_opts = NULL;
Error *local_err = NULL;
bool image_opts = false;
@@ -5119,6 +5120,7 @@ static int img_dd(int argc, char **argv)
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
bool force_share = false, skip_create = false;
+ const char *snapshot_name = NULL;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5156,7 +5158,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:l:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5179,6 +5181,19 @@ static int img_dd(int argc, char **argv)
case 'n':
skip_create = true;
break;
+ case 'l':
+ if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+ sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+ optarg, false);
+ if (!sn_opts) {
+ error_report("Failed in parsing snapshot param '%s'",
+ optarg);
+ goto out;
+ }
+ } else {
+ snapshot_name = optarg;
+ }
+ break;
case 'U':
force_share = true;
break;
@@ -5238,11 +5253,24 @@ static int img_dd(int argc, char **argv)
if (dd.flags & C_IF) {
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
force_share);
-
if (!blk1) {
ret = -1;
goto out;
}
+ if (sn_opts) {
+ bdrv_snapshot_load_tmp(blk_bs(blk1),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+ &local_err);
+ } else if (snapshot_name != NULL) {
+ bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(blk1), snapshot_name,
+ &local_err);
+ }
+ if (local_err) {
+ error_reportf_err(local_err, "Failed to load snapshot: ");
+ ret = -1;
+ goto out;
+ }
}
if (dd.flags & C_OSIZE) {
@@ -5397,6 +5425,7 @@ static int img_dd(int argc, char **argv)
out:
g_free(arg);
qemu_opts_del(opts);
+ qemu_opts_del(sn_opts);
qemu_opts_free(create_opts);
blk_unref(blk1);
blk_unref(blk2);

View File

@@ -7,62 +7,17 @@ Actually provide memory information via the query-balloon
command.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: add BalloonInfo to member name exceptions list
rebase for 8.0 - moved to hw/core/machine-hmp-cmds.c]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
hw/virtio/virtio-balloon.c | 33 +++++++++++++++++++++++++++++++--
monitor/hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
qapi/machine.json | 22 +++++++++++++++++++++-
qapi/pragma.json | 1 +
4 files changed, 82 insertions(+), 4 deletions(-)
3 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 8701f00cc7..3b4c5ef403 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -179,7 +179,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index afd2ad6dd6..c724218c17 100644
index ae7867a8db..956e3f4e46 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -795,8 +795,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
@@ -820,8 +820,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{
VirtIOBalloon *dev = opaque;
@@ -102,13 +57,54 @@ index afd2ad6dd6..c724218c17 100644
}
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index f4ef58d257..c8b97909e7 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -698,7 +698,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/qapi/machine.json b/qapi/machine.json
index a6b8795b09..9f7ed0eaa0 100644
index 157712f006..34035c25d1 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1163,9 +1163,29 @@
# @actual: the logical size of the VM in bytes Formula used:
# logical_vm_size = vm_ram_size - balloon_size
@@ -1018,10 +1018,30 @@
# @actual: the logical size of the VM in bytes
# Formula used: logical_vm_size = vm_ram_size - balloon_size
#
+# @last_update: time when stats got updated from guest
+#
@@ -127,6 +123,7 @@ index a6b8795b09..9f7ed0eaa0 100644
+# @max_mem: amount of memory (in bytes) assigned to the guest
+#
# Since: 0.14
#
##
-{ 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
+{ 'struct': 'BalloonInfo',
@@ -137,15 +134,3 @@ index a6b8795b09..9f7ed0eaa0 100644
##
# @query-balloon:
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 023a2ef7bc..6aaa9cb975 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -81,6 +81,7 @@
'member-name-exceptions': [ # visible in:
'ACPISlotType', # query-acpi-ospm-status
'AcpiTableOptions', # -acpitable
+ 'BalloonInfo', # query-balloon
'BlkdebugEvent', # blockdev-add, -blockdev
'BlkdebugSetStateOptions', # blockdev-add, -blockdev
'BlockDeviceInfo', # query-block

View File

@@ -13,13 +13,13 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 130217da8f..52a6d74820 100644
index 216fdfaf3a..8f8d5d5276 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -90,6 +90,12 @@ MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props,
@@ -98,6 +98,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
info->numa_mem_supported = mc->numa_mem_supported;
info->deprecated = !!mc->deprecation_reason;
info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
+
+ if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
+ info->has_is_current = true;
@@ -28,21 +28,21 @@ index 130217da8f..52a6d74820 100644
+
if (mc->default_cpu_type) {
info->default_cpu_type = g_strdup(mc->default_cpu_type);
}
info->has_default_cpu_type = true;
diff --git a/qapi/machine.json b/qapi/machine.json
index 9f7ed0eaa0..16366b774a 100644
index 34035c25d1..cf120ac343 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -167,6 +167,8 @@
@@ -141,6 +141,8 @@
#
# @is-default: whether the machine is default
#
+# @is-current: whether this machine is currently used
+#
# @cpu-max: maximum number of CPUs supported by the machine type
# (since 1.5)
# (since 1.5)
#
@@ -199,7 +201,7 @@
@@ -162,7 +164,7 @@
##
{ 'struct': 'MachineInfo',
'data': { 'name': 'str', '*alias': 'str',
@@ -50,4 +50,4 @@ index 9f7ed0eaa0..16366b774a 100644
+ '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str', 'acpi': 'bool',
'*default-ram-id': 'str' } }

View File

@@ -6,18 +6,16 @@ Subject: [PATCH] PVE: qapi: modify spice query
Provide the last ticket in the SpiceInfo struct optionally.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI change]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qapi/ui.json | 3 +++
ui/spice-core.c | 4 ++++
2 files changed, 7 insertions(+)
ui/spice-core.c | 5 +++++
2 files changed, 8 insertions(+)
diff --git a/qapi/ui.json b/qapi/ui.json
index 460a26b981..42b911bda3 100644
index cba8665b73..081115ea8a 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -312,11 +312,14 @@
@@ -333,11 +333,14 @@
#
# @channels: a list of @SpiceChannel for each active spice channel
#
@@ -30,17 +28,18 @@ index 460a26b981..42b911bda3 100644
'*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
+ '*ticket': 'str',
'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
'if': 'CONFIG_SPICE' }
'if': 'defined(CONFIG_SPICE)' }
diff --git a/ui/spice-core.c b/ui/spice-core.c
index a7ecaad9c7..fecf002d50 100644
index 840cf56923..96be349635 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
@@ -534,6 +534,11 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
micro = SPICE_SERVER_VERSION & 0xff;
info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);
+ if (auth_passwd) {
+ info->has_ticket = true;
+ info->ticket = g_strdup(auth_passwd);
+ }
+

View File

@@ -1,284 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 13 Oct 2022 11:33:50 +0200
Subject: [PATCH] PVE: add IOChannel implementation for savevm-async
based on migration/channel-block.c and the implementation that was
present in migration/savevm-async.c before QEMU 7.1.
Passes along read/write requests to the given BlockBackend, while
ensuring that a read request going beyond the end results in a
graceful short read.
Additionally, allows tracking the current position from the outside
(intended to be used for progress tracking).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/channel-savevm-async.c | 184 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.h | 51 +++++++++
migration/meson.build | 1 +
3 files changed, 236 insertions(+)
create mode 100644 migration/channel-savevm-async.c
create mode 100644 migration/channel-savevm-async.h
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
new file mode 100644
index 0000000000..081a192f49
--- /dev/null
+++ b/migration/channel-savevm-async.c
@@ -0,0 +1,184 @@
+/*
+ * QIO Channel implementation to be used by savevm-async QMP calls
+ */
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "qapi/error.h"
+#include "sysemu/block-backend.h"
+#include "trace.h"
+
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos)
+{
+ QIOChannelSavevmAsync *ioc;
+
+ ioc = QIO_CHANNEL_SAVEVM_ASYNC(object_new(TYPE_QIO_CHANNEL_SAVEVM_ASYNC));
+
+ bdrv_ref(blk_bs(be));
+ ioc->be = be;
+ ioc->bs_pos = bs_pos;
+
+ return ioc;
+}
+
+
+static void
+qio_channel_savevm_async_finalize(Object *obj)
+{
+ QIOChannelSavevmAsync *ioc = QIO_CHANNEL_SAVEVM_ASYNC(obj);
+
+ if (ioc->be) {
+ bdrv_unref(blk_bs(ioc->be));
+ ioc->be = NULL;
+ }
+ ioc->bs_pos = NULL;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_readv(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int **fds,
+ size_t *nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ int64_t maxlen = blk_getlength(be);
+ QEMUIOVector qiov;
+ size_t size;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (*saioc->bs_pos >= maxlen) {
+ error_setg(errp, "cannot read beyond maxlen");
+ return -1;
+ }
+
+ if (maxlen - *saioc->bs_pos < qiov.size) {
+ size = maxlen - *saioc->bs_pos;
+ } else {
+ size = qiov.size;
+ }
+
+ // returns 0 on success
+ ret = blk_preadv(be, *saioc->bs_pos, size, &qiov, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk_preadv failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += size;
+ return size;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_writev(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int *fds,
+ size_t nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ QEMUIOVector qiov;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (qemu_in_coroutine()) {
+ ret = blk_co_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ aio_wait_kick();
+ } else {
+ ret = blk_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk(_co)_pwritev failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += qiov.size;
+ return qiov.size;
+}
+
+
+static int
+qio_channel_savevm_async_set_blocking(QIOChannel *ioc,
+ bool enabled,
+ Error **errp)
+{
+ if (!enabled) {
+ error_setg(errp, "Non-blocking mode not supported for savevm-async");
+ return -1;
+ }
+ return 0;
+}
+
+
+static int
+qio_channel_savevm_async_close(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ int rv = bdrv_flush(blk_bs(saioc->be));
+
+ if (rv < 0) {
+ error_setg_errno(errp, -rv, "Unable to flush VMState");
+ return -1;
+ }
+
+ bdrv_unref(blk_bs(saioc->be));
+ saioc->be = NULL;
+ saioc->bs_pos = NULL;
+
+ return 0;
+}
+
+
+static void
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
+ AioContext *read_ctx,
+ IOHandler *io_read,
+ AioContext *write_ctx,
+ IOHandler *io_write,
+ void *opaque)
+{
+ // if channel-block starts doing something, check if this needs adaptation
+}
+
+
+static void
+qio_channel_savevm_async_class_init(ObjectClass *klass,
+ void *class_data G_GNUC_UNUSED)
+{
+ QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+ ioc_klass->io_writev = qio_channel_savevm_async_writev;
+ ioc_klass->io_readv = qio_channel_savevm_async_readv;
+ ioc_klass->io_set_blocking = qio_channel_savevm_async_set_blocking;
+ ioc_klass->io_close = qio_channel_savevm_async_close;
+ ioc_klass->io_set_aio_fd_handler = qio_channel_savevm_async_set_aio_fd_handler;
+}
+
+static const TypeInfo qio_channel_savevm_async_info = {
+ .parent = TYPE_QIO_CHANNEL,
+ .name = TYPE_QIO_CHANNEL_SAVEVM_ASYNC,
+ .instance_size = sizeof(QIOChannelSavevmAsync),
+ .instance_finalize = qio_channel_savevm_async_finalize,
+ .class_init = qio_channel_savevm_async_class_init,
+};
+
+static void
+qio_channel_savevm_async_register_types(void)
+{
+ type_register_static(&qio_channel_savevm_async_info);
+}
+
+type_init(qio_channel_savevm_async_register_types);
diff --git a/migration/channel-savevm-async.h b/migration/channel-savevm-async.h
new file mode 100644
index 0000000000..17ae2cb261
--- /dev/null
+++ b/migration/channel-savevm-async.h
@@ -0,0 +1,51 @@
+/*
+ * QEMU I/O channels driver for savevm-async.c
+ *
+ * Copyright (c) 2022 Proxmox Server Solutions
+ *
+ * Authors:
+ * Fiona Ebner (f.ebner@proxmox.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QIO_CHANNEL_SAVEVM_ASYNC_H
+#define QIO_CHANNEL_SAVEVM_ASYNC_H
+
+#include "io/channel.h"
+#include "qom/object.h"
+
+#define TYPE_QIO_CHANNEL_SAVEVM_ASYNC "qio-channel-savevm-async"
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSavevmAsync, QIO_CHANNEL_SAVEVM_ASYNC)
+
+
+/**
+ * QIOChannelSavevmAsync:
+ *
+ * The QIOChannelBlock object provides a channel implementation that is able to
+ * perform I/O on any BlockBackend whose BlockDriverState directly contains a
+ * VMState (as opposed to indirectly, like qcow2). It allows tracking the
+ * current position from the outside.
+ */
+struct QIOChannelSavevmAsync {
+ QIOChannel parent;
+ BlockBackend *be;
+ size_t *bs_pos;
+};
+
+
+/**
+ * qio_channel_savevm_async_new:
+ * @be: the block backend
+ * @bs_pos: used to keep track of the IOChannels current position
+ *
+ * Create a new IO channel object that can perform I/O on a BlockBackend object
+ * whose BlockDriverState directly contains a VMState.
+ *
+ * Returns: the new channel object
+ */
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos);
+
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
diff --git a/migration/meson.build b/migration/meson.build
index d53cf3417a..b00d58064d 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -13,6 +13,7 @@ system_ss.add(files(
'block-dirty-bitmap.c',
'channel.c',
'channel-block.c',
+ 'channel-savevm-async.c',
'cpu-throttle.c',
'dirtyrate.c',
'exec.c',

View File

@@ -21,37 +21,28 @@ still opened by QEMU.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[SR: improve aborting
register yank before migration_incoming_state_destroy]
[improve aborting]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[FE: further improve aborting
adapt to removal of QEMUFileOps
improve condition for entering final stage
adapt to QAPI and other changes for 8.2
make sure to not call vm_start() from coroutine
stop CPU throttling after finishing
force raw format when loading state as suggested by Friedrich Weber]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hmp-commands-info.hx | 13 +
hmp-commands.hx | 17 ++
hmp-commands.hx | 33 ++
include/migration/snapshot.h | 2 +
include/monitor/hmp.h | 3 +
include/monitor/hmp.h | 5 +
migration/meson.build | 1 +
migration/savevm-async.c | 571 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 38 +++
qapi/migration.json | 34 +++
qapi/misc.json | 18 ++
migration/savevm-async.c | 598 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 57 ++++
qapi/migration.json | 34 ++
qapi/misc.json | 32 ++
qemu-options.hx | 12 +
system/vl.c | 10 +
11 files changed, 719 insertions(+)
softmmu/vl.c | 10 +
11 files changed, 797 insertions(+)
create mode 100644 migration/savevm-async.c
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index c59cd6637b..d1a7b99add 100644
index 27206ac049..e6dd3be07a 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -512,6 +512,19 @@ SRST
@@ -551,6 +551,19 @@ SRST
Show current migration parameters.
ERST
@@ -72,13 +63,13 @@ index c59cd6637b..d1a7b99add 100644
.name = "balloon",
.args_type = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 06746f0afc..0c7c6f2c16 100644
index d78e4cfc47..42203dbe92 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1859,3 +1859,20 @@ SRST
List event channels in the guest
ERST
#endif
@@ -1744,3 +1744,36 @@ ERST
.help = "start a round of guest dirty rate measurement",
.cmd = hmp_calc_dirty_rate,
},
+
+ {
+ .name = "savevm-start",
@@ -89,6 +80,22 @@ index 06746f0afc..0c7c6f2c16 100644
+ },
+
+ {
+ .name = "snapshot-drive",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Create internal snapshot.",
+ .cmd = hmp_snapshot_drive,
+ },
+
+ {
+ .name = "delete-drive-snapshot",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Delete internal snapshot.",
+ .cmd = hmp_delete_drive_snapshot,
+ },
+
+ {
+ .name = "savevm-end",
+ .args_type = "",
+ .params = "",
@@ -97,21 +104,21 @@ index 06746f0afc..0c7c6f2c16 100644
+ .coroutine = true,
+ },
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index 9e4dcaaa75..2581730d74 100644
index e72083b117..c846d37806 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -68,4 +68,6 @@ bool delete_snapshot(const char *name,
*/
void load_snapshot_resume(RunState state);
@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
bool has_devices, strList *devices,
Error **errp);
+int load_snapshot_from_blockdev(const char *filename, Error **errp);
+
#endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index ae116d9804..2596cc2426 100644
index 3baa1058e2..1247d7362a 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
void hmp_info_uuid(Monitor *mon, const QDict *qdict);
void hmp_info_chardev(Monitor *mon, const QDict *qdict);
void hmp_info_mice(Monitor *mon, const QDict *qdict);
@@ -119,44 +126,42 @@ index ae116d9804..2596cc2426 100644
void hmp_info_migrate(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -92,6 +93,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
void hmp_mouse_move(Monitor *mon, const QDict *qdict);
void hmp_mouse_button(Monitor *mon, const QDict *qdict);
void hmp_mouse_set(Monitor *mon, const QDict *qdict);
@@ -79,6 +80,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
void hmp_netdev_del(Monitor *mon, const QDict *qdict);
void hmp_getfd(Monitor *mon, const QDict *qdict);
void hmp_closefd(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
void hmp_sendkey(Monitor *mon, const QDict *qdict);
void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/migration/meson.build b/migration/meson.build
index b00d58064d..075b013971 100644
index f8714dcb15..ea9aedeefc 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -29,6 +29,7 @@ system_ss.add(files(
'options.c',
@@ -23,6 +23,7 @@ softmmu_ss.add(files(
'multifd-zlib.c',
'postcopy-ram.c',
'savevm.c',
+ 'savevm-async.c',
'socket.c',
'tls.c',
'threadinfo.c',
), gnutls)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
new file mode 100644
index 0000000000..ee8ef316d0
index 0000000000..79a0cda906
--- /dev/null
+++ b/migration/savevm-async.c
@@ -0,0 +1,571 @@
@@ -0,0 +1,598 @@
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "migration/migration.h"
+#include "migration/migration-stats.h"
+#include "migration/options.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
+#include "migration/ram.h"
+#include "migration/qemu-file.h"
+#include "sysemu/cpu-throttle.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
+#include "block/block.h"
@@ -168,14 +173,15 @@ index 0000000000..ee8ef316d0
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+#include "qemu/yank.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+/* used while emulated sync operation in progress */
+#define NOT_DONE -EINPROGRESS
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
@@ -204,7 +210,7 @@ index 0000000000..ee8ef316d0
+ int64_t total_time;
+ QEMUBH *finalize_bh;
+ Coroutine *co;
+ QemuCoSleep target_close_wait;
+ QemuCoSleep *target_close_wait;
+} snap_state;
+
+static bool savevm_aborted(void)
@@ -223,20 +229,24 @@ index 0000000000..ee8ef316d0
+ info->bytes = s->bs_pos;
+ switch (s->state) {
+ case SAVE_STATE_ERROR:
+ info->has_status = true;
+ info->status = g_strdup("failed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
+ if (s->error) {
+ info->has_error = true;
+ info->error = g_strdup(error_get_pretty(s->error));
+ }
+ break;
+ case SAVE_STATE_ACTIVE:
+ info->has_status = true;
+ info->status = g_strdup("active");
+ info->has_total_time = true;
+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+ - s->total_time;
+ break;
+ case SAVE_STATE_COMPLETED:
+ info->has_status = true;
+ info->status = g_strdup("completed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
@@ -258,11 +268,9 @@ index 0000000000..ee8ef316d0
+
+ if (snap_state.file) {
+ ret = qemu_fclose(snap_state.file);
+ snap_state.file = NULL;
+ }
+
+ if (snap_state.target) {
+ BlockDriverState *target_bs = blk_bs(snap_state.target);
+ if (!savevm_aborted()) {
+ /* try to truncate, but ignore errors (will fail on block devices).
+ * note1: bdrv_read() need whole blocks, so we need to round up
@@ -271,21 +279,21 @@ index 0000000000..ee8ef316d0
+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+ }
+ if (target_bs) {
+ bdrv_op_unblock_all(target_bs, snap_state.blocker);
+ }
+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
+ error_free(snap_state.blocker);
+ snap_state.blocker = NULL;
+ blk_unref(snap_state.target);
+ snap_state.target = NULL;
+
+ qemu_co_sleep_wake(&snap_state.target_close_wait);
+ if (snap_state.target_close_wait) {
+ qemu_co_sleep_wake(snap_state.target_close_wait);
+ }
+ }
+
+ return ret;
+}
+
+static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
+static void save_snapshot_error(const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
@@ -297,7 +305,7 @@ index 0000000000..ee8ef316d0
+ DPRINTF("save_snapshot_error: %s\n", msg);
+
+ if (!snap_state.error) {
+ error_setg(&snap_state.error, "%s", msg);
+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
+ }
+
+ g_free (msg);
@@ -305,9 +313,64 @@ index 0000000000..ee8ef316d0
+ snap_state.state = SAVE_STATE_ERROR;
+}
+
+static int block_state_close(void *opaque, Error **errp)
+{
+ snap_state.file = NULL;
+ return blk_flush(snap_state.target);
+}
+
+typedef struct BlkRwCo {
+ int64_t offset;
+ QEMUIOVector *qiov;
+ ssize_t ret;
+} BlkRwCo;
+
+static void coroutine_fn block_state_write_entry(void *opaque) {
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
+ rwco->qiov, 0);
+ aio_wait_kick();
+}
+
+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos, Error **errp)
+{
+ QEMUIOVector qiov;
+ BlkRwCo rwco;
+
+ assert(pos == snap_state.bs_pos);
+ rwco = (BlkRwCo) {
+ .offset = pos,
+ .qiov = &qiov,
+ .ret = NOT_DONE,
+ };
+
+ qemu_iovec_init_external(&qiov, iov, iovcnt);
+
+ if (qemu_in_coroutine()) {
+ block_state_write_entry(&rwco);
+ } else {
+ Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
+ bdrv_coroutine_enter(blk_bs(snap_state.target), co);
+ BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
+ }
+ if (rwco.ret < 0) {
+ return rwco.ret;
+ }
+
+ snap_state.bs_pos += qiov.size;
+ return qiov.size;
+}
+
+static const QEMUFileOps block_file_ops = {
+ .writev_buffer = block_state_writev_buffer,
+ .close = block_state_close,
+};
+
+static void process_savevm_finalize(void *opaque)
+{
+ int ret;
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ MigrationState *ms = migrate_get_current();
+
+ bool aborted = savevm_aborted();
@@ -324,7 +387,9 @@ index 0000000000..ee8ef316d0
+ * so move it back. It can stay in the main context and live out its live
+ * there, since we're done with it after this method ends anyway.
+ */
+ aio_context_acquire(iohandler_ctx);
+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+ aio_context_release(iohandler_ctx);
+
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
@@ -336,7 +401,7 @@ index 0000000000..ee8ef316d0
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+ }
+
@@ -349,12 +414,6 @@ index 0000000000..ee8ef316d0
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL;
+
+ /*
+ * Same as in migration_iteration_finish(): saving RAM might've turned on CPU throttling for
+ * auto-converge, make sure to disable it.
+ */
+ cpu_throttle_stop();
+
+ qemu_savevm_state_cleanup();
+
+ ret = save_snapshot_cleanup();
@@ -363,11 +422,8 @@ index 0000000000..ee8ef316d0
+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_COMPLETED;
+ } else if (aborted) {
+ /*
+ * If there was an error, there's no need to set a new one here.
+ * If the snapshot was canceled, leave setting the state to
+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
+ */
+ save_snapshot_error("process_savevm_cleanup: found aborted state: %d",
+ snap_state.state);
+ } else {
+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+ snap_state.state);
@@ -399,32 +455,18 @@ index 0000000000..ee8ef316d0
+ }
+
+ while (snap_state.state == SAVE_STATE_ACTIVE) {
+ uint64_t pending_size, pend_precopy, pend_postcopy;
+ uint64_t threshold = 400 * 1000;
+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
+
+ /*
+ * pending_{estimate,exact} are expected to be called without iothread
+ * lock. Similar to what is done in migration.c, call the exact variant
+ * only once pend_precopy in the estimate is below the threshold.
+ */
+ bql_unlock();
+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
+ if (pend_precopy <= threshold) {
+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
+ }
+ bql_lock();
+ pending_size = pend_precopy + pend_postcopy;
+ /* pending is expected to be called without iothread lock */
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
+ qemu_mutex_lock_iothread();
+
+ /*
+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
+ * large enough so that the guest can't dirty this much between the
+ * check and the guest actually being stopped, but it should be small
+ * enough to avoid long downtimes for non-hibernation snapshots.
+ */
+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
+
+ /* Note that there is no progress for pend_postcopy when iterating */
+ if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+
+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
+ ret = qemu_savevm_state_iterate(snap_state.file, false);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
@@ -433,7 +475,11 @@ index 0000000000..ee8ef316d0
+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+ } else {
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ global_state_store();
+ ret = global_state_store();
+ if (ret) {
+ save_snapshot_error("global_state_store error %d", ret);
+ break;
+ }
+
+ DPRINTF("savevm iterate complete\n");
+ break;
@@ -452,25 +498,19 @@ index 0000000000..ee8ef316d0
+ * so move there now and after every flush.
+ */
+ aio_co_reschedule_self(qemu_get_aio_context());
+ bdrv_graph_co_rdlock();
+ bs = bdrv_first(&it);
+ bdrv_graph_co_rdunlock();
+ while (bs) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+ if (bs != blk_bs(snap_state.target)) {
+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
+ if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_graph_co_rdlock();
+ bdrv_flush(bs);
+ bdrv_graph_co_rdunlock();
+ aio_co_reschedule_self(qemu_get_aio_context());
+ }
+ if (bs == blk_bs(snap_state.target)) {
+ continue;
+ }
+
+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
+ if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_flush(bs);
+ aio_co_reschedule_self(qemu_get_aio_context());
+ }
+ bdrv_graph_co_rdlock();
+ bs = bdrv_next(&it);
+ bdrv_graph_co_rdunlock();
+ }
+
+ DPRINTF("timing: async flushing took %ld ms\n",
@@ -479,23 +519,28 @@ index 0000000000..ee8ef316d0
+ qemu_bh_schedule(snap_state.finalize_bh);
+}
+
+void qmp_savevm_start(const char *statefile, Error **errp)
+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+{
+ Error *local_err = NULL;
+ MigrationState *ms = migrate_get_current();
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ BlockDriverState *target_bs = NULL;
+ int ret = 0;
+
+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
+
+ if (snap_state.state != SAVE_STATE_DONE) {
+ error_setg(errp, "VM snapshot already started\n");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot already started\n");
+ return;
+ }
+
+ if (migration_is_running()) {
+ error_setg(errp, "There's a migration process in progress");
+ if (migration_is_running(ms->state)) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+ return;
+ }
+
+ if (migrate_use_block()) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Block migration and snapshots are incompatible");
+ return;
+ }
+
@@ -504,14 +549,13 @@ index 0000000000..ee8ef316d0
+ snap_state.bs_pos = 0;
+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ snap_state.blocker = NULL;
+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
+
+ if (snap_state.error) {
+ error_free(snap_state.error);
+ snap_state.error = NULL;
+ }
+
+ if (!statefile) {
+ if (!has_statefile) {
+ vm_stop(RUN_STATE_SAVE_VM);
+ snap_state.state = SAVE_STATE_COMPLETED;
+ return;
@@ -527,21 +571,14 @@ index 0000000000..ee8ef316d0
+ qdict_put_str(options, "driver", "raw");
+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
+ if (!snap_state.target) {
+ error_setg(errp, "failed to open '%s'", statefile);
+ goto restart;
+ }
+ target_bs = blk_bs(snap_state.target);
+ if (!target_bs) {
+ error_setg(errp, "failed to open '%s' - no block driver state", statefile);
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+ goto restart;
+ }
+
+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
+ &snap_state.bs_pos));
+ snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+
+ if (!snap_state.file) {
+ error_setg(errp, "failed to open '%s'", statefile);
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+ goto restart;
+ }
+
@@ -550,36 +587,26 @@ index 0000000000..ee8ef316d0
+ * State is cleared in process_savevm_co, but has to be initialized
+ * here (blocking main thread, from QMP) to avoid race conditions.
+ */
+ if (migrate_init(ms, errp)) {
+ return;
+ }
+ memset(&mig_stats, 0, sizeof(mig_stats));
+ migrate_init(ms);
+ memset(&ram_counters, 0, sizeof(ram_counters));
+ ms->to_dst_file = snap_state.file;
+
+ error_setg(&snap_state.blocker, "block device is in use by savevm");
+ bdrv_op_block_all(target_bs, snap_state.blocker);
+ blk_op_block_all(snap_state.target, snap_state.blocker);
+
+ snap_state.state = SAVE_STATE_ACTIVE;
+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_header(snap_state.file);
+ ret = qemu_savevm_state_setup(snap_state.file, &local_err);
+ if (ret != 0) {
+ error_setg_errno(errp, -ret, "savevm state setup failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ return;
+ }
+ qemu_savevm_state_setup(snap_state.file);
+ qemu_mutex_lock_iothread();
+
+ /* Async processing from here on out happens in iohandler context, so let
+ * the target bdrv have its home there.
+ */
+ ret = blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+ if (ret != 0) {
+ warn_report("failed to set iohandler context for VM state target: %s %s",
+ local_err ? error_get_pretty(local_err) : "unknown error",
+ strerror(-ret));
+ }
+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+
+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+ aio_co_schedule(iohandler_ctx, snap_state.co);
+
+ return;
@@ -594,50 +621,19 @@ index 0000000000..ee8ef316d0
+ }
+}
+
+static void coroutine_fn wait_for_close_co(void *opaque)
+void coroutine_fn qmp_savevm_end(Error **errp)
+{
+ int64_t timeout;
+
+ if (!snap_state.target) {
+ DPRINTF("savevm-end: no target file open\n");
+ return;
+ }
+
+ /* wait until cleanup is done before returning, this ensures that after this
+ * call exits the statefile will be closed and can be removed immediately */
+ DPRINTF("savevm-end: waiting for cleanup\n");
+ timeout = 30L * 1000 * 1000 * 1000;
+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
+ QEMU_CLOCK_REALTIME, timeout);
+ if (snap_state.target) {
+ save_snapshot_error("timeout waiting for target file close in "
+ "qmp_savevm_end");
+ /* we cannot assume the snapshot finished in this case, so leave the
+ * state alone - caller has to figure something out */
+ return;
+ }
+
+ // File closed and no other error, so ensure next snapshot can be started.
+ if (snap_state.state != SAVE_STATE_ERROR) {
+ snap_state.state = SAVE_STATE_DONE;
+ }
+
+ DPRINTF("savevm-end: cleanup done\n");
+}
+
+void qmp_savevm_end(Error **errp)
+{
+ if (snap_state.state == SAVE_STATE_DONE) {
+ error_setg(errp, "VM snapshot not started\n");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot not started\n");
+ return;
+ }
+
+ Coroutine *wait_for_close = qemu_coroutine_create(wait_for_close_co, NULL);
+
+ if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_CANCELLED;
+ qemu_coroutine_enter(wait_for_close);
+ return;
+ goto wait_for_close;
+ }
+
+ if (snap_state.saved_vm_running) {
@@ -647,42 +643,88 @@ index 0000000000..ee8ef316d0
+
+ snap_state.state = SAVE_STATE_DONE;
+
+ qemu_coroutine_enter(wait_for_close);
+wait_for_close:
+ if (!snap_state.target) {
+ DPRINTF("savevm-end: no target file open\n");
+ return;
+ }
+
+ /* wait until cleanup is done before returning, this ensures that after this
+ * call exits the statefile will be closed and can be removed immediately */
+ DPRINTF("savevm-end: waiting for cleanup\n");
+ timeout = 30L * 1000 * 1000 * 1000;
+ qemu_co_sleep_ns_wakeable(snap_state.target_close_wait,
+ QEMU_CLOCK_REALTIME, timeout);
+ snap_state.target_close_wait = NULL;
+ if (snap_state.target) {
+ save_snapshot_error("timeout waiting for target file close in "
+ "qmp_savevm_end");
+ /* we cannot assume the snapshot finished in this case, so leave the
+ * state alone - caller has to figure something out */
+ return;
+ }
+
+ DPRINTF("savevm-end: cleanup done\n");
+}
+
+// FIXME: Deprecated
+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
+{
+ // Compatibility to older qemu-server.
+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
+}
+
+// FIXME: Deprecated
+void qmp_delete_drive_snapshot(const char *device, const char *name,
+ Error **errp)
+{
+ // Compatibility to older qemu-server.
+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
+ true, name, errp);
+}
+
+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
+ size_t size, Error **errp)
+{
+ BlockBackend *be = opaque;
+ int64_t maxlen = blk_getlength(be);
+ if (pos > maxlen) {
+ return -EIO;
+ }
+ if ((pos + size) > maxlen) {
+ size = maxlen - pos - 1;
+ }
+ if (size == 0) {
+ return 0;
+ }
+ return blk_pread(be, pos, buf, size);
+}
+
+static const QEMUFileOps loadstate_file_ops = {
+ .get_buffer = loadstate_get_buffer,
+};
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+ BlockBackend *be;
+ BlockDriverState *bs = NULL;
+ Error *local_err = NULL;
+ Error *blocker = NULL;
+ QDict *options;
+
+ QEMUFile *f;
+ size_t bs_pos = 0;
+ int ret = -EINVAL;
+
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+
+ be = blk_new_open(filename, NULL, options, 0, &local_err);
+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
+
+ if (!be) {
+ error_setg(errp, "Could not open VM state file");
+ goto the_end;
+ }
+
+ bs = blk_bs(be);
+ if (!bs) {
+ error_setg(errp, "Could not open VM state file - missing block driver state");
+ goto the_end;
+ }
+
+ error_setg(&blocker, "block device is in use by load state");
+ bdrv_op_block_all(bs, blocker);
+ blk_op_block_all(be, blocker);
+
+ /* restore the VM state */
+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_fopen_ops(be, &loadstate_file_ops);
+ if (!f) {
+ error_setg(errp, "Could not open VM state file");
+ goto the_end;
@@ -695,10 +737,6 @@ index 0000000000..ee8ef316d0
+ dirty_bitmap_mig_before_vm_start();
+
+ qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
+ migration_incoming_state_destroy();
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error while loading VM state");
@@ -709,37 +747,46 @@ index 0000000000..ee8ef316d0
+
+ the_end:
+ if (be) {
+ if (bs) {
+ bdrv_op_unblock_all(bs, blocker);
+ }
+ blk_op_unblock_all(be, blocker);
+ error_free(blocker);
+ blk_unref(be);
+ }
+ return ret;
+}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index f601d06ab8..874084565f 100644
index c8b97909e7..64a84cf4ee 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -24,6 +24,7 @@
#include "qapi/error.h"
#include "qapi/qapi-commands-control.h"
#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-commands-migration.h"
#include "qapi/qapi-commands-misc.h"
#include "qapi/qmp/qdict.h"
#include "qemu/cutils.h"
@@ -434,3 +435,40 @@ void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "dtb dumped to %s", filename);
@@ -1961,6 +1961,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}
#endif
+
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+ qmp_savevm_start(statefile, &errp);
+ qmp_savevm_start(statefile != NULL, statefile, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_snapshot_drive(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_delete_drive_snapshot(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
@@ -756,7 +803,7 @@ index f601d06ab8..874084565f 100644
+ SaveVMInfo *info;
+ info = qmp_query_savevm(NULL);
+
+ if (info->status) {
+ if (info->has_status) {
+ monitor_printf(mon, "savevm status: %s\n", info->status);
+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+ info->total_time);
@@ -766,17 +813,21 @@ index f601d06ab8..874084565f 100644
+ if (info->has_bytes) {
+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+ }
+ if (info->error) {
+ if (info->has_error) {
+ monitor_printf(mon, "Error: %s\n", info->error);
+ }
+}
+
void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index a605dc26db..927b1e1c7d 100644
index 1124a2dda8..3d72b3e3f3 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -276,6 +276,40 @@
'*dirty-limit-throttle-time-per-round': 'uint64',
'*dirty-limit-ring-full-time': 'uint64'} }
@@ -247,6 +247,40 @@
'*compression': 'CompressionStats',
'*socket-address': ['SocketAddress'] } }
+##
+# @SaveVMInfo:
@@ -816,10 +867,10 @@ index a605dc26db..927b1e1c7d 100644
# @query-migrate:
#
diff --git a/qapi/misc.json b/qapi/misc.json
index 559b66f201..7959e89c1e 100644
index 5c2ca3b556..9bc14e1032 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -454,6 +454,24 @@
@@ -431,6 +431,38 @@
##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
@@ -828,27 +879,41 @@ index 559b66f201..7959e89c1e 100644
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+# @statefile: target file that state should be written to.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end' }
+{ 'command': 'savevm-end', 'coroutine': true }
+
##
# @CommandLineParameterType:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index dacc9790a4..c05f411599 100644
index 83aa59a920..002ba697e9 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4764,6 +4764,18 @@ SRST
@@ -4131,6 +4131,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor)
ERST
@@ -867,22 +932,22 @@ index dacc9790a4..c05f411599 100644
#ifndef _WIN32
DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/system/vl.c b/system/vl.c
index 2f855d83fb..39d451bb41 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -164,6 +164,7 @@ static const char *accelerators;
static bool have_custom_ram_size;
static const char *ram_memdev_id;
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 5ca11e7469..220c67cd32 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -150,6 +150,7 @@ static const char *incoming;
static const char *loadvm;
static const char *accelerators;
static QDict *machine_opts_dict;
+static const char *loadstate;
static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
static int display_remote;
@@ -2725,6 +2726,12 @@ void qmp_x_exit_preconfig(Error **errp)
RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
load_snapshot_resume(state);
static ram_addr_t maxram_size;
static uint64_t ram_slots;
@@ -2700,6 +2701,12 @@ void qmp_x_exit_preconfig(Error **errp)
autostart = 0;
exit(1);
}
+ } else if (loadstate) {
+ Error *local_err = NULL;
+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
@@ -892,7 +957,7 @@ index 2f855d83fb..39d451bb41 100644
}
if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init();
@@ -3262,6 +3269,9 @@ void qemu_init(int argc, char **argv)
@@ -3238,6 +3245,9 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_loadvm:
loadvm = optarg;
break;

View File

@@ -0,0 +1,188 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/qemu-file.c | 38 +++++++++++++++++++++++++-------------
migration/qemu-file.h | 1 +
migration/savevm-async.c | 4 ++--
3 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 6338d8e2ff..6697a93a7e 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,8 +30,8 @@
#include "trace.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile {
const QEMUFileOps *ops;
@@ -45,7 +45,8 @@ struct QEMUFile {
when reading */
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -103,7 +104,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, bool has_ioc, size_t buffer_size)
{
QEMUFile *f;
@@ -112,9 +113,17 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
f->opaque = opaque;
f->ops = ops;
f->has_ioc = has_ioc;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
+
return f;
}
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
+{
+ return qemu_fopen_ops_sized(opaque, ops, has_ioc, DEFAULT_IO_BUF_SIZE);
+}
+
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
{
@@ -349,7 +358,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
}
len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
- IO_BUF_SIZE - pending, &local_error);
+ f->buf_allocated_size - pending, &local_error);
if (len > 0) {
f->buf_size += len;
f->pos += len;
@@ -389,6 +398,9 @@ int qemu_fclose(QEMUFile *f)
ret = ret2;
}
}
+
+ free(f->buf);
+
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -443,7 +455,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -469,7 +481,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -516,8 +528,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -567,7 +579,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -601,7 +613,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
*/
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -626,7 +638,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -778,7 +790,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+ ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 3f36d4dc8c..67501fd9cf 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -121,6 +121,7 @@ typedef struct QEMUFileHooks {
} QEMUFileHooks;
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc);
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, bool has_ioc, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_get_fd(QEMUFile *f);
int qemu_fclose(QEMUFile *f);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 79a0cda906..970ee3b3fc 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -418,7 +418,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
goto restart;
}
- snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+ snap_state.file = qemu_fopen_ops_sized(&snap_state, &block_file_ops, false, 4 * 1024 * 1024);
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -567,7 +567,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */
- f = qemu_fopen_ops(be, &loadstate_file_ops);
+ f = qemu_fopen_ops_sized(be, &loadstate_file_ops, false, 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -1,208 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/qemu-file.c | 48 +++++++++++++++++++++++++++-------------
migration/qemu-file.h | 2 ++
migration/savevm-async.c | 5 +++--
3 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index b6d2f588bd..754dc0b3f7 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -34,8 +34,8 @@
#include "rdma.h"
#include "io/channel-file.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile {
QIOChannel *ioc;
@@ -43,7 +43,8 @@ struct QEMUFile {
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -100,7 +101,9 @@ int qemu_file_shutdown(QEMUFile *f)
return 0;
}
-static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
+ bool is_writable,
+ size_t buffer_size)
{
QEMUFile *f;
@@ -109,6 +112,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
object_ref(ioc);
f->ioc = ioc;
f->is_writable = is_writable;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
return f;
}
@@ -119,17 +124,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
*/
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
{
- return qemu_file_new_impl(f->ioc, !f->is_writable);
+ return qemu_file_new_impl(f->ioc, !f->is_writable, DEFAULT_IO_BUF_SIZE);
}
QEMUFile *qemu_file_new_output(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, true);
+ return qemu_file_new_impl(ioc, true, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, true, buffer_size);
}
QEMUFile *qemu_file_new_input(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, false);
+ return qemu_file_new_impl(ioc, false, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, false, buffer_size);
}
/*
@@ -327,7 +342,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
do {
len = qio_channel_read(f->ioc,
(char *)f->buf + pending,
- IO_BUF_SIZE - pending,
+ f->buf_allocated_size - pending,
&local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
@@ -367,6 +382,9 @@ int qemu_fclose(QEMUFile *f)
ret = ret2;
}
g_clear_pointer(&f->ioc, object_unref);
+
+ free(f->buf);
+
error_free(f->last_error_obj);
g_free(f);
trace_qemu_file_fclose();
@@ -415,7 +433,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -440,7 +458,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -586,8 +604,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -637,7 +655,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -671,7 +689,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
*/
size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -696,7 +714,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 11c2120edd..edf3c5d147 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -30,7 +30,9 @@
#include "io/channel.h"
QEMUFile *qemu_file_new_input(QIOChannel *ioc);
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
QEMUFile *qemu_file_new_output(QIOChannel *ioc);
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
int qemu_fclose(QEMUFile *f);
/*
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index ee8ef316d0..1e79fce9ba 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -390,7 +390,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
&snap_state.bs_pos));
- snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_file_new_output_sized(ioc, 4 * 1024 * 1024);
if (!snap_state.file) {
error_setg(errp, "failed to open '%s'", statefile);
@@ -534,7 +534,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
bdrv_op_block_all(bs, blocker);
/* restore the VM state */
- f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_file_new_input_sized(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)),
+ 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -4,34 +4,30 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
Subject: [PATCH] PVE: block: add the zeroinit block driver filter
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
adhere to block graph lock requirements
use dedicated function to open file child]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 1 +
block/zeroinit.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 208 insertions(+)
block/zeroinit.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 197 insertions(+)
create mode 100644 block/zeroinit.c
diff --git a/block/meson.build b/block/meson.build
index f1262ec2ba..6a60b5d6b9 100644
index 0450914c7a..7a0bc3df09 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -39,6 +39,7 @@ block_ss.add(files(
'throttle.c',
'throttle-groups.c',
@@ -41,6 +41,7 @@ block_ss.add(files(
'vmdk.c',
'vpc.c',
'write-threshold.c',
+ 'zeroinit.c',
), zstd, zlib)
), zstd, zlib, gnutls)
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/zeroinit.c b/block/zeroinit.c
new file mode 100644
index 0000000000..2b2b194ccf
index 0000000000..5529627f7e
--- /dev/null
+++ b/block/zeroinit.c
@@ -0,0 +1,207 @@
@@ -0,0 +1,196 @@
+/*
+ * Filter to fake a zero-initialized block device.
+ *
@@ -45,8 +41,6 @@ index 0000000000..2b2b194ccf
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+#include "block/graph-lock.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
@@ -112,9 +106,10 @@ index 0000000000..2b2b194ccf
+ }
+
+ /* Open the raw file */
+ ret = bdrv_open_file_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &local_err);
+ if (ret < 0) {
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &child_of_bds, BDRV_CHILD_FILTERED, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
@@ -125,9 +120,7 @@ index 0000000000..2b2b194ccf
+ ret = 0;
+fail:
+ if (ret < 0) {
+ bdrv_graph_wrlock();
+ bdrv_unref_child(bs, bs->file);
+ bdrv_graph_wrunlock();
+ }
+ qemu_opts_del(opts);
+ return ret;
@@ -139,32 +132,28 @@ index 0000000000..2b2b194ccf
+ (void)s;
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+zeroinit_co_getlength(BlockDriverState *bs)
+static int64_t zeroinit_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags)
+static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ if (offset >= s->extents)
+ return 0;
+ return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ int64_t extents = offset + bytes;
@@ -173,37 +162,33 @@ index 0000000000..2b2b194ccf
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_flush(BlockDriverState *bs)
+static coroutine_fn int zeroinit_co_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->file->bs);
+}
+
+static int GRAPH_RDLOCK
+zeroinit_has_zero_init(BlockDriverState *bs)
+static int zeroinit_has_zero_init(BlockDriverState *bs)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ return s->has_zero_init;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+zeroinit_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static int GRAPH_RDLOCK
+zeroinit_co_truncate(BlockDriverState *bs, int64_t offset, _Bool exact,
+ PreallocMode prealloc, BdrvRequestFlags req_flags,
+ Error **errp)
+static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
+ _Bool exact, PreallocMode prealloc,
+ BdrvRequestFlags req_flags, Error **errp)
+{
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+zeroinit_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int zeroinit_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return bdrv_co_get_info(bs->file->bs, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
+}
+
+static BlockDriver bdrv_zeroinit = {
@@ -212,9 +197,9 @@ index 0000000000..2b2b194ccf
+ .instance_size = sizeof(BDRVZeroinitState),
+
+ .bdrv_parse_filename = zeroinit_parse_filename,
+ .bdrv_open = zeroinit_open,
+ .bdrv_file_open = zeroinit_open,
+ .bdrv_close = zeroinit_close,
+ .bdrv_co_getlength = zeroinit_co_getlength,
+ .bdrv_getlength = zeroinit_getlength,
+ .bdrv_child_perm = bdrv_default_perms,
+ .bdrv_co_flush_to_disk = zeroinit_co_flush,
+
@@ -230,7 +215,7 @@ index 0000000000..2b2b194ccf
+ .bdrv_co_pdiscard = zeroinit_co_pdiscard,
+
+ .bdrv_co_truncate = zeroinit_co_truncate,
+ .bdrv_co_get_info = zeroinit_co_get_info,
+ .bdrv_get_info = zeroinit_get_info,
+};
+
+static void bdrv_zeroinit_init(void)

View File

@@ -10,16 +10,16 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qemu-options.hx | 3 +++
system/vl.c | 8 ++++++++
softmmu/vl.c | 8 ++++++++
2 files changed, 11 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx
index c05f411599..0732077a0e 100644
index 002ba697e9..a05959b9f1 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1239,6 +1239,9 @@ legacy PC, they are not recommended for modern configurations.
@@ -1005,6 +1005,9 @@ DEFHEADING()
ERST
DEFHEADING(Block device options:)
+DEF("id", HAS_ARG, QEMU_OPTION_id,
+ "-id n set the VMID", QEMU_ARCH_ALL)
@@ -27,11 +27,11 @@ index c05f411599..0732077a0e 100644
DEF("fda", HAS_ARG, QEMU_OPTION_fda,
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
diff --git a/system/vl.c b/system/vl.c
index 39d451bb41..e7cae51f13 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2762,6 +2762,7 @@ void qemu_init(int argc, char **argv)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 220c67cd32..d87cf6e103 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2736,6 +2736,7 @@ void qemu_init(int argc, char **argv, char **envp)
MachineClass *machine_class;
bool userconfig = true;
FILE *vmstate_dump_file = NULL;
@@ -39,9 +39,9 @@ index 39d451bb41..e7cae51f13 100644
qemu_add_opts(&qemu_drive_opts);
qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -3374,6 +3375,13 @@ void qemu_init(int argc, char **argv)
machine_parse_property_opt(qemu_find_opts("smp-opts"),
"smp", optarg);
@@ -3360,6 +3361,13 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_smp:
machine_parse_property_opt(qemu_find_opts("smp-opts"), "smp", optarg, &error_fatal);
break;
+ case QEMU_OPTION_id:
+ vm_id = strtol(optarg, (char **)&optarg, 10);
@@ -50,6 +50,6 @@ index 39d451bb41..e7cae51f13 100644
+ exit(1);
+ }
+ break;
#ifdef CONFIG_VNC
case QEMU_OPTION_vnc:
vnc_parse(optarg);
break;

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 62f3bbf203..89e0c7d995 100644
index 2a20982066..7968ad5a93 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -263,6 +263,15 @@ static void apic_reset_common(DeviceState *dev)
@@ -278,6 +278,15 @@ static void apic_reset_common(DeviceState *dev)
info->vapic_base_update(s);
apic_init_reset(dev);

View File

@@ -9,14 +9,14 @@ Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/file-posix.c | 59 ++++++++++++++++++++++++++++++--------------
qapi/block-core.json | 7 +++++-
2 files changed, 46 insertions(+), 20 deletions(-)
qapi/block-core.json | 3 ++-
2 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index e2ea071315..4c3dc56c8e 100644
index 3ac5177cbb..907aa3f22e 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2884,6 +2884,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2443,6 +2443,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
int fd;
uint64_t perm, shared;
int result = 0;
@@ -24,7 +24,7 @@ index e2ea071315..4c3dc56c8e 100644
/* Validate options and set default values */
assert(options->driver == BLOCKDEV_DRIVER_FILE);
@@ -2924,19 +2925,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2483,19 +2484,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
@@ -59,7 +59,7 @@ index e2ea071315..4c3dc56c8e 100644
}
/* Clear the file by truncating it to 0 */
@@ -2990,13 +2994,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2549,13 +2553,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
}
out_unlock:
@@ -82,7 +82,7 @@ index e2ea071315..4c3dc56c8e 100644
}
out_close:
@@ -3020,6 +3026,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2580,6 +2586,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
PreallocMode prealloc;
char *buf = NULL;
Error *local_err = NULL;
@@ -90,7 +90,7 @@ index e2ea071315..4c3dc56c8e 100644
/* Skip file: protocol prefix */
strstart(filename, "file:", &filename);
@@ -3042,6 +3049,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2602,6 +2609,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
return -EINVAL;
}
@@ -109,7 +109,7 @@ index e2ea071315..4c3dc56c8e 100644
options = (BlockdevCreateOptions) {
.driver = BLOCKDEV_DRIVER_FILE,
.u.file = {
@@ -3053,6 +3072,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2613,6 +2632,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
.nocow = nocow,
.has_extent_size_hint = has_extent_size_hint,
.extent_size_hint = extent_size_hint,
@@ -119,21 +119,10 @@ index e2ea071315..4c3dc56c8e 100644
};
return raw_co_create(&options, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 48ba32049f..321d1fd0e1 100644
index 6356a63695..fdfa579d00 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4974,6 +4974,10 @@
# @extent-size-hint: Extent size hint to add to the image file; 0 for
# not adding an extent size hint (default: 1 MB, since 5.1)
#
+# @locking: whether to enable file locking. If set to 'auto', only
+# enable when Open File Descriptor (OFD) locking API is available
+# (default: auto).
+#
# Since: 2.12
##
{ 'struct': 'BlockdevCreateOptionsFile',
@@ -4981,7 +4985,8 @@
@@ -4341,7 +4341,8 @@
'size': 'size',
'*preallocation': 'PreallocMode',
'*nocow': 'bool',

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/monitor/qmp.c b/monitor/qmp.c
index eb181d5979..20fc0d20a6 100644
index 6b8cfcf6d8..3ec67e32d3 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -534,8 +534,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
@@ -519,8 +519,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
qemu_chr_fe_set_echo(&mon->common.chr, true);
/* Note: we run QMP monitor in I/O thread when @chr supports that */

View File

@@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f29fe95964..2c327fc36a 100644
index 2cf2f321f9..e0f857820d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -180,7 +180,8 @@ GlobalProperty hw_compat_4_0[] = {
@@ -107,7 +107,8 @@ GlobalProperty hw_compat_4_0[] = {
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-device", "edid", "false" },
{ "virtio-device", "use-started", "false" },

View File

@@ -11,90 +11,90 @@ and only if 'is-current').
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-qmp-cmds.c | 5 +++++
hw/core/machine-qmp-cmds.c | 6 ++++++
include/hw/boards.h | 2 ++
qapi/machine.json | 3 +++
system/vl.c | 24 ++++++++++++++++++++++++
4 files changed, 34 insertions(+)
qapi/machine.json | 4 +++-
softmmu/vl.c | 25 +++++++++++++++++++++++++
4 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 52a6d74820..362128842d 100644
index 8f8d5d5276..370e66d9cc 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -94,6 +94,11 @@ MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props,
@@ -102,6 +102,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
info->has_is_current = true;
info->is_current = true;
+
+ // PVE version string only exists for current machine
+ if (mc->pve_version) {
+ info->has_pve_version = true;
+ info->pve_version = g_strdup(mc->pve_version);
+ }
}
if (mc->default_cpu_type) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 36fbb9b59d..d1741ea121 100644
index accd6eff35..1b16728389 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -268,6 +268,8 @@ struct MachineClass {
@@ -205,6 +205,8 @@ struct MachineClass {
const char *desc;
const char *deprecation_reason;
+ const char *pve_version;
+
void (*init)(MachineState *state);
void (*reset)(MachineState *state, ResetType type);
void (*reset)(MachineState *state);
void (*wakeup)(MachineState *state);
diff --git a/qapi/machine.json b/qapi/machine.json
index 16366b774a..12cfd3f260 100644
index cf120ac343..a6f483af4f 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -189,6 +189,8 @@
@@ -160,6 +160,8 @@
#
# @acpi: machine type supports ACPI (since 8.0)
# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
#
+# @pve-version: custom PVE version suffix specified as 'machine+pveN'
+#
# @compat-props: The machine type's compatibility properties. Only
# present when query-machines argument @compat-props is true.
# (since 9.1)
@@ -205,6 +207,7 @@
# Since: 1.2
##
{ 'struct': 'MachineInfo',
@@ -167,7 +169,7 @@
'*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str', 'acpi': 'bool',
+ '*pve-version': 'str',
'*compat-props': { 'type': ['CompatProperty'],
'features': ['unstable'] } } }
- '*default-ram-id': 'str' } }
+ '*default-ram-id': 'str', '*pve-version': 'str' } }
diff --git a/system/vl.c b/system/vl.c
index e7cae51f13..3f4916ac5a 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -1675,6 +1675,7 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
##
# @query-machines:
diff --git a/softmmu/vl.c b/softmmu/vl.c
index d87cf6e103..e9d40065bc 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1621,6 +1621,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp)
{
ERRP_GUARD();
const char *machine_type = qdict_get_try_str(qdict, "type");
const char *optarg = qdict_get_try_str(qdict, "type");
+ const char *pvever = qdict_get_try_str(qdict, "pvever");
g_autoptr(GSList) machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class = NULL;
GSList *machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class;
Error *local_err = NULL;
@@ -1638,6 +1639,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
}
}
@@ -1694,7 +1695,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
if (!machine_class) {
error_append_hint(errp,
"Use -machine help to list supported machines\n");
+ } else {
+ if (machine_class) {
+ machine_class->pve_version = g_strdup(pvever);
+ qdict_del(qdict, "pvever");
}
+ }
+
return machine_class;
}
@@ -3316,12 +3321,31 @@ void qemu_init(int argc, char **argv)
g_slist_free(machines);
if (local_err) {
error_append_hint(&local_err, "Use -machine help to list supported machines\n");
@@ -3312,12 +3318,31 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_machine:
{
bool help;

View File

@@ -3,71 +3,64 @@ From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:57 +0200
Subject: [PATCH] PVE-Backup: add vma backup format code
Notes about partial restoring: skipping a certain drive is done via a
map line of the form skip=drive-scsi0. Since in PVE, most archives are
compressed and piped to vma for restore, it's not easily possible to
skip reads.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: improvements during create
allow partial restore
allow specifying disk formats for create operation]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
[FE: create: register all streams before entering coroutines]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
meson.build | 5 +
vma-reader.c | 867 ++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 816 ++++++++++++++++++++++++++++++++++++++++
vma.c | 941 ++++++++++++++++++++++++++++++++++++++++++++++
vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 790 ++++++++++++++++++++++++++++++++++++++++++
vma.c | 851 +++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 ++++++++
6 files changed, 2781 insertions(+)
6 files changed, 2655 insertions(+)
create mode 100644 vma-reader.c
create mode 100644 vma-writer.c
create mode 100644 vma.c
create mode 100644 vma.h
diff --git a/block/meson.build b/block/meson.build
index 6a60b5d6b9..652c8cbdb7 100644
index 7a0bc3df09..9ce9246194 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -42,6 +42,8 @@ block_ss.add(files(
@@ -44,6 +44,8 @@ block_ss.add(files(
'zeroinit.c',
), zstd, zlib)
), zstd, zlib, gnutls)
+block_ss.add(files('../vma-writer.c'), libuuid)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
block_ss.add(when: 'CONFIG_QCOW1', if_true: files('qcow.c'))
diff --git a/meson.build b/meson.build
index 147097c652..b9b673c271 100644
index b3e7ec0e92..cc46eabb42 100644
--- a/meson.build
+++ b/meson.build
@@ -2129,6 +2129,8 @@ endif
@@ -1064,6 +1064,8 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
+libuuid = cc.find_library('uuid', required: true)
+
# libselinux
selinux = dependency('libselinux',
required: get_option('selinux'),
@@ -4344,6 +4346,9 @@ if have_tools
dependencies: [blockdev, qemuutil, selinux],
install: true)
# Malloc tests
malloc = []
@@ -2743,6 +2745,9 @@ if have_tools
qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
dependencies: [blockdev, qemuutil, gnutls], install: true)
+ vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
+ dependencies: [authz, block, crypto, io, qemuutil, qom], install: true)
+ dependencies: [authz, block, crypto, io, qom], install: true)
+
subdir('storage-daemon')
foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
subdir('contrib/rdmacm-mux')
subdir('contrib/elf2dmp')
diff --git a/vma-reader.c b/vma-reader.c
new file mode 100644
index 0000000000..bb65ad313c
index 0000000000..2b1d1cdab3
--- /dev/null
+++ b/vma-reader.c
@@ -0,0 +1,867 @@
@@ -0,0 +1,857 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -85,11 +78,11 @@ index 0000000000..bb65ad313c
+#include <glib.h>
+#include <uuid/uuid.h>
+
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/ratelimit.h"
+#include "vma.h"
+#include "block/block.h"
+#include "block/graph-lock.h"
+#include "sysemu/block-backend.h"
+
+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
@@ -99,7 +92,6 @@ index 0000000000..bb65ad313c
+ bool write_zeroes;
+ unsigned long *bitmap;
+ int bitmap_size;
+ bool skip;
+} VmaRestoreState;
+
+struct VmaReader {
@@ -263,9 +255,6 @@ index 0000000000..bb65ad313c
+ if (vmar->rstate[i].bitmap) {
+ g_free(vmar->rstate[i].bitmap);
+ }
+ if (vmar->rstate[i].target) {
+ blk_unref(vmar->rstate[i].target);
+ }
+ }
+
+ if (vmar->md5csum) {
@@ -382,6 +371,7 @@ index 0000000000..bb65ad313c
+ }
+
+
+ int count = 0;
+ for (i = 1; i < 256; i++) {
+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
@@ -389,6 +379,7 @@ index 0000000000..bb65ad313c
+ const char *devname = get_header_str(vmar, devname_ptr);
+
+ if (size && devname) {
+ count++;
+ vmar->devinfo[i].size = size;
+ vmar->devinfo[i].devname = devname;
+
@@ -495,14 +486,13 @@ index 0000000000..bb65ad313c
+}
+
+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes, bool skip)
+ BlockBackend *target, bool write_zeroes)
+{
+ assert(vmar);
+ assert(dev_id);
+
+ vmar->rstate[dev_id].target = target;
+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
+ vmar->rstate[dev_id].skip = skip;
+
+ int64_t size = vmar->devinfo[dev_id].size;
+
@@ -517,30 +507,28 @@ index 0000000000..bb65ad313c
+}
+
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
+ bool write_zeroes, bool skip, Error **errp)
+ bool write_zeroes, Error **errp)
+{
+ assert(vmar);
+ assert(target != NULL || skip);
+ assert(target != NULL);
+ assert(dev_id);
+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
+ assert(vmar->rstate[dev_id].target == NULL);
+
+ if (target != NULL) {
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+
+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
+ allocate_rstate(vmar, dev_id, target, write_zeroes);
+
+ return 0;
+}
@@ -598,12 +586,10 @@ index 0000000000..bb65ad313c
+ }
+ }
+ } else {
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
+ if (res < 0) {
+ bdrv_graph_rdlock_main_loop();
+ error_setg(errp, "blk_pwrite to %s failed (%d)",
+ bdrv_get_device_name(blk_bs(target)), res);
+ bdrv_graph_rdunlock_main_loop();
+ return -1;
+ }
+ }
@@ -635,23 +621,19 @@ index 0000000000..bb65ad313c
+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
+ BlockBackend *target = NULL;
+
+ bool skip = rstate->skip;
+
+ if (dev_id != vmar->vmstate_stream) {
+ target = rstate->target;
+ if (!verify && !target && !skip) {
+ if (!verify && !target) {
+ error_setg(errp, "got wrong dev id %d", dev_id);
+ return -1;
+ }
+
+ if (!skip) {
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+
+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
+ } else {
@@ -697,7 +679,7 @@ index 0000000000..bb65ad313c
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num, nb_sectors,
@@ -733,7 +715,7 @@ index 0000000000..bb65ad313c
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num,
@@ -758,7 +740,7 @@ index 0000000000..bb65ad313c
+ vmar->partial_zero_cluster_data += zero_size;
+ }
+
+ if (rstate->write_zeroes && !verify && !skip) {
+ if (rstate->write_zeroes && !verify) {
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ zero_vma_block, sector_num,
+ nb_sectors, errp) < 0) {
@@ -883,7 +865,8 @@ index 0000000000..bb65ad313c
+
+ int64_t cluster_num, end;
+
+ end = DIV_ROUND_UP(vmar->devinfo[i].size, VMA_CLUSTER_SIZE);
+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
+ VMA_CLUSTER_SIZE;
+
+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
@@ -928,7 +911,7 @@ index 0000000000..bb65ad313c
+
+ for (dev_id = 1; dev_id < 255; dev_id++) {
+ if (vma_reader_get_device_info(vmar, dev_id)) {
+ allocate_rstate(vmar, dev_id, NULL, false, false);
+ allocate_rstate(vmar, dev_id, NULL, false);
+ }
+ }
+
@@ -937,10 +920,10 @@ index 0000000000..bb65ad313c
+
diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644
index 0000000000..3f489092df
index 0000000000..11d8321ffd
--- /dev/null
+++ b/vma-writer.c
@@ -0,0 +1,816 @@
@@ -0,0 +1,790 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -956,8 +939,6 @@ index 0000000000..3f489092df
+
+#include "qemu/osdep.h"
+#include <glib.h>
+#include <linux/magic.h>
+#include <sys/vfs.h>
+#include <uuid/uuid.h>
+
+#include "vma.h"
@@ -966,8 +947,6 @@ index 0000000000..3f489092df
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
+
+#define DEBUG_VMA 0
+
@@ -1134,7 +1113,8 @@ index 0000000000..3f489092df
+ vmaw->stream_info[n].devname = g_strdup(devname);
+ vmaw->stream_info[n].size = size;
+
+ vmaw->stream_info[n].cluster_count = DIV_ROUND_UP(size, VMA_CLUSTER_SIZE);
+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
+ VMA_CLUSTER_SIZE;
+
+ vmaw->stream_count = n;
+
@@ -1149,10 +1129,10 @@ index 0000000000..3f489092df
+{
+ assert(qemu_in_coroutine());
+ AioContext *ctx = qemu_get_current_aio_context();
+ aio_set_fd_handler(ctx, fd, NULL, (IOHandler *)qemu_coroutine_enter, NULL,
+ aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter,
+ NULL, qemu_coroutine_self());
+ qemu_coroutine_yield();
+ aio_set_fd_handler(ctx, fd, NULL, NULL, NULL, NULL, NULL);
+ aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL);
+}
+
+static ssize_t coroutine_fn
@@ -1201,23 +1181,6 @@ index 0000000000..3f489092df
+ return (done == bytes) ? bytes : -1;
+}
+
+static bool is_path_tmpfs(const char *path) {
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ warn_report("statfs call for %s failed, assuming not tmpfs - %s\n",
+ path, strerror(errno));
+ return false;
+ }
+
+ return fs.f_type == TMPFS_MAGIC;
+}
+
+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
+{
+ const char *p;
@@ -1267,19 +1230,12 @@ index 0000000000..3f489092df
+ }
+ /* try to use O_NONBLOCK */
+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
+ } else {
+ gchar *dirname = g_path_get_dirname(filename);
+ oflags = O_NONBLOCK|O_WRONLY|O_EXCL;
+ if (!is_path_tmpfs(dirname)) {
+ oflags |= O_DIRECT;
+ }
+ g_free(dirname);
+ } else {
+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_EXCL;
+ vmaw->fd = qemu_create(filename, oflags, 0644, errp);
+ }
+
+ if (vmaw->fd < 0) {
+ error_free(*errp);
+ *errp = NULL;
+ error_setg(errp, "can't open file %s - %s\n", filename,
+ g_strerror(errno));
+ goto err;
@@ -1514,16 +1470,17 @@ index 0000000000..3f489092df
+ int i;
+
+ g_assert(vmaw != NULL);
+ g_assert(status != NULL);
+
+ status->status = vmaw->status;
+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
+ for (i = 0; i <= 255; i++) {
+ status->stream_info[i] = vmaw->stream_info[i];
+ if (status) {
+ status->status = vmaw->status;
+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
+ for (i = 0; i <= 255; i++) {
+ status->stream_info[i] = vmaw->stream_info[i];
+ }
+
+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
+ }
+
+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
+
+ status->closed = vmaw->closed;
+
+ return vmaw->status;
@@ -1759,10 +1716,10 @@ index 0000000000..3f489092df
+}
diff --git a/vma.c b/vma.c
new file mode 100644
index 0000000000..8d4b4be414
index 0000000000..df542b7732
--- /dev/null
+++ b/vma.c
@@ -0,0 +1,941 @@
@@ -0,0 +1,851 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -1780,11 +1737,11 @@ index 0000000000..8d4b4be414
+#include <glib.h>
+
+#include "vma.h"
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include "qemu/memalign.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/block-backend.h"
+
@@ -1794,9 +1751,9 @@ index 0000000000..8d4b4be414
+ "usage: vma command [command options]\n"
+ "\n"
+ "vma list <filename>\n"
+ "vma config <filename> [-c <config>]\n"
+ "vma create <filename> [-c <config>] [-d format=<format>:<device name>=<path> [-d ...]]\n"
+ "vma extract <filename> [-d <drive-list>] [-r <fifo>] <targetdir>\n"
+ "vma config <filename> [-c config]\n"
+ "vma create <filename> [-c config] pathname ...\n"
+ "vma extract <filename> [-r <fifo>] <targetdir>\n"
+ "vma verify <filename> [-v]\n"
+ ;
+
@@ -1903,7 +1860,6 @@ index 0000000000..8d4b4be414
+ char *throttling_group;
+ char *cache;
+ bool write_zero;
+ bool skip;
+} RestoreMap;
+
+static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -1941,10 +1897,9 @@ index 0000000000..8d4b4be414
+ const char *filename;
+ const char *dirname;
+ const char *readmap = NULL;
+ gchar **drive_list = NULL;
+
+ for (;;) {
+ c = getopt(argc, argv, "hvd:r:");
+ c = getopt(argc, argv, "hvr:");
+ if (c == -1) {
+ break;
+ }
@@ -1953,9 +1908,6 @@ index 0000000000..8d4b4be414
+ case 'h':
+ help();
+ break;
+ case 'd':
+ drive_list = g_strsplit(optarg, ",", 254);
+ break;
+ case 'r':
+ readmap = optarg;
+ break;
@@ -2015,89 +1967,76 @@ index 0000000000..8d4b4be414
+ char *bps = NULL;
+ char *group = NULL;
+ char *cache = NULL;
+ char *devname = NULL;
+ bool skip = false;
+ uint64_t bps_value = 0;
+ const char *path = NULL;
+ bool write_zero = true;
+
+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
+ break;
+ }
+ int len = strlen(line);
+ if (line[len - 1] == '\n') {
+ line[len - 1] = '\0';
+ len = len - 1;
+ if (len == 0) {
+ if (len == 1) {
+ break;
+ }
+ }
+
+ if (strncmp(line, "skip", 4) == 0) {
+ if (len < 6 || line[4] != '=') {
+ g_error("read map failed - option 'skip' has no value ('%s')",
+ inbuf);
+ } else {
+ devname = line + 5;
+ skip = true;
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ } else {
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ }
+
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ path = extract_devname(path, &devname, -1);
+ }
+
+ uint64_t bps_value = 0;
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ const char *path;
+ bool write_zero;
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ char *devname = NULL;
+ path = extract_devname(path, &devname, -1);
+ if (!devname) {
+ g_error("read map failed - no dev name specified ('%s')",
+ inbuf);
+ }
+
+ RestoreMap *restore_map = g_new0(RestoreMap, 1);
+ restore_map->devname = g_strdup(devname);
+ restore_map->path = g_strdup(path);
+ restore_map->format = format;
+ restore_map->throttling_bps = bps_value;
+ restore_map->throttling_group = group;
+ restore_map->cache = cache;
+ restore_map->write_zero = write_zero;
+ restore_map->skip = skip;
+ RestoreMap *map = g_new0(RestoreMap, 1);
+ map->devname = g_strdup(devname);
+ map->path = g_strdup(path);
+ map->format = format;
+ map->throttling_bps = bps_value;
+ map->throttling_group = group;
+ map->cache = cache;
+ map->write_zero = write_zero;
+
+ g_hash_table_insert(devmap, restore_map->devname, restore_map);
+ g_hash_table_insert(devmap, map->devname, map);
+
+ };
+ }
+
+ int i;
+ int vmstate_fd = -1;
+ bool drive_rename_bitmap[255];
+ memset(drive_rename_bitmap, 0, sizeof(drive_rename_bitmap));
+ guint8 vmstate_stream = 0;
+
+ BlockBackend *blk = NULL;
+
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
+ vmstate_stream = i;
+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
+ if (vmstate_fd < 0) {
@@ -2113,25 +2052,8 @@ index 0000000000..8d4b4be414
+ const char *cache = NULL;
+ int flags = BDRV_O_RDWR;
+ bool write_zero = true;
+ bool skip = false;
+
+ BlockBackend *blk = NULL;
+
+ if (drive_list) {
+ skip = true;
+ int j;
+ for (j = 0; drive_list[j]; j++) {
+ if (strcmp(drive_list[j], di->devname) == 0) {
+ skip = false;
+ drive_rename_bitmap[i] = true;
+ break;
+ }
+ }
+ } else {
+ drive_rename_bitmap[i] = true;
+ }
+
+ if (!skip && readmap) {
+ if (readmap) {
+ RestoreMap *map;
+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
+ if (map == NULL) {
@@ -2143,8 +2065,7 @@ index 0000000000..8d4b4be414
+ throttling_group = map->throttling_group;
+ cache = map->cache;
+ write_zero = map->write_zero;
+ skip = map->skip;
+ } else if (!skip) {
+ } else {
+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ printf("DEVINFO %s %zd\n", devfn, di->size);
@@ -2162,60 +2083,57 @@ index 0000000000..8d4b4be414
+ write_zero = false;
+ }
+
+ if (!skip) {
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+ g_error("%s", error_get_pretty(errp));
+ }
+
@@ -2225,10 +2143,6 @@ index 0000000000..8d4b4be414
+ }
+ }
+
+ if (drive_list) {
+ g_strfreev(drive_list);
+ }
+
+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
+ g_error("restore failed - %s", error_get_pretty(errp));
+ }
@@ -2236,7 +2150,7 @@ index 0000000000..8d4b4be414
+ if (!readmap) {
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && drive_rename_bitmap[i]) {
+ if (di && (i != vmstate_stream)) {
+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ char *fn = g_strdup_printf("%s/disk-%s.raw",
@@ -2251,6 +2165,8 @@ index 0000000000..8d4b4be414
+
+ vma_reader_destroy(vmar);
+
+ blk_unref(blk);
+
+ bdrv_close_all();
+
+ return ret;
@@ -2335,7 +2251,7 @@ index 0000000000..8d4b4be414
+ struct iovec iov;
+ QEMUIOVector qiov;
+
+ int64_t start, end, readlen;
+ int64_t start, end;
+ int ret = 0;
+
+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -2349,24 +2265,16 @@ index 0000000000..8d4b4be414
+ iov.iov_len = VMA_CLUSTER_SIZE;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ if (start + 1 == end) {
+ memset(buf, 0, VMA_CLUSTER_SIZE);
+ readlen = job->len - start * VMA_CLUSTER_SIZE;
+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+ } else {
+ readlen = VMA_CLUSTER_SIZE;
+ }
+
+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
+ readlen, &qiov, 0);
+ VMA_CLUSTER_SIZE, &qiov, 0);
+ if (ret < 0) {
+ vma_writer_set_error(job->vmaw, "read error");
+ vma_writer_set_error(job->vmaw, "read error", -1);
+ goto out;
+ }
+
+ size_t zb = 0;
+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed");
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
+ goto out;
+ }
+ }
@@ -2384,16 +2292,14 @@ index 0000000000..8d4b4be414
+
+static int create_archive(int argc, char **argv)
+{
+ int c;
+ int i, c;
+ int verbose = 0;
+ bool expect_format = true;
+ const char *archivename;
+ GList *backup_coroutines = NULL;
+ GList *config_files = NULL;
+ GList *disk_infos = NULL;
+
+ for (;;) {
+ c = getopt(argc, argv, "hvc:d:");
+ c = getopt(argc, argv, "hvc:");
+ if (c == -1) {
+ break;
+ }
@@ -2405,9 +2311,6 @@ index 0000000000..8d4b4be414
+ case 'c':
+ config_files = g_list_append(config_files, optarg);
+ break;
+ case 'd':
+ disk_infos = g_list_append(disk_infos, optarg);
+ break;
+ case 'v':
+ verbose = 1;
+ break;
@@ -2453,48 +2356,16 @@ index 0000000000..8d4b4be414
+ l = g_list_next(l);
+ }
+
+ /*
+ * Don't allow mixing new and old way to specifiy disks.
+ * TODO PVE 9 drop old way and always require format.
+ */
+ if (optind < argc && g_list_first(disk_infos)) {
+ unlink(archivename);
+ g_error("Unexpected extra argument - specify all devices via '-d'");
+ }
+
+ while (optind < argc) {
+ expect_format = false;
+ disk_infos = g_list_append(disk_infos, argv[optind++]);
+ }
+
+ int devcount = 0;
+ GList *disk_l = disk_infos;
+ while (disk_l && disk_l->data) {
+ char *disk_info = disk_l->data;
+ const char *path = NULL;
+ while (optind < argc) {
+ const char *path = argv[optind++];
+ char *devname = NULL;
+ char *format = NULL;
+ QDict *options = qdict_new();
+
+ if (try_parse_option(&disk_info, "format", &format, disk_info)) {
+ qdict_put_str(options, "driver", format);
+ } else {
+ if (expect_format) {
+ unlink(archivename);
+ g_error("No format specified for device: '%s'", disk_info);
+ } else {
+ g_warning("Specifying a device without a format is deprecated"
+ " - use '-d format=<format>:%s'",
+ disk_info);
+ }
+ }
+
+ path = extract_devname(disk_info, &devname, devcount++);
+ path = extract_devname(path, &devname, devcount++);
+
+ Error *errp = NULL;
+ BlockBackend *target;
+
+ target = blk_new_open(path, NULL, options, 0, &errp);
+ target = blk_new_open(path, NULL, NULL, 0, &errp);
+ if (!target) {
+ unlink(archivename);
+ g_error("bdrv_open '%s' failed - %s", path, error_get_pretty(errp));
@@ -2516,8 +2387,6 @@ index 0000000000..8d4b4be414
+ // Don't enter coroutine yet, because it might write the header before
+ // all streams can be registered.
+ backup_coroutines = g_list_append(backup_coroutines, co);
+
+ disk_l = g_list_next(disk_l);
+ }
+
+ VmaStatus vmastat;
@@ -2581,7 +2450,6 @@ index 0000000000..8d4b4be414
+ vma_writer_get_status(vmaw, &vmastat);
+
+ if (verbose) {
+ int i;
+ for (i = 0; i < 256; i++) {
+ VmaStreamInfo *si = &vmastat.stream_info[i];
+ if (si->size) {
@@ -2599,7 +2467,6 @@ index 0000000000..8d4b4be414
+
+ g_list_free(backup_coroutines);
+ g_list_free(config_files);
+ g_list_free(disk_infos);
+ vma_writer_destroy(vmaw);
+ return 0;
+}
@@ -2706,7 +2573,7 @@ index 0000000000..8d4b4be414
+}
diff --git a/vma.h b/vma.h
new file mode 100644
index 0000000000..86d2873aa5
index 0000000000..c895c97f6d
--- /dev/null
+++ b/vma.h
@@ -0,0 +1,150 @@
@@ -2844,7 +2711,7 @@ index 0000000000..86d2873aa5
+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
+
+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
+
+
+VmaReader *vma_reader_create(const char *filename, Error **errp);
@@ -2854,7 +2721,7 @@ index 0000000000..86d2873aa5
+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes,
+ bool skip, Error **errp);
+ Error **errp);
+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
+ Error **errp);
+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);

View File

@@ -9,23 +9,21 @@ Subject: [PATCH] PVE-Backup: add backup-dump block driver
- job.c: make job_should_pause non-static
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to coroutine changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/backup-dump.c | 172 +++++++++++++++++++++++++++++++
block/backup.c | 30 ++----
block/meson.build | 1 +
include/block/block_int-common.h | 35 +++++++
job.c | 3 +-
5 files changed, 218 insertions(+), 23 deletions(-)
block/backup-dump.c | 168 ++++++++++++++++++++++++++++++++++++++
block/backup.c | 32 +++-----
block/meson.build | 1 +
include/block/block_int.h | 35 ++++++++
job.c | 3 +-
5 files changed, 216 insertions(+), 23 deletions(-)
create mode 100644 block/backup-dump.c
diff --git a/block/backup-dump.c b/block/backup-dump.c
new file mode 100644
index 0000000000..e46abf1070
index 0000000000..93d7f46950
--- /dev/null
+++ b/block/backup-dump.c
@@ -0,0 +1,172 @@
@@ -0,0 +1,168 @@
+/*
+ * BlockDriver to send backup data stream to a callback function
+ *
@@ -37,8 +35,7 @@ index 0000000000..e46abf1070
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/qmp/qdict.h"
+#include "qemu-common.h"
+#include "qom/object_interfaces.h"
+#include "block/block_int.h"
+
@@ -49,8 +46,7 @@ index 0000000000..e46abf1070
+ void *dump_cb_data;
+} BDRVBackupDumpState;
+
+static coroutine_fn int qemu_backup_dump_co_get_info(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+static int qemu_backup_dump_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -91,7 +87,7 @@ index 0000000000..e46abf1070
+ /* Nothing to do. */
+}
+
+static coroutine_fn int64_t qemu_backup_dump_co_getlength(BlockDriverState *bs)
+static int64_t qemu_backup_dump_getlength(BlockDriverState *bs)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -151,8 +147,8 @@ index 0000000000..e46abf1070
+
+ .bdrv_close = qemu_backup_dump_close,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_getlength = qemu_backup_dump_co_getlength,
+ .bdrv_co_get_info = qemu_backup_dump_co_get_info,
+ .bdrv_getlength = qemu_backup_dump_getlength,
+ .bdrv_get_info = qemu_backup_dump_get_info,
+
+ .bdrv_co_writev = qemu_backup_dump_co_writev,
+
@@ -171,7 +167,7 @@ index 0000000000..e46abf1070
+block_init(bdrv_backup_dump_init);
+
+
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
+BlockDriverState *bdrv_backup_dump_create(
+ int dump_cb_block_size,
+ uint64_t byte_size,
+ BackupDumpFunc *dump_cb,
@@ -179,11 +175,9 @@ index 0000000000..e46abf1070
+ Error **errp)
+{
+ BDRVBackupDumpState *state;
+ BlockDriverState *bs = bdrv_new_open_driver(
+ &bdrv_backup_dump_drive, NULL, BDRV_O_RDWR, errp);
+
+ QDict *options = qdict_new();
+ qdict_put_str(options, "driver", "backup-dump-drive");
+
+ BlockDriverState *bs = bdrv_co_open(NULL, NULL, options, BDRV_O_RDWR, errp);
+ if (!bs) {
+ return NULL;
+ }
@@ -199,16 +193,16 @@ index 0000000000..e46abf1070
+ return bs;
+}
diff --git a/block/backup.c b/block/backup.c
index 2e38b30d67..fe69723ada 100644
index bd3614ce70..8bae9b060e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,28 +29,6 @@
@@ -31,28 +31,6 @@
#include "block/copy-before-write.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-typedef struct BackupBlockJob {
- BlockJob common;
- BlockDriverState *cbw;
- BlockDriverState *backup_top;
- BlockDriverState *source_bs;
- BlockDriverState *target_bs;
-
@@ -231,10 +225,11 @@ index 2e38b30d67..fe69723ada 100644
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -462,6 +440,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
@@ -504,6 +482,16 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
cluster_size = block_copy_cluster_size(bcs);
+ cluster_size = backup_calculate_cluster_size(target, errp);
+ if (cluster_size < 0) {
+ goto error;
+ }
@@ -243,11 +238,12 @@ index 2e38b30d67..fe69723ada 100644
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ cluster_size = MAX(cluster_size, bdi.cluster_size);
+ }
if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+
/*
* If source is in backing chain of target assume that target is going to be
* used for "image fleecing", i.e. it should represent a kind of snapshot of
diff --git a/block/meson.build b/block/meson.build
index 652c8cbdb7..e1cf5a2e65 100644
index 9ce9246194..19bc2b7cbb 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files(
@@ -255,28 +251,28 @@ index 652c8cbdb7..e1cf5a2e65 100644
'amend.c',
'backup.c',
+ 'backup-dump.c',
'backup-top.c',
'blkdebug.c',
'blklogwrites.c',
'blkverify.c',
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index ebb4e56a50..e717a74e5f 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 11442893d0..8f6135e6a5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
#include "block/aio.h"
#include "block/block-common.h"
#include "block/accounting.h"
#include "block/block.h"
+#include "block/block-copy.h"
#include "block/block-global-state.h"
#include "block/snapshot.h"
#include "qemu/iov.h"
@@ -60,6 +61,40 @@
#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
@@ -63,6 +64,40 @@
#define BLOCK_PROBE_BUF_SIZE 512
+typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
+
+BlockDriverState *coroutine_fn bdrv_co_backup_dump_create(
+BlockDriverState *bdrv_backuo_dump_create(
+ int dump_cb_block_size,
+ uint64_t byte_size,
+ BackupDumpFunc *dump_cb,
@@ -288,7 +284,7 @@ index ebb4e56a50..e717a74e5f 100644
+typedef struct BlockCopyState BlockCopyState;
+typedef struct BackupBlockJob {
+ BlockJob common;
+ BlockDriverState *cbw;
+ BlockDriverState *backup_top;
+ BlockDriverState *source_bs;
+ BlockDriverState *target_bs;
+
@@ -312,16 +308,16 @@ index ebb4e56a50..e717a74e5f 100644
BDRV_TRACKED_READ,
BDRV_TRACKED_WRITE,
diff --git a/job.c b/job.c
index 660ce22c56..baf54c8d60 100644
index e7a5d28854..44eec9a441 100644
--- a/job.c
+++ b/job.c
@@ -331,7 +331,8 @@ static bool job_started_locked(Job *job)
@@ -269,7 +269,8 @@ static bool job_started(Job *job)
return job->co;
}
/* Called with job_mutex held. */
-static bool job_should_pause_locked(Job *job)
+bool job_should_pause_locked(Job *job);
+bool job_should_pause_locked(Job *job)
-static bool job_should_pause(Job *job)
+bool job_should_pause(Job *job);
+bool job_should_pause(Job *job)
{
return job->pause_count > 0;
}

View File

@@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Mar 2022 08:35:05 +0100
Subject: [PATCH] block/backup: move bcs bitmap initialization to job creation
For backing up the state of multiple disks from the same time, a job
for each disk has to be created. It's convenient if the jobs don't
have to be started at the same time and if operation of the VM can be
resumed after job creation. This would lead to a window between job
creation and running the job, where writes can happen. But no writes
should happen between setting up the copy-before-write filter and
setting up the block copy state bitmap, because then new writes would
just pass through.
Commit 06e0a9c16405c0a4c1eca33cf286cc04c42066a2 moved initalization of
the bitmap to setting up the copy-before-write filter when sync_mode
is not MIRROR_SYNC_MODE_BITMAP. Ensure that the bitmap is initialized
upon job creation for the remaining case too, by moving the
backup_init_bcs_bitmap call to backup_job_create.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index a1292c01ec..2e38b30d67 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
true);
} else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
/*
- * We can't hog the coroutine to initialize this thoroughly.
- * Set a flag and resume work when we are able to yield safely.
+ * Initialization is costly here. Simply set a flag and let the
+ * backup_run coroutine resume work once it can yield safely.
*/
block_copy_set_skip_unallocated(job->bcs, true);
}
@@ -252,8 +252,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
int ret;
- backup_init_bcs_bitmap(s);
-
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
@@ -502,6 +500,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
&error_abort);
bdrv_graph_wrunlock();
+ backup_init_bcs_bitmap(job);
+
return &job->common;
error:

File diff suppressed because it is too large Load Diff

View File

@@ -5,35 +5,33 @@ Subject: [PATCH] PVE-Backup: pbs-restore - new command to restore from proxmox
backup server
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
meson.build | 4 +
pbs-restore.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 240 insertions(+)
pbs-restore.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 228 insertions(+)
create mode 100644 pbs-restore.c
diff --git a/meson.build b/meson.build
index f6fb9b4fd8..f666d0f028 100644
index 7d7e474313..dd1c5bdb4e 100644
--- a/meson.build
+++ b/meson.build
@@ -4350,6 +4350,10 @@ if have_tools
@@ -2749,6 +2749,10 @@ if have_tools
vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
dependencies: [authz, block, crypto, io, qemuutil, qom], install: true)
dependencies: [authz, block, crypto, io, qom], install: true)
+ pbs_restore = executable('pbs-restore', files('pbs-restore.c') + genh,
+ dependencies: [authz, block, crypto, io, qemuutil, qom,
+ dependencies: [authz, block, crypto, io, qom,
+ libproxmox_backup_qemu], install: true)
+
subdir('storage-daemon')
foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
subdir('contrib/rdmacm-mux')
subdir('contrib/elf2dmp')
diff --git a/pbs-restore.c b/pbs-restore.c
new file mode 100644
index 0000000000..f03d9bab8d
index 0000000000..4d3f925a1b
--- /dev/null
+++ b/pbs-restore.c
@@ -0,0 +1,236 @@
@@ -0,0 +1,224 @@
+/*
+ * Qemu image restore helper for Proxmox Backup
+ *
@@ -52,6 +50,7 @@ index 0000000000..f03d9bab8d
+#include <getopt.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
@@ -65,7 +64,7 @@ index 0000000000..f03d9bab8d
+static void help(void)
+{
+ const char *help_msg =
+ "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
+ "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+ ;
+
+ printf("%s", help_msg);
@@ -97,7 +96,7 @@ index 0000000000..f03d9bab8d
+ }
+ res = blk_pwrite_zeroes(callback_data->target, offset, data_len, 0);
+ } else {
+ res = blk_pwrite(callback_data->target, offset, data_len, data, 0);
+ res = blk_pwrite(callback_data->target, offset, data, data_len, 0);
+ }
+
+ if (res < 0) {
@@ -113,7 +112,6 @@ index 0000000000..f03d9bab8d
+ Error *main_loop_err = NULL;
+ const char *format = "raw";
+ const char *repository = NULL;
+ const char *backup_ns = NULL;
+ const char *keyfile = NULL;
+ int verbose = false;
+ bool skip_zero = false;
@@ -127,7 +125,6 @@ index 0000000000..f03d9bab8d
+ {"verbose", no_argument, 0, 'v'},
+ {"format", required_argument, 0, 'f'},
+ {"repository", required_argument, 0, 'r'},
+ {"ns", required_argument, 0, 'n'},
+ {"keyfile", required_argument, 0, 'k'},
+ {0, 0, 0, 0}
+ };
@@ -148,9 +145,6 @@ index 0000000000..f03d9bab8d
+ case 'r':
+ repository = g_strdup(argv[optind - 1]);
+ break;
+ case 'n':
+ backup_ns = g_strdup(argv[optind - 1]);
+ break;
+ case 'k':
+ keyfile = g_strdup(argv[optind - 1]);
+ break;
@@ -201,16 +195,8 @@ index 0000000000..f03d9bab8d
+ fprintf(stderr, "connecting to repository '%s'\n", repository);
+ }
+ char *pbs_error = NULL;
+ ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+ repository,
+ snapshot,
+ backup_ns,
+ password,
+ keyfile,
+ key_password,
+ fingerprint,
+ &pbs_error
+ );
+ ProxmoxRestoreHandle *conn = proxmox_restore_new(
+ repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ if (conn == NULL) {
+ fprintf(stderr, "restore failed: %s\n", pbs_error);
+ return -1;

View File

@@ -0,0 +1,452 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 29 Jun 2020 11:06:03 +0200
Subject: [PATCH] PVE-Backup: Add dirty-bitmap tracking for incremental backups
Uses QEMU's existing MIRROR_SYNC_MODE_BITMAP and a dirty-bitmap on top
of all backed-up drives. This will only execute the data-write callback
for any changed chunks, the PBS rust code will reuse chunks from the
previous index for everything it doesn't receive if reuse_index is true.
On error or cancellation, remove all dirty bitmaps to ensure
consistency.
Add PBS/incremental specific information to query backup info QMP and
HMP commands.
Only supported for PBS backups.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
monitor/hmp-cmds.c | 45 ++++++++++----
proxmox-backup-client.c | 3 +-
proxmox-backup-client.h | 1 +
pve-backup.c | 103 ++++++++++++++++++++++++++++++---
qapi/block-core.json | 12 +++-
6 files changed, 142 insertions(+), 23 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 1e29681d30..3fca3ce3e9 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,6 +1042,7 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
+ false, false, // PBS incremental
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 7efcd2d641..b2b5f1298b 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -221,19 +221,42 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "End time: %s", ctime(&info->end_time));
}
- int per = (info->has_total && info->total &&
- info->has_transferred && info->transferred) ?
- (info->transferred * 100)/info->total : 0;
- int zero_per = (info->has_total && info->total &&
- info->has_zero_bytes && info->zero_bytes) ?
- (info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Backup file: %s\n", info->backup_file);
monitor_printf(mon, "Backup uuid: %s\n", info->uuid);
- monitor_printf(mon, "Total size: %zd\n", info->total);
- monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
- monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
- info->zero_bytes, zero_per);
+
+ if (!(info->has_total && info->total)) {
+ // this should not happen normally
+ monitor_printf(mon, "Total size: %d\n", 0);
+ } else {
+ bool incremental = false;
+ size_t total_or_dirty = info->total;
+ if (info->has_transferred) {
+ if (info->has_dirty && info->dirty) {
+ if (info->dirty < info->total) {
+ total_or_dirty = info->dirty;
+ incremental = true;
+ }
+ }
+ }
+
+ int per = (info->transferred * 100)/total_or_dirty;
+
+ monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+
+ int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
+ (info->zero_bytes * 100)/info->total : 0;
+ monitor_printf(mon, "Total size: %zd\n", info->total);
+ monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
+ info->transferred, per);
+ monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
+ info->zero_bytes, zero_per);
+
+ if (info->has_reused) {
+ int reused_per = (info->reused * 100)/total_or_dirty;
+ monitor_printf(mon, "Reused bytes: %zd (%d%%)\n",
+ info->reused, reused_per);
+ }
+ }
}
qapi_free_BackupStatus(info);
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index a8f6653a81..4ce7bc0b5e 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -89,6 +89,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp)
{
Coroutine *co = qemu_coroutine_self();
@@ -98,7 +99,7 @@ proxmox_backup_co_register_image(
int pbs_res = -1;
proxmox_backup_register_image_async(
- pbs, device_name, size ,proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
+ pbs, device_name, size, incremental, proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
qemu_coroutine_yield();
if (pbs_res < 0) {
if (errp) error_setg(errp, "backup register image failed: %s", pbs_err ? pbs_err : "unknown error");
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
index 1dda8b7d8f..8cbf645b2c 100644
--- a/proxmox-backup-client.h
+++ b/proxmox-backup-client.h
@@ -32,6 +32,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp);
diff --git a/pve-backup.c b/pve-backup.c
index 66868dec14..6cdbd40529 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -28,6 +28,8 @@
*
*/
+const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
+
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
@@ -39,7 +41,9 @@ static struct PVEBackupState {
uuid_t uuid;
char uuid_str[37];
size_t total;
+ size_t dirty;
size_t transferred;
+ size_t reused;
size_t zero_bytes;
} stat;
int64_t speed;
@@ -66,6 +70,7 @@ typedef struct PVEBackupDevInfo {
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
+ BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
} PVEBackupDevInfo;
@@ -105,11 +110,12 @@ static bool pvebackup_error_or_canceled(void)
return error_or_canceled;
}
-static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes, size_t reused)
{
qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.zero_bytes += zero_bytes;
backup_state.stat.transferred += transferred;
+ backup_state.stat.reused += reused;
qemu_mutex_unlock(&backup_state.stat.lock);
}
@@ -148,7 +154,8 @@ pvebackup_co_dump_pbs_cb(
pvebackup_propagate_error(local_err);
return pbs_res;
} else {
- pvebackup_add_transfered_bytes(size, !buf ? size : 0);
+ size_t reused = (pbs_res == 0) ? size : 0;
+ pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
}
return size;
@@ -208,11 +215,11 @@ pvebackup_co_dump_vma_cb(
} else {
if (remaining >= VMA_CLUSTER_SIZE) {
assert(ret == VMA_CLUSTER_SIZE);
- pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
+ pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes, 0);
remaining -= VMA_CLUSTER_SIZE;
} else {
assert(ret == remaining);
- pvebackup_add_transfered_bytes(remaining, zero_bytes);
+ pvebackup_add_transfered_bytes(remaining, zero_bytes, 0);
remaining = 0;
}
}
@@ -248,6 +255,18 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
+ } else {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+ }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -303,6 +322,12 @@ static void pvebackup_complete_cb(void *opaque, int ret)
// remove self from job queue
backup_state.di_list = g_list_remove(backup_state.di_list, di);
+ if (di->bitmap && ret < 0) {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
g_free(di);
qemu_mutex_unlock(&backup_state.backup_mutex);
@@ -472,12 +497,18 @@ static bool create_backup_jobs(void) {
assert(di->target != NULL);
+ MirrorSyncMode sync_mode = MIRROR_SYNC_MODE_FULL;
+ BitmapSyncMode bitmap_mode = BITMAP_SYNC_MODE_NEVER;
+ if (di->bitmap) {
+ sync_mode = MIRROR_SYNC_MODE_BITMAP;
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
AioContext *aio_context = bdrv_get_aio_context(di->bs);
aio_context_acquire(aio_context);
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
aio_context_release(aio_context);
@@ -528,6 +559,8 @@ typedef struct QmpBackupTask {
const char *fingerprint;
bool has_fingerprint;
int64_t backup_time;
+ bool has_use_dirty_bitmap;
+ bool use_dirty_bitmap;
bool has_format;
BackupFormat format;
bool has_config_file;
@@ -619,6 +652,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
+ size_t dirty = 0;
l = di_list;
while (l) {
@@ -656,6 +690,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
+ bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -675,7 +711,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
goto err;
}
- if (proxmox_backup_co_connect(pbs, task->errp) < 0)
+ int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ if (connect_result < 0)
goto err;
/* register all devices */
@@ -686,9 +723,40 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *devname = bdrv_get_device_name(di->bs);
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, task->errp);
- if (dev_id < 0)
+ BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
+ bool expect_only_dirty = false;
+
+ if (use_dirty_bitmap) {
+ if (bitmap == NULL) {
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ if (!bitmap) {
+ goto err;
+ }
+ } else {
+ expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ }
+
+ if (expect_only_dirty) {
+ dirty += bdrv_get_dirty_count(bitmap);
+ } else {
+ /* mark entire bitmap as dirty to make full backup */
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ dirty += di->size;
+ }
+ di->bitmap = bitmap;
+ } else {
+ dirty += di->size;
+
+ /* after a full backup the old dirty bitmap is invalid anyway */
+ if (bitmap != NULL) {
+ bdrv_release_dirty_bitmap(bitmap);
+ }
+ }
+
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ if (dev_id < 0) {
goto err;
+ }
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
goto err;
@@ -697,6 +765,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->dev_id = dev_id;
}
} else if (format == BACKUP_FORMAT_VMA) {
+ dirty = total;
+
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
@@ -724,6 +794,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
} else if (format == BACKUP_FORMAT_DIR) {
+ dirty = total;
+
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
@@ -796,8 +868,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
+ backup_state.stat.dirty = dirty;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -821,6 +895,10 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
if (di->target) {
bdrv_unref(di->target);
}
@@ -862,6 +940,7 @@ UuidInfo *qmp_backup(
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -880,6 +959,8 @@ UuidInfo *qmp_backup(
.backup_id = backup_id,
.has_backup_time = has_backup_time,
.backup_time = backup_time,
+ .has_use_dirty_bitmap = has_use_dirty_bitmap,
+ .use_dirty_bitmap = use_dirty_bitmap,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
@@ -948,10 +1029,14 @@ BackupStatus *qmp_query_backup(Error **errp)
info->has_total = true;
info->total = backup_state.stat.total;
+ info->has_dirty = true;
+ info->dirty = backup_state.stat.dirty;
info->has_zero_bytes = true;
info->zero_bytes = backup_state.stat.zero_bytes;
info->has_transferred = true;
info->transferred = backup_state.stat.transferred;
+ info->has_reused = true;
+ info->reused = backup_state.stat.reused;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c5d604693f..a138ad08d4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -712,8 +712,13 @@
#
# @total: total amount of bytes involved in the backup process
#
+# @dirty: with incremental mode (PBS) this is the amount of bytes involved
+# in the backup process which are marked dirty.
+#
# @transferred: amount of bytes already backed up.
#
+# @reused: amount of bytes reused due to deduplication.
+#
# @zero-bytes: amount of 'zero' bytes detected.
#
# @start-time: time (epoch) when backup job started.
@@ -726,8 +731,8 @@
#
##
{ 'struct': 'BackupStatus',
- 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int',
- '*transferred': 'int', '*zero-bytes': 'int',
+ 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
+ '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
'*backup-file': 'str', '*uuid': 'str' } }
@@ -770,6 +775,8 @@
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
#
+# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
+#
# Returns: the uuid of the backup job
#
##
@@ -780,6 +787,7 @@
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
+ '*use-dirty-bitmap': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,219 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Thu, 9 Jul 2020 12:53:08 +0200
Subject: [PATCH] PVE: various PBS fixes
pbs: fix crypt and compress parameters
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
PVE: handle PBS write callback with big blocks correctly
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
PVE: add zero block handling to PBS dump callback
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 ++-
pve-backup.c | 59 ++++++++++++++++++++++++++--------
qapi/block-core.json | 6 ++++
3 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 3fca3ce3e9..69254396d5 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,7 +1042,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
- false, false, // PBS incremental
+ false, false, // PBS use-dirty-bitmap
+ false, false, // PBS compress
+ false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/pve-backup.c b/pve-backup.c
index 6cdbd40529..7527885251 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -8,6 +8,7 @@
#include "block/blockjob.h"
#include "qapi/qapi-commands-block.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
/* PVE backup state and related function */
@@ -67,6 +68,7 @@ opts_init(pvebackup_init);
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
size_t size;
+ uint64_t block_size;
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
@@ -135,10 +137,13 @@ pvebackup_co_dump_pbs_cb(
PVEBackupDevInfo *di = opaque;
assert(backup_state.pbs);
+ assert(buf);
Error *local_err = NULL;
int pbs_res = -1;
+ bool is_zero_block = size == di->block_size && buffer_is_zero(buf, size);
+
qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
@@ -147,16 +152,28 @@ pvebackup_co_dump_pbs_cb(
return -1;
}
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ uint64_t transferred = 0;
+ uint64_t reused = 0;
+ while (transferred < size) {
+ uint64_t left = size - transferred;
+ uint64_t to_transfer = left < di->block_size ? left : di->block_size;
+
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
+ is_zero_block ? NULL : buf + transferred, start + transferred,
+ to_transfer, &local_err);
+ transferred += to_transfer;
+
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return pbs_res;
+ }
+
+ reused += pbs_res == 0 ? to_transfer : 0;
+ }
+
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
-
- if (pbs_res < 0) {
- pvebackup_propagate_error(local_err);
- return pbs_res;
- } else {
- size_t reused = (pbs_res == 0) ? size : 0;
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
- }
+ pvebackup_add_transfered_bytes(size, is_zero_block ? size : 0, reused);
return size;
}
@@ -178,6 +195,7 @@ pvebackup_co_dump_vma_cb(
int ret = -1;
assert(backup_state.vmaw);
+ assert(buf);
uint64_t remaining = size;
@@ -204,9 +222,7 @@ pvebackup_co_dump_vma_cb(
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
+ buf += VMA_CLUSTER_SIZE;
if (ret < 0) {
Error *local_err = NULL;
vma_writer_error_propagate(backup_state.vmaw, &local_err);
@@ -569,6 +585,10 @@ typedef struct QmpBackupTask {
const char *firewall_file;
bool has_devlist;
const char *devlist;
+ bool has_compress;
+ bool compress;
+ bool has_encrypt;
+ bool encrypt;
bool has_speed;
int64_t speed;
Error **errp;
@@ -692,6 +712,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -701,8 +722,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->has_password ? task->password : NULL,
task->has_keyfile ? task->keyfile : NULL,
task->has_key_password ? task->key_password : NULL,
+ task->has_compress ? task->compress : true,
+ task->has_encrypt ? task->encrypt : task->has_keyfile,
task->has_fingerprint ? task->fingerprint : NULL,
- &pbs_err);
+ &pbs_err);
if (!pbs) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
@@ -721,6 +744,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ di->block_size = dump_cb_block_size;
+
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
@@ -941,6 +966,8 @@ UuidInfo *qmp_backup(
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -951,6 +978,8 @@ UuidInfo *qmp_backup(
.backup_file = backup_file,
.has_password = has_password,
.password = password,
+ .has_keyfile = has_keyfile,
+ .keyfile = keyfile,
.has_key_password = has_key_password,
.key_password = key_password,
.has_fingerprint = has_fingerprint,
@@ -961,6 +990,10 @@ UuidInfo *qmp_backup(
.backup_time = backup_time,
.has_use_dirty_bitmap = has_use_dirty_bitmap,
.use_dirty_bitmap = use_dirty_bitmap,
+ .has_compress = has_compress,
+ .compress = compress,
+ .has_encrypt = has_encrypt,
+ .encrypt = encrypt,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a138ad08d4..a75f1b4687 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -777,6 +777,10 @@
#
# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
+# @compress: use compression (optional for format 'pbs', defaults to true)
+#
+# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
+#
# Returns: the uuid of the backup job
#
##
@@ -788,6 +792,8 @@
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
+ '*compress': 'bool',
+ '*encrypt': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -7,40 +7,35 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[error cleanups, file_open implementation]
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[FE: adapt to changed function signatures
make pbs_co_preadv return values consistent with QEMU
getlength is now a coroutine function]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
block/pbs.c | 306 +++++++++++++++++++++++++++++++++++++++++++
meson.build | 2 +-
qapi/block-core.json | 29 ++++
qapi/pragma.json | 1 +
5 files changed, 339 insertions(+), 1 deletion(-)
block/meson.build | 3 +
block/pbs.c | 271 +++++++++++++++++++++++++++++++++++++++++++
configure | 9 ++
meson.build | 1 +
qapi/block-core.json | 13 +++
5 files changed, 297 insertions(+)
create mode 100644 block/pbs.c
diff --git a/block/meson.build b/block/meson.build
index 2367e1ac1b..e178047ec9 100644
index 9e433daf2e..e3ed5ac97c 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -49,6 +49,8 @@ block_ss.add(files(
@@ -51,6 +51,9 @@ block_ss.add(files(
'../pve-backup.c',
), libproxmox_backup_qemu)
+block_ss.add(files('pbs.c'), libproxmox_backup_qemu)
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: files('pbs.c'))
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/pbs.c b/block/pbs.c
new file mode 100644
index 0000000000..2d5e28ce8f
index 0000000000..78dad0dcc4
--- /dev/null
+++ b/block/pbs.c
@@ -0,0 +1,306 @@
@@ -0,0 +1,271 @@
+/*
+ * Proxmox Backup Server read-only block driver
+ */
@@ -53,12 +48,10 @@ index 0000000000..2d5e28ce8f
+#include "qemu/option.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+
+#include <proxmox-backup-qemu.h>
+
+#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
+#define PBS_OPT_SNAPSHOT "snapshot"
+#define PBS_OPT_ARCHIVE "archive"
+#define PBS_OPT_KEYFILE "keyfile"
@@ -68,11 +61,10 @@ index 0000000000..2d5e28ce8f
+
+typedef struct {
+ ProxmoxRestoreHandle *conn;
+ uint8_t aid;
+ char aid;
+ int64_t length;
+
+ char *repository;
+ char *namespace;
+ char *snapshot;
+ char *archive;
+} BDRVPBSState;
@@ -87,11 +79,6 @@ index 0000000000..2d5e28ce8f
+ .help = "The server address and repository to connect to.",
+ },
+ {
+ .name = PBS_OPT_NAMESPACE,
+ .type = QEMU_OPT_STRING,
+ .help = "Optional: The snapshot's namespace.",
+ },
+ {
+ .name = PBS_OPT_SNAPSHOT,
+ .type = QEMU_OPT_STRING,
+ .help = "The snapshot to read.",
@@ -127,7 +114,7 @@ index 0000000000..2d5e28ce8f
+
+
+// filename format:
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+// pbs:repository=<repo>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+static void pbs_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
@@ -163,7 +150,6 @@ index 0000000000..2d5e28ce8f
+ s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
+ const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
+ const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+ const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
+ const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
+ const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
+
@@ -176,12 +162,9 @@ index 0000000000..2d5e28ce8f
+ if (!key_password) {
+ key_password = getenv("PBS_ENCRYPTION_PASSWORD");
+ }
+ if (namespace) {
+ s->namespace = g_strdup(namespace);
+ }
+
+ /* connect to PBS server in read mode */
+ s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
+ s->conn = proxmox_restore_new(s->repository, s->snapshot, password,
+ keyfile, key_password, fingerprint, &pbs_error);
+
+ /* invalidates qemu_opt_get char pointers from above */
@@ -201,18 +184,12 @@ index 0000000000..2d5e28ce8f
+ }
+
+ /* acquire handle and length */
+ ret = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+ if (ret < 0) {
+ s->aid = proxmox_restore_open_image(s->conn, s->archive, &pbs_error);
+ if (s->aid < 0) {
+ if (pbs_error && errp) error_setg(errp, "PBS open_image failed: %s", pbs_error);
+ if (pbs_error) proxmox_backup_free_error(pbs_error);
+ return -ENODEV;
+ }
+ if (ret > UINT8_MAX) {
+ error_setg(errp, "PBS open_image returned an ID larger than %u", UINT8_MAX);
+ return -ENODEV;
+ }
+ s->aid = ret;
+
+ s->length = proxmox_restore_get_image_length(s->conn, s->aid, &pbs_error);
+ if (s->length < 0) {
+ if (pbs_error && errp) error_setg(errp, "PBS get_image_length failed: %s", pbs_error);
@@ -223,17 +200,21 @@ index 0000000000..2d5e28ce8f
+ return 0;
+}
+
+static int pbs_file_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ return pbs_open(bs, options, flags, errp);
+}
+
+static void pbs_close(BlockDriverState *bs) {
+ BDRVPBSState *s = bs->opaque;
+ g_free(s->repository);
+ g_free(s->namespace);
+ g_free(s->snapshot);
+ g_free(s->archive);
+ proxmox_restore_disconnect(s->conn);
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+pbs_co_getlength(BlockDriverState *bs)
+static int64_t pbs_getlength(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ return s->length;
@@ -250,35 +231,21 @@ index 0000000000..2d5e28ce8f
+ aio_co_schedule(rcb->ctx, rcb->co);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BDRVPBSState *s = bs->opaque;
+ int ret;
+ char *pbs_error = NULL;
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+ uint8_t *buf = malloc(bytes);
+
+ ReadCallbackData rcb = {
+ .co = qemu_coroutine_self(),
+ .ctx = bdrv_get_aio_context(bs),
+ };
+
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, (uint64_t)offset, (uint64_t)bytes,
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, offset, bytes,
+ read_callback, (void *) &rcb, &ret, &pbs_error);
+
+ qemu_coroutine_yield();
@@ -289,34 +256,26 @@ index 0000000000..2d5e28ce8f
+ return -EIO;
+ }
+
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ free(buf);
+
+ return 0;
+ return ret;
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+pbs_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
+ "any attached disk devices are set to read-only!\n");
+ return -EPERM;
+}
+
+static void GRAPH_RDLOCK
+pbs_refresh_filename(BlockDriverState *bs)
+static void pbs_refresh_filename(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ if (s->namespace) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+ s->repository, s->namespace, s->snapshot, s->archive);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+ }
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+}
+
+static const char *const pbs_strong_runtime_opts[] = {
@@ -330,9 +289,10 @@ index 0000000000..2d5e28ce8f
+
+ .bdrv_parse_filename = pbs_parse_filename,
+
+ .bdrv_file_open = pbs_file_open,
+ .bdrv_open = pbs_open,
+ .bdrv_close = pbs_close,
+ .bdrv_co_getlength = pbs_co_getlength,
+ .bdrv_getlength = pbs_getlength,
+
+ .bdrv_co_preadv = pbs_co_preadv,
+ .bdrv_co_pwritev = pbs_co_pwritev,
@@ -347,32 +307,72 @@ index 0000000000..2d5e28ce8f
+}
+
+block_init(bdrv_pbs_init);
diff --git a/configure b/configure
index 6e308ed77f..869e97c72f 100755
--- a/configure
+++ b/configure
@@ -428,6 +428,7 @@ vdi=${default_feature:-yes}
vvfat=${default_feature:-yes}
qed=${default_feature:-yes}
parallels=${default_feature:-yes}
+pbs_bdrv="yes"
libxml2="auto"
debug_mutex="no"
libpmem="auto"
@@ -1486,6 +1487,10 @@ for opt do
;;
--enable-parallels) parallels="yes"
;;
+ --disable-pbs-bdrv) pbs_bdrv="no"
+ ;;
+ --enable-pbs-bdrv) pbs_bdrv="yes"
+ ;;
--disable-vhost-user) vhost_user="no"
;;
--enable-vhost-user) vhost_user="yes"
@@ -1956,6 +1961,7 @@ disabled with --disable-FEATURE, default is enabled if available
vvfat vvfat image format support
qed qed image format support
parallels parallels image format support
+ pbs-bdrv Proxmox backup server read-only block driver support
crypto-afalg Linux AF_ALG crypto backend driver
capstone capstone disassembler support
debug-mutex mutex debugging support
@@ -4624,6 +4630,9 @@ fi
if test "$linux_aio" = "yes" ; then
echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
fi
+if test "$pbs_bdrv" = "yes" ; then
+ echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
+fi
if test "$vhost_scsi" = "yes" ; then
echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
fi
diff --git a/meson.build b/meson.build
index f666d0f028..4c85736ec3 100644
index dd1c5bdb4e..45c1f2de73 100644
--- a/meson.build
+++ b/meson.build
@@ -4815,7 +4815,7 @@ summary_info += {'Query Processing Library support': qpl}
summary_info += {'UADK Library support': uadk}
summary_info += {'qatzip support': qatzip}
summary_info += {'NUMA host support': numa}
-summary_info += {'capstone': capstone}
@@ -3111,6 +3111,7 @@ summary_info += {'lzfse support': liblzfse.found()}
summary_info += {'zstd support': zstd.found()}
summary_info += {'NUMA host support': config_host.has_key('CONFIG_NUMA')}
summary_info += {'libxml2': libxml2.found()}
+summary_info += {'PBS bdrv support': config_host.has_key('CONFIG_PBS_BDRV')}
summary_info += {'libpmem support': libpmem}
summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libcbor support': libcbor}
summary_info += {'capstone': capstone_opt == 'disabled' ? false : capstone_opt}
summary_info += {'libpmem support': libpmem.found()}
summary_info += {'libdaxctl support': libdaxctl.found()}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 68caf30084..d45e8975a7 100644
index a75f1b4687..e4d0c923a4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3466,6 +3466,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
@@ -2982,6 +2982,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
+ 'pbs',
'ssh', 'throttle', 'vdi', 'vhdx',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
@@ -3552,6 +3553,33 @@
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
@@ -3045,6 +3046,17 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
@@ -381,48 +381,20 @@ index 68caf30084..d45e8975a7 100644
+#
+# Driver specific block device options for the PBS backend.
+#
+# @repository: Proxmox Backup Server repository.
+#
+# @snapshot: backup snapshots ID.
+#
+# @archive: archive name.
+#
+# @keyfile: keyfile to use for encryption.
+#
+# @password: password to use for connection.
+#
+# @fingerprint: backup server fingerprint.
+#
+# @key_password: password to unlock key.
+#
+# @namespace: namespace where backup snapshot lives.
+#
+##
+{ 'struct': 'BlockdevOptionsPbs',
+ 'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
+ '*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
+ '*key_password': 'str', '*namespace': 'str' } }
+ '*key_password': 'str' } }
+
##
# @BlockdevOptionsNVMe:
#
@@ -4993,6 +5021,7 @@
@@ -4263,6 +4275,7 @@
'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
+ 'pbs': 'BlockdevOptionsPbs',
'nvme': 'BlockdevOptionsNVMe',
'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
'if': 'CONFIG_BLKIO' },
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 6aaa9cb975..e9c595c4ba 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -91,6 +91,7 @@
'BlockInfo', # query-block
'BlockdevAioOptions', # blockdev-add, -blockdev
'BlockdevDriver', # blockdev-add, query-blockstats, ...
+ 'BlockdevOptionsPbs', # for PBS backwards compat
'BlockdevVmdkAdapterType', # blockdev-create (to match VMDK spec)
'BlockdevVmdkSubformat', # blockdev-create (to match VMDK spec)
'ColoCompareProperties', # object_add, -object
'parallels': 'BlockdevOptionsGenericFormat',
'preallocate':'BlockdevOptionsPreallocate',

View File

@@ -0,0 +1,74 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 8 Jul 2020 11:57:53 +0200
Subject: [PATCH] PVE: add query_proxmox_support QMP command
Generic interface for future use, currently used for PBS dirty-bitmap
backup support.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[PVE: query-proxmox-support: include library version]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 9 +++++++++
qapi/block-core.json | 29 +++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/pve-backup.c b/pve-backup.c
index 7527885251..8cba8e97d3 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1075,3 +1075,12 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+
+ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+{
+ ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
+ ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
+ ret->pbs_dirty_bitmap = true;
+ ret->pbs_dirty_bitmap_savevm = true;
+ return ret;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e4d0c923a4..3eebe7ff71 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -822,6 +822,35 @@
##
{ 'command': 'backup-cancel' }
+##
+# @ProxmoxSupportStatus:
+#
+# Contains info about supported features added by Proxmox.
+#
+# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
+# supported.
+#
+# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
+# safely be set for savevm-async.
+#
+# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+#
+##
+{ 'struct': 'ProxmoxSupportStatus',
+ 'data': { 'pbs-dirty-bitmap': 'bool',
+ 'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-library-version': 'str' } }
+
+##
+# @query-proxmox-support:
+#
+# Returns information about supported features added by Proxmox.
+#
+# Returns: @ProxmoxSupportStatus
+#
+##
+{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -0,0 +1,441 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 19 Aug 2020 17:02:00 +0200
Subject: [PATCH] PVE: add query-pbs-bitmap-info QMP call
Returns advanced information about dirty bitmaps used (or not used) for
the latest PBS backup.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp-cmds.c | 28 ++++++-----
pve-backup.c | 117 ++++++++++++++++++++++++++++++++-----------
qapi/block-core.json | 56 +++++++++++++++++++++
3 files changed, 159 insertions(+), 42 deletions(-)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index b2b5f1298b..7a449edafa 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -198,6 +198,7 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
void hmp_info_backup(Monitor *mon, const QDict *qdict)
{
BackupStatus *info;
+ PBSBitmapInfoList *bitmap_info;
info = qmp_query_backup(NULL);
@@ -228,26 +229,29 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
// this should not happen normally
monitor_printf(mon, "Total size: %d\n", 0);
} else {
- bool incremental = false;
size_t total_or_dirty = info->total;
- if (info->has_transferred) {
- if (info->has_dirty && info->dirty) {
- if (info->dirty < info->total) {
- total_or_dirty = info->dirty;
- incremental = true;
- }
- }
+ bitmap_info = qmp_query_pbs_bitmap_info(NULL);
+
+ while (bitmap_info) {
+ monitor_printf(mon, "Drive %s:\n",
+ bitmap_info->value->drive);
+ monitor_printf(mon, " bitmap action: %s\n",
+ PBSBitmapAction_str(bitmap_info->value->action));
+ monitor_printf(mon, " size: %zd\n",
+ bitmap_info->value->size);
+ monitor_printf(mon, " dirty: %zd\n",
+ bitmap_info->value->dirty);
+ bitmap_info = bitmap_info->next;
}
- int per = (info->transferred * 100)/total_or_dirty;
-
- monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+ qapi_free_PBSBitmapInfoList(bitmap_info);
int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
(info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Total size: %zd\n", info->total);
+ int trans_per = (info->transferred * 100)/total_or_dirty;
monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
+ info->transferred, trans_per);
monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
info->zero_bytes, zero_per);
diff --git a/pve-backup.c b/pve-backup.c
index 8cba8e97d3..22420db26a 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -46,6 +46,7 @@ static struct PVEBackupState {
size_t transferred;
size_t reused;
size_t zero_bytes;
+ GList *bitmap_list;
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -672,7 +673,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
- size_t dirty = 0;
l = di_list;
while (l) {
@@ -693,18 +693,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_generate(uuid);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.reused = 0;
+
+ /* clear previous backup's bitmap_list */
+ if (backup_state.stat.bitmap_list) {
+ GList *bl = backup_state.stat.bitmap_list;
+ while (bl) {
+ g_free(((PBSBitmapInfo *)bl->data)->drive);
+ g_free(bl->data);
+ bl = g_list_next(bl);
+ }
+ g_list_free(backup_state.stat.bitmap_list);
+ backup_state.stat.bitmap_list = NULL;
+ }
+
if (format == BACKUP_FORMAT_PBS) {
if (!task->has_password) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_id) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_time) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
- goto err;
+ goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
@@ -731,12 +746,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
- goto err;
+ goto err_mutex;
}
int connect_result = proxmox_backup_co_connect(pbs, task->errp);
if (connect_result < 0)
- goto err;
+ goto err_mutex;
/* register all devices */
l = di_list;
@@ -747,6 +762,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->block_size = dump_cb_block_size;
const char *devname = bdrv_get_device_name(di->bs);
+ PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
+ size_t dirty = di->size;
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
@@ -755,49 +772,59 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (bitmap == NULL) {
bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
if (!bitmap) {
- goto err;
+ goto err_mutex;
}
+ action = PBS_BITMAP_ACTION_NEW;
} else {
expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
}
if (expect_only_dirty) {
- dirty += bdrv_get_dirty_count(bitmap);
+ /* track clean chunks as reused */
+ dirty = MIN(bdrv_get_dirty_count(bitmap), di->size);
+ backup_state.stat.reused += di->size - dirty;
+ action = PBS_BITMAP_ACTION_USED;
} else {
/* mark entire bitmap as dirty to make full backup */
bdrv_set_dirty_bitmap(bitmap, 0, di->size);
- dirty += di->size;
+ if (action != PBS_BITMAP_ACTION_NEW) {
+ action = PBS_BITMAP_ACTION_INVALID;
+ }
}
di->bitmap = bitmap;
} else {
- dirty += di->size;
-
/* after a full backup the old dirty bitmap is invalid anyway */
if (bitmap != NULL) {
bdrv_release_dirty_bitmap(bitmap);
+ action = PBS_BITMAP_ACTION_NOT_USED_REMOVED;
}
}
int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
if (dev_id < 0) {
- goto err;
+ goto err_mutex;
}
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
di->dev_id = dev_id;
+
+ PBSBitmapInfo *info = g_malloc(sizeof(*info));
+ info->drive = g_strdup(devname);
+ info->action = action;
+ info->size = di->size;
+ info->dirty = dirty;
+ backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- dirty = total;
-
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
error_propagate(task->errp, local_err);
}
- goto err;
+ goto err_mutex;
}
/* register all devices for vma writer */
@@ -807,7 +834,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
l = g_list_next(l);
if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
@@ -815,16 +842,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (di->dev_id <= 0) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
- goto err;
+ goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- dirty = total;
-
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
- goto err;
+ goto err_mutex;
}
backup_dir = task->backup_file;
@@ -841,18 +866,18 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->size, flags, false, &local_err);
if (local_err) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
}
} else {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
- goto err;
+ goto err_mutex;
}
@@ -860,7 +885,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_config_file) {
if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
@@ -868,12 +893,11 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_firewall_file) {
if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
/* initialize global backup_state now */
-
- qemu_mutex_lock(&backup_state.stat.lock);
+ /* note: 'reused' and 'bitmap_list' are initialized earlier */
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -893,10 +917,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
- backup_state.stat.dirty = dirty;
+ backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -913,6 +936,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->result = uuid_info;
return;
+err_mutex:
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
err:
l = di_list;
@@ -1076,11 +1102,42 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+PBSBitmapInfoList *qmp_query_pbs_bitmap_info(Error **errp)
+{
+ PBSBitmapInfoList *head = NULL, **p_next = &head;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+
+ GList *l = backup_state.stat.bitmap_list;
+ while (l) {
+ PBSBitmapInfo *info = (PBSBitmapInfo *)l->data;
+ l = g_list_next(l);
+
+ /* clone bitmap info to avoid auto free after QMP marshalling */
+ PBSBitmapInfo *info_ret = g_malloc0(sizeof(*info_ret));
+ info_ret->drive = g_strdup(info->drive);
+ info_ret->action = info->action;
+ info_ret->size = info->size;
+ info_ret->dirty = info->dirty;
+
+ PBSBitmapInfoList *info_list = g_malloc0(sizeof(*info_list));
+ info_list->value = info_ret;
+
+ *p_next = info_list;
+ p_next = &info_list->next;
+ }
+
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ return head;
+}
+
ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
{
ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->query_bitmap_info = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3eebe7ff71..170c13984d 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -830,6 +830,8 @@
# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
# supported.
#
+# @query-bitmap-info: True if the 'query-pbs-bitmap-info' QMP call is supported.
+#
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -838,6 +840,7 @@
##
{ 'struct': 'ProxmoxSupportStatus',
'data': { 'pbs-dirty-bitmap': 'bool',
+ 'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-library-version': 'str' } }
@@ -851,6 +854,59 @@
##
{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+##
+# @PBSBitmapAction:
+#
+# An action taken on a dirty-bitmap when a backup job was started.
+#
+# @not-used: Bitmap mode was not enabled.
+#
+# @not-used-removed: Bitmap mode was not enabled, but a bitmap from a
+# previous backup still existed and was removed.
+#
+# @new: A new bitmap was attached to the drive for this backup.
+#
+# @used: An existing bitmap will be used to only backup changed data.
+#
+# @invalid: A bitmap existed, but had to be cleared since it's associated
+# base snapshot did not match the base given for the current job or
+# the crypt mode has changed.
+#
+##
+{ 'enum': 'PBSBitmapAction',
+ 'data': ['not-used', 'not-used-removed', 'new', 'used', 'invalid'] }
+
+##
+# @PBSBitmapInfo:
+#
+# Contains information about dirty bitmaps used for each drive in a PBS backup.
+#
+# @drive: The underlying drive.
+#
+# @action: The action that was taken when the backup started.
+#
+# @size: The total size of the drive.
+#
+# @dirty: How much of the drive is considered dirty and will be backed up,
+# or 'size' if everything will be.
+#
+##
+{ 'struct': 'PBSBitmapInfo',
+ 'data': { 'drive': 'str', 'action': 'PBSBitmapAction', 'size': 'int',
+ 'dirty': 'int' } }
+
+##
+# @query-pbs-bitmap-info:
+#
+# Returns information about dirty bitmaps used on the most recently started
+# backup. Returns nothing when the last backup was not using PBS or if no
+# backup occured in this session.
+#
+# Returns: @PBSBitmapInfo
+#
+##
+{ 'command': 'query-pbs-bitmap-info', 'returns': ['PBSBitmapInfo'] }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -9,51 +9,50 @@ fitting.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
meson.build | 3 ++-
meson.build | 2 ++
os-posix.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-)
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build
index 4c85736ec3..57f666d722 100644
index 45c1f2de73..44071acbb7 100644
--- a/meson.build
+++ b/meson.build
@@ -2130,6 +2130,7 @@ endif
@@ -1065,6 +1065,7 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true)
+libsystemd = cc.find_library('systemd', required: true)
libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
# libselinux
@@ -3744,7 +3745,7 @@ if have_block
if host_os == 'windows'
system_ss.add(files('os-win32.c'))
else
- blockdev_ss.add(files('os-posix.c'))
+ blockdev_ss.add(files('os-posix.c'), libsystemd)
endif
# Malloc tests
@@ -2246,6 +2247,7 @@ if have_block
# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
# os-win32.c does not
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+ blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
endif
diff --git a/os-posix.c b/os-posix.c
index 43f9a43f3f..a47e46d1c2 100644
index ae6c9f2a5e..36807806bf 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -29,6 +29,8 @@
@@ -28,6 +28,8 @@
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
+#include <systemd/sd-journal.h>
+#include <syslog.h>
#include "qemu/error-report.h"
#include "qemu/log.h"
@@ -306,9 +308,10 @@ void os_setup_post(void)
#include "qemu-common.h"
/* Needed early for CONFIG_BSD etc. */
@@ -291,9 +293,10 @@ void os_setup_post(void)
dup2(fd, 0);
dup2(fd, 1);
- /* In case -D is given do not redirect stderr to /dev/null */
+ /* In case -D is given do not redirect stderr to journal */
if (!qemu_log_enabled()) {
if (!qemu_logfile) {
- dup2(fd, 2);
+ int journal_fd = sd_journal_stream_fd("QEMU", LOG_ERR, 0);
+ dup2(journal_fd, 2);

View File

@@ -7,14 +7,14 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/qemu/job.h | 12 ++++++++++++
job.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
job.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 2b873f2576..528cd6acb9 100644
index 41162ed494..6662c63519 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -362,6 +362,18 @@ void job_unlock(void);
@@ -285,6 +285,18 @@ typedef enum JobCreateFlags {
*/
JobTxn *job_txn_new(void);
@@ -34,10 +34,10 @@ index 2b873f2576..528cd6acb9 100644
* Release a reference that was previously acquired with job_txn_add_job or
* job_txn_new. If it's the last reference to the object, it will be freed.
diff --git a/job.c b/job.c
index baf54c8d60..3ac5e5cde2 100644
index 44eec9a441..a0753ff2f1 100644
--- a/job.c
+++ b/job.c
@@ -94,6 +94,8 @@ struct JobTxn {
@@ -72,6 +72,8 @@ struct JobTxn {
/* Reference count */
int refcnt;
@@ -45,8 +45,8 @@ index baf54c8d60..3ac5e5cde2 100644
+ bool sequential;
};
void job_lock(void)
@@ -119,6 +121,25 @@ JobTxn *job_txn_new(void)
/* Right now, this mutex is only needed to synchronize accesses to job->busy
@@ -102,6 +104,25 @@ JobTxn *job_txn_new(void)
return txn;
}
@@ -69,23 +69,20 @@ index baf54c8d60..3ac5e5cde2 100644
+ job_start(first);
+}
+
/* Called with job_mutex held. */
static void job_txn_ref_locked(JobTxn *txn)
static void job_txn_ref(JobTxn *txn)
{
@@ -1042,6 +1063,12 @@ static void job_completed_txn_success_locked(Job *job)
txn->refcnt++;
@@ -850,6 +871,9 @@ static void job_completed_txn_success(Job *job)
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!job_is_completed_locked(other_job)) {
if (!job_is_completed(other_job)) {
+ if (txn->sequential) {
+ job_unlock();
+ /* Needs to be called without holding the job lock */
+ job_start(other_job);
+ job_lock();
+ }
return;
}
assert(other_job->ret == 0);
@@ -1253,6 +1280,13 @@ int job_finish_sync_locked(Job *job,
@@ -1020,6 +1044,13 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
return -EBUSY;
}
@@ -93,9 +90,9 @@ index baf54c8d60..3ac5e5cde2 100644
+ * of cancelling, these have not begun work so job_enter won't do anything,
+ * let's ensure they are marked as ABORTING if required */
+ if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+ job_update_rc_locked(job);
+ job_update_rc(job);
+ }
+
job_unlock();
AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
(job_enter(job), !job_is_completed(job)));
AIO_WAIT_WHILE(job->aio_context,
(job_enter(job), !job_is_completed(job)));

View File

@@ -0,0 +1,294 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 20 Aug 2020 14:25:00 +0200
Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states
By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were
successful - meaning we don't need to remove them when the backup fails,
since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
To keep the rate-limiting and IO impact from before, we use a sequential
transaction, so drives will still be backed up one after the other.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 169 +++++++++++++++------------------------------------
1 file changed, 50 insertions(+), 119 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 22420db26a..2e628d68e4 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -52,6 +52,7 @@ static struct PVEBackupState {
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
+ JobTxn *txn;
QemuMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
@@ -71,32 +72,12 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
- bool completed;
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
+ BlockJob *job;
} PVEBackupDevInfo;
-static void pvebackup_run_next_job(void);
-
-static BlockJob *
-lookup_active_block_job(PVEBackupDevInfo *di)
-{
- if (!di->completed && di->bs) {
- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue;
- }
-
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) {
- return job;
- }
- }
- }
- return NULL;
-}
-
static void pvebackup_propagate_error(Error *err)
{
qemu_mutex_lock(&backup_state.stat.lock);
@@ -272,18 +253,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
- } else {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
- }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -322,8 +291,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
qemu_mutex_lock(&backup_state.backup_mutex);
- di->completed = true;
-
if (ret < 0) {
Error *local_err = NULL;
error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
@@ -336,20 +303,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
block_on_coroutine_fn(pvebackup_complete_stream, di);
- // remove self from job queue
+ // remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
- if (di->bitmap && ret < 0) {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
g_free(di);
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ /* call cleanup if we're the last job */
+ if (!g_list_first(backup_state.di_list)) {
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ }
- pvebackup_run_next_job();
+ qemu_mutex_unlock(&backup_state.backup_mutex);
}
static void pvebackup_cancel(void)
@@ -371,36 +335,28 @@ static void pvebackup_cancel(void)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
+ /* it's enough to cancel one job in the transaction, the rest will follow
+ * automatically */
+ GList *bdi = g_list_first(backup_state.di_list);
+ BlockJob *cancel_job = bdi && bdi->data ?
+ ((PVEBackupDevInfo *)bdi->data)->job :
+ NULL;
+
+ /* ref the job before releasing the mutex, just to be safe */
+ if (cancel_job) {
+ job_ref(&cancel_job->job);
+ }
+
+ /* job_cancel_sync may enter the job, so we need to release the
+ * backup_mutex to avoid deadlock */
qemu_mutex_unlock(&backup_state.backup_mutex);
- for(;;) {
-
- BlockJob *next_job = NULL;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
- if (job != NULL) {
- next_job = job;
- break;
- }
- }
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (next_job) {
- AioContext *aio_context = next_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&next_job->job);
- aio_context_release(aio_context);
- } else {
- break;
- }
+ if (cancel_job) {
+ AioContext *aio_context = cancel_job->job.aio_context;
+ aio_context_acquire(aio_context);
+ job_cancel_sync(&cancel_job->job);
+ job_unref(&cancel_job->job);
+ aio_context_release(aio_context);
}
}
@@ -459,51 +415,19 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-bool job_should_pause(Job *job);
-
-static void pvebackup_run_next_job(void)
-{
- assert(!qemu_in_coroutine());
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
-
- if (job) {
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- AioContext *aio_context = job->job.aio_context;
- aio_context_acquire(aio_context);
-
- if (job_should_pause(&job->job)) {
- bool error_or_canceled = pvebackup_error_or_canceled();
- if (error_or_canceled) {
- job_cancel_sync(&job->job);
- } else {
- job_resume(&job->job);
- }
- }
- aio_context_release(aio_context);
- return;
- }
- }
-
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-}
-
static bool create_backup_jobs(void) {
assert(!qemu_in_coroutine());
Error *local_err = NULL;
+ /* create job transaction to synchronize bitmap commit and cancel all
+ * jobs in case one errors */
+ if (backup_state.txn) {
+ job_txn_unref(backup_state.txn);
+ }
+ backup_state.txn = job_txn_new_seq();
+
BackupPerf perf = { .max_workers = 16 };
/* create and start all jobs (paused state) */
@@ -526,7 +450,7 @@ static bool create_backup_jobs(void) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
aio_context_release(aio_context);
@@ -538,7 +462,8 @@ static bool create_backup_jobs(void) {
pvebackup_propagate_error(create_job_err);
break;
}
- job_start(&job->job);
+
+ di->job = job;
bdrv_unref(di->target);
di->target = NULL;
@@ -556,6 +481,10 @@ static bool create_backup_jobs(void) {
bdrv_unref(di->target);
di->target = NULL;
}
+
+ if (di->job) {
+ job_unref(&di->job->job);
+ }
}
}
@@ -946,10 +875,6 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
if (di->target) {
bdrv_unref(di->target);
}
@@ -1038,9 +963,15 @@ UuidInfo *qmp_backup(
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
- create_backup_jobs();
+ bool errors = create_backup_jobs();
qemu_mutex_unlock(&backup_state.backup_mutex);
- pvebackup_run_next_job();
+
+ if (!errors) {
+ /* start the first job in the transaction
+ * note: this might directly enter the job, so we need to do this
+ * after unlocking the backup_mutex */
+ job_txn_start_seq(backup_state.txn);
+ }
} else {
qemu_mutex_unlock(&backup_state.backup_mutex);
}

View File

@@ -0,0 +1,501 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 28 Sep 2020 13:40:51 +0200
Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
create_backup_jobs
proxmox_backup_co_finish is already async, but previously we would wait
for the coroutine using block_on_coroutine_fn(). Avoid this by
scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
as a real coroutine when calling from pvebackup_complete_cb. This is ok,
since complete_stream uses the backup_mutex internally to synchronize,
and other streams can happily continue writing in the meantime anyway.
To accomodate, backup_mutex is converted to a CoMutex. This means
converting every user to a coroutine. This is not just useful here, but
will come in handy once this series[0] is merged, and QMP calls can be
yield-able coroutines too. Then we can also finally get rid of
block_on_coroutine_fn.
Cases of aio_context_acquire/release from within what is now a coroutine
are changed to aio_co_reschedule_self, which works since a running
coroutine always holds the aio lock for the context it is running in.
job_cancel_sync is called from a BH since it can't be run from a
coroutine (uses AIO_WAIT_WHILE internally).
Same thing for create_backup_jobs, which is converted to a BH too.
To communicate the finishing state, a new property is introduced to
query-backup: 'finishing'. A new state is explicitly not used, since
that would break compatibility with older qemu-server versions.
Also fix create_backup_jobs:
No more weird bool returns, just the standard "errp" format used
everywhere else too. With this, if backup_job_create fails, the error
message is actually returned over QMP and can be shown to the user.
To facilitate correct cleanup on such an error, we call
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
This additionally allows us to actually hold the backup_mutex during
operation.
Also add a job_cancel_sync before job_unref, since a job must be in
STATUS_NULL to be deleted by unref, which could trigger an assert
before.
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
qapi/block-core.json | 5 +-
2 files changed, 144 insertions(+), 78 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 2e628d68e4..9c20ef3a5e 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
static struct PVEBackupState {
struct {
- // Everithing accessed from qmp_backup_query command is protected using lock
+ // Everything accessed from qmp_backup_query command is protected using
+ // this lock. Do NOT hold this lock for long times, as it is sometimes
+ // acquired from coroutines, and thus any wait time may block the guest.
QemuMutex lock;
Error *error;
time_t start_time;
@@ -47,20 +49,22 @@ static struct PVEBackupState {
size_t reused;
size_t zero_bytes;
GList *bitmap_list;
+ bool finishing;
+ bool starting;
} stat;
int64_t speed;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
JobTxn *txn;
- QemuMutex backup_mutex;
+ CoMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
qemu_mutex_init(&backup_state.stat.lock);
- qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.backup_mutex);
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
+ int completed_ret; // INT_MAX if not completed
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
}
// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_co_cleanup(void *unused)
+static void coroutine_fn pvebackup_co_cleanup(void)
{
assert(qemu_in_coroutine());
qemu_mutex_lock(&backup_state.stat.lock);
- backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = true;
qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
}
-// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_complete_stream(void *opaque)
+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
{
PVEBackupDevInfo *di = opaque;
+ int ret = di->completed_ret;
- bool error_or_canceled = pvebackup_error_or_canceled();
-
- if (backup_state.vmaw) {
- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool starting = backup_state.stat.starting;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ if (starting) {
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
+ * must) skip all cleanup, as we don't know what has and hasn't been
+ * initialized yet. */
+ return;
}
- if (backup_state.pbs && !error_or_canceled) {
- Error *local_err = NULL;
- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
- if (local_err != NULL) {
- pvebackup_propagate_error(local_err);
- }
- }
-}
-
-static void pvebackup_complete_cb(void *opaque, int ret)
-{
- assert(!qemu_in_coroutine());
-
- PVEBackupDevInfo *di = opaque;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (ret < 0) {
Error *local_err = NULL;
@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
assert(di->target == NULL);
- block_on_coroutine_fn(pvebackup_complete_stream, di);
+ bool error_or_canceled = pvebackup_error_or_canceled();
+
+ if (backup_state.vmaw) {
+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ }
+
+ if (backup_state.pbs && !error_or_canceled) {
+ Error *local_err = NULL;
+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
+ if (local_err != NULL) {
+ pvebackup_propagate_error(local_err);
+ }
+ }
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/* call cleanup if we're the last job */
if (!g_list_first(backup_state.di_list)) {
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ pvebackup_co_cleanup();
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-static void pvebackup_cancel(void)
+static void pvebackup_complete_cb(void *opaque, int ret)
{
- assert(!qemu_in_coroutine());
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+ /*
+ * Schedule stream cleanup in async coroutine. close_image and finish might
+ * take a while, so we can't block on them here. This way it also doesn't
+ * matter if we're already running in a coroutine or not.
+ * Note: di is a pointer to an entry in the global backup_state struct, so
+ * it stays valid.
+ */
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
+ aio_co_enter(qemu_get_aio_context(), co);
+}
+
+/*
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
+ * main loop processing via a bottom half.
+ */
+static void job_cancel_bh(void *opaque) {
+ CoCtxData *data = (CoCtxData*)opaque;
+ Job *job = (Job*)data->data;
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job);
+ aio_context_release(job_ctx);
+ aio_co_enter(data->ctx, data->co);
+}
+
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
+{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
((PVEBackupDevInfo *)bdi->data)->job :
NULL;
- /* ref the job before releasing the mutex, just to be safe */
if (cancel_job) {
- job_ref(&cancel_job->job);
+ CoCtxData data = {
+ .ctx = qemu_get_current_aio_context(),
+ .co = qemu_coroutine_self(),
+ .data = &cancel_job->job,
+ };
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+ qemu_coroutine_yield();
}
- /* job_cancel_sync may enter the job, so we need to release the
- * backup_mutex to avoid deadlock */
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (cancel_job) {
- AioContext *aio_context = cancel_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&cancel_job->job);
- job_unref(&cancel_job->job);
- aio_context_release(aio_context);
- }
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
void qmp_backup_cancel(Error **errp)
{
- pvebackup_cancel();
+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
}
// assumes the caller holds backup_mutex
@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-static bool create_backup_jobs(void) {
+/*
+ * backup_job_create can *not* be run from a coroutine (and requires an
+ * acquired AioContext), so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+ */
+static void create_backup_jobs_bh(void *opaque) {
assert(!qemu_in_coroutine());
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
Error *local_err = NULL;
/* create job transaction to synchronize bitmap commit and cancel all
@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
aio_context_release(aio_context);
- if (!job || local_err != NULL) {
- Error *create_job_err = NULL;
- error_setg(&create_job_err, "backup_job_create failed: %s",
+ di->job = job;
+
+ if (!job || local_err) {
+ error_setg(errp, "backup_job_create failed: %s",
local_err ? error_get_pretty(local_err) : "null");
-
- pvebackup_propagate_error(create_job_err);
break;
}
- di->job = job;
-
bdrv_unref(di->target);
di->target = NULL;
}
- bool errors = pvebackup_error_or_canceled();
-
- if (errors) {
+ if (*errp) {
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
}
if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_cancel_sync(&di->job->job);
job_unref(&di->job->job);
+ aio_context_release(ctx);
}
}
}
- return errors;
+ /* return */
+ aio_co_enter(data->ctx, data->co);
}
typedef struct QmpBackupTask {
@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
-// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
QmpBackupTask *task = opaque;
task->result = NULL; // just to be sure
@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
di->size = size;
total += size;
+
+ di->completed_ret = INT_MAX;
}
uuid_generate(uuid);
@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info->UUID = uuid_str;
task->result = uuid_info;
+
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(task->errp, local_err);
+ goto err;
+ }
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.starting = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ /* start the first job in the transaction */
+ job_txn_start_seq(backup_state.txn);
+
return;
err_mutex:
@@ -885,6 +960,7 @@ err:
g_free(di);
}
g_list_free(di_list);
+ backup_state.di_list = NULL;
if (devs) {
g_strfreev(devs);
@@ -905,6 +981,8 @@ err:
}
task->result = NULL;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
.errp = errp,
};
- qemu_mutex_lock(&backup_state.backup_mutex);
-
block_on_coroutine_fn(pvebackup_co_prepare, &task);
- if (*errp == NULL) {
- bool errors = create_backup_jobs();
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (!errors) {
- /* start the first job in the transaction
- * note: this might directly enter the job, so we need to do this
- * after unlocking the backup_mutex */
- job_txn_start_seq(backup_state.txn);
- }
- } else {
- qemu_mutex_unlock(&backup_state.backup_mutex);
- }
-
return task.result;
}
@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
info->transferred = backup_state.stat.transferred;
info->has_reused = true;
info->reused = backup_state.stat.reused;
+ info->finishing = backup_state.stat.finishing;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 170c13984d..a0d1d278e9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -729,12 +729,15 @@
#
# @uuid: uuid for this backup job
#
+# @finishing: if status='active' and finishing=true, then the backup process is
+# waiting for the target to finish.
+#
##
{ 'struct': 'BackupStatus',
'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
'*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
- '*backup-file': 'str', '*uuid': 'str' } }
+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
##
# @BackupFormat:

View File

@@ -13,68 +13,61 @@ safe migration is possible and makes sense.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: split up state_pending for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/misc.h | 3 ++
migration/meson.build | 2 +
migration/migration.c | 1 +
migration/pbs-state.c | 104 +++++++++++++++++++++++++++++++++++++++
migration/pbs-state.c | 106 +++++++++++++++++++++++++++++++++++++++
pve-backup.c | 1 +
qapi/block-core.json | 6 +++
6 files changed, 117 insertions(+)
6 files changed, 119 insertions(+)
create mode 100644 migration/pbs-state.c
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 804eb23c06..c75b146ae6 100644
index 465906710d..4f0aeceb6f 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -106,4 +106,7 @@ bool migration_incoming_postcopy_advised(void);
/* True if background snapshot is active */
bool migration_in_bg_snapshot(void);
@@ -75,4 +75,7 @@ bool migration_in_bg_snapshot(void);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
+/* migration/pbs-state.c */
+void pbs_state_mig_init(void);
+
#endif
diff --git a/migration/meson.build b/migration/meson.build
index 075b013971..eca57cb2a3 100644
index ea9aedeefc..c27dc9bd97 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -8,6 +8,7 @@ migration_files = files(
@@ -7,8 +7,10 @@ migration_files = files(
'qemu-file-channel.c',
'qemu-file.c',
'yank_functions.c',
)
+system_ss.add(libproxmox_backup_qemu)
system_ss.add(files(
'block-dirty-bitmap.c',
@@ -27,6 +28,7 @@ system_ss.add(files(
'multifd-zlib.c',
'multifd-zero-page.c',
'options.c',
+ 'pbs-state.c',
'postcopy-ram.c',
'savevm.c',
'savevm-async.c',
)
softmmu_ss.add(migration_files)
+softmmu_ss.add(libproxmox_backup_qemu)
softmmu_ss.add(files(
'block-dirty-bitmap.c',
diff --git a/migration/migration.c b/migration/migration.c
index 8c5bd0a75c..491d9aa017 100644
index 041b8451a6..9df2eed75e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -266,6 +266,7 @@ void migration_object_init(void)
/* Initialize cpu throttle timers */
cpu_throttle_init();
@@ -218,6 +218,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ pbs_state_mig_init();
}
typedef struct {
void migration_cancel(void)
diff --git a/migration/pbs-state.c b/migration/pbs-state.c
new file mode 100644
index 0000000000..a97187e4d7
index 0000000000..29f2b3860d
--- /dev/null
+++ b/migration/pbs-state.c
@@ -0,0 +1,104 @@
@@ -0,0 +1,106 @@
+/*
+ * PBS (dirty-bitmap) state migration
+ */
@@ -93,8 +86,11 @@ index 0000000000..a97187e4d7
+/* state is accessed via this static variable directly, 'opaque' is NULL */
+static PBSState pbs_state;
+
+static void pbs_state_pending(void *opaque, uint64_t *must_precopy,
+ uint64_t *can_postcopy)
+static void pbs_state_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ /* we send everything in save_setup, so nothing is ever pending */
+}
@@ -120,7 +116,7 @@ index 0000000000..a97187e4d7
+}
+
+/* serialize PBS state and send to target via f, called on source */
+static int pbs_state_save_setup(QEMUFile *f, void *opaque, Error **errp)
+static int pbs_state_save_setup(QEMUFile *f, void *opaque)
+{
+ size_t buf_size;
+ uint8_t *buf = proxmox_export_state(&buf_size);
@@ -164,8 +160,7 @@ index 0000000000..a97187e4d7
+static SaveVMHandlers savevm_pbs_state_handlers = {
+ .save_setup = pbs_state_save_setup,
+ .has_postcopy = pbs_state_has_postcopy,
+ .state_pending_exact = pbs_state_pending,
+ .state_pending_estimate = pbs_state_pending,
+ .save_live_pending = pbs_state_save_pending,
+ .is_active_iterate = pbs_state_is_active_iterate,
+ .load_state = pbs_state_load,
+ .is_active = pbs_state_is_active,
@@ -180,22 +175,22 @@ index 0000000000..a97187e4d7
+ NULL);
+}
diff --git a/pve-backup.c b/pve-backup.c
index fea0152de0..faa6a9b93c 100644
index 9c20ef3a5e..59ccb38ceb 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1083,6 +1083,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
@@ -1132,6 +1132,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index d45e8975a7..9795247c1f 100644
index a0d1d278e9..e5de769dc1 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1004,6 +1004,11 @@
@@ -838,6 +838,11 @@
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -204,14 +199,14 @@ index d45e8975a7..9795247c1f 100644
+# migration cap if this is false/unset may lead
+# to crashes on migration!
+#
# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
# parameter.
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
@@ -1017,6 +1022,7 @@
##
@@ -845,6 +850,7 @@
'data': { 'pbs-dirty-bitmap': 'bool',
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
'backup-max-workers': 'bool' } }
'pbs-library-version': 'str' } }
##

View File

@@ -1,81 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:05 +0200
Subject: [PATCH] Revert "block/rbd: workaround for ceph issue #53784"
This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/rbd.c | 42 ++----------------------------------------
1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 728bce3b1e..6c9a8e0add 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1515,7 +1515,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
- uint64_t head = 0;
assert(offset + bytes <= s->image_size);
@@ -1543,43 +1542,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
-#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
- /*
- * librbd had a bug until early 2022 that affected all versions of ceph that
- * supported fast-diff. This bug results in reporting of incorrect offsets
- * if the offset parameter to rbd_diff_iterate2 is not object aligned.
- * Work around this bug by rounding down the offset to object boundaries.
- * This is OK because we call rbd_diff_iterate2 with whole_object = true.
- * However, this workaround only works for non cloned images with default
- * striping.
- *
- * See: https://tracker.ceph.com/issues/53784
- */
-
- /* check if RBD image has non-default striping enabled */
- if (features & RBD_FEATURE_STRIPINGV2) {
- return status;
- }
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- /*
- * check if RBD image is a clone (= has a parent).
- *
- * rbd_get_parent_info is deprecated from Nautilus onwards, but the
- * replacement rbd_get_parent is not present in Luminous and Mimic.
- */
- if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
- return status;
- }
-#pragma GCC diagnostic pop
-
- head = req.offs & (s->object_size - 1);
- req.offs -= head;
- bytes += head;
-#endif
-
- r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1598,8 +1561,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}
- assert(req.bytes > head);
- *pnum = req.bytes - head;
+ *pnum = req.bytes;
return status;
}

View File

@@ -15,22 +15,18 @@ transferred.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/block-dirty-bitmap.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
migration/block-dirty-bitmap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index a7d55048c2..44078ea670 100644
index 35f5ef688d..c4640925e7 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -539,7 +539,11 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
}
@@ -538,7 +538,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) {
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
error_report_err(local_err);
- return -1;
+ if (errp != NULL) {
+ error_report_err(*errp);
+ *errp = NULL;
+ }
+ continue;
}

View File

@@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 30 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 979bf90cb7..961714a4be 100644
index 4d2a416ce7..c345d30812 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1392,12 +1392,42 @@ static char *get_initiator_name(QemuOpts *opts)
@@ -1372,12 +1372,42 @@ static char *get_initiator_name(QemuOpts *opts)
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;

View File

@@ -1,36 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:07 +0200
Subject: [PATCH] Revert "block/rbd: fix handling of holes in
.bdrv_co_block_status"
This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/rbd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 6c9a8e0add..6f5fe90f3a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1474,11 +1474,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);
-
- /* treat a hole like an unallocated area and bail out */
- if (!exists) {
- return 0;
- }
+ /*
+ * we do not diff against a snapshot so we should never receive a callback
+ * for a hole.
+ */
+ assert(exists);
if (!req->exists && offs > req->offs) {
/*

View File

@@ -0,0 +1,598 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 26 Jan 2021 15:45:30 +0100
Subject: [PATCH] PVE: Use coroutine QMP for backup/cancel_backup
Finally turn backup QMP calls into coroutines, now that it's possible.
This has the benefit that calls are asynchronous to the main loop, i.e.
long running operations like connecting to a PBS server will no longer
hang the VM.
Additionally, it allows us to get rid of block_on_coroutine_fn, which
was always a hacky workaround.
While we're already spring cleaning, also remove the QmpBackupTask
struct, since we can now put the 'prepare' function directly into
qmp_backup and thus no longer need those giant walls of text.
(Note that for our patches to work with 5.2.0 this change is actually
required, otherwise monitor_get_fd() fails as we're not in a QMP
coroutine, but one we start ourselves - we could of course set the
monitor for that coroutine ourselves, but let's just fix it the right
way instead)
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +-
hmp-commands.hx | 2 +
proxmox-backup-client.c | 31 -----
pve-backup.c | 232 ++++++++++-----------------------
qapi/block-core.json | 4 +-
5 files changed, 77 insertions(+), 196 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 69254396d5..b838586fc0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1016,7 +1016,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
g_free(global_snapshots);
}
-void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup_cancel(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
@@ -1025,7 +1025,7 @@ void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, error);
}
-void hmp_backup(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 7faba36b39..dca4e58858 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -109,6 +109,7 @@ ERST
"\n\t\t\t Use -d to dump data into a directory instead"
"\n\t\t\t of using VMA format.",
.cmd = hmp_backup,
+ .coroutine = true,
},
SRST
@@ -122,6 +123,7 @@ ERST
.params = "",
.help = "cancel the current VM backup",
.cmd = hmp_backup_cancel,
+ .coroutine = true,
},
SRST
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index 4ce7bc0b5e..0923037dec 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -5,37 +5,6 @@
/* Proxmox Backup Server client bindings using coroutines */
-typedef struct BlockOnCoroutineWrapper {
- AioContext *ctx;
- CoroutineEntry *entry;
- void *entry_arg;
- bool finished;
-} BlockOnCoroutineWrapper;
-
-static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
-{
- BlockOnCoroutineWrapper *wrapper = opaque;
- wrapper->entry(wrapper->entry_arg);
- wrapper->finished = true;
- aio_wait_kick();
-}
-
-void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
-{
- assert(!qemu_in_coroutine());
-
- AioContext *ctx = qemu_get_current_aio_context();
- BlockOnCoroutineWrapper wrapper = {
- .finished = false,
- .entry = entry,
- .entry_arg = entry_arg,
- .ctx = ctx,
- };
- Coroutine *wrapper_co = qemu_coroutine_create(block_on_coroutine_wrapper, &wrapper);
- aio_co_enter(ctx, wrapper_co);
- AIO_WAIT_WHILE(ctx, !wrapper.finished);
-}
-
// This is called from another thread, so we use aio_co_schedule()
static void proxmox_backup_schedule_wake(void *data) {
CoCtxData *waker = (CoCtxData *)data;
diff --git a/pve-backup.c b/pve-backup.c
index 59ccb38ceb..f858003a06 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -357,7 +357,7 @@ static void job_cancel_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
+void coroutine_fn qmp_backup_cancel(Error **errp)
{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
@@ -394,11 +394,6 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-void qmp_backup_cancel(Error **errp)
-{
- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
-}
-
// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
@@ -533,50 +528,27 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-typedef struct QmpBackupTask {
- const char *backup_file;
- bool has_password;
- const char *password;
- bool has_keyfile;
- const char *keyfile;
- bool has_key_password;
- const char *key_password;
- bool has_backup_id;
- const char *backup_id;
- bool has_backup_time;
- const char *fingerprint;
- bool has_fingerprint;
- int64_t backup_time;
- bool has_use_dirty_bitmap;
- bool use_dirty_bitmap;
- bool has_format;
- BackupFormat format;
- bool has_config_file;
- const char *config_file;
- bool has_firewall_file;
- const char *firewall_file;
- bool has_devlist;
- const char *devlist;
- bool has_compress;
- bool compress;
- bool has_encrypt;
- bool encrypt;
- bool has_speed;
- int64_t speed;
- Error **errp;
- UuidInfo *result;
-} QmpBackupTask;
-
-static void coroutine_fn pvebackup_co_prepare(void *opaque)
+UuidInfo coroutine_fn *qmp_backup(
+ const char *backup_file,
+ bool has_password, const char *password,
+ bool has_keyfile, const char *keyfile,
+ bool has_key_password, const char *key_password,
+ bool has_fingerprint, const char *fingerprint,
+ bool has_backup_id, const char *backup_id,
+ bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
+ bool has_format, BackupFormat format,
+ bool has_config_file, const char *config_file,
+ bool has_firewall_file, const char *firewall_file,
+ bool has_devlist, const char *devlist,
+ bool has_speed, int64_t speed, Error **errp)
{
assert(qemu_in_coroutine());
qemu_co_mutex_lock(&backup_state.backup_mutex);
- QmpBackupTask *task = opaque;
-
- task->result = NULL; // just to be sure
-
BlockBackend *blk;
BlockDriverState *bs = NULL;
const char *backup_dir = NULL;
@@ -593,17 +565,17 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
+ return NULL;
}
/* Todo: try to auto-detect format based on file name */
- BackupFormat format = task->has_format ? task->format : BACKUP_FORMAT_VMA;
+ format = has_format ? format : BACKUP_FORMAT_VMA;
- if (task->has_devlist) {
- devs = g_strsplit_set(task->devlist, ",;:", -1);
+ if (has_devlist) {
+ devs = g_strsplit_set(devlist, ",;:", -1);
gchar **d = devs;
while (d && *d) {
@@ -611,14 +583,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (blk) {
bs = blk_bs(blk);
if (!bdrv_is_inserted(bs)) {
- error_setg(task->errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
goto err;
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
di_list = g_list_append(di_list, di);
} else {
- error_set(task->errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", *d);
goto err;
}
@@ -641,7 +613,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (!di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
goto err;
}
@@ -651,13 +623,13 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, task->errp)) {
+ if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
goto err;
}
ssize_t size = bdrv_getlength(di->bs);
if (size < 0) {
- error_setg_errno(task->errp, -di->size, "bdrv_getlength failed");
+ error_setg_errno(errp, -di->size, "bdrv_getlength failed");
goto err;
}
di->size = size;
@@ -684,47 +656,44 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (format == BACKUP_FORMAT_PBS) {
- if (!task->has_password) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
+ if (!has_password) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
goto err_mutex;
}
- if (!task->has_backup_id) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
+ if (!has_backup_id) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
goto err_mutex;
}
- if (!task->has_backup_time) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
+ if (!has_backup_time) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
- bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
-
-
char *pbs_err = NULL;
pbs = proxmox_backup_new(
- task->backup_file,
- task->backup_id,
- task->backup_time,
+ backup_file,
+ backup_id,
+ backup_time,
dump_cb_block_size,
- task->has_password ? task->password : NULL,
- task->has_keyfile ? task->keyfile : NULL,
- task->has_key_password ? task->key_password : NULL,
- task->has_compress ? task->compress : true,
- task->has_encrypt ? task->encrypt : task->has_keyfile,
- task->has_fingerprint ? task->fingerprint : NULL,
+ has_password ? password : NULL,
+ has_keyfile ? keyfile : NULL,
+ has_key_password ? key_password : NULL,
+ has_compress ? compress : true,
+ has_encrypt ? encrypt : has_keyfile,
+ has_fingerprint ? fingerprint : NULL,
&pbs_err);
if (!pbs) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
goto err_mutex;
}
- int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ int connect_result = proxmox_backup_co_connect(pbs, errp);
if (connect_result < 0)
goto err_mutex;
@@ -743,9 +712,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
- if (use_dirty_bitmap) {
+ if (has_use_dirty_bitmap && use_dirty_bitmap) {
if (bitmap == NULL) {
- bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, errp);
if (!bitmap) {
goto err_mutex;
}
@@ -775,12 +744,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
if (dev_id < 0) {
goto err_mutex;
}
- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, errp))) {
goto err_mutex;
}
@@ -794,10 +763,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
}
goto err_mutex;
}
@@ -808,25 +777,25 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, errp))) {
goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
if (di->dev_id <= 0) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- if (mkdir(task->backup_file, 0640) != 0) {
- error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
- task->backup_file);
+ if (mkdir(backup_file, 0640) != 0) {
+ error_setg_errno(errp, errno, "can't create directory '%s'\n",
+ backup_file);
goto err_mutex;
}
- backup_dir = task->backup_file;
+ backup_dir = backup_file;
l = di_list;
while (l) {
@@ -840,34 +809,34 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_img_create(di->targetfile, "raw", NULL, NULL, NULL,
di->size, flags, false, &local_err);
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
}
} else {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
goto err_mutex;
}
/* add configuration file to archive */
- if (task->has_config_file) {
- if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_config_file) {
+ if (pvebackup_co_add_config(config_file, config_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
/* add firewall file to archive */
- if (task->has_firewall_file) {
- if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_firewall_file) {
+ if (pvebackup_co_add_config(firewall_file, firewall_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
@@ -885,7 +854,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (backup_state.stat.backup_file) {
g_free(backup_state.stat.backup_file);
}
- backup_state.stat.backup_file = g_strdup(task->backup_file);
+ backup_state.stat.backup_file = g_strdup(backup_file);
uuid_copy(backup_state.stat.uuid, uuid);
uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
@@ -900,7 +869,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_mutex_unlock(&backup_state.stat.lock);
- backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+ backup_state.speed = (has_speed && speed > 0) ? speed : 0;
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -910,8 +879,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
- task->result = uuid_info;
-
/* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
* backup_mutex locked. This is fine, a CoMutex can be held across yield
* points, and we'll release it as soon as the BH reschedules us.
@@ -925,7 +892,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_coroutine_yield();
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err;
}
@@ -938,7 +905,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
/* start the first job in the transaction */
job_txn_start_seq(backup_state.txn);
- return;
+ return uuid_info;
err_mutex:
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -969,7 +936,7 @@ err:
if (vmaw) {
Error *err = NULL;
vma_writer_close(vmaw, &err);
- unlink(task->backup_file);
+ unlink(backup_file);
}
if (pbs) {
@@ -980,65 +947,8 @@ err:
rmdir(backup_dir);
}
- task->result = NULL;
-
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
-}
-
-UuidInfo *qmp_backup(
- const char *backup_file,
- bool has_password, const char *password,
- bool has_keyfile, const char *keyfile,
- bool has_key_password, const char *key_password,
- bool has_fingerprint, const char *fingerprint,
- bool has_backup_id, const char *backup_id,
- bool has_backup_time, int64_t backup_time,
- bool has_use_dirty_bitmap, bool use_dirty_bitmap,
- bool has_compress, bool compress,
- bool has_encrypt, bool encrypt,
- bool has_format, BackupFormat format,
- bool has_config_file, const char *config_file,
- bool has_firewall_file, const char *firewall_file,
- bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
-{
- QmpBackupTask task = {
- .backup_file = backup_file,
- .has_password = has_password,
- .password = password,
- .has_keyfile = has_keyfile,
- .keyfile = keyfile,
- .has_key_password = has_key_password,
- .key_password = key_password,
- .has_fingerprint = has_fingerprint,
- .fingerprint = fingerprint,
- .has_backup_id = has_backup_id,
- .backup_id = backup_id,
- .has_backup_time = has_backup_time,
- .backup_time = backup_time,
- .has_use_dirty_bitmap = has_use_dirty_bitmap,
- .use_dirty_bitmap = use_dirty_bitmap,
- .has_compress = has_compress,
- .compress = compress,
- .has_encrypt = has_encrypt,
- .encrypt = encrypt,
- .has_format = has_format,
- .format = format,
- .has_config_file = has_config_file,
- .config_file = config_file,
- .has_firewall_file = has_firewall_file,
- .firewall_file = firewall_file,
- .has_devlist = has_devlist,
- .devlist = devlist,
- .has_speed = has_speed,
- .speed = speed,
- .errp = errp,
- };
-
- block_on_coroutine_fn(pvebackup_co_prepare, &task);
-
- return task.result;
+ return NULL;
}
BackupStatus *qmp_query_backup(Error **errp)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e5de769dc1..afa67c28d2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -801,7 +801,7 @@
'*config-file': 'str',
'*firewall-file': 'str',
'*devlist': 'str', '*speed': 'int' },
- 'returns': 'UuidInfo' }
+ 'returns': 'UuidInfo', 'coroutine': true }
##
# @query-backup:
@@ -823,7 +823,7 @@
# Notes: This command succeeds even if there is no backup process running.
#
##
-{ 'command': 'backup-cancel' }
+{ 'command': 'backup-cancel', 'coroutine': true }
##
# @ProxmoxSupportStatus:

View File

@@ -1,162 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Tue, 17 May 2022 09:46:02 +0200
Subject: [PATCH] Revert "block/rbd: implement bdrv_co_block_status"
During backup, bdrv_co_block_status is called for each block copy
chunk. When RBD is used, the current implementation with
rbd_diff_iterate2() using whole_object=true takes about linearly more
time, depending on the image size. Since there are linearly more
chunks, the slowdown is quadratic, becoming unacceptable for large
images (starting somewhere between 500-1000 GiB in my testing).
This reverts commit 0347a8fd4c3faaedf119be04c197804be40a384b as a
stop-gap measure, until it's clear how to make the implemenation
more efficient.
Upstream bug report:
https://gitlab.com/qemu-project/qemu/-/issues/1026
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/rbd.c | 112 ----------------------------------------------------
1 file changed, 112 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 6f5fe90f3a..24e820d056 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -108,12 +108,6 @@ typedef struct RBDTask {
int64_t ret;
} RBDTask;
-typedef struct RBDDiffIterateReq {
- uint64_t offs;
- uint64_t bytes;
- bool exists;
-} RBDDiffIterateReq;
-
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid,
@@ -1460,111 +1454,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}
-/*
- * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
- * value in the callback routine. Choose a value that does not conflict with
- * an existing exitcode and return it if we want to prematurely stop the
- * execution because we detected a change in the allocation status.
- */
-#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
-
-static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
- int exists, void *opaque)
-{
- RBDDiffIterateReq *req = opaque;
-
- assert(req->offs + req->bytes <= offs);
- /*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
- assert(exists);
-
- if (!req->exists && offs > req->offs) {
- /*
- * we started in an unallocated area and hit the first allocated
- * block. req->bytes must be set to the length of the unallocated area
- * before the allocated area. stop further processing.
- */
- req->bytes = offs - req->offs;
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- if (req->exists && offs > req->offs + req->bytes) {
- /*
- * we started in an allocated area and jumped over an unallocated area,
- * req->bytes contains the length of the allocated area before the
- * unallocated area. stop further processing.
- */
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- req->bytes += len;
- req->exists = true;
-
- return 0;
-}
-
-static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
-{
- BDRVRBDState *s = bs->opaque;
- int status, r;
- RBDDiffIterateReq req = { .offs = offset };
- uint64_t features, flags;
-
- assert(offset + bytes <= s->image_size);
-
- /* default to all sectors allocated */
- status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
- *map = offset;
- *file = bs;
- *pnum = bytes;
-
- /* check if RBD image supports fast-diff */
- r = rbd_get_features(s->image, &features);
- if (r < 0) {
- return status;
- }
- if (!(features & RBD_FEATURE_FAST_DIFF)) {
- return status;
- }
-
- /* check if RBD fast-diff result is valid */
- r = rbd_get_flags(s->image, &flags);
- if (r < 0) {
- return status;
- }
- if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
- return status;
- }
-
- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
- qemu_rbd_diff_iterate_cb, &req);
- if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
- return status;
- }
- assert(req.bytes <= bytes);
- if (!req.exists) {
- if (r == 0) {
- /*
- * rbd_diff_iterate2 does not invoke callbacks for unallocated
- * areas. This here catches the case where no callback was
- * invoked at all (req.bytes == 0).
- */
- assert(req.bytes == 0);
- req.bytes = bytes;
- }
- status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
- }
-
- *pnum = req.bytes;
- return status;
-}
-
static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
{
BDRVRBDState *s = bs->opaque;
@@ -1801,7 +1690,6 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif
- .bdrv_co_block_status = qemu_rbd_co_block_status,
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,

View File

@@ -0,0 +1,98 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 10 Feb 2021 11:07:06 +0100
Subject: [PATCH] PBS: add master key support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
this requires a new enough libproxmox-backup-qemu0, and allows querying
from the PVE side to avoid QMP calls with unsupported parameters.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 3 +++
qapi/block-core.json | 7 +++++++
3 files changed, 11 insertions(+)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index b838586fc0..5b52b93232 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1039,6 +1039,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS password
false, NULL, // PBS keyfile
false, NULL, // PBS key_password
+ false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
diff --git a/pve-backup.c b/pve-backup.c
index f858003a06..04ebfc1e33 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -533,6 +533,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_password, const char *password,
bool has_keyfile, const char *keyfile,
bool has_key_password, const char *key_password,
+ bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
@@ -681,6 +682,7 @@ UuidInfo coroutine_fn *qmp_backup(
has_password ? password : NULL,
has_keyfile ? keyfile : NULL,
has_key_password ? key_password : NULL,
+ has_master_keyfile ? master_keyfile : NULL,
has_compress ? compress : true,
has_encrypt ? encrypt : has_keyfile,
has_fingerprint ? fingerprint : NULL,
@@ -1044,5 +1046,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_savevm = true;
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index afa67c28d2..84e4406d21 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -772,6 +772,8 @@
#
# @key-password: password for keyfile (optional for format 'pbs')
#
+# @master-keyfile: PEM-formatted master public keyfile (optional for format 'pbs')
+#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
# @backup-id: backup ID (required for format 'pbs')
@@ -791,6 +793,7 @@
'*password': 'str',
'*keyfile': 'str',
'*key-password': 'str',
+ '*master-keyfile': 'str',
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
@@ -843,6 +846,9 @@
# migration cap if this is false/unset may lead
# to crashes on migration!
#
+# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
+# parameter.
+#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
##
@@ -851,6 +857,7 @@
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
'pbs-library-version': 'str' } }
##

View File

@@ -1,43 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Tue, 26 Mar 2024 14:57:51 +0100
Subject: [PATCH] alloc-track: error out when auto-remove is not set
Since replacing the node now happens in the stream job, where the
option cannot be read from (it's internal to the driver), it will
always be treated as on.
qemu-server will always set it, make sure to have other users notice
the change (should they even exist). The option can be fully dropped
in the future while adding a version guard in qemu-server.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index b4a9851144..fc7d58a5d0 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -34,7 +34,6 @@ typedef struct {
BdrvDirtyBitmap *bitmap;
uint64_t granularity;
DropState drop_state;
- bool auto_remove;
} BDRVAllocTrackState;
static QemuOptsList runtime_opts = {
@@ -86,7 +85,11 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+ if (!qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false)) {
+ error_setg(errp, "alloc-track: requires auto-remove option to be set to on");
+ ret = -EINVAL;
+ goto fail;
+ }
/* open the target (write) node, backing will be attached by block layer */
file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,

View File

@@ -0,0 +1,53 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 9 Dec 2020 11:46:57 +0100
Subject: [PATCH] PVE: block/pbs: fast-path reads without allocation if
possible
...and switch over to g_malloc/g_free while at it to align with other
QEMU code.
Tracing shows the fast-path is taken almost all the time, though not
100% so the slow one is still necessary.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/pbs.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/pbs.c b/block/pbs.c
index 78dad0dcc4..ac54e816c0 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -200,7 +200,16 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
BDRVPBSState *s = bs->opaque;
int ret;
char *pbs_error = NULL;
- uint8_t *buf = malloc(bytes);
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
ReadCallbackData rcb = {
.co = qemu_coroutine_self(),
@@ -218,8 +227,10 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
return -EIO;
}
- qemu_iovec_from_buf(qiov, 0, buf, bytes);
- free(buf);
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
return ret;
}

View File

@@ -1,84 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 27 Mar 2024 11:15:39 +0100
Subject: [PATCH] alloc-track: avoid seemingly superfluous child permission
update
Doesn't seem necessary nowadays (maybe after commit "alloc-track: fix
deadlock during drop" where the dropping is not rescheduled and delayed
anymore or some upstream change). Should there really be some issue,
instead of having a drop state, this could also be just based off the
fact whether there is still a backing child.
Dumping the cumulative (shared) permissions for the BDS with a debug
print yields the same values after this patch and with QEMU 8.1,
namely 3 and 5.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 26 --------------------------
1 file changed, 26 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index fc7d58a5d0..b56425b7f0 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -25,15 +25,9 @@
#define TRACK_OPT_AUTO_REMOVE "auto-remove"
-typedef enum DropState {
- DropNone,
- DropInProgress,
-} DropState;
-
typedef struct {
BdrvDirtyBitmap *bitmap;
uint64_t granularity;
- DropState drop_state;
} BDRVAllocTrackState;
static QemuOptsList runtime_opts = {
@@ -137,8 +131,6 @@ static int track_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->drop_state = DropNone;
-
fail:
if (ret < 0) {
bdrv_graph_wrlock();
@@ -289,18 +281,8 @@ track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
- BDRVAllocTrackState *s = bs->opaque;
-
*nshared = BLK_PERM_ALL;
- /* in case we're currently dropping ourselves, claim to not use any
- * permissions at all - which is fine, since from this point on we will
- * never issue a read or write anymore */
- if (s->drop_state == DropInProgress) {
- *nperm = 0;
- return;
- }
-
if (role & BDRV_CHILD_DATA) {
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
} else {
@@ -326,14 +308,6 @@ track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
* kinda fits better, but in the long-term, a special parameter would be
* nice (or done via qemu-server via upcoming blockdev-replace QMP command).
*/
- if (backing_file == NULL) {
- BDRVAllocTrackState *s = bs->opaque;
- bdrv_drained_begin(bs);
- s->drop_state = DropInProgress;
- bdrv_child_refresh_perms(bs, bs->file, &error_abort);
- bdrv_drained_end(bs);
- }
-
return 0;
}

View File

@@ -1,337 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 11 Apr 2024 11:29:28 +0200
Subject: [PATCH] PVE backup: add fleecing option
When a fleecing option is given, it is expected that each device has
a corresponding "-fleecing" block device already attached, except for
EFI disk and TPM state, where fleecing is never used.
The following graph was adapted from [0] which also contains more
details about fleecing.
[guest]
|
| root
v file
[copy-before-write]<------[snapshot-access]
| |
| file | target
v v
[source] [fleecing]
For fleecing, a copy-before-write filter is inserted on top of the
source node, as well as a snapshot-access node pointing to the filter
node which allows to read the consistent state of the image at the
time it was inserted. New guest writes are passed through the
copy-before-write filter which will first copy over old data to the
fleecing image in case that old data is still needed by the
snapshot-access node.
The backup process will sequentially read from the snapshot access,
which has a bitmap and knows whether to read from the original image
or the fleecing image to get the "snapshot" state, i.e. data from the
source image at the time when the copy-before-write filter was
inserted. After reading, the copied sections are discarded from the
fleecing image to reduce space usage.
All of this can be restricted by an initial dirty bitmap to parts of
the source image that are required for an incremental backup.
For discard to work, it is necessary that the fleecing image does not
have a larger cluster size than the backup job granularity. Since
querying that size does not always work, e.g. for RBD with krbd, the
cluster size will not be reported, a minimum of 4 MiB is used. A job
with PBS target already has at least this granularity, so it's just
relevant for other targets. I.e. edge cases where this minimum is not
enough should be very rare in practice. If ever necessary in the
future, can still add a passed-in value for the backup QMP command to
override.
Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
are set when installing the copy-before-write filter and
snapshot-access. When an error or timeout occurs, the problematic (and
each further) snapshot operation will fail and thus cancel the backup
instead of breaking the guest write.
Note that job_id cannot be inferred from the snapshot-access bs because
it has no parent, so just pass the one from the original bs.
[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 135 ++++++++++++++++++++++++++++++++-
qapi/block-core.json | 10 ++-
3 files changed, 142 insertions(+), 4 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 439a7a14c8..d0e7771dcc 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1044,6 +1044,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
NULL, NULL,
devlist, qdict_haskey(qdict, "speed"), speed,
false, 0, // BackupPerf max-workers
+ false, false, // fleecing
&error);
hmp_handle_error(mon, error);
diff --git a/pve-backup.c b/pve-backup.c
index faa6a9b93c..4b0820c8a7 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -7,9 +7,11 @@
#include "sysemu/blockdev.h"
#include "block/block_int-global-state.h"
#include "block/blockjob.h"
+#include "block/copy-before-write.h"
#include "block/dirty-bitmap.h"
#include "block/graph-lock.h"
#include "qapi/qapi-commands-block.h"
+#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qerror.h"
#include "qemu/cutils.h"
@@ -80,8 +82,15 @@ static void pvebackup_init(void)
// initialize PVEBackupState at startup
opts_init(pvebackup_init);
+typedef struct PVEBackupFleecingInfo {
+ BlockDriverState *bs;
+ BlockDriverState *cbw;
+ BlockDriverState *snapshot_access;
+} PVEBackupFleecingInfo;
+
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
+ PVEBackupFleecingInfo fleecing;
size_t size;
uint64_t block_size;
uint8_t dev_id;
@@ -353,6 +362,22 @@ static void pvebackup_complete_cb(void *opaque, int ret)
PVEBackupDevInfo *di = opaque;
di->completed_ret = ret;
+ /*
+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
+ * won't be done as a coroutine anyways:
+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
+ * just spawn a BH calling bdrv_unref().
+ * - For cbw, draining would need to spawn a BH.
+ */
+ if (di->fleecing.snapshot_access) {
+ bdrv_unref(di->fleecing.snapshot_access);
+ di->fleecing.snapshot_access = NULL;
+ }
+ if (di->fleecing.cbw) {
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+ }
+
/*
* Needs to happen outside of coroutine, because it takes the graph write lock.
*/
@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
}
bdrv_drained_begin(di->bs);
+ BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
+
+ BlockDriverState *source_bs = di->bs;
+ bool discard_source = false;
+ bdrv_graph_co_rdlock();
+ const char *job_id = bdrv_get_device_name(di->bs);
+ bdrv_graph_co_rdunlock();
+ if (di->fleecing.bs) {
+ QDict *cbw_opts = qdict_new();
+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
+
+ if (di->bitmap) {
+ /*
+ * Only guest writes to parts relevant for the backup need to be intercepted with
+ * old data being copied to the fleecing image.
+ */
+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
+ }
+ /*
+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
+ * abort a bit before that.
+ */
+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
+
+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
+
+ if (!di->fleecing.cbw) {
+ error_setg(errp, "appending cbw node for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ break;
+ }
+
+ QDict *snapshot_access_opts = qdict_new();
+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
+
+ di->fleecing.snapshot_access =
+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
+ if (!di->fleecing.snapshot_access) {
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ break;
+ }
+ source_bs = di->fleecing.snapshot_access;
+ discard_source = true;
+
+ /*
+ * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
+ * on the fleecing image won't work if the backup job's granularity is less than the RBD
+ * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
+ * target, the backup job granularity would already be at least this much.
+ */
+ perf.min_cluster_size = 4 * 1024 * 1024;
+ /*
+ * For discard to work, cluster size for the backup job must be at least the same as for
+ * the fleecing image.
+ */
+ BlockDriverInfo bdi;
+ if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
+ perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
+ }
+ }
+
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
- bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
+ job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
&local_err);
@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
+/*
+ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
+ */
+static bool device_uses_fleecing(const char *device_id)
+{
+ return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
+}
+
/*
* Returns a list of device infos, which needs to be freed by the caller. In
* case of an error, errp will be set, but the returned value might still be a
@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
*/
static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
const char *devlist,
+ bool fleecing,
Error **errp)
{
gchar **devs = NULL;
@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
+
+ if (fleecing && device_uses_fleecing(*d)) {
+ g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
+ BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
+ if (!fleecing_blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", fleecing_devid);
+ goto err;
+ }
+ BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
+ if (!bdrv_co_is_inserted(fleecing_bs)) {
+ error_setg(errp, "Device '%s' has no medium", fleecing_devid);
+ goto err;
+ }
+ /*
+ * Fleecing image needs to be the same size to act as a cbw target.
+ */
+ if (bs->total_sectors != fleecing_bs->total_sectors) {
+ error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
+ fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
+ goto err;
+ }
+ di->fleecing.bs = fleecing_bs;
+ }
+
di_list = g_list_append(di_list, di);
d++;
}
@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
const char *devlist,
bool has_speed, int64_t speed,
bool has_max_workers, int64_t max_workers,
+ bool has_fleecing, bool fleecing,
Error **errp)
{
assert(qemu_in_coroutine());
@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
format = has_format ? format : BACKUP_FORMAT_VMA;
bdrv_graph_co_rdlock();
- di_list = get_device_info(devlist, &local_err);
+ di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
bdrv_graph_co_rdunlock();
if (local_err) {
error_propagate(errp, local_err);
@@ -1087,5 +1215,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
+ ret->backup_fleecing = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9795247c1f..c581f1f238 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -948,6 +948,10 @@
#
# @max-workers: see @BackupPerf for details. Default 16.
#
+# @fleecing: perform a backup with fleecing. For each device in @devlist, a
+# corresponing '-fleecing' device with the same size already needs to
+# be present.
+#
# Returns: the uuid of the backup job
#
##
@@ -968,7 +972,8 @@
'*firewall-file': 'str',
'*devlist': 'str',
'*speed': 'int',
- '*max-workers': 'int' },
+ '*max-workers': 'int',
+ '*fleecing': 'bool' },
'returns': 'UuidInfo', 'coroutine': true }
##
@@ -1014,6 +1019,8 @@
#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
+# @backup-fleecing: Whether backup fleecing is supported or not.
+#
# @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
# supported or not.
#
@@ -1025,6 +1032,7 @@
'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
+ 'backup-fleecing': 'bool',
'backup-max-workers': 'bool' } }
##

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/stream.c b/block/stream.c
index 9076203193..1d1c65f061 100644
index 97bee482dc..50093c9f57 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -27,7 +27,7 @@ enum {
@@ -28,7 +28,7 @@ enum {
* large enough to process multiple clusters in a single call, so
* that populating contiguous regions of the image is efficient.
*/

View File

@@ -1,117 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 29 Apr 2024 14:43:58 +0200
Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
fleecing
With fleecing, failure for copy-before-write does not fail the guest
write, but only sets the snapshot error that is associated to the
copy-before-write filter, making further requests to the snapshot
access fail with EACCES, which then also fails the job. But that error
code is not the root cause of why the backup failed, so bubble up the
original snapshot error instead.
Reported-by: Friedrich Weber <f.weber@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Tested-by: Friedrich Weber <f.weber@proxmox.com>
---
block/copy-before-write.c | 18 ++++++++++++------
block/copy-before-write.h | 1 +
pve-backup.c | 9 +++++++++
3 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 81afeff1c7..fdf9cdc0cd 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -27,6 +27,7 @@
#include "qapi/qmp/qjson.h"
#include "sysemu/block-backend.h"
+#include "qemu/atomic.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "block/block_int.h"
@@ -75,7 +76,8 @@ typedef struct BDRVCopyBeforeWriteState {
* @snapshot_error is normally zero. But on first copy-before-write failure
* when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
* value of this error (<0). After that all in-flight and further
- * snapshot-API requests will fail with that error.
+ * snapshot-API requests will fail with that error. To be accessed with
+ * atomics.
*/
int snapshot_error;
} BDRVCopyBeforeWriteState;
@@ -115,7 +117,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
return 0;
}
- if (s->snapshot_error) {
+ if (qatomic_read(&s->snapshot_error)) {
return 0;
}
@@ -139,9 +141,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
WITH_QEMU_LOCK_GUARD(&s->lock) {
if (ret < 0) {
assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
- if (!s->snapshot_error) {
- s->snapshot_error = ret;
- }
+ qatomic_cmpxchg(&s->snapshot_error, 0, ret);
} else {
bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
}
@@ -215,7 +215,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
QEMU_LOCK_GUARD(&s->lock);
- if (s->snapshot_error) {
+ if (qatomic_read(&s->snapshot_error)) {
g_free(req);
return NULL;
}
@@ -595,6 +595,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
bdrv_unref(bs);
}
+int bdrv_cbw_snapshot_error(BlockDriverState *bs)
+{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+ return qatomic_read(&s->snapshot_error);
+}
+
static void cbw_init(void)
{
bdrv_register(&bdrv_cbw_filter);
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 2a5d4ba693..969da3620f 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockCopyState **bcs,
Error **errp);
void bdrv_cbw_drop(BlockDriverState *bs);
+int bdrv_cbw_snapshot_error(BlockDriverState *bs);
#endif /* COPY_BEFORE_WRITE_H */
diff --git a/pve-backup.c b/pve-backup.c
index 4b0820c8a7..81697d9bf9 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
di->fleecing.snapshot_access = NULL;
}
if (di->fleecing.cbw) {
+ /*
+ * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
+ * error, making further requests to the snapshot fail with EACCES, which then also fail the
+ * job. But that code is not the root cause and just confusing, so update it.
+ */
+ int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
+ if (di->completed_ret == -EACCES && snapshot_error) {
+ di->completed_ret = snapshot_error;
+ }
bdrv_cbw_drop(di->fleecing.cbw);
di->fleecing.cbw = NULL;
}

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 2 Mar 2021 16:11:54 +0100
Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request
Some operations, e.g. block-stream, perform reads while discarding the
results (only copy-on-read matters). In this case they will pass NULL as
the target QEMUIOVector, which will however trip bdrv_pad_request, since
it wants to extend its passed vector.
Simply check for NULL and do nothing, there's no reason to pad the
target if it will be discarded anyway.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/io.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/block/io.c b/block/io.c
index f38e7f81d8..28c3a712b6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1764,6 +1764,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
{
int ret;
+ if (!qiov) {
+ return 0;
+ }
+
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {

View File

@@ -1,103 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 7 Nov 2024 17:51:14 +0100
Subject: [PATCH] PVE backup: fixup error handling for fleecing
The drained section needs to be terminated before breaking out of the
loop in the error scenarios. Otherwise, guest IO on the drive would
become stuck.
If the job is created successfully, then the job completion callback
will clean up the snapshot access block nodes. In case failure
happened before the job is created, there was no cleanup for the
snapshot access block nodes yet. Add it.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 38 +++++++++++++++++++++++++-------------
1 file changed, 25 insertions(+), 13 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 81697d9bf9..320c660589 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -357,22 +357,23 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-static void pvebackup_complete_cb(void *opaque, int ret)
+static void cleanup_snapshot_access(PVEBackupDevInfo *di)
{
- PVEBackupDevInfo *di = opaque;
- di->completed_ret = ret;
-
- /*
- * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
- * won't be done as a coroutine anyways:
- * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
- * just spawn a BH calling bdrv_unref().
- * - For cbw, draining would need to spawn a BH.
- */
if (di->fleecing.snapshot_access) {
bdrv_unref(di->fleecing.snapshot_access);
di->fleecing.snapshot_access = NULL;
}
+ if (di->fleecing.cbw) {
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+ }
+}
+
+static void pvebackup_complete_cb(void *opaque, int ret)
+{
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+
if (di->fleecing.cbw) {
/*
* With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
@@ -383,10 +384,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
if (di->completed_ret == -EACCES && snapshot_error) {
di->completed_ret = snapshot_error;
}
- bdrv_cbw_drop(di->fleecing.cbw);
- di->fleecing.cbw = NULL;
}
+ /*
+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
+ * won't be done as a coroutine anyways:
+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
+ * just spawn a BH calling bdrv_unref().
+ * - For cbw, draining would need to spawn a BH.
+ */
+ cleanup_snapshot_access(di);
+
/*
* Needs to happen outside of coroutine, because it takes the graph write lock.
*/
@@ -587,6 +595,7 @@ static void create_backup_jobs_bh(void *opaque) {
if (!di->fleecing.cbw) {
error_setg(errp, "appending cbw node for fleecing failed: %s",
local_err ? error_get_pretty(local_err) : "unknown error");
+ bdrv_drained_end(di->bs);
break;
}
@@ -599,6 +608,8 @@ static void create_backup_jobs_bh(void *opaque) {
if (!di->fleecing.snapshot_access) {
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
local_err ? error_get_pretty(local_err) : "unknown error");
+ cleanup_snapshot_access(di);
+ bdrv_drained_end(di->bs);
break;
}
source_bs = di->fleecing.snapshot_access;
@@ -637,6 +648,7 @@ static void create_backup_jobs_bh(void *opaque) {
}
if (!job || local_err) {
+ cleanup_snapshot_access(di);
error_setg(errp, "backup_job_create failed: %s",
local_err ? error_get_pretty(local_err) : "null");
break;

View File

@@ -19,33 +19,23 @@ well.
This only worked if the target supports backing images, so up until now
only for qcow2, with alloc-track any driver for the target can be used.
Replacing the node cannot be done in the
track_co_change_backing_file() callback, because replacing a node
cannot happen in a coroutine and requires the block graph lock
exclusively. Could either become a special option for the stream job,
or maybe the upcoming blockdev-replace QMP command can be used in the
future.
If 'auto-remove' is set, alloc-track will automatically detach itself
once the backing image is removed. It will be replaced by 'file'.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
make error return value consistent with QEMU
avoid premature break during read
adhere to block graph lock requirements]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 366 ++++++++++++++++++++++++++++++++++++++++++++
block/alloc-track.c | 345 ++++++++++++++++++++++++++++++++++++++++++++
block/meson.build | 1 +
block/stream.c | 34 ++++
3 files changed, 401 insertions(+)
2 files changed, 346 insertions(+)
create mode 100644 block/alloc-track.c
diff --git a/block/alloc-track.c b/block/alloc-track.c
new file mode 100644
index 0000000000..b4a9851144
index 0000000000..35f2737c89
--- /dev/null
+++ b/block/alloc-track.c
@@ -0,0 +1,366 @@
@@ -0,0 +1,345 @@
+/*
+ * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this
@@ -61,12 +51,9 @@ index 0000000000..b4a9851144
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "block/graph-lock.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "sysemu/block-backend.h"
@@ -75,12 +62,12 @@ index 0000000000..b4a9851144
+
+typedef enum DropState {
+ DropNone,
+ DropRequested,
+ DropInProgress,
+} DropState;
+
+typedef struct {
+ BdrvDirtyBitmap *bitmap;
+ uint64_t granularity;
+ DropState drop_state;
+ bool auto_remove;
+} BDRVAllocTrackState;
@@ -99,29 +86,26 @@ index 0000000000..b4a9851144
+ },
+};
+
+static void GRAPH_RDLOCK
+track_refresh_limits(BlockDriverState *bs, Error **errp)
+static void track_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ BlockDriverInfo bdi;
+
+ if (!bs->file) {
+ return;
+ }
+
+ /*
+ * Always use alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv. Also use at
+ * least the bitmap granularity.
+ */
+ /* always use alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv (no partial
+ * cluster allocation in 'file') */
+ bdrv_get_info(bs->file->bs, &bdi);
+ bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
+ s->granularity);
+ MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+}
+
+static int track_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ BdrvChild *file = NULL;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ int ret = 0;
@@ -137,45 +121,18 @@ index 0000000000..b4a9851144
+ s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+
+ /* open the target (write) node, backing will be attached by block layer */
+ file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+ &local_err);
+ bdrv_graph_wrlock();
+ bs->file = file;
+ bdrv_graph_wrunlock();
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ bdrv_graph_rdlock_main_loop();
+ BlockDriverInfo bdi = {0};
+ ret = bdrv_get_info(bs->file->bs, &bdi);
+ if (ret < 0) {
+ /*
+ * Not a hard failure. Worst that can happen is partial cluster
+ * allocation in the write target. However, the driver here returns its
+ * allocation status based on the dirty bitmap, so any other data that
+ * maps to such a cluster will still be copied later by a stream job (or
+ * during writes to that cluster).
+ */
+ warn_report("alloc-track: unable to query cluster size for write target: %s",
+ strerror(ret));
+ }
+ ret = 0;
+ /*
+ * Always consider alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv. Also try to
+ * avoid partial cluster allocation in the write target by considering the
+ * cluster size.
+ */
+ s->granularity = MAX(bs->file->bs->bl.request_alignment,
+ MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+ track_refresh_limits(bs, errp);
+ s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, s->granularity, NULL,
+ &local_err);
+ bdrv_graph_rdunlock_main_loop();
+ uint64_t gran = bs->bl.request_alignment;
+ s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, gran, NULL, &local_err);
+ if (local_err) {
+ ret = -EIO;
+ error_propagate(errp, local_err);
@@ -186,9 +143,7 @@ index 0000000000..b4a9851144
+
+fail:
+ if (ret < 0) {
+ bdrv_graph_wrlock();
+ bdrv_unref_child(bs, bs->file);
+ bdrv_graph_wrunlock();
+ if (s->bitmap) {
+ bdrv_release_dirty_bitmap(s->bitmap);
+ }
@@ -205,15 +160,13 @@ index 0000000000..b4a9851144
+ }
+}
+
+static coroutine_fn int64_t GRAPH_RDLOCK
+track_co_getlength(BlockDriverState *bs)
+static int64_t track_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn track_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ QEMUIOVector local_qiov;
@@ -224,11 +177,6 @@ index 0000000000..b4a9851144
+ int64_t local_bytes;
+ bool alloc;
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+
+ /* a read request can span multiple granularity-sized chunks, and can thus
+ * contain blocks with different allocation status - we could just iterate
+ * granularity-wise, but for better performance use bdrv_dirty_bitmap_next_X
@@ -259,8 +207,7 @@ index 0000000000..b4a9851144
+ ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
+ &local_qiov, flags);
+ } else {
+ qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ ret = 0;
+ ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ }
+
+ if (ret != 0) {
@@ -271,39 +218,36 @@ index 0000000000..b4a9851144
+ return ret;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags)
+static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static coroutine_fn int GRAPH_RDLOCK
+track_co_flush(BlockDriverState *bs)
+static coroutine_fn int track_co_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->file->bs);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_block_status(BlockDriverState *bs, bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+static int coroutine_fn track_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+
@@ -329,10 +273,10 @@ index 0000000000..b4a9851144
+ return 0;
+}
+
+static void GRAPH_RDLOCK
+track_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+ BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+
@@ -355,28 +299,53 @@ index 0000000000..b4a9851144
+ }
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+track_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+ const char *backing_fmt)
+static void track_drop(void *opaque)
+{
+ /*
+ * Note that the actual backing file graph change is already done in the
+ * stream job itself with bdrv_set_backing_hd_drained(), so no need to
+ * actually do anything here. But still needs to be implemented, to make
+ * our caller (i.e. bdrv_co_change_backing_file() do the right thing).
+ *
+ * FIXME
+ * We'd like to auto-remove ourselves from the block graph, but it cannot
+ * be done from a coroutine. Currently done in the stream job, where it
+ * kinda fits better, but in the long-term, a special parameter would be
+ * nice (or done via qemu-server via upcoming blockdev-replace QMP command).
+ */
+ if (backing_file == NULL) {
+ BDRVAllocTrackState *s = bs->opaque;
+ bdrv_drained_begin(bs);
+ s->drop_state = DropInProgress;
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+ bdrv_drained_end(bs);
+ BlockDriverState *bs = (BlockDriverState*)opaque;
+ BlockDriverState *file = bs->file->bs;
+ BDRVAllocTrackState *s = bs->opaque;
+
+ assert(file);
+
+ /* we rely on the fact that we're not used anywhere else, so let's wait
+ * until we're only used once - in the drive connected to the guest (and one
+ * ref is held by bdrv_ref in track_change_backing_file) */
+ if (bs->refcnt > 2) {
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
+ return;
+ }
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress;
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+ bdrv_drained_end(bs);
+ bdrv_unref(bs);
+ aio_context_release(aio_context);
+}
+
+static int track_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ if (s->auto_remove && s->drop_state == DropNone &&
+ backing_file == NULL && backing_fmt == NULL)
+ {
+ /* backing file has been disconnected, there's no longer any use for
+ * this node, so let's remove ourselves from the block graph - we need
+ * to schedule this for later however, since when this function is
+ * called, the blockjob modifying us is probably not done yet and has a
+ * blocker on 'bs' */
+ s->drop_state = DropRequested;
+ bdrv_ref(bs);
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ }
+
+ return 0;
@@ -386,9 +355,9 @@ index 0000000000..b4a9851144
+ .format_name = "alloc-track",
+ .instance_size = sizeof(BDRVAllocTrackState),
+
+ .bdrv_open = track_open,
+ .bdrv_file_open = track_open,
+ .bdrv_close = track_close,
+ .bdrv_co_getlength = track_co_getlength,
+ .bdrv_getlength = track_getlength,
+ .bdrv_child_perm = track_child_perm,
+ .bdrv_refresh_limits = track_refresh_limits,
+
@@ -403,7 +372,7 @@ index 0000000000..b4a9851144
+ .supports_backing = true,
+
+ .bdrv_co_block_status = track_co_block_status,
+ .bdrv_co_change_backing_file = track_co_change_backing_file,
+ .bdrv_change_backing_file = track_change_backing_file,
+};
+
+static void bdrv_alloc_track_init(void)
@@ -413,7 +382,7 @@ index 0000000000..b4a9851144
+
+block_init(bdrv_alloc_track_init);
diff --git a/block/meson.build b/block/meson.build
index e178047ec9..7ef7250d31 100644
index e3ed5ac97c..d1ee260048 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh)
@@ -424,48 +393,3 @@ index e178047ec9..7ef7250d31 100644
'amend.c',
'backup.c',
'backup-dump.c',
diff --git a/block/stream.c b/block/stream.c
index 1d1c65f061..d499c8883f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -120,6 +120,40 @@ static int stream_prepare(Job *job)
ret = -EPERM;
goto out;
}
+
+ /*
+ * This cannot be done in the co_change_backing_file callback, because
+ * bdrv_replace_node() cannot be done in a coroutine. The latter also
+ * requires the graph lock exclusively. Only required for the
+ * alloc-track driver.
+ *
+ * The long-term plan is to either have an explicit parameter for the
+ * stream job or use the upcoming blockdev-replace QMP command.
+ */
+ if (base_id == NULL && strcmp(unfiltered_bs->drv->format_name, "alloc-track") == 0) {
+ BlockDriverState *file_bs;
+
+ bdrv_graph_rdlock_main_loop();
+ file_bs = unfiltered_bs->file->bs;
+ bdrv_graph_rdunlock_main_loop();
+
+ bdrv_ref(unfiltered_bs); // unrefed by bdrv_replace_node()
+ bdrv_drained_begin(file_bs);
+ bdrv_graph_wrlock();
+
+ bdrv_replace_node(unfiltered_bs, file_bs, &local_err);
+
+ bdrv_graph_wrunlock();
+ bdrv_drained_end(file_bs);
+ bdrv_unref(unfiltered_bs);
+
+ if (local_err) {
+ error_prepend(&local_err, "failed to replace alloc-track node: ");
+ error_report_err(local_err);
+ ret = -EPERM;
+ goto out;
+ }
+ }
}
out:

View File

@@ -1,135 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 7 Nov 2024 17:51:15 +0100
Subject: [PATCH] PVE backup: factor out setting up snapshot access for
fleecing
Avoids some line bloat in the create_backup_jobs_bh() function and is
in preparation for setting up the snapshot access independently of
fleecing, in particular that will be useful for providing access to
the snapshot via NBD.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 95 ++++++++++++++++++++++++++++++++--------------------
1 file changed, 58 insertions(+), 37 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 320c660589..d8d0c04b0f 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -525,6 +525,62 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
+/*
+ * Setup a snapshot-access block node for a device with associated fleecing image.
+ */
+static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
+{
+ Error *local_err = NULL;
+
+ if (!di->fleecing.bs) {
+ error_setg(errp, "no associated fleecing image");
+ return -1;
+ }
+
+ QDict *cbw_opts = qdict_new();
+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
+
+ if (di->bitmap) {
+ /*
+ * Only guest writes to parts relevant for the backup need to be intercepted with
+ * old data being copied to the fleecing image.
+ */
+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
+ }
+ /*
+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
+ * abort a bit before that.
+ */
+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
+
+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
+
+ if (!di->fleecing.cbw) {
+ error_setg(errp, "appending cbw node for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ return -1;
+ }
+
+ QDict *snapshot_access_opts = qdict_new();
+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
+
+ di->fleecing.snapshot_access =
+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
+ if (!di->fleecing.snapshot_access) {
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ return -1;
+ }
+
+ return 0;
+}
+
/*
* backup_job_create can *not* be run from a coroutine, so this can't either.
* The caller is responsible that backup_mutex is held nonetheless.
@@ -569,49 +625,14 @@ static void create_backup_jobs_bh(void *opaque) {
const char *job_id = bdrv_get_device_name(di->bs);
bdrv_graph_co_rdunlock();
if (di->fleecing.bs) {
- QDict *cbw_opts = qdict_new();
- qdict_put_str(cbw_opts, "driver", "copy-before-write");
- qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
- qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-
- if (di->bitmap) {
- /*
- * Only guest writes to parts relevant for the backup need to be intercepted with
- * old data being copied to the fleecing image.
- */
- qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
- qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
- }
- /*
- * Fleecing storage is supposed to be fast and it's better to break backup than guest
- * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
- * abort a bit before that.
- */
- qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
- qdict_put_int(cbw_opts, "cbw-timeout", 45);
-
- di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-
- if (!di->fleecing.cbw) {
- error_setg(errp, "appending cbw node for fleecing failed: %s",
- local_err ? error_get_pretty(local_err) : "unknown error");
- bdrv_drained_end(di->bs);
- break;
- }
-
- QDict *snapshot_access_opts = qdict_new();
- qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
- qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-
- di->fleecing.snapshot_access =
- bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
- if (!di->fleecing.snapshot_access) {
+ if (setup_snapshot_access(di, &local_err) < 0) {
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
local_err ? error_get_pretty(local_err) : "unknown error");
cleanup_snapshot_access(di);
bdrv_drained_end(di->bs);
break;
}
+
source_bs = di->fleecing.snapshot_access;
discard_source = true;

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 15:26:30 +0200
Subject: [PATCH] PVE: whitelist 'invalid' QAPI names for backwards compat
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qapi/pragma.json | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7c91ea3685..c3888d654c 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -12,6 +12,7 @@
'device_add',
'device_del',
'expire_password',
+ 'get_link_status',
'migrate_cancel',
'netdev_add',
'netdev_del',
@@ -60,6 +61,8 @@
'SysEmuTarget', # query-cpu-fast, query-target
'UuidInfo', # query-uuid
'VncClientInfo', # query-vnc, query-vnc-servers, ...
- 'X86CPURegister32' # qom-get of x86 CPU properties
+ 'X86CPURegister32', # qom-get of x86 CPU properties
# feature-words, filtered-features
+ 'BlockdevOptionsPbs', # for PBS backwards compat
+ 'BalloonInfo'
] } }

View File

@@ -1,135 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 7 Nov 2024 17:51:16 +0100
Subject: [PATCH] PVE backup: save device name in device info structure
The device name needs to be queried while holding the graph read lock
and since it doesn't change during the whole operation, just get it
once during setup and avoid the need to query it again in different
places.
Also in preparation to use it more often in error messages and for the
upcoming external backup access API.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index d8d0c04b0f..e2110ce0db 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -94,6 +94,7 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
+ char* device_name;
int completed_ret; // INT_MAX if not completed
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
@@ -327,6 +328,8 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
}
di->bs = NULL;
+ g_free(di->device_name);
+ di->device_name = NULL;
assert(di->target == NULL);
@@ -621,9 +624,6 @@ static void create_backup_jobs_bh(void *opaque) {
BlockDriverState *source_bs = di->bs;
bool discard_source = false;
- bdrv_graph_co_rdlock();
- const char *job_id = bdrv_get_device_name(di->bs);
- bdrv_graph_co_rdunlock();
if (di->fleecing.bs) {
if (setup_snapshot_access(di, &local_err) < 0) {
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
@@ -654,7 +654,7 @@ static void create_backup_jobs_bh(void *opaque) {
}
BlockJob *job = backup_job_create(
- job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ di->device_name, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
&local_err);
@@ -751,6 +751,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
if (fleecing && device_uses_fleecing(*d)) {
g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
@@ -789,6 +790,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
di_list = g_list_append(di_list, di);
}
}
@@ -956,9 +958,6 @@ UuidInfo coroutine_fn *qmp_backup(
di->block_size = dump_cb_block_size;
- bdrv_graph_co_rdlock();
- const char *devname = bdrv_get_device_name(di->bs);
- bdrv_graph_co_rdunlock();
PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
size_t dirty = di->size;
@@ -973,7 +972,8 @@ UuidInfo coroutine_fn *qmp_backup(
}
action = PBS_BITMAP_ACTION_NEW;
} else {
- expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ expect_only_dirty =
+ proxmox_backup_check_incremental(pbs, di->device_name, di->size) != 0;
}
if (expect_only_dirty) {
@@ -997,7 +997,8 @@ UuidInfo coroutine_fn *qmp_backup(
}
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, di->device_name, di->size,
+ expect_only_dirty, errp);
if (dev_id < 0) {
goto err_mutex;
}
@@ -1009,7 +1010,7 @@ UuidInfo coroutine_fn *qmp_backup(
di->dev_id = dev_id;
PBSBitmapInfo *info = g_malloc(sizeof(*info));
- info->drive = g_strdup(devname);
+ info->drive = g_strdup(di->device_name);
info->action = action;
info->size = di->size;
info->dirty = dirty;
@@ -1032,10 +1033,7 @@ UuidInfo coroutine_fn *qmp_backup(
goto err_mutex;
}
- bdrv_graph_co_rdlock();
- const char *devname = bdrv_get_device_name(di->bs);
- bdrv_graph_co_rdunlock();
- di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
+ di->dev_id = vma_writer_register_stream(vmaw, di->device_name, di->size);
if (di->dev_id <= 0) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
@@ -1146,6 +1144,9 @@ err:
bdrv_co_unref(di->target);
}
+ g_free(di->device_name);
+ di->device_name = NULL;
+
g_free(di);
}
g_list_free(di_list);

View File

@@ -0,0 +1,35 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 17:36:55 +0200
Subject: [PATCH] PVE: savevm-async: register yank before
migration_incoming_state_destroy
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/savevm-async.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 970ee3b3fc..b3ccc069f1 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -19,6 +19,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
+#include "qemu/yank.h"
/* #define DEBUG_SAVEVM_STATE */
@@ -580,6 +581,10 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
dirty_bitmap_mig_before_vm_start();
qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
migration_incoming_state_destroy();
if (ret < 0) {
error_setg_errno(errp, -ret, "Error while loading VM state");

View File

@@ -1,25 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 7 Nov 2024 17:51:17 +0100
Subject: [PATCH] PVE backup: include device name in error when setting up
snapshot access fails
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/pve-backup.c b/pve-backup.c
index e2110ce0db..32352fb5ec 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -626,7 +626,8 @@ static void create_backup_jobs_bh(void *opaque) {
bool discard_source = false;
if (di->fleecing.bs) {
if (setup_snapshot_access(di, &local_err) < 0) {
- error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ error_setg(errp, "%s - setting up snapshot access for fleecing failed: %s",
+ di->device_name,
local_err ? error_get_pretty(local_err) : "unknown error");
cleanup_snapshot_access(di);
bdrv_drained_end(di->bs);

View File

@@ -1,137 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 3 Jan 2025 14:03:12 +0100
Subject: [PATCH] adapt machine version deprecation for Proxmox VE
In commit a35f8577a0 ("include/hw: add macros for deprecation &
removal of versioned machines"), a new machine version deprecation and
removal policy was introduced. After only 3 years a machine version
will be deprecated while being removed after 6 years.
The deprecation is a bit early considering major PVE releases are
approximately every 2 years. This means that a deprecation warning can
already happen for a machine version that was introduced during the
previous major release. This would scare users for no good reason, so
avoid deprecating machine versions in PVE too early and define a
baseline of machine versions that will be supported throughout a
single major PVE release.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/hw/boards.h | 78 +++++++++++++++++++++++++++++----------------
1 file changed, 51 insertions(+), 27 deletions(-)
diff --git a/include/hw/boards.h b/include/hw/boards.h
index d1741ea121..3f9befda14 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -631,42 +631,66 @@ struct MachineState {
/*
- * How many years/major releases for each phase
- * of the life cycle. Assumes use of versioning
- * scheme where major is bumped each year
+ * Baseline of machine versions that are still considered supported throughout
+ * current major Proxmox VE release. Machine versions older than this are
+ * considered to be deprecated in Proxmox VE.
+ *
+ * Machine versions older than 6 years are removed just like in upstream QEMU.
+ * (policy takes effect with QEMU 10.1). Assumes yearly major QEMU release.
+ *
+ * QEMU release cylce N.0 in ~April, N.1 in ~August, N.2 in ~December
+ * Debian/PVE release cylce ~every two years in summer
+ *
+ * PVE - last QEMU - machine versions dropped - baseline
+ * 8 9.2 2.3 and older 2.4
+ * 9 11.2 5.2 and older 6.0
+ * 10 13.2 7.2 and older 8.0
+ */
+#define MACHINE_VER_BASELINE_PVE_MAJOR 2
+#define MACHINE_VER_BASELINE_PVE_MINOR 4
+#define MACHINE_VER_DELETION_MAJOR (QEMU_VERSION_MAJOR - 6)
+#define MACHINE_VER_DELETION_MINOR QEMU_VERSION_MINOR
+
+/*
+ * Proxmox VE needs to support the baseline throughout a major PVE release. So
+ * a QEMU release where the baseline is already deleted cannot be used.
+ * Removal policy after 6 years takes effect with QEMU 10.1.
*/
-#define MACHINE_VER_DELETION_MAJOR 6
-#define MACHINE_VER_DEPRECATION_MAJOR 3
+#if ((QEMU_VERSION_MAJOR > 10) || ((QEMU_VERSION_MAJOR == 10) && (QEMU_VERSION_MINOR >= 1)))
+#if ((MACHINE_VER_BASELINE_PVE_MAJOR < MACHINE_VER_DELETION_MAJOR) || \
+ ((MACHINE_VER_BASELINE_PVE_MAJOR == MACHINE_VER_DELETION_MAJOR) && \
+ (MACHINE_VER_BASELINE_PVE_MINOR < MACHINE_VER_DELETION_MINOR)))
+#error "Baseline machine version needed by Proxmox VE not supported anymore by this QEMU release"
+#endif
+#endif
/*
* Expands to a static string containing a deprecation
* message for a versioned machine type
*/
#define MACHINE_VER_DEPRECATION_MSG \
- "machines more than " stringify(MACHINE_VER_DEPRECATION_MAJOR) \
- " years old are subject to deletion after " \
- stringify(MACHINE_VER_DELETION_MAJOR) " years"
-
-#define _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) \
- (((QEMU_VERSION_MAJOR - major) > cutoff) || \
- (((QEMU_VERSION_MAJOR - major) == cutoff) && \
- (QEMU_VERSION_MINOR - minor) >= 0))
-
-#define _MACHINE_VER_IS_EXPIRED2(cutoff, major, minor) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-#define _MACHINE_VER_IS_EXPIRED3(cutoff, major, minor, micro) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-#define _MACHINE_VER_IS_EXPIRED4(cutoff, major, minor, _unused, tag) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-#define _MACHINE_VER_IS_EXPIRED5(cutoff, major, minor, micro, _unused, tag) \
- _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
-
-#define _MACHINE_IS_EXPIRED(cutoff, ...) \
+ "old machine version is subject to deletion during current major Proxmox VE release"
+
+#define _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor) \
+ ((major < baseline_major) || \
+ ((major == baseline_major) && \
+ (minor < baseline_minor)))
+
+#define _MACHINE_VER_IS_EXPIRED2(baseline_major, baseline_minor, major, minor) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+#define _MACHINE_VER_IS_EXPIRED3(baseline_major, baseline_minor, major, minor, micro) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+#define _MACHINE_VER_IS_EXPIRED4(baseline_major, baseline_minor, major, minor, _unused, tag) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+#define _MACHINE_VER_IS_EXPIRED5(baseline_major, baseline_minor, major, minor, micro, _unused, tag) \
+ _MACHINE_VER_IS_EXPIRED_IMPL(baseline_major, baseline_minor, major, minor)
+
+#define _MACHINE_IS_EXPIRED(baseline_major, baseline_minor, ...) \
_MACHINE_VER_PICK(__VA_ARGS__, \
_MACHINE_VER_IS_EXPIRED5, \
_MACHINE_VER_IS_EXPIRED4, \
_MACHINE_VER_IS_EXPIRED3, \
- _MACHINE_VER_IS_EXPIRED2) (cutoff, __VA_ARGS__)
+ _MACHINE_VER_IS_EXPIRED2) (baseline_major, baseline_minor, __VA_ARGS__)
/*
* Evaluates true when a machine type with (major, minor)
@@ -675,7 +699,7 @@ struct MachineState {
* lifecycle rules
*/
#define MACHINE_VER_IS_DEPRECATED(...) \
- _MACHINE_IS_EXPIRED(MACHINE_VER_DEPRECATION_MAJOR, __VA_ARGS__)
+ _MACHINE_IS_EXPIRED(MACHINE_VER_BASELINE_PVE_MAJOR, MACHINE_VER_BASELINE_PVE_MINOR, __VA_ARGS__)
/*
* Evaluates true when a machine type with (major, minor)
@@ -684,7 +708,7 @@ struct MachineState {
* lifecycle rules
*/
#define MACHINE_VER_SHOULD_DELETE(...) \
- _MACHINE_IS_EXPIRED(MACHINE_VER_DELETION_MAJOR, __VA_ARGS__)
+ _MACHINE_IS_EXPIRED(MACHINE_VER_DELETION_MAJOR, MACHINE_VER_DELETION_MINOR, __VA_ARGS__)
/*
* Sets the deprecation reason for a versioned machine based

94
debian/patches/series vendored
View File

@@ -1,14 +1,7 @@
extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
extra/0002-ide-avoid-potential-deadlock-when-draining-during-tr.patch
extra/0003-tcg-Reset-free_temps-before-tcg_optimize.patch
extra/0004-target-i386-Reset-TSCs-of-parked-vCPUs-too-on-VM-res.patch
extra/0005-i386-cpu-Mark-avx10_version-filtered-when-prefix-is-.patch
extra/0006-net-Fix-announce_self.patch
extra/0007-net-dump-Correctly-compute-Ethernet-packet-offset.patch
extra/0008-pci-acpi-Windows-PCI-Label-Id-bug-workaround.patch
extra/0009-hw-usb-hcd-xhci-pci-Use-modulo-to-select-MSI-vector-.patch
extra/0010-pci-ensure-valid-link-status-bits-for-downstream-por.patch
extra/0011-pci-msix-Fix-msix-pba-read-vector-poll-end-calculati.patch
extra/0002-monitor-hmp-add-support-for-flag-argument-with-value.patch
extra/0003-monitor-refactor-set-expire_password-and-allow-VNC-d.patch
extra/0004-block-mirror-fix-NULL-pointer-dereference-in-mirror_.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -21,47 +14,46 @@ pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
pve/0007-PVE-Up-qmp-add-get_link_status.patch
pve/0008-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0009-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0010-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0011-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0012-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
pve/0014-PVE-qapi-modify-query-machines.patch
pve/0015-PVE-qapi-modify-spice-query.patch
pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0023-PVE-monitor-disable-oob-capability.patch
pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0025-PVE-Allow-version-code-in-machine-type.patch
pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
pve/0027-PVE-Backup-add-vma-backup-format-code.patch
pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
pve/0029-PVE-Add-sequential-job-transaction-support.patch
pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0037-PVE-block-stream-increase-chunk-size.patch
pve/0038-block-add-alloc-track-driver.patch
pve/0039-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
pve/0044-PVE-backup-add-fleecing-option.patch
pve/0045-PVE-backup-improve-error-when-copy-before-write-fail.patch
pve/0046-PVE-backup-fixup-error-handling-for-fleecing.patch
pve/0047-PVE-backup-factor-out-setting-up-snapshot-access-for.patch
pve/0048-PVE-backup-save-device-name-in-device-info-structure.patch
pve/0049-PVE-backup-include-device-name-in-error-when-setting.patch
pve/0050-adapt-machine-version-deprecation-for-Proxmox-VE.patch
pve/0016-PVE-add-savevm-async-for-background-state-snapshots.patch
pve/0017-PVE-add-optional-buffer-size-to-QEMUFile.patch
pve/0018-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0019-PVE-Add-dummy-id-command-line-parameter.patch
pve/0020-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0021-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0022-PVE-monitor-disable-oob-capability.patch
pve/0023-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0024-PVE-Allow-version-code-in-machine-type.patch
pve/0025-PVE-Backup-add-vma-backup-format-code.patch
pve/0026-PVE-Backup-add-backup-dump-block-driver.patch
pve/0027-PVE-Backup-proxmox-backup-patches-for-qemu.patch
pve/0028-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0029-PVE-Backup-Add-dirty-bitmap-tracking-for-incremental.patch
pve/0030-PVE-various-PBS-fixes.patch
pve/0031-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0032-PVE-add-query_proxmox_support-QMP-command.patch
pve/0033-PVE-add-query-pbs-bitmap-info-QMP-call.patch
pve/0034-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0035-PVE-Add-sequential-job-transaction-support.patch
pve/0036-PVE-Backup-Use-a-transaction-to-synchronize-job-stat.patch
pve/0037-PVE-Backup-Don-t-block-on-finishing-and-cleanup-crea.patch
pve/0038-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0039-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0040-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0041-PVE-Use-coroutine-QMP-for-backup-cancel_backup.patch
pve/0042-PBS-add-master-key-support.patch
pve/0043-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch
pve/0044-PVE-block-stream-increase-chunk-size.patch
pve/0045-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
pve/0046-block-add-alloc-track-driver.patch
pve/0047-PVE-whitelist-invalid-QAPI-names-for-backwards-compa.patch
pve/0048-PVE-savevm-async-register-yank-before-migration_inco.patch
pve-qemu-6.1-vitastor.patch

View File

@@ -1,6 +1,7 @@
# install the userspace utilities
debian/kvm-ifup etc/kvm/
debian/kvm-ifdown etc/kvm/
#install ovmf uefi rom
debian/OVMF_CODE-pure-efi.fd usr/share/kvm/
debian/OVMF_VARS-pure-efi.fd usr/share/kvm/
debian/kvm-ifdown etc/kvm/
# install the userspace utilities
debian/kvm-ifup etc/kvm/

View File

@@ -1,13 +1,16 @@
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
usr/bin/qemu-system-x86_64 usr/bin/kvm
# qemu-system-i386 and qemu-system-x86_64 provides the same hardware emulation
usr/bin/qemu-system-x86_64 usr/bin/qemu-system-i386
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
# upstream provides a qemu man page,
# we symlink to kvm for backward compatibility
# and to qemu-system-{i386,x86_64} to fullfill our 'Provides: qemu-system-x86'
usr/share/man/man1/qemu.1 usr/share/man/man1/kvm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-i386.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-x86_64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1

View File

@@ -1,7 +1,4 @@
pve-qemu-kvm: arch-dependent-file-in-usr-share [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: binary-from-other-architecture [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: embedded-javascript-library please use * [usr/share/doc/pve-qemu-kvm/kvm/_static/*]
pve-qemu-kvm: groff-message *: warning [*]: can't break line [usr/share/man/*]
pve-qemu-kvm: groff-message *: warning [*]: cannot adjust line [usr/share/man/*]
pve-qemu-kvm: statically-linked-binary [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: unstripped-binary-or-object [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: arch-dependent-file-in-usr-share usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: binary-from-other-architecture usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: unstripped-binary-or-object usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: statically-linked-binary usr/share/kvm/hppa-firmware.img

138
debian/rules vendored
View File

@@ -1,12 +1,22 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Sample debian/rules that uses debhelper.
# This file was originally written by Joey Hess and Craig Small.
# As a special exception, when this file is copied by dh-make into a
# dh-make output file, you may use that output file without restriction.
# This special exception was added by Craig Small in version 0.37 of dh-make.
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
HOST_CPU ?= $(DEB_HOST_GNU_CPU)
# These are used for cross-compiling and for saving the configure script
# from having to guess our platform (since we know it already)
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
ARCH ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU)
PACKAGE=pve-qemu-kvm
destdir := $(CURDIR)/debian/$(PACKAGE)
@@ -17,70 +27,58 @@ machinefile := $(destdir)/usr/share/kvm/machine-versions-x86_64.json
# default QEMU out-of-tree build directory is ./build
BUILDDIR=build
# FIXME: pass to configure as --extra-cflags=CFLAGS ?! also LDFLAGS?
CFLAGS += -Wall
CFLAGS = -Wall
export CFLAGS
# DEB_BUILD_OPTIONS=parallel=N
MAKEFLAGS += $(subst parallel=,-j,$(filter parallel=%,${DEB_BUILD_OPTIONS}))
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
CFLAGS += -O0
else
CFLAGS += -O2
endif
${BUILDDIR}/config.status: configure
dh_testdir
# Add here commands to configure the package.
ifneq "$(wildcard /usr/share/misc/config.sub)" ""
cp -f /usr/share/misc/config.sub config.sub
endif
ifneq "$(wildcard /usr/share/misc/config.guess)" ""
cp -f /usr/share/misc/config.guess config.guess
endif
# guest-agent is only required for guest systems
./configure \
--disable-download \
--docdir=/usr/share/doc/pve-qemu-kvm \
--localstatedir=/var \
--prefix=/usr \
--sysconfdir=/etc \
--target-list=$(HOST_CPU)-softmmu,aarch64-softmmu \
--with-suffix="kvm" \
--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM_REVISION}" \
--audio-drv-list="alsa" \
--datadir=/usr/share \
--libexecdir=/usr/lib/kvm \
--disable-capstone \
--disable-gtk \
--disable-guest-agent \
--disable-guest-agent-msi \
--disable-libnfs \
--disable-libssh \
--disable-sdl \
--disable-smartcard \
--disable-strip \
--disable-xen \
--enable-curl \
--enable-docs \
--enable-glusterfs \
--enable-gnutls \
--enable-libiscsi \
--enable-libusb \
--enable-linux-aio \
--enable-linux-io-uring \
--enable-numa \
--enable-opengl \
--enable-rbd \
--enable-seccomp \
--enable-slirp \
--enable-spice \
--enable-usb-redir \
--enable-virglrenderer \
--enable-virtfs \
--enable-zstd
--with-git-submodules=ignore \
--docdir=/usr/share/doc/pve-qemu-kvm \
--localstatedir=/var \
--prefix=/usr \
--sysconfdir=/etc \
--target-list=$(ARCH)-softmmu,aarch64-softmmu \
--with-suffix="kvm" \
--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM}" \
--audio-drv-list="alsa" \
--datadir=/usr/share \
--libexecdir=/usr/lib/kvm \
--disable-capstone \
--disable-gtk \
--disable-guest-agent \
--disable-guest-agent-msi \
--disable-libnfs \
--disable-libxml2 \
--disable-sdl \
--disable-smartcard \
--disable-strip \
--disable-xen \
--enable-curl \
--enable-docs \
--enable-glusterfs \
--enable-gnutls \
--enable-libiscsi \
--enable-libusb \
--enable-linux-aio \
--enable-linux-io-uring \
--enable-numa \
--enable-rbd \
--enable-seccomp \
--enable-spice \
--enable-usb-redir \
--enable-virtfs \
--enable-virtiofsd \
--enable-xfsctl
build: build-arch build-indep
build-arch: build-stamp
build-indep: build-stamp
build: build-stamp
build-stamp: ${BUILDDIR}/config.status
@@ -98,8 +96,15 @@ clean:
dh_testroot
rm -f build-stamp
# Add here commands to clean up before the build process.
# Add here commands to clean up after the build process.
-$(MAKE) distclean
ifneq "$(wildcard /usr/share/misc/config.sub)" ""
cp -f /usr/share/misc/config.sub config.sub
endif
ifneq "$(wildcard /usr/share/misc/config.guess)" ""
cp -f /usr/share/misc/config.guess config.guess
endif
dh_clean
@@ -113,6 +118,21 @@ install: build
# Add here commands to install the package into debian/pve-kvm.
$(MAKE) DESTDIR=$(destdir) install
# we do not need openbios files (sparc/ppc)
rm -rf $(destdir)/usr/share/kvm/openbios-*
# remove ppc files
rm $(destdir)/usr/share/kvm/*.dtb
rm $(destdir)/usr/share/kvm/s390-ccw.img
rm $(destdir)/usr/share/kvm/s390-netboot.img
rm $(destdir)/usr/share/kvm/qemu_vga.ndrv
rm $(destdir)/usr/share/kvm/slof.bin
rm $(destdir)/usr/share/kvm/u-boot.e500
# remove Alpha files
rm $(destdir)/usr/share/kvm/palcode-clipper
# remove RISC-V files
rm $(destdir)/usr/share/kvm/opensbi-riscv32-generic-fw_dynamic.elf
rm $(destdir)/usr/share/kvm/opensbi-riscv64-generic-fw_dynamic.elf
# Remove things we don't package at all, would be a "kvm-dev" package
rm -Rf $(destdir)/usr/include/linux/
rm -Rf $(destdir)/usr/include
@@ -148,7 +168,7 @@ binary-arch: build install
# dh_installinfo
dh_installman
dh_link
dh_strip --dbgsym-migration='pve-qemu-kvm-dbg (<<8.0.0-1~)'
dh_strip --dbg-package=pve-qemu-kvm-dbg
dh_compress
dh_fixperms
# dh_perl

View File

@@ -1,3 +0,0 @@
debian/OVMF_CODE-pure-efi.fd
debian/OVMF_VARS-pure-efi.fd
debian/Logo.bmp

View File

@@ -1,2 +0,0 @@
source-is-missing [roms/SLOF/*.oco]
source-is-missing [linux-user/*/vdso-*.so]

Some files were not shown because too many files have changed in this diff Show More