Compare commits

..

2 Commits

Author SHA1 Message Date
0115e4efb0 Add bdrv_co_block_status 2023-01-13 23:50:40 +03:00
4948452f3c Add Vitastor support 2022-12-14 19:16:08 +03:00
115 changed files with 11715 additions and 6487 deletions

7
.gitignore vendored
View File

@@ -1,7 +0,0 @@
/*.build
/*.buildinfo
/*.changes
/*.deb
/*.dsc
/*.tar*
/pve-qemu-kvm-*.*/

View File

@@ -1,88 +1,60 @@
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
include /usr/share/dpkg/architecture.mk
PACKAGE = pve-qemu-kvm
SRCDIR := qemu
BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
ORIG_SRC_TAR=$(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz
BUILDDIR ?= ${PACKAGE}-${DEB_VERSION_UPSTREAM}
GITVERSION := $(shell git rev-parse HEAD)
DSC=$(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION).dsc
DEB = $(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB_DBG = $(PACKAGE)-dbgsym_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB = ${PACKAGE}_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEB_DBG = ${PACKAGE}-dbg_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEBS = $(DEB) $(DEB_DBG)
all: $(DEBS)
.PHONY: submodule
submodule:
ifeq ($(shell test -f "$(SRCDIR)/configure" && echo 1 || echo 0), 0)
git submodule update --init --recursive
cd $(SRCDIR); meson subprojects download
endif
test -f "${SRCDIR}/configure" || git submodule update --init --recursive
PC_BIOS_FW_PURGE_LIST_IN = \
hppa-firmware.img \
openbios-ppc \
openbios-sparc32 \
openbios-sparc64 \
palcode-clipper \
s390-ccw.img \
s390-netboot.img \
u-boot.e500 \
.*\.dtb \
qemu_vga.ndrv \
slof.bin \
opensbi-riscv.*-generic-fw_dynamic.bin \
BLOB_PURGE_SED_CMDS = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "/$(FILE)/d")
BLOB_PURGE_FILTER = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "$(FILE)")
$(BUILDDIR): submodule
$(BUILDDIR): keycodemapdb | submodule
# check if qemu/ was used for a build
# if so, please run 'make distclean' in the submodule and try again
test ! -f $(SRCDIR)/build/config.status
rm -rf $@.tmp $@
cp -a $(SRCDIR) $@.tmp
cp -a debian $@.tmp/debian
rm -rf $@.tmp/roms/edk2 # packaged separately
find $@.tmp/pc-bios -type f | grep $(BLOB_PURGE_FILTER) | xargs rm -f
sed -i $(BLOB_PURGE_SED_CMDS) $@.tmp/pc-bios/meson.build
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $@.tmp/debian/SOURCE
mv $@.tmp $@
rm -rf $(BUILDDIR)
cp -a $(SRCDIR) $(BUILDDIR)
cp -a debian $(BUILDDIR)/debian
rm -rf $(BUILDDIR)/ui/keycodemapdb
cp -a keycodemapdb $(BUILDDIR)/ui/
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $(BUILDDIR)/debian/SOURCE
.PHONY: deb kvm
deb kvm: $(DEBS)
$(DEB_DBG): $(DEB)
$(DEB): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j32
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j
lintian $(DEBS)
sbuild: $(DSC)
sbuild $(DSC)
$(ORIG_SRC_TAR): $(BUILDDIR)
tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
.PHONY: dsc
dsc:
rm -rf *.dsc $(BUILDDIR)
$(MAKE) $(DSC)
lintian $(DSC)
$(DSC): $(ORIG_SRC_TAR) $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
.PHONY: update
update:
cd $(SRCDIR) && git submodule deinit ui/keycodemapdb || true
rm -rf $(SRCDIR)/ui/keycodemapdb
mkdir $(SRCDIR)/ui/keycodemapdb
cd $(SRCDIR) && git submodule update --init ui/keycodemapdb
rm -rf keycodemapdb
mkdir keycodemapdb
cp -R $(SRCDIR)/ui/keycodemapdb/* keycodemapdb/
git add keycodemapdb
.PHONY: upload
upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
upload: $(DEBS)
tar cf - $(DEBS) | ssh repoman@repo.proxmox.com upload --product pve --dist $(UPLOAD_DIST)
tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist bullseye
.PHONY: distclean clean
distclean: clean
clean:
rm -rf $(PACKAGE)-[0-9]*/ $(PACKAGE)*.tar* *.deb *.dsc *.build *.buildinfo *.changes
rm -rf $(BUILDDIR) $(PACKAGE)*.deb *.buildinfo *.changes
.PHONY: dinstall
dinstall: $(DEBS)

406
debian/changelog vendored
View File

@@ -1,404 +1,14 @@
pve-qemu-kvm (8.1.5-3+vitastor1) bookworm; urgency=medium
pve-qemu-kvm (6.1.1-2+vitastor2) bullseye; urgency=medium
* Add bdrv_co_block_status implementation for QCOW2 export support
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 13 Jan 2023 20:20:16 +0300
pve-qemu-kvm (6.1.1-2+vitastor1) bullseye; urgency=medium
* Add Vitastor support
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 23 Feb 2024 12:19:05 +0300
pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
* backport fix for potential deadlock during QMP stop command if the VM has
disks attached through VirtIO-Block and IO-Thread enabled
* fix #4507: add patch to automatically increase NOFILE soft limit
-- Proxmox Support Team <support@proxmox.com> Wed, 21 Feb 2024 20:11:23 +0100
pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium
* work around for a situation where guest IO might get stuck, if the VM is
configure with iothread and VirtIO block/SCSI
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:41:27 +0100
pve-qemu-kvm (8.1.5-1) bookworm; urgency=medium
* update to 8.1.5 stable release, including more relevant fixes like:
- virtio-net: correctly copy vnet header when flushing TX
- hw/pflash: implement update buffer for block writes
- Fixes to i386 emulation and ARM emulation.
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:08:13 +0100
pve-qemu-kvm (8.1.2-6) bookworm; urgency=medium
* revert attempted fix to avoid rare issue with stuck guest IO when using
iothread, because it caused a much more common issue with iothreads
consuming too much CPU
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Dec 2023 14:22:06 +0100
pve-qemu-kvm (8.1.2-5) bookworm; urgency=medium
* backport workaround for stuck guest IO with iothread and VirtIO block/SCSI
in some rare edge cases
* backport fix for potential deadlock when issuing the "resize" QMP command
for a disk that is using iothread
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Dec 2023 16:58:27 +0100
pve-qemu-kvm (8.1.2-4) bookworm; urgency=medium
* fix vnc clipboard in the host to guest direction
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Nov 2023 14:28:21 +0100
pve-qemu-kvm (8.1.2-3) bookworm; urgency=medium
* fix #5054: backport fix for software reset with SATA, avoiding breakage
with, e.g., some FreeBSD VMs
-- Proxmox Support Team <support@proxmox.com> Mon, 20 Nov 2023 10:24:50 +0100
pve-qemu-kvm (8.1.2-2) bookworm; urgency=medium
* revert "x86: acpi: workaround Windows not handling name references in
Package properly" as that seems to have broken networking (and possibly
other things) one some localized variants of Windows (e.g., the German
versions).
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Nov 2023 11:55:23 +0100
pve-qemu-kvm (8.1.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.1.2
* use QEMU's keycode-map-db again instead of our static copy from QEMU 6.0
* disable graph locking, newly introduced in the 8.1 release, as it has
still various deadlock issuess, e.g., during canceling backup jobs.
-- Proxmox Support Team <support@proxmox.com> Tue, 24 Oct 2023 13:42:45 +0200
pve-qemu-kvm (8.0.2-7) bookworm; urgency=medium
* fix #2874: SATA: avoid unsolicited write to sector 0 during reset
-- Proxmox Support Team <support@proxmox.com> Wed, 04 Oct 2023 08:33:35 +0200
pve-qemu-kvm (8.0.2-6) bookworm; urgency=medium
* fix #1534: vma: add extract-filter for disk images allowing users to pass
a comma separated list of the disks they want to extract from an archive.
* backup: create jobs in a drained section to avoid subtle bugs where
something interferes with the block-copy-state bitmap on initialization
* backup: drop experimental, and since a while also fully broken, directory
backup format (BACKUP_FORMAT_DIR). This format was never exposed via the
Proxmox VE API, but only available via QMP, as its broken since QEMU 8 and
we got zero reports about that, it's safe to assume that there are no
public users, so just remove it completely.
-- Proxmox Support Team <support@proxmox.com> Wed, 06 Sep 2023 17:03:59 +0200
pve-qemu-kvm (8.0.2-5) bookworm; urgency=medium
* improve memory footprint after backup by not keeping as much memory
resident.
* fix file descriptor leak for vhost (used by default by vNICs).
-- Proxmox Support Team <support@proxmox.com> Wed, 16 Aug 2023 11:52:24 +0200
pve-qemu-kvm (8.0.2-4) bookworm; urgency=medium
* fix resume for snapshot and hibernate in combination with iothread and
dirty bitmap
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Jul 2023 12:58:22 +0200
pve-qemu-kvm (8.0.2-3) bookworm; urgency=medium
* fix regression in QEMU 8.0 for drive mirror with bitmap
-- Proxmox Support Team <support@proxmox.com> Thu, 15 Jun 2023 13:57:46 +0200
pve-qemu-kvm (8.0.2-2) bookworm; urgency=medium
* drop custom get_link_status QMP command, was never really used.
* drop custom & deprecated drive snapshot QMP commands, we use a better
alternative since a while.
-- Proxmox Support Team <support@proxmox.com> Fri, 09 Jun 2023 07:57:56 +0200
pve-qemu-kvm (8.0.2-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.2
* update patches for avoiding issues with DMA reentrancy to current,
slightly optimized version.
-- Proxmox Support Team <support@proxmox.com> Tue, 06 Jun 2023 16:34:50 +0200
pve-qemu-kvm (8.0.0-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.0
* re-build for Proxmox VE 8 / Debian 12 Bookworm
* adapt to the local virtiofsd C variant being dropped, it has been
rewritten in Rust and is now hosted in a separate source repository.
-- Proxmox Support Team <support@proxmox.com> Mon, 22 May 2023 13:45:49 +0200
pve-qemu-kvm (7.2.0-8) bullseye; urgency=medium
* backport fix for ACPI CPU hotplug issue with TCG
* cherry-pick TCG-related stable fixes for 7.2 for users that turned off KVM
HW acceleration
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Mar 2023 15:47:08 +0100
pve-qemu-kvm (7.2.0-7) bullseye; urgency=medium
* improve fix for potential deadlock with trim for IDE/SATA and draining
* backport stable fixes:
- hw/nvme: fix missing endian conversions for doorbell buffers
- hw/smbios: fix field corruption in type 4 table
- virtio-rng-pci: fix transitional migration compat for vectors
- hw/timer/hpet: Fix expiration time overflow
- vhost/vdpa: stop all svq on device deletion
- vhost: avoid a potential use of an uninitialized variable in the call to
vhost_svq_poll
- chardev/char-socket: set s->listener = NULL in char_socket_finalize to
fix a potential crash after live-migration
- intel-iommu: fail MAP notifier without caching mode
- intel-iommu: fail DEVIOTLB_UNMAP without dt mode
* fix a regression for when the LSI SCSI controller is used
-- Proxmox Support Team <support@proxmox.com> Mon, 13 Mar 2023 17:42:49 +0100
pve-qemu-kvm (7.2.0-6) bullseye; urgency=medium
* fix 7.2 regression for Linux boot failures with megasas SCSI
* fix 7.0 regression for a potential deadlock with trim for IDE/SATA and
draining
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Mar 2023 14:32:17 +0100
pve-qemu-kvm (7.2.0-5) bullseye; urgency=medium
* fix #4476: savevm-async: avoid looping without progress
* savevm-async: decrease the boundary for free space for (memory) state left
on target from 30 MiB to 100 MiB, improving the heuristic for when to
enter the final "pause and sync" stage.
* QMP backup: use correct error number when getting blockdrive length fails
* backport fix for some DMA reentrancy issues, better protecting against
malicious guests
* backport fix for iSCSI double free issue leading to crashes
-- Proxmox Support Team <support@proxmox.com> Tue, 21 Feb 2023 13:49:43 +0100
pve-qemu-kvm (7.2.0-4) bullseye; urgency=medium
* backport fix for a 7.2 regression when using VirtIO disk with
detect-zeroes=unmap
-- Proxmox Support Team <support@proxmox.com> Fri, 27 Jan 2023 09:37:49 +0100
pve-qemu-kvm (7.2.0-3) bullseye; urgency=medium
* add fix for live-migration with virtio-rng devices, which regressed in
QEMU 7.2.0.
-- Proxmox Support Team <support@proxmox.com> Thu, 12 Jan 2023 13:13:14 +0100
pve-qemu-kvm (7.2.0-2) bullseye; urgency=medium
* enable slirp again for now, as in qemu-server, user networking is
supported (via CLI/API) when no bridge is set on a virtual NIC
* cherry-pick stable fixes for 7.2. Two for virtio-mem and one for vIOMMU.
Both features are not yet exposed in PVE's qemu-server, but there's work
going on to change that.
-- Proxmox Support Team <support@proxmox.com> Tue, 10 Jan 2023 15:47:48 +0100
pve-qemu-kvm (7.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.2.0
* drop 'slirp' networking
-- Proxmox Support Team <support@proxmox.com> Fri, 16 Dec 2022 13:18:21 +0100
pve-qemu-kvm (7.1.0-4) bullseye; urgency=medium
* cherry-pick "block/block-backend: blk_set_enable_write_cache is IO_CODE"
-- Proxmox Support Team <support@proxmox.com> Tue, 22 Nov 2022 09:21:06 +0100
pve-qemu-kvm (7.1.0-3) bullseye; urgency=medium
* init: daemonize: defuse PID file resolve error to a warning at max, fixing
some usecases that regressed with 7.1, like tracking start up in our
file-restore VM.
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Oct 2022 10:27:49 +0200
pve-qemu-kvm (7.1.0-2) bullseye; urgency=medium
* fix an issue with error handling in async backup code
-- Proxmox Support Team <support@proxmox.com> Tue, 18 Oct 2022 15:33:44 +0200
pve-qemu-kvm (7.1.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.1.0
* add fix for io_uring_register_ring_fd from upstream
-- Proxmox Support Team <support@proxmox.com> Fri, 14 Oct 2022 14:54:09 +0200
pve-qemu-kvm (7.0.0-4) bullseye; urgency=medium
* add revision to version output
* PVE Backup: allow passing max-workers performance setting
-- Proxmox Support Team <support@proxmox.com> Mon, 10 Oct 2022 11:55:37 +0200
pve-qemu-kvm (7.0.0-3) bullseye; urgency=medium
* savevm-async: avoid segfault when aborting snapshot creation task
* savevm-async: set SAVE_STATE_DONE when closing state file was successful
allowing one to start a new snapshot task after aborting one.
-- Proxmox Support Team <support@proxmox.com> Tue, 30 Aug 2022 12:54:03 +0200
pve-qemu-kvm (7.0.0-2) bullseye; urgency=medium
* backport "io_uring: fix short read slow path"
* backport "e1000: set RX descriptor status in a separate operation"
-- Proxmox Support Team <support@proxmox.com> Wed, 20 Jul 2022 09:17:07 +0200
pve-qemu-kvm (7.0.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.0.0
-- Proxmox Support Team <support@proxmox.com> Thu, 30 Jun 2022 11:07:37 +0200
pve-qemu-kvm (6.2.0-11) bullseye; urgency=medium
* add 'namespace' to BlockdevOptionsPbs for live-restore support
* vma: create: support 64KiB-unaligned input images like to improve backing
up some VM templates
* block: alloc-track: avoid unlikely, but possible premature break
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Jun 2022 15:54:54 +0200
pve-qemu-kvm (6.2.0-10) bullseye; urgency=medium
* fix #4101: fix backup cancellation bug with iothreads
-- Proxmox Support Team <support@proxmox.com> Thu, 9 Jun 2022 16:35:51 +0200
pve-qemu-kvm (6.2.0-9) bullseye; urgency=medium
* fix possible race conditions during cancellation of a PBS backup
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Jun 2022 14:03:22 +0200
pve-qemu-kvm (6.2.0-8) bullseye; urgency=medium
* revert "block/rbd: implement bdrv_co_block_status" to work around
performance regression when backing up large RBD disk
-- Proxmox Support Team <support@proxmox.com> Thu, 19 May 2022 09:24:45 +0200
pve-qemu-kvm (6.2.0-7) bullseye; urgency=medium
* Proxmox Backup Server namespace support
-- Proxmox Support Team <support@proxmox.com> Thu, 12 May 2022 16:05:56 +0200
pve-qemu-kvm (6.2.0-6) bullseye; urgency=medium
* block/gluster: correctly set max_pdiscard which is int64_t to avoid
triggering assertion
* ui/vnc.c: Fixed a deadlock bug
* display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) and
integer overflow in cursor_alloc (CVE-2021-4206)
-- Proxmox Support Team <support@proxmox.com> Wed, 11 May 2022 10:42:53 +0200
pve-qemu-kvm (6.2.0-5) bullseye; urgency=medium
* vma: allow partial restore by skipping some disk
-- Proxmox Support Team <support@proxmox.com> Mon, 25 Apr 2022 10:13:46 +0200
pve-qemu-kvm (6.2.0-4) bullseye; urgency=medium
* d/control: add libgbm to build dependencies
* d/control: add suggest dependency-hint for libgl1
* various stable backports:
+ virtio-net: fix map leaking on error during receive
+ memory: Fix incorrect calls of log_global_start/stop
+ acpi: fix OEM ID/OEM Table ID padding
+ vhost-vsock: detach the virqueue element in case of error
+ vhost-user: remove VirtQ notifier restore
+ vhost-user: fix VirtQ notifier cleanup
+ virtio: fix the condition for iommu_platform not supported
-- Proxmox Support Team <support@proxmox.com> Fri, 22 Apr 2022 11:52:30 +0200
pve-qemu-kvm (6.2.0-3) bullseye; urgency=medium
* cherry-pick fix for some manually added ACPI table SLIC entries via the
custom args flag.
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Apr 2022 09:09:37 +0200
pve-qemu-kvm (6.2.0-2) bullseye; urgency=medium
* compile in virgl support
* enable zstd support
* drop sdl dependency (it was disabled at compile time already)
* recommend 'numactl'
* fix an issue with multi-disk backups where chunks would be written
multiple times
-- Proxmox Support Team <support@proxmox.com> Thu, 03 Mar 2022 12:03:44 +0100
pve-qemu-kvm (6.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 6.2.0
-- Proxmox Support Team <support@proxmox.com> Thu, 17 Feb 2022 06:23:14 +0100
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 14 Dec 2022 19:15:40 +0300
pve-qemu-kvm (6.1.1-2) bullseye; urgency=medium

1
debian/compat vendored Normal file
View File

@@ -0,0 +1 @@
10

34
debian/control vendored
View File

@@ -2,43 +2,41 @@ Source: pve-qemu-kvm
Section: admin
Priority: optional
Maintainer: Proxmox Support Team <support@proxmox.com>
Build-Depends: debhelper-compat (= 13),
Build-Depends: autotools-dev,
check,
debhelper (>= 9),
libacl1-dev,
libaio-dev,
libattr1-dev,
libcap-ng-dev,
libcurl4-gnutls-dev,
libepoxy-dev,
libfdt-dev,
libgbm-dev,
libglusterfs-dev (>= 5.2-2),
libgnutls28-dev,
libiscsi-dev (>= 1.12.0),
libjemalloc-dev,
libjpeg-dev,
libjson-perl,
libnuma-dev,
libpci-dev,
libpixman-1-dev,
libproxmox-backup-qemu0-dev (>= 1.3.0),
libproxmox-backup-qemu0-dev (>= 1.0.3-1),
librbd-dev (>= 0.48),
libsdl1.2-dev,
libseccomp-dev,
libslirp-dev,
libspice-protocol-dev (>= 0.12.14~),
libspice-server-dev (>= 0.14.0~),
libsystemd-dev,
liburing-dev,
libusb-1.0-0-dev (>= 1.0.17),
libusb-1.0-0-dev (>= 1.0.17-1),
libusbredirparser-dev (>= 0.6-2),
libvirglrenderer-dev,
libzstd-dev,
meson,
python3-minimal,
python3-sphinx,
python3-sphinx-rtd-theme,
python3-venv,
quilt,
texi2html,
texinfo,
uuid-dev,
xfslibs-dev,
Standards-Version: 3.7.2
@@ -47,6 +45,7 @@ Package: pve-qemu-kvm
Architecture: any
Depends: ceph-common (>= 0.48),
iproute2,
libaio1,
libgfapi0 | glusterfs-common (>= 5.6),
libgfchangelog0 | glusterfs-common (>= 5.6),
libgfdb0 | glusterfs-common (>= 5.6),
@@ -55,16 +54,16 @@ Depends: ceph-common (>= 0.48),
libglusterfs-dev | glusterfs-common (>= 5.6),
libglusterfs0 | glusterfs-common (>= 5.6),
libiscsi4 (>= 1.12.0) | libiscsi7,
libjemalloc2,
libjpeg62-turbo,
libsdl1.2debian,
libspice-server1 (>= 0.14.0~),
libusb-1.0-0 (>= 1.0.17-1),
libusbredirparser1 (>= 0.6-2),
vitastor-client (>= 0.9.4),
libuuid1,
numactl,
${misc:Depends},
${shlibs:Depends},
Recommends: numactl,
Suggests: libgl1,
Conflicts: kvm,
pve-kvm,
pve-qemu-kvm-2.6.18,
@@ -72,17 +71,22 @@ Conflicts: kvm,
qemu-kvm,
qemu-system-arm,
qemu-system-common,
qemu-system-data,
qemu-system-x86,
qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils
Replaces: pve-kvm,
pve-qemu-kvm-2.6.18,
qemu-system-arm,
qemu-system-x86,
qemu-utils,
Breaks: qemu-server (<= 8.0.6)
Description: Full virtualization on x86 hardware
Using KVM, one can run multiple virtual PCs, each running unmodified Linux or
Windows images. Each virtual machine has private virtualized hardware: a
network card, disk, graphics adapter, etc.
Package: pve-qemu-kvm-dbg
Architecture: any
Section: debug
Depends: pve-qemu-kvm (= ${binary:Version})
Description: pve qemu debugging symbols
This package contains the debugging symbols for pve-qemu-kvm.

2
debian/copyright vendored
View File

@@ -25,7 +25,7 @@ License:
In particular, the QEMU virtual CPU core library (libqemu.a) is
released under the GNU Lesser General Public License version 2 or later.
On Debian systems, the complete text of the GNU Lesser General Public
On Debian systems, the complete text of the GNU Lesser General Public
License can be found in the file /usr/share/common-licenses/LGPL.
Some hardware device emulation sources and other QEMU functionality are

View File

@@ -24,5 +24,4 @@ while (<STDIN>) {
die "no QEMU machine types detected from STDIN input" if scalar (@$machines) <= 0;
print to_json($machines, { utf8 => 1, canonical => 1 })
or die "failed to encode detected machines as JSON - $!\n";
print to_json($machines, { utf8 => 1 }) or die "$!\n";

View File

@@ -27,21 +27,19 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebased for 8.1.1]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 98 +++++++++++++++++++++-----
blockdev.c | 38 +++++++++-
include/block/block_int-global-state.h | 4 +-
qapi/block-core.json | 25 ++++++-
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 142 insertions(+), 27 deletions(-)
block/mirror.c | 98 +++++++++++++++++++++++++-------
blockdev.c | 39 ++++++++++++-
include/block/block_int.h | 4 +-
qapi/block-core.json | 29 ++++++++--
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 145 insertions(+), 29 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index d3cacd1708..1ff42c8af1 100644
index 85b781bc21..0821214138 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@@ -50,7 +50,7 @@ typedef struct MirrorBlockJob {
BlockDriverState *to_replace;
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
@@ -59,7 +57,7 @@ index d3cacd1708..1ff42c8af1 100644
BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi;
uint8_t *buf;
@@ -705,7 +707,8 @@ static int mirror_exit_common(Job *job)
@@ -697,7 +699,8 @@ static int mirror_exit_common(Job *job)
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
@@ -69,7 +67,7 @@ index d3cacd1708..1ff42c8af1 100644
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
if (bdrv_cow_bs(unfiltered_target) != backing) {
@@ -809,6 +812,16 @@ static void mirror_abort(Job *job)
@@ -802,6 +805,16 @@ static void mirror_abort(Job *job)
assert(ret == 0);
}
@@ -86,7 +84,7 @@ index d3cacd1708..1ff42c8af1 100644
static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -997,7 +1010,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
@@ -983,7 +996,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -96,7 +94,7 @@ index d3cacd1708..1ff42c8af1 100644
ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit;
@@ -1251,6 +1265,7 @@ static const BlockJobDriver mirror_job_driver = {
@@ -1216,6 +1230,7 @@ static const BlockJobDriver mirror_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
@@ -104,15 +102,15 @@ index d3cacd1708..1ff42c8af1 100644
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = mirror_cancel,
@@ -1267,6 +1282,7 @@ static const BlockJobDriver commit_active_job_driver = {
@@ -1232,6 +1247,7 @@ static const BlockJobDriver commit_active_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
+ .clean = mirror_clean,
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = commit_active_cancel,
@@ -1658,7 +1674,10 @@ static BlockJob *mirror_start_job(
},
@@ -1594,7 +1610,10 @@ static BlockJob *mirror_start_job(
BlockCompletionFunc *cb,
void *opaque,
const BlockJobDriver *driver,
@@ -124,12 +122,11 @@ index d3cacd1708..1ff42c8af1 100644
bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode,
Error **errp)
@@ -1670,10 +1689,39 @@ static BlockJob *mirror_start_job(
@@ -1606,10 +1625,39 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
- if (granularity == 0) {
- granularity = bdrv_get_default_bitmap_granularity(target);
+ if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+ error_setg(errp, "Sync mode '%s' not supported",
+ MirrorSyncMode_str(sync_mode));
@@ -150,8 +147,8 @@ index d3cacd1708..1ff42c8af1 100644
+ "sync mode '%s' is not compatible with bitmaps",
+ MirrorSyncMode_str(sync_mode));
+ return NULL;
}
+ }
+
+ if (bitmap) {
+ if (granularity) {
+ error_setg(errp, "granularity (%d)"
@@ -161,12 +158,13 @@ index d3cacd1708..1ff42c8af1 100644
+ }
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ } else if (granularity == 0) {
+ granularity = bdrv_get_default_bitmap_granularity(target);
+ }
granularity = bdrv_get_default_bitmap_granularity(target);
}
-
assert(is_power_of_2(granularity));
if (buf_size < 0) {
@@ -1804,7 +1852,9 @@ static BlockJob *mirror_start_job(
@@ -1747,7 +1795,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
@@ -177,7 +175,7 @@ index d3cacd1708..1ff42c8af1 100644
s->backing_mode = backing_mode;
s->zero_target = zero_target;
s->copy_mode = copy_mode;
@@ -1825,6 +1875,18 @@ static BlockJob *mirror_start_job(
@@ -1768,6 +1818,18 @@ static BlockJob *mirror_start_job(
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
}
@@ -196,7 +194,7 @@ index d3cacd1708..1ff42c8af1 100644
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
BLK_PERM_CONSISTENT_READ,
@@ -1902,6 +1964,9 @@ fail:
@@ -1845,6 +1907,9 @@ fail:
if (s->dirty_bitmap) {
bdrv_release_dirty_bitmap(s->dirty_bitmap);
}
@@ -206,7 +204,7 @@ index d3cacd1708..1ff42c8af1 100644
job_early_fail(&s->common.job);
}
@@ -1919,31 +1984,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1862,29 +1927,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -223,8 +221,6 @@ index d3cacd1708..1ff42c8af1 100644
- bool is_none_mode;
BlockDriverState *base;
GLOBAL_STATE_CODE();
- if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
- (mode == MIRROR_SYNC_MODE_BITMAP)) {
- error_setg(errp, "Sync mode '%s' not supported",
@@ -243,7 +239,7 @@ index d3cacd1708..1ff42c8af1 100644
}
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1970,7 +2029,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1909,7 +1968,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque,
@@ -254,32 +250,33 @@ index d3cacd1708..1ff42c8af1 100644
errp);
if (!job) {
diff --git a/blockdev.c b/blockdev.c
index c28462a633..a402fa4bf7 100644
index 3d8ac368a1..03e99264dc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2849,6 +2849,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2957,6 +2957,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target,
const char *replaces,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
+ bool has_bitmap,
+ const char *bitmap_name,
+ bool has_bitmap_mode,
+ BitmapSyncMode bitmap_mode,
BlockMirrorBackingMode backing_mode,
bool zero_target,
bool has_speed, int64_t speed,
@@ -2867,6 +2870,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2976,6 +2980,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
{
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT;
+ BdrvDirtyBitmap *bitmap = NULL;
GLOBAL_STATE_CODE();
GRAPH_RDLOCK_GUARD_MAINLOOP();
@@ -2921,6 +2925,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_speed) {
speed = 0;
@@ -3030,6 +3035,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if (bitmap_name) {
+ if (has_bitmap) {
+ if (granularity) {
+ error_setg(errp, "Granularity and bitmap cannot both be set");
+ return;
@@ -302,53 +299,53 @@ index c28462a633..a402fa4bf7 100644
+ }
+ }
+
if (!replaces) {
if (!has_replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
@@ -2966,8 +2993,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3076,8 +3104,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
- replaces, job_flags,
- has_replaces ? replaces : NULL, job_flags,
- speed, granularity, buf_size, sync, backing_mode, zero_target,
+ replaces, job_flags, speed, granularity, buf_size, sync,
+ bitmap, bitmap_mode, backing_mode, zero_target,
+ has_replaces ? replaces : NULL, job_flags, speed, granularity,
+ buf_size, sync, bitmap, bitmap_mode, backing_mode, zero_target,
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
}
@@ -3115,6 +3142,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3222,6 +3250,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->job_id, bs, target_bs,
arg->replaces, arg->sync,
+ arg->bitmap,
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
arg->has_replaces, arg->replaces, arg->sync,
+ arg->has_bitmap, arg->bitmap,
+ arg->has_bitmap_mode, arg->bitmap_mode,
backing_mode, zero_target,
arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
@@ -3136,6 +3165,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3243,6 +3273,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
const char *device, const char *target,
const char *replaces,
bool has_replaces, const char *replaces,
MirrorSyncMode sync,
+ const char *bitmap,
+ bool has_bitmap, const char *bitmap,
+ bool has_bitmap_mode, BitmapSyncMode bitmap_mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
@@ -3184,7 +3215,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3292,7 +3324,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
}
blockdev_mirror_common(job_id, bs, target_bs,
- replaces, sync, backing_mode,
+ replaces, sync,
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
- has_replaces, replaces, sync, backing_mode,
+ has_replaces, replaces, sync, has_bitmap,
+ bitmap, has_bitmap_mode, bitmap_mode, backing_mode,
zero_target, has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index da5fb31089..32f0f9858a 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -152,7 +152,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c31cbd034a..11442893d0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1254,7 +1254,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -360,26 +357,31 @@ index da5fb31089..32f0f9858a 100644
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bca1a0c372..a5cea82139 100644
index 675d8265eb..6356a63695 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2145,6 +2145,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -1938,10 +1938,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (Since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2187,7 +2196,9 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M (since 1.4).
+# power of 2 between 512 and 64M. Must not be specified if
+# @bitmap is present (since 1.4).
#
# @buf-size: maximum amount of data in flight from source to
# target (since 1.4).
@@ -1979,7 +1988,9 @@
{ 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -390,23 +392,28 @@ index bca1a0c372..a5cea82139 100644
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
@@ -2471,6 +2482,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -2247,10 +2258,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2521,7 +2541,8 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M
+# power of 2 between 512 and 64M . Must not be specified if
+# @bitmap is present.
#
# @buf-size: maximum amount of data in flight from source to
# target
@@ -2299,7 +2319,8 @@
{ 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str',
@@ -417,10 +424,10 @@ index bca1a0c372..a5cea82139 100644
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index d727a5fee8..8a34aa2328 100644
index c39e70b2f5..470ef79ae0 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -757,8 +757,8 @@ static void test_propagate_mirror(void)
@@ -617,8 +617,8 @@ static void test_propagate_mirror(void)
/* Start a mirror job */
mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@@ -430,4 +437,4 @@ index d727a5fee8..8a34aa2328 100644
+ false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
&error_abort);
WITH_JOB_LOCK_GUARD() {
job = job_get("job0");

View File

@@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 1ff42c8af1..11b8a8e959 100644
index 0821214138..c688726fae 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -682,8 +682,6 @@ static int mirror_exit_common(Job *job)
@@ -674,8 +674,6 @@ static int mirror_exit_common(Job *job)
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
}
@@ -36,9 +36,9 @@ index 1ff42c8af1..11b8a8e959 100644
/* Make sure that the source BDS doesn't go away during bdrv_replace_node,
* before we can call bdrv_drained_end */
bdrv_ref(src);
@@ -788,6 +786,18 @@ static int mirror_exit_common(Job *job)
block_job_remove_all_bdrv(bjob);
bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
@@ -783,6 +781,18 @@ static int mirror_exit_common(Job *job)
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+ if (s->sync_bitmap) {
+ if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@@ -55,7 +55,7 @@ index 1ff42c8af1..11b8a8e959 100644
bs_opaque->job = NULL;
bdrv_drained_end(src);
@@ -1699,10 +1709,6 @@ static BlockJob *mirror_start_job(
@@ -1635,10 +1645,6 @@ static BlockJob *mirror_start_job(
" sync mode",
MirrorSyncMode_str(sync_mode));
return NULL;
@@ -66,7 +66,7 @@ index 1ff42c8af1..11b8a8e959 100644
}
} else if (bitmap) {
error_setg(errp,
@@ -1719,6 +1725,12 @@ static BlockJob *mirror_start_job(
@@ -1655,6 +1661,12 @@ static BlockJob *mirror_start_job(
return NULL;
}
granularity = bdrv_dirty_bitmap_granularity(bitmap);

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
index a402fa4bf7..01b0ab0549 100644
index 03e99264dc..9e14feec87 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2946,6 +2946,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3056,6 +3056,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return;
}
@@ -28,4 +28,4 @@ index a402fa4bf7..01b0ab0549 100644
+ return;
}
if (!replaces) {
if (!has_replaces) {

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 11b8a8e959..00f2665ca4 100644
index c688726fae..a7f829f766 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -792,8 +792,8 @@ static int mirror_exit_common(Job *job)
@@ -787,8 +787,8 @@ static int mirror_exit_common(Job *job)
job->ret == 0 && ret == 0)) {
/* Success; synchronize copy back to sync. */
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@@ -30,7 +30,7 @@ index 11b8a8e959..00f2665ca4 100644
}
}
bdrv_release_dirty_bitmap(s->dirty_bitmap);
@@ -1892,11 +1892,8 @@ static BlockJob *mirror_start_job(
@@ -1835,11 +1835,8 @@ static BlockJob *mirror_start_job(
}
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {

View File

@@ -12,8 +12,6 @@ uniform w.r.t. backup block jobs.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebase for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 28 +++------------
blockdev.c | 29 +++++++++++++++
@@ -21,10 +19,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 00f2665ca4..60cf574de5 100644
index a7f829f766..6a126d18c8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1699,31 +1699,13 @@ static BlockJob *mirror_start_job(
@@ -1635,31 +1635,13 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
@@ -62,17 +60,17 @@ index 00f2665ca4..60cf574de5 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
index 01b0ab0549..cd5f205ad1 100644
index 9e14feec87..b6f797b41f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2925,7 +2925,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3035,7 +3035,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if ((sync == MIRROR_SYNC_MODE_BITMAP) ||
+ (sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
+ /* done before desugaring 'incremental' to print the right message */
+ if (!bitmap_name) {
+ if (!has_bitmap) {
+ error_setg(errp, "Must provide a valid bitmap name for "
+ "'%s' sync mode", MirrorSyncMode_str(sync));
+ return;
@@ -93,7 +91,7 @@ index 01b0ab0549..cd5f205ad1 100644
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
+
if (bitmap_name) {
if (has_bitmap) {
+ if (sync != MIRROR_SYNC_MODE_BITMAP) {
+ error_setg(errp, "Sync mode '%s' not supported with bitmap.",
+ MirrorSyncMode_str(sync));

View File

@@ -48,7 +48,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 965f5d5450..e04bd059b6 100644
index 1a8a369b50..2c8a558c67 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
@@ -60,10 +60,10 @@ index 965f5d5450..e04bd059b6 100644
void monitor_init_globals(void);
void monitor_init_globals_core(void);
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index 252de85681..8db28f9272 100644
index 9c3a09cb01..a92be8c3f7 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -151,6 +151,13 @@ typedef struct {
@@ -144,6 +144,13 @@ typedef struct {
QemuMutex qmp_queue_lock;
/* Input queue that holds all the parsed QMP requests */
GQueue *qmp_requests;
@@ -78,10 +78,10 @@ index 252de85681..8db28f9272 100644
/**
diff --git a/monitor/monitor.c b/monitor/monitor.c
index dc352f9e9d..56e1307014 100644
index 46a171bca6..5ccdd2424b 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -117,6 +117,21 @@ bool monitor_cur_is_qmp(void)
@@ -135,6 +135,21 @@ bool monitor_cur_is_qmp(void)
return cur_mon && monitor_is_qmp(cur_mon);
}
@@ -104,10 +104,10 @@ index dc352f9e9d..56e1307014 100644
* Is @mon is using readline?
* Note: not all HMP monitors use readline, e.g., gdbserver has a
diff --git a/monitor/qmp.c b/monitor/qmp.c
index a239945e8d..589c9524f8 100644
index 092c527b6f..6b8cfcf6d8 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -165,6 +165,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
QDict *rsp;
QDict *error;
@@ -116,7 +116,7 @@ index a239945e8d..589c9524f8 100644
rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
&mon->common);
@@ -180,7 +182,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
}
}
@@ -135,7 +135,7 @@ index a239945e8d..589c9524f8 100644
qobject_unref(rsp);
}
@@ -461,6 +473,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
switch (event) {
case CHR_EVENT_OPENED:
@@ -144,10 +144,10 @@ index a239945e8d..589c9524f8 100644
monitor_qmp_caps_reset(mon);
data = qmp_greeting(mon);
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 176b549473..790bb7d1da 100644
index 59600210ce..95602446eb 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -117,16 +117,28 @@ typedef struct QmpDispatchBH {
@@ -120,16 +120,28 @@ typedef struct QmpDispatchBH {
QObject **ret;
Error **errp;
Coroutine *co;
@@ -180,19 +180,19 @@ index 176b549473..790bb7d1da 100644
aio_co_wake(data->co);
}
@@ -253,6 +265,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
@@ -243,6 +255,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
.ret = &ret,
.errp = &err,
.co = qemu_coroutine_self(),
+ .conn_nr = monitor_get_connection_nr(cur_mon),
};
aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh,
aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
&data);
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index afa477aae6..d3ff124bf3 100644
index d058a2a00d..3290b58120 100644
--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
@@ -12,6 +12,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
@@ -13,6 +13,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
return NULL;
}

View File

@@ -0,0 +1,55 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 1 Sep 2021 16:51:04 +0200
Subject: [PATCH] monitor/hmp: add support for flag argument with value
Adds support for the "-xS" parameter type, where "-x" denotes a flag
name and the "S" suffix indicates that this flag is supposed to take an
arbitrary string parameter.
These parameters are always optional, the entry in the qdict will be
omitted if the flag is not given.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/monitor/hmp.c b/monitor/hmp.c
index d50c3124e1..a32dce7a35 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -980,6 +980,7 @@ static QDict *monitor_parse_arguments(Monitor *mon,
{
const char *tmp = p;
int skip_key = 0;
+ int ret;
/* option */
c = *typestr++;
@@ -1002,8 +1003,22 @@ static QDict *monitor_parse_arguments(Monitor *mon,
}
if (skip_key) {
p = tmp;
+ } else if (*typestr == 'S') {
+ /* has option with string value */
+ typestr++;
+ tmp = p++;
+ while (qemu_isspace(*p)) {
+ p++;
+ }
+ ret = get_str(buf, sizeof(buf), &p);
+ if (ret < 0) {
+ monitor_printf(mon, "%s: value expected for -%c\n",
+ cmd->name, *tmp);
+ goto fail;
+ }
+ qdict_put_str(qdict, key, buf);
} else {
- /* has option */
+ /* has boolean option */
p++;
qdict_put_bool(qdict, key, true);
}

View File

@@ -1,69 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 28 Feb 2023 09:11:29 -0800
Subject: [PATCH] scsi: megasas: Internal cdbs have 16-byte length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Host drivers do not necessarily set cdb_len in megasas io commands.
With commits 6d1511cea0 ("scsi: Reject commands if the CDB length
exceeds buf_len") and fe9d8927e2 ("scsi: Add buf_len parameter to
scsi_req_new()"), this results in failures to boot Linux from affected
SCSI drives because cdb_len is set to 0 by the host driver.
Set the cdb length to its actual size to solve the problem.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(picked-up from https://lists.nongnu.org/archive/html/qemu-devel/2023-02/msg08653.html)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/scsi/megasas.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 32c70c9e99..984b6a3145 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -1781,7 +1781,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
uint8_t cdb[16];
int len;
struct SCSIDevice *sdev = NULL;
- int target_id, lun_id, cdb_len;
+ int target_id, lun_id;
lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
@@ -1790,7 +1790,6 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
target_id = cmd->frame->header.target_id;
lun_id = cmd->frame->header.lun_id;
- cdb_len = cmd->frame->header.cdb_len;
if (target_id < MFI_MAX_LD && lun_id == 0) {
sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
@@ -1805,15 +1804,6 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
return MFI_STAT_DEVICE_NOT_FOUND;
}
- if (cdb_len > 16) {
- trace_megasas_scsi_invalid_cdb_len(
- mfi_frame_desc(frame_cmd), 1, target_id, lun_id, cdb_len);
- megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
- cmd->frame->header.scsi_status = CHECK_CONDITION;
- s->event_count++;
- return MFI_STAT_SCSI_DONE_WITH_ERROR;
- }
-
cmd->iov_size = lba_count * sdev->blocksize;
if (megasas_map_sgl(s, cmd, &cmd->frame->io.sgl)) {
megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
@@ -1824,7 +1814,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
megasas_encode_lba(cdb, lba_start, lba_count, is_write);
cmd->req = scsi_req_new(sdev, cmd->index,
- lun_id, cdb, cdb_len, cmd);
+ lun_id, cdb, sizeof(cdb), cmd);
if (!cmd->req) {
trace_megasas_scsi_req_alloc_failed(
mfi_frame_desc(frame_cmd), target_id, lun_id);

View File

@@ -1,100 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Tue, 7 Mar 2023 15:03:02 +0100
Subject: [PATCH] ide: avoid potential deadlock when draining during trim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The deadlock can happen as follows:
1. ide_issue_trim is called, and increments the in_flight counter.
2. ide_issue_trim_cb calls blk_aio_pdiscard.
3. Somebody else starts draining (e.g. backup to insert the cbw node).
4. ide_issue_trim_cb is called as the completion callback for
blk_aio_pdiscard.
5. ide_issue_trim_cb issues yet another blk_aio_pdiscard request.
6. The request is added to the wait queue via blk_wait_while_drained,
because draining has been started.
7. Nobody ever decrements the in_flight counter and draining can't
finish. This would be done by ide_trim_bh_cb, which is called after
ide_issue_trim_cb has issued its last request, but
ide_issue_trim_cb is not called anymore, because it's the
completion callback of blk_aio_pdiscard, which waits on draining.
Quoting Hanna Czenczek:
> The point of 7e5cdb345f was that we need any in-flight count to
> accompany a set s->bus->dma->aiocb. While blk_aio_pdiscard() is
> happening, we dont necessarily need another count. But we do need
> it while there is no blk_aio_pdiscard().
> ide_issue_trim_cb() returns in two cases (and, recursively through
> its callers, leaves s->bus->dma->aiocb set):
> 1. After calling blk_aio_pdiscard(), which will keep an in-flight
> count,
> 2. After calling replay_bh_schedule_event() (i.e.
> qemu_bh_schedule()), which does not keep an in-flight count.
Thus, even after moving the blk_inc_in_flight to above the
replay_bh_schedule_event call, the invariant "ide_issue_trim_cb
returns with an accompanying in-flight count" is still satisfied.
However, the issue 7e5cdb345f fixed for canceling resurfaces, because
ide_cancel_dma_sync assumes that it just needs to drain once. But now
the in_flight count is not consistently > 0 during the trim operation.
So, change it to drain until !s->bus->dma->aiocb, which means that the
operation finished (s->bus->dma->aiocb is cleared by ide_set_inactive
via the ide_dma_cb when the end of the transfer is reached).
Discussion here:
https://lists.nongnu.org/archive/html/qemu-devel/2023-03/msg02506.html
Fixes: 7e5cdb345f ("ide: Increment BB in-flight counter for TRIM BH")
Suggested-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/ide/core.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index c3508acbb1..289347af58 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -444,7 +444,7 @@ static void ide_trim_bh_cb(void *opaque)
iocb->bh = NULL;
qemu_aio_unref(iocb);
- /* Paired with an increment in ide_issue_trim() */
+ /* Paired with an increment in ide_issue_trim_cb() */
blk_dec_in_flight(blk);
}
@@ -504,6 +504,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
done:
iocb->aiocb = NULL;
if (iocb->bh) {
+ /* Paired with a decrement in ide_trim_bh_cb() */
+ blk_inc_in_flight(s->blk);
replay_bh_schedule_event(iocb->bh);
}
}
@@ -516,9 +518,6 @@ BlockAIOCB *ide_issue_trim(
IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
- /* Paired with a decrement in ide_trim_bh_cb() */
- blk_inc_in_flight(s->blk);
-
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
@@ -742,8 +741,9 @@ void ide_cancel_dma_sync(IDEState *s)
*/
if (s->bus->dma->aiocb) {
trace_ide_cancel_dma_sync_remaining();
- blk_drain(s->blk);
- assert(s->bus->dma->aiocb == NULL);
+ while (s->bus->dma->aiocb) {
+ blk_drain(s->blk);
+ }
}
}

View File

@@ -0,0 +1,479 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 25 Aug 2021 11:14:13 +0200
Subject: [PATCH] monitor: refactor set/expire_password and allow VNC display
id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It is possible to specify more than one VNC server on the command line,
either with an explicit ID or the auto-generated ones à la "default",
"vnc2", "vnc3", ...
It is not possible to change the password on one of these extra VNC
displays though. Fix this by adding a "display" parameter to the
"set_password" and "expire_password" QMP and HMP commands.
For HMP, the display is specified using the "-d" value flag.
For QMP, the schema is updated to explicitly express the supported
variants of the commands with protocol-discriminated unions.
Suggested-by: Eric Blake <eblake@redhat.com>
Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hmp-commands.hx | 29 ++++----
monitor/hmp-cmds.c | 57 +++++++++++++++-
monitor/qmp-cmds.c | 62 ++++++-----------
qapi/ui.json | 165 ++++++++++++++++++++++++++++++++++++++-------
4 files changed, 233 insertions(+), 80 deletions(-)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 8e45bce2cd..d78e4cfc47 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1514,34 +1514,35 @@ ERST
{
.name = "set_password",
- .args_type = "protocol:s,password:s,connected:s?",
- .params = "protocol password action-if-connected",
+ .args_type = "protocol:s,password:s,display:-dS,connected:s?",
+ .params = "protocol password [-d display] [action-if-connected]",
.help = "set spice/vnc password",
.cmd = hmp_set_password,
},
SRST
-``set_password [ vnc | spice ] password [ action-if-connected ]``
- Change spice/vnc password. Use zero to make the password stay valid
- forever. *action-if-connected* specifies what should happen in
- case a connection is established: *fail* makes the password change
- fail. *disconnect* changes the password and disconnects the
- client. *keep* changes the password and keeps the connection up.
- *keep* is the default.
+``set_password [ vnc | spice ] password [ -d display ] [ action-if-connected ]``
+ Change spice/vnc password. *display* can be used with 'vnc' to specify
+ which display to set the password on. *action-if-connected* specifies
+ what should happen in case a connection is established: *fail* makes
+ the password change fail. *disconnect* changes the password and
+ disconnects the client. *keep* changes the password and keeps the
+ connection up. *keep* is the default.
ERST
{
.name = "expire_password",
- .args_type = "protocol:s,time:s",
- .params = "protocol time",
+ .args_type = "protocol:s,time:s,display:-dS",
+ .params = "protocol time [-d display]",
.help = "set spice/vnc password expire-time",
.cmd = hmp_expire_password,
},
SRST
-``expire_password [ vnc | spice ]`` *expire-time*
- Specify when a password for spice/vnc becomes
- invalid. *expire-time* accepts:
+``expire_password [ vnc | spice ] expire-time [ -d display ]``
+ Specify when a password for spice/vnc becomes invalid.
+ *display* behaves the same as in ``set_password``.
+ *expire-time* accepts:
``now``
Invalidate password instantly.
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index a7e197a90b..f4ef58d257 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1451,10 +1451,41 @@ void hmp_set_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
const char *password = qdict_get_str(qdict, "password");
+ const char *display = qdict_get_try_str(qdict, "display");
const char *connected = qdict_get_try_str(qdict, "connected");
Error *err = NULL;
+ DisplayProtocol proto;
- qmp_set_password(protocol, password, !!connected, connected, &err);
+ SetPasswordOptions opts = {
+ .password = g_strdup(password),
+ .u.vnc.display = NULL,
+ };
+
+ proto = qapi_enum_parse(&DisplayProtocol_lookup, protocol,
+ DISPLAY_PROTOCOL_VNC, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ opts.protocol = proto;
+
+ if (proto == DISPLAY_PROTOCOL_VNC) {
+ opts.u.vnc.has_display = !!display;
+ opts.u.vnc.display = g_strdup(display);
+ } else if (proto == DISPLAY_PROTOCOL_SPICE) {
+ opts.u.spice.has_connected = !!connected;
+ opts.u.spice.connected =
+ qapi_enum_parse(&SetPasswordAction_lookup, connected,
+ SET_PASSWORD_ACTION_KEEP, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ }
+
+ qmp_set_password(&opts, &err);
+ g_free(opts.password);
+ g_free(opts.u.vnc.display);
hmp_handle_error(mon, err);
}
@@ -1462,9 +1493,31 @@ void hmp_expire_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
const char *whenstr = qdict_get_str(qdict, "time");
+ const char *display = qdict_get_try_str(qdict, "display");
Error *err = NULL;
+ DisplayProtocol proto;
- qmp_expire_password(protocol, whenstr, &err);
+ ExpirePasswordOptions opts = {
+ .time = g_strdup(whenstr),
+ .u.vnc.display = NULL,
+ };
+
+ proto = qapi_enum_parse(&DisplayProtocol_lookup, protocol,
+ DISPLAY_PROTOCOL_VNC, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+ opts.protocol = proto;
+
+ if (proto == DISPLAY_PROTOCOL_VNC) {
+ opts.u.vnc.has_display = !!display;
+ opts.u.vnc.display = g_strdup(display);
+ }
+
+ qmp_expire_password(&opts, &err);
+ g_free(opts.time);
+ g_free(opts.u.vnc.display);
hmp_handle_error(mon, err);
}
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index f7d64a6457..65882b5997 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -164,45 +164,30 @@ void qmp_system_wakeup(Error **errp)
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, errp);
}
-void qmp_set_password(const char *protocol, const char *password,
- bool has_connected, const char *connected, Error **errp)
+void qmp_set_password(SetPasswordOptions *opts, Error **errp)
{
- int disconnect_if_connected = 0;
- int fail_if_connected = 0;
- int rc;
+ bool disconnect_if_connected = false;
+ bool fail_if_connected = false;
+ int rc = 0;
- if (has_connected) {
- if (strcmp(connected, "fail") == 0) {
- fail_if_connected = 1;
- } else if (strcmp(connected, "disconnect") == 0) {
- disconnect_if_connected = 1;
- } else if (strcmp(connected, "keep") == 0) {
- /* nothing */
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER, "connected");
- return;
- }
- }
-
- if (strcmp(protocol, "spice") == 0) {
+ if (opts->protocol == DISPLAY_PROTOCOL_SPICE) {
if (!qemu_using_spice(errp)) {
return;
}
- rc = qemu_spice.set_passwd(password, fail_if_connected,
+ if (opts->u.spice.has_connected) {
+ fail_if_connected =
+ opts->u.spice.connected == SET_PASSWORD_ACTION_FAIL;
+ disconnect_if_connected =
+ opts->u.spice.connected == SET_PASSWORD_ACTION_DISCONNECT;
+ }
+ rc = qemu_spice.set_passwd(opts->password, fail_if_connected,
disconnect_if_connected);
- } else if (strcmp(protocol, "vnc") == 0) {
- if (fail_if_connected || disconnect_if_connected) {
- /* vnc supports "connected=keep" only */
- error_setg(errp, QERR_INVALID_PARAMETER, "connected");
- return;
- }
+ } else if (opts->protocol == DISPLAY_PROTOCOL_VNC) {
/* Note that setting an empty password will not disable login through
* this interface. */
- rc = vnc_display_password(NULL, password);
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol",
- "'vnc' or 'spice'");
- return;
+ rc = vnc_display_password(
+ opts->u.vnc.has_display ? opts->u.vnc.display : NULL,
+ opts->password);
}
if (rc != 0) {
@@ -210,11 +195,11 @@ void qmp_set_password(const char *protocol, const char *password,
}
}
-void qmp_expire_password(const char *protocol, const char *whenstr,
- Error **errp)
+void qmp_expire_password(ExpirePasswordOptions *opts, Error **errp)
{
time_t when;
int rc;
+ const char* whenstr = opts->time;
if (strcmp(whenstr, "now") == 0) {
when = 0;
@@ -226,17 +211,14 @@ void qmp_expire_password(const char *protocol, const char *whenstr,
when = strtoull(whenstr, NULL, 10);
}
- if (strcmp(protocol, "spice") == 0) {
+ if (opts->protocol == DISPLAY_PROTOCOL_SPICE) {
if (!qemu_using_spice(errp)) {
return;
}
rc = qemu_spice.set_pw_expire(when);
- } else if (strcmp(protocol, "vnc") == 0) {
- rc = vnc_display_pw_expire(NULL, when);
- } else {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol",
- "'vnc' or 'spice'");
- return;
+ } else if (opts->protocol == DISPLAY_PROTOCOL_VNC) {
+ rc = vnc_display_pw_expire(
+ opts->u.vnc.has_display ? opts->u.vnc.display : NULL, when);
}
if (rc != 0) {
diff --git a/qapi/ui.json b/qapi/ui.json
index fd9677d48e..cba8665b73 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -9,22 +9,23 @@
{ 'include': 'common.json' }
{ 'include': 'sockets.json' }
+##
+# @DisplayProtocol:
+#
+# Display protocols which support changing password options.
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'DisplayProtocol',
+ 'data': [ { 'name': 'vnc', 'if': 'defined(CONFIG_VNC)' },
+ { 'name': 'spice', 'if': 'defined(CONFIG_SPICE)' } ] }
+
##
# @set_password:
#
# Sets the password of a remote display session.
#
-# @protocol: - 'vnc' to modify the VNC server password
-# - 'spice' to modify the Spice server password
-#
-# @password: the new password
-#
-# @connected: how to handle existing clients when changing the
-# password. If nothing is specified, defaults to 'keep'
-# 'fail' to fail the command if clients are connected
-# 'disconnect' to disconnect existing clients
-# 'keep' to maintain existing clients
-#
# Returns: - Nothing on success
# - If Spice is not enabled, DeviceNotFound
#
@@ -37,16 +38,123 @@
# <- { "return": {} }
#
##
-{ 'command': 'set_password',
- 'data': {'protocol': 'str', 'password': 'str', '*connected': 'str'} }
+{ 'command': 'set_password', 'boxed': true, 'data': 'SetPasswordOptions' }
+
+##
+# @SetPasswordOptions:
+#
+# Data required to set a new password on a display server protocol.
+#
+# @protocol: - 'vnc' to modify the VNC server password
+# - 'spice' to modify the Spice server password
+#
+# @password: the new password
+#
+# Since: 6.2
+#
+##
+{ 'union': 'SetPasswordOptions',
+ 'base': { 'protocol': 'DisplayProtocol',
+ 'password': 'str' },
+ 'discriminator': 'protocol',
+ 'data': { 'vnc': 'SetPasswordOptionsVnc',
+ 'spice': 'SetPasswordOptionsSpice' } }
+
+##
+# @SetPasswordAction:
+#
+# An action to take on changing a password on a connection with active clients.
+#
+# @fail: fail the command if clients are connected
+#
+# @disconnect: disconnect existing clients
+#
+# @keep: maintain existing clients
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'SetPasswordAction',
+ 'data': [ 'fail', 'disconnect', 'keep' ] }
+
+##
+# @SetPasswordActionVnc:
+#
+# See @SetPasswordAction. VNC only supports the keep action. 'connection'
+# should just be omitted for VNC, this is kept for backwards compatibility.
+#
+# @keep: maintain existing clients
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'SetPasswordActionVnc',
+ 'data': [ 'keep' ] }
+
+##
+# @SetPasswordOptionsSpice:
+#
+# Options for set_password specific to the VNC procotol.
+#
+# @connected: How to handle existing clients when changing the
+# password. If nothing is specified, defaults to 'keep'.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'SetPasswordOptionsSpice',
+ 'data': { '*connected': 'SetPasswordAction' } }
+
+##
+# @SetPasswordOptionsVnc:
+#
+# Options for set_password specific to the VNC procotol.
+#
+# @display: The id of the display where the password should be changed.
+# Defaults to the first.
+#
+# @connected: How to handle existing clients when changing the
+# password.
+#
+# Features:
+# @deprecated: For VNC, @connected will always be 'keep', parameter should be
+# omitted.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'SetPasswordOptionsVnc',
+ 'data': { '*display': 'str',
+ '*connected': { 'type': 'SetPasswordActionVnc',
+ 'features': ['deprecated'] } } }
##
# @expire_password:
#
# Expire the password of a remote display server.
#
-# @protocol: the name of the remote display protocol 'vnc' or 'spice'
+# Returns: - Nothing on success
+# - If @protocol is 'spice' and Spice is not active, DeviceNotFound
#
+# Since: 0.14
+#
+# Example:
+#
+# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
+# "time": "+60" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'expire_password', 'boxed': true, 'data': 'ExpirePasswordOptions' }
+
+##
+# @ExpirePasswordOptions:
+#
+# Data required to set password expiration on a display server protocol.
+#
+# @protocol: - 'vnc' to modify the VNC server expiration
+# - 'spice' to modify the Spice server expiration
+
# @time: when to expire the password.
#
# - 'now' to expire the password immediately
@@ -54,24 +162,33 @@
# - '+INT' where INT is the number of seconds from now (integer)
# - 'INT' where INT is the absolute time in seconds
#
-# Returns: - Nothing on success
-# - If @protocol is 'spice' and Spice is not active, DeviceNotFound
-#
-# Since: 0.14
-#
# Notes: Time is relative to the server and currently there is no way to
# coordinate server time with client time. It is not recommended to
# use the absolute time version of the @time parameter unless you're
# sure you are on the same machine as the QEMU instance.
#
-# Example:
+# Since: 6.2
#
-# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
-# "time": "+60" } }
-# <- { "return": {} }
+##
+{ 'union': 'ExpirePasswordOptions',
+ 'base': { 'protocol': 'DisplayProtocol',
+ 'time': 'str' },
+ 'discriminator': 'protocol',
+ 'data': { 'vnc': 'ExpirePasswordOptionsVnc' } }
+
+##
+# @ExpirePasswordOptionsVnc:
+#
+# Options for expire_password specific to the VNC procotol.
+#
+# @display: The id of the display where the expiration should be changed.
+# Defaults to the first.
+#
+# Since: 6.2
#
##
-{ 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
+{ 'struct': 'ExpirePasswordOptionsVnc',
+ 'data': { '*display': 'str' } }
##
# @screendump:

View File

@@ -0,0 +1,83 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 10 Sep 2021 14:45:33 +0200
Subject: [PATCH] block/mirror: fix NULL pointer dereference in
mirror_wait_on_conflicts()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In mirror_iteration() we call mirror_wait_on_conflicts() with
`self` parameter set to NULL.
Starting from commit d44dae1a7c we dereference `self` pointer in
mirror_wait_on_conflicts() without checks if it is not NULL.
Backtrace:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
at ../block/mirror.c:172
172 self->waiting_for_op = op;
[Current thread is 1 (Thread 0x7f0908931ec0 (LWP 380249))]
(gdb) bt
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
at ../block/mirror.c:172
#1 0x00005610c5d9d631 in mirror_run (job=0x5610c76a2c00, errp=<optimized out>) at ../block/mirror.c:491
#2 0x00005610c5d58726 in job_co_entry (opaque=0x5610c76a2c00) at ../job.c:917
#3 0x00005610c5f046c6 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>)
at ../util/coroutine-ucontext.c:173
#4 0x00007f0909975820 in ?? () at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91
from /usr/lib64/libc.so.6
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001404
Fixes: d44dae1a7c ("block/mirror: fix active mirror dead-lock in mirror_wait_on_conflicts")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20210910124533.288318-1-sgarzare@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
(cherry picked from commit 66fed30c9cd11854fc878a4eceb507e915d7c9cd)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 98fc66eabf..85b781bc21 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -160,18 +160,25 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
if (ranges_overlap(self_start_chunk, self_nb_chunks,
op_start_chunk, op_nb_chunks))
{
- /*
- * If the operation is already (indirectly) waiting for us, or
- * will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case).
- */
- if (op->waiting_for_op) {
- continue;
+ if (self) {
+ /*
+ * If the operation is already (indirectly) waiting for us,
+ * or will wait for us as soon as it wakes up, then just go
+ * on (instead of producing a deadlock in the former case).
+ */
+ if (op->waiting_for_op) {
+ continue;
+ }
+
+ self->waiting_for_op = op;
}
- self->waiting_for_op = op;
qemu_co_queue_wait(&op->waiting_requests, NULL);
- self->waiting_for_op = NULL;
+
+ if (self) {
+ self->waiting_for_op = NULL;
+ }
+
break;
}
}

View File

@@ -1,48 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 28 Jul 2023 10:47:48 +0200
Subject: [PATCH] migration/block-dirty-bitmap: fix loading bitmap when there
is an iothread
The bdrv_create_dirty_bitmap() function (which is also called by
bdrv_dirty_bitmap_create_successor()) uses bdrv_getlength(bs). This is
a wrapper around a coroutine, and thus uses bdrv_poll_co(). Polling
tries to release the AioContext which will trigger an assert() if it
hasn't been acquired before.
The issue does not happen for migration, because there we are in a
coroutine already, so the wrapper will just call bdrv_co_getlength()
directly without polling.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/block-dirty-bitmap.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 032fc5f405..e1ae3b7316 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -805,8 +805,11 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
"destination", bdrv_dirty_bitmap_name(s->bitmap));
return -EINVAL;
} else {
+ AioContext *ctx = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(ctx);
s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
s->bitmap_name, &local_err);
+ aio_context_release(ctx);
if (!s->bitmap) {
error_report_err(local_err);
return -EINVAL;
@@ -833,7 +836,10 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
bdrv_disable_dirty_bitmap(s->bitmap);
if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
+ AioContext *ctx = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(ctx);
bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
+ aio_context_release(ctx);
if (local_err) {
error_report_err(local_err);
return -EINVAL;

View File

@@ -1,140 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 10:07:03 +0200
Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0]. Because the AioContext locks still exist, it
should be safe to disable locking again.
From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
> We don't currently rely on graph locking yet. It is supposed to replace
> the AioContext lock eventually to enable multiqueue support, but as long
> as we still have the AioContext lock, it is sufficient without the graph
> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
> more either and this commit can be reverted. (Of course, it can also be
> reverted while the AioContext lock still exists if the callers have been
> fixed.)
[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/graph-lock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 5e66f01ae8..5c2873262a 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,12 +108,19 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(BlockDriverState *bs)
{
+#if 0
AioContext *ctx = NULL;
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
assert(!qatomic_read(&has_writer));
/*
@@ -158,11 +168,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
if (ctx) {
aio_context_acquire(bdrv_get_aio_context(bs));
}
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -174,10 +186,13 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -237,10 +252,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -258,6 +275,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -275,13 +293,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}

View File

@@ -1,57 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 11:19:14 +0200
Subject: [PATCH] migration states: workaround snapshot performance regression
Commit 813cd616 ("migration: Use migration_transferred_bytes() to
calculate rate_limit") introduced a prohibitive performance regression
when taking a snapshot [0]. The reason turns out to be the flushing
done by migration_transferred_bytes()
Just use a _noflush version of the relevant function as a workaround
until upstream fixes the issue. This is inspired by a not-applied
upstream series [1], but doing the very minimum to avoid the
regression.
[0]: https://gitlab.com/qemu-project/qemu/-/issues/1821
[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-05/msg07708.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/migration-stats.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/migration/migration-stats.c b/migration/migration-stats.c
index 095d6d75bb..8073c8ebaa 100644
--- a/migration/migration-stats.c
+++ b/migration/migration-stats.c
@@ -18,6 +18,20 @@
MigrationAtomicStats mig_stats;
+/*
+ * Same as migration_transferred_bytes below, but using the _noflush
+ * variant of qemu_file_transferred() to avoid a performance
+ * regression in migration_rate_exceeded().
+ */
+static uint64_t migration_transferred_bytes_noflush(QEMUFile *f)
+{
+ uint64_t multifd = stat64_get(&mig_stats.multifd_bytes);
+ uint64_t qemu_file = qemu_file_transferred_noflush(f);
+
+ trace_migration_transferred_bytes(qemu_file, multifd);
+ return qemu_file + multifd;
+}
+
bool migration_rate_exceeded(QEMUFile *f)
{
if (qemu_file_get_error(f)) {
@@ -25,7 +39,7 @@ bool migration_rate_exceeded(QEMUFile *f)
}
uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
- uint64_t rate_limit_current = migration_transferred_bytes(f);
+ uint64_t rate_limit_current = migration_transferred_bytes_noflush(f);
uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
uint64_t rate_limit_max = stat64_get(&mig_stats.rate_limit_max);

View File

@@ -1,45 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 17 Nov 2023 11:18:06 +0100
Subject: [PATCH] Revert "x86: acpi: workaround Windows not handling name
references in Package properly"
This reverts commit 44d975ef340e2f21f236f9520c53e1b30d2213a4.
As reported in the community forum [0] and reproduced locally this
breaks VirtIO network adapters in (at least) the German ISO of Windows
Server 2022. The fix itself was for
> Issue is not fatal but as result acpi-index/"PCI Label ID" property
> is either not shown in device details page or shows incorrect value.
so revert and tolerate that as a stop-gap, rather than have the
devices not working at all.
[0]: https://forum.proxmox.com/threads/92094/post-605684
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/acpi-build.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index bb12b0ad43..de14d3c3da 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -362,13 +362,9 @@ Aml *aml_pci_device_dsm(void)
{
Aml *params = aml_local(0);
Aml *pkg = aml_package(2);
- aml_append(pkg, aml_int(0));
- aml_append(pkg, aml_int(0));
+ aml_append(pkg, aml_name("BSEL"));
+ aml_append(pkg, aml_name("ASUN"));
aml_append(method, aml_store(pkg, params));
- aml_append(method,
- aml_store(aml_name("BSEL"), aml_index(params, aml_int(0))));
- aml_append(method,
- aml_store(aml_name("ASUN"), aml_index(params, aml_int(1))));
aml_append(method,
aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1),
aml_arg(2), aml_arg(3), params))

View File

@@ -1,34 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 1 Feb 2022 20:09:41 +0100
Subject: [PATCH] target/i386: the sgx_epc_get_section stub is reachable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The sgx_epc_get_section stub is reachable from cpu_x86_cpuid. It
should not assert, instead it should just return true just like
the "real" sgx_epc_get_section does when SGX is disabled.
Reported-by: Vladimír Beneš <vbenes@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20220201190941.106001-1-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry-picked from commit 219615740425d9683588207b40a365e6741691a6)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/sgx-stub.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
index 26833eb233..16b1dfd90b 100644
--- a/hw/i386/sgx-stub.c
+++ b/hw/i386/sgx-stub.c
@@ -34,5 +34,5 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
{
- g_assert_not_reached();
+ return true;
}

View File

@@ -1,86 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 24 Jan 2024 11:57:48 +0100
Subject: [PATCH] ui/clipboard: mark type as not available when there is no
data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT
message with len=0. In qemu_clipboard_set_data(), the clipboard info
will be updated setting data to NULL (because g_memdup(data, size)
returns NULL when size is 0). If the client does not set the
VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then
the 'request' callback for the clipboard peer is not initialized.
Later, because data is NULL, qemu_clipboard_request() can be reached
via vdagent_chr_write() and vdagent_clipboard_recv_request() and
there, the clipboard owner's 'request' callback will be attempted to
be called, but that is a NULL pointer.
In particular, this can happen when using the KRDC (22.12.3) VNC
client.
Another scenario leading to the same issue is with two clients (say
noVNC and KRDC):
The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and
initializes its cbpeer.
The KRDC client does not, but triggers a vnc_client_cut_text() (note
it's not the _ext variant)). There, a new clipboard info with it as
the 'owner' is created and via qemu_clipboard_set_data() is called,
which in turn calls qemu_clipboard_update() with that info.
In qemu_clipboard_update(), the notifier for the noVNC client will be
called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the
noVNC client. The 'owner' in that clipboard info is the clipboard peer
for the KRDC client, which did not initialize the 'request' function.
That sounds correct to me, it is the owner of that clipboard info.
Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set
the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it
passes), that clipboard info is passed to qemu_clipboard_request() and
the original segfault still happens.
Fix the issue by handling updates with size 0 differently. In
particular, mark in the clipboard info that the type is not available.
While at it, switch to g_memdup2(), because g_memdup() is deprecated.
Cc: qemu-stable@nongnu.org
Fixes: CVE-2023-6683
Reported-by: Markus Frank <m.frank@proxmox.com>
Suggested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Tested-by: Markus Frank <m.frank@proxmox.com>
(picked from https://lists.nongnu.org/archive/html/qemu-stable/2024-01/msg00228.html)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
ui/clipboard.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/ui/clipboard.c b/ui/clipboard.c
index 3d14bffaf8..b3f6fa3c9e 100644
--- a/ui/clipboard.c
+++ b/ui/clipboard.c
@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer,
}
g_free(info->types[type].data);
- info->types[type].data = g_memdup(data, size);
- info->types[type].size = size;
- info->types[type].available = true;
+ if (size) {
+ info->types[type].data = g_memdup2(data, size);
+ info->types[type].size = size;
+ info->types[type].available = true;
+ } else {
+ info->types[type].data = NULL;
+ info->types[type].size = 0;
+ info->types[type].available = false;
+ }
if (update) {
qemu_clipboard_update(info);

View File

@@ -1,65 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 16:31:56 +0100
Subject: [PATCH] virtio-scsi: Attach event vq notifier with no_poll
As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi:
don't waste CPU polling the event virtqueue"), we only attach an io_read
notifier for the virtio-scsi event virtqueue instead, and no polling
notifiers. During operation, the event virtqueue is typically
non-empty, but none of the buffers are intended to be used immediately.
Instead, they only get used when certain events occur. Therefore, it
makes no sense to continuously poll it when non-empty, because it is
supposed to be and stay non-empty.
We do this by using virtio_queue_aio_attach_host_notifier_no_poll()
instead of virtio_queue_aio_attach_host_notifier() for the event
virtqueue.
Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement
BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use
virtio_queue_aio_attach_host_notifier() for all virtqueues, including
the event virtqueue. This can lead to it being polled again, undoing
the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552.
Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the
event virtqueue.
("virtio-scsi: implement BlockDevOps->drained_begin()")
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
Fixes: 766aa2de0f29b657148e04599320d771c36fd126
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/scsi/virtio-scsi.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 45b95ea070..ad24a882fd 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -1148,6 +1148,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
static void virtio_scsi_drained_end(SCSIBus *bus)
{
VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
VirtIODevice *vdev = VIRTIO_DEVICE(s);
uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
s->parent_obj.conf.num_queues;
@@ -1165,7 +1166,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
for (uint32_t i = 0; i < total_queues; i++) {
VirtQueue *vq = virtio_get_queue(vdev, i);
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+ if (vq == vs->event_vq) {
+ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
+ } else {
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+ }
}
}

View File

@@ -1,125 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 16:31:57 +0100
Subject: [PATCH] virtio: Re-enable notifications after drain
During drain, we do not care about virtqueue notifications, which is why
we remove the handlers on it. When removing those handlers, whether vq
notifications are enabled or not depends on whether we were in polling
mode or not; if not, they are enabled (by default); if so, they have
been disabled by the io_poll_start callback.
Because we do not care about those notifications after removing the
handlers, this is fine. However, we have to explicitly ensure they are
enabled when re-attaching the handlers, so we will resume receiving
notifications. We do this in virtio_queue_aio_attach_host_notifier*().
If such a function is called while we are in a polling section,
attaching the notifiers will then invoke the io_poll_start callback,
re-disabling notifications.
Because we will always miss virtqueue updates in the drained section, we
also need to poll the virtqueue once after attaching the notifiers.
Buglink: https://issues.redhat.com/browse/RHEL-3934
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++
include/block/aio.h | 7 ++++++-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 969c25f4cf..02cce83111 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3526,6 +3526,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
{
+ /*
+ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
+ * Re-enable them. (And if detach has not been used before, notifications
+ * being enabled is still the default state while a notifier is attached;
+ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
+ * notifications enabled once the polling section is left.)
+ */
+ if (!virtio_queue_get_notification(vq)) {
+ virtio_queue_set_notification(vq, 1);
+ }
+
aio_set_event_notifier(ctx, &vq->host_notifier,
virtio_queue_host_notifier_read,
virtio_queue_host_notifier_aio_poll,
@@ -3533,6 +3544,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
aio_set_event_notifier_poll(ctx, &vq->host_notifier,
virtio_queue_host_notifier_aio_poll_begin,
virtio_queue_host_notifier_aio_poll_end);
+
+ /*
+ * We will have ignored notifications about new requests from the guest
+ * while no notifiers were attached, so "kick" the virt queue to process
+ * those requests now.
+ */
+ event_notifier_set(&vq->host_notifier);
}
/*
@@ -3543,14 +3561,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
*/
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
{
+ /* See virtio_queue_aio_attach_host_notifier() */
+ if (!virtio_queue_get_notification(vq)) {
+ virtio_queue_set_notification(vq, 1);
+ }
+
aio_set_event_notifier(ctx, &vq->host_notifier,
virtio_queue_host_notifier_read,
NULL, NULL);
+
+ /*
+ * See virtio_queue_aio_attach_host_notifier().
+ * Note that this may be unnecessary for the type of virtqueues this
+ * function is used for. Still, it will not hurt to have a quick look into
+ * whether we can/should process any of the virtqueue elements.
+ */
+ event_notifier_set(&vq->host_notifier);
}
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
{
aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
+
+ /*
+ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
+ * will run after io_poll_begin(), so by removing the notifier, we do not
+ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
+ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
+ * notifications are enabled or disabled. It does not really matter anyway;
+ * we just removed the notifier, so we do not care about notifications until
+ * we potentially re-attach it. The attach_host_notifier functions will
+ * ensure that notifications are enabled again when they are needed.
+ */
}
void virtio_queue_host_notifier_read(EventNotifier *n)
diff --git a/include/block/aio.h b/include/block/aio.h
index 32042e8905..79efadfa48 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -498,9 +498,14 @@ void aio_set_event_notifier(AioContext *ctx,
AioPollFn *io_poll,
EventNotifierHandler *io_poll_ready);
-/* Set polling begin/end callbacks for an event notifier that has already been
+/*
+ * Set polling begin/end callbacks for an event notifier that has already been
* registered with aio_set_event_notifier. Do nothing if the event notifier is
* not registered.
+ *
+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
+ * during polling, it will not be called, so an io_poll_begin() is not
+ * necessarily always followed by an io_poll_end().
*/
void aio_set_event_notifier_poll(AioContext *ctx,
EventNotifier *notifier,

View File

@@ -1,119 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 18 Dec 2023 11:13:40 +0100
Subject: [PATCH] qemu_init: increase NOFILE soft limit on POSIX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In many configurations, e.g. multiple vNICs with multiple queues or
with many Ceph OSDs, the default soft limit of 1024 is not enough.
QEMU is supposed to work fine with file descriptors >= 1024 and does
not use select() on POSIX. Bump the soft limit to the allowed hard
limit to avoid issues with the aforementioned configurations.
Of course the limit could be raised from the outside, but the man page
of systemd.exec states about 'LimitNOFILE=':
> Don't use.
> [...]
> Typically applications should increase their soft limit to the hard
> limit on their own, if they are OK with working with file
> descriptors above 1023,
If the soft limit is already the same as the hard limit, avoid the
superfluous setrlimit call. This can avoid a warning with a strict
seccomp filter blocking setrlimit if NOFILE was already raised before
executing QEMU.
Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
---
include/sysemu/os-posix.h | 1 +
include/sysemu/os-win32.h | 5 +++++
os-posix.c | 22 ++++++++++++++++++++++
softmmu/vl.c | 2 ++
4 files changed, 30 insertions(+)
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 1030d39904..edc415aff5 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -48,6 +48,7 @@ void os_setup_early_signal_handling(void);
void os_set_proc_name(const char *s);
void os_setup_signal_handling(void);
void os_daemonize(void);
+void os_setup_limits(void);
void os_setup_post(void);
int os_mlock(void);
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 91aa0d7ec0..f6e23fe01e 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -129,6 +129,11 @@ static inline int os_mlock(void)
return -ENOSYS;
}
+void os_setup_limits(void)
+{
+ return;
+}
+
#define fsync _commit
#if !defined(lseek)
diff --git a/os-posix.c b/os-posix.c
index cfcb96533c..0cc1d991b1 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -24,6 +24,7 @@
*/
#include "qemu/osdep.h"
+#include <sys/resource.h>
#include <sys/wait.h>
#include <pwd.h>
#include <grp.h>
@@ -286,6 +287,27 @@ void os_daemonize(void)
}
}
+void os_setup_limits(void)
+{
+ struct rlimit nofile;
+
+ if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) {
+ warn_report("unable to query NOFILE limit: %s", strerror(errno));
+ return;
+ }
+
+ if (nofile.rlim_cur == nofile.rlim_max) {
+ return;
+ }
+
+ nofile.rlim_cur = nofile.rlim_max;
+
+ if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) {
+ warn_report("unable to set NOFILE limit: %s", strerror(errno));
+ return;
+ }
+}
+
void os_setup_post(void)
{
int fd = 0;
diff --git a/softmmu/vl.c b/softmmu/vl.c
index c9e9ede237..ba6ad8a8df 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2713,6 +2713,8 @@ void qemu_init(int argc, char **argv)
error_init(argv[0]);
qemu_init_exec_dir(argv[0]);
+ os_setup_limits();
+
qemu_init_arch_modules();
qemu_init_subsystems();

View File

@@ -1,61 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 22 Jan 2024 12:26:25 -0500
Subject: [PATCH] virtio-blk: avoid using ioeventfd state in irqfd conditional
Requests that complete in an IOThread use irqfd to notify the guest
while requests that complete in the main loop thread use the traditional
qdev irq code path. The reason for this conditional is that the irq code
path requires the BQL:
if (s->ioeventfd_started && !s->ioeventfd_disabled) {
virtio_notify_irqfd(vdev, req->vq);
} else {
virtio_notify(vdev, req->vq);
}
There is a corner case where the conditional invokes the irq code path
instead of the irqfd code path:
static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev)
{
...
/*
* Set ->ioeventfd_started to false before draining so that host notifiers
* are not detached/attached anymore.
*/
s->ioeventfd_started = false;
/* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
blk_drain(s->conf.conf.blk);
During blk_drain() the conditional produces the wrong result because
ioeventfd_started is false.
Use qemu_in_iothread() instead of checking the ioeventfd state.
Cc: qemu-stable@nongnu.org
Buglink: https://issues.redhat.com/browse/RHEL-15394
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20240122172625.415386-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
[FE: backport: dataplane -> ioeventfd rework didn't happen yet]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/block/virtio-blk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 39e7f23fab..61bd1f6859 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
iov_discard_undo(&req->inhdr_undo);
iov_discard_undo(&req->outhdr_undo);
virtqueue_push(req->vq, &req->elem, req->in_len);
- if (s->dataplane_started && !s->dataplane_disabled) {
+ if (qemu_in_iothread()) {
virtio_blk_data_plane_notify(s->dataplane, req->vq);
} else {
virtio_notify(vdev, req->vq);

View File

@@ -1,89 +1,17 @@
Index: pve-qemu-kvm-8.1.2/block/meson.build
Index: pve-qemu-kvm-6.1.0/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-8.1.2.orig/block/meson.build
+++ pve-qemu-kvm-8.1.2/block/meson.build
@@ -123,6 +123,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-8.1.2/meson.build
===================================================================
--- pve-qemu-kvm-8.1.2.orig/meson.build
+++ pve-qemu-kvm-8.1.2/meson.build
@@ -1303,6 +1303,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'))
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -2123,6 +2143,7 @@ if numa.found()
endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
config_host_data.set('CONFIG_SDL', sdl.found())
@@ -4298,6 +4319,7 @@ summary_info += {'fdt support': fd
summary_info += {'libcap-ng support': libcap_ng}
summary_info += {'bpf support': libbpf}
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: pve-qemu-kvm-8.1.2/meson_options.txt
===================================================================
--- pve-qemu-kvm-8.1.2.orig/meson_options.txt
+++ pve-qemu-kvm-8.1.2/meson_options.txt
@@ -186,6 +186,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-8.1.2.orig/qapi/block-core.json
+++ pve-qemu-kvm-8.1.2/qapi/block-core.json
@@ -3403,7 +3403,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
--- pve-qemu-kvm-6.1.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-6.1.0/qapi/block-core.json
@@ -3084,7 +3084,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4465,6 +4465,28 @@
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4020,6 +4020,28 @@
'*server': ['InetSocketAddressBase'] } }
##
@@ -112,15 +40,15 @@ Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4923,6 +4945,7 @@
@@ -4392,6 +4414,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5360,6 +5383,17 @@
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4782,6 +4805,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
@@ -138,7 +66,7 @@ Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5581,6 +5615,7 @@
@@ -4977,6 +5011,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
@@ -146,53 +74,123 @@ Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure
Index: pve-qemu-kvm-6.1.0/block/meson.build
===================================================================
--- pve-qemu-kvm-8.1.2.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -30,7 +30,7 @@
--with-suffix="qemu-kvm" \
--firmwarepath=/usr/share/qemu-firmware \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -176,6 +176,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
Index: pve-qemu-kvm-8.1.2/scripts/meson-buildoptions.sh
--- pve-qemu-kvm-6.1.0.orig/block/meson.build
+++ pve-qemu-kvm-6.1.0/block/meson.build
@@ -91,6 +91,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-6.1.0/configure
===================================================================
--- pve-qemu-kvm-8.1.2.orig/scripts/meson-buildoptions.sh
+++ pve-qemu-kvm-8.1.2/scripts/meson-buildoptions.sh
@@ -153,6 +153,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -416,6 +417,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;
--- pve-qemu-kvm-6.1.0.orig/configure
+++ pve-qemu-kvm-6.1.0/configure
@@ -375,6 +375,7 @@ trace_file="trace"
spice="$default_feature"
spice_protocol="auto"
rbd="auto"
+vitastor="auto"
smartcard="auto"
u2f="auto"
libusb="auto"
@@ -1293,6 +1294,10 @@ for opt do
;;
--enable-rbd) rbd="enabled"
;;
+ --disable-vitastor) vitastor="disabled"
+ ;;
+ --enable-vitastor) vitastor="enabled"
+ ;;
--disable-xfsctl) xfs="no"
;;
--enable-xfsctl) xfs="yes"
@@ -1921,6 +1926,7 @@ disabled with --disable-FEATURE, default
spice spice
spice-protocol spice-protocol
rbd rados block device (rbd)
+ vitastor vitastor block device
libiscsi iscsi support
libnfs nfs support
smartcard smartcard support (libcacard)
@@ -5211,7 +5217,7 @@ if test "$skip_meson" = no; then
-Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt -Dbrlapi=$brlapi \
-Dcurl=$curl -Dglusterfs=$glusterfs -Dbzip2=$bzip2 -Dlibiscsi=$libiscsi \
-Dlibnfs=$libnfs -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
- -Drbd=$rbd -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse -Dlibxml2=$libxml2 \
+ -Drbd=$rbd -Dvitastor=$vitastor -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse -Dlibxml2=$libxml2 \
-Dlibdaxctl=$libdaxctl -Dlibpmem=$libpmem -Dlinux_io_uring=$linux_io_uring \
-Dgnutls=$gnutls -Dnettle=$nettle -Dgcrypt=$gcrypt -Dauth_pam=$auth_pam \
-Dzstd=$zstd -Dseccomp=$seccomp -Dvirtfs=$virtfs -Dcap_ng=$cap_ng \
Index: pve-qemu-kvm-6.1.0/meson.build
===================================================================
--- pve-qemu-kvm-6.1.0.orig/meson.build
+++ pve-qemu-kvm-6.1.0/meson.build
@@ -729,6 +729,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1268,6 +1288,7 @@ config_host_data.set('CONFIG_LIBNFS', li
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3087,6 +3108,7 @@ summary_info += {'bpf support': libbpf.f
# TODO: add back protocol and server version
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
summary_info += {'rbd support': rbd.found()}
+summary_info += {'vitastor support': vitastor.found()}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': cacard.found()}
summary_info += {'U2F support': u2f.found()}
Index: pve-qemu-kvm-6.1.0/meson_options.txt
===================================================================
--- pve-qemu-kvm-6.1.0.orig/meson_options.txt
+++ pve-qemu-kvm-6.1.0/meson_options.txt
@@ -102,6 +102,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
Index: a/block/vitastor.c
===================================================================
--- /dev/null
+++ a/block/vitastor.c
@@ -0,0 +1,1076 @@
@@ -0,0 +1,797 @@
+// Copyright (c) Vitaliy Filippov, 2019+
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
+
@@ -204,9 +202,6 @@ Index: a/block/vitastor.c
+#endif
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#if QEMU_VERSION_MAJOR >= 8
+#include "block/block-io.h"
+#endif
+#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
@@ -230,11 +225,6 @@ Index: a/block/vitastor.c
+#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
+#define qobject_unref QDECREF
+#endif
+#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
+#include "sysemu/replay.h"
+#else
+#include "sysemu/sysemu.h"
+#endif
+
+#include "vitastor_c.h"
+
@@ -248,13 +238,9 @@ Index: a/block/vitastor.c
+}
+#endif
+
+typedef struct VitastorFdData VitastorFdData;
+
+typedef struct VitastorClient
+{
+ void *proxy;
+ int uring_eventfd;
+
+ void *watch;
+ char *config_path;
+ char *etcd_host;
@@ -271,24 +257,12 @@ Index: a/block/vitastor.c
+ int rdma_gid_index;
+ int rdma_mtu;
+ QemuMutex mutex;
+ AioContext *ctx;
+ VitastorFdData **fds;
+ int fd_count, fd_alloc;
+ int bh_uring_scheduled;
+
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
+ uint32_t last_bitmap_granularity;
+ uint8_t *last_bitmap;
+} VitastorClient;
+
+typedef struct VitastorFdData
+{
+ VitastorClient *cli;
+ int fd;
+ IOHandler *fd_read, *fd_write;
+ void *opaque;
+} VitastorFdData;
+
+typedef struct VitastorRPC
+{
+ BlockDriverState *bs;
@@ -299,21 +273,10 @@ Index: a/block/vitastor.c
+ uint64_t inode, offset, len;
+ uint32_t bitmap_granularity;
+ uint8_t *bitmap;
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
+ QEMUBH *bh;
+#endif
+} VitastorRPC;
+
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
+typedef struct VitastorBH
+{
+ VitastorClient *cli;
+ QEMUBH *bh;
+} VitastorBH;
+#endif
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
+static void vitastor_co_generic_cb(void *opaque, long retval);
+static void vitastor_co_generic_bh_cb(void *opaque, long retval);
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
+static void vitastor_close(BlockDriverState *bs);
+
@@ -391,13 +354,8 @@ Index: a/block/vitastor.c
+ !strcmp(name, "rdma-gid-index") ||
+ !strcmp(name, "rdma-mtu"))
+ {
+#if QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1
+ unsigned long long num_val;
+ if (parse_uint_full(value, &num_val, 0))
+#else
+ uint64_t num_val;
+ if (parse_uint_full(value, 0, &num_val))
+#endif
+ {
+ error_setg(errp, "Illegal %s: %s", name, value);
+ goto out;
@@ -434,54 +392,6 @@ Index: a/block/vitastor.c
+ return;
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+static void vitastor_uring_handler(void *opaque)
+{
+ VitastorClient *client = (VitastorClient*)opaque;
+ qemu_mutex_lock(&client->mutex);
+ client->bh_uring_scheduled = 0;
+ vitastor_c_uring_handle_events(client->proxy);
+ qemu_mutex_unlock(&client->mutex);
+}
+
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
+static void vitastor_bh_uring_handler(void *opaque)
+{
+ VitastorBH *vbh = opaque;
+ vitastor_bh_handler(vbh->cli);
+ qemu_bh_delete(vbh->bh);
+ free(vbh);
+}
+#endif
+
+static void vitastor_schedule_uring_handler(VitastorClient *client)
+{
+ void *opaque = client;
+ if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
+ {
+ client->bh_uring_scheduled = 1;
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
+ replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
+ aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
+#else
+ VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
+ vbh->cli = client;
+#if QEMU_VERSION_MAJOR >= 2
+ vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
+#else
+ vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
+#endif
+ qemu_bh_schedule(vbh->bh);
+#endif
+ }
+}
+#else
+static void vitastor_schedule_uring_handler(VitastorClient *client)
+{
+}
+#endif
+
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
+{
+ BlockDriverState *bs = task->bs;
@@ -489,8 +399,7 @@ Index: a/block/vitastor.c
+ task->co = qemu_coroutine_self();
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
+ vitastor_schedule_uring_handler(client);
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task->complete)
@@ -499,32 +408,13 @@ Index: a/block/vitastor.c
+ }
+}
+
+static void vitastor_aio_fd_read(void *fddv)
+{
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
+ qemu_mutex_lock(&fdd->cli->mutex);
+ fdd->fd_read(fdd->opaque);
+ vitastor_schedule_uring_handler(fdd->cli);
+ qemu_mutex_unlock(&fdd->cli->mutex);
+}
+
+static void vitastor_aio_fd_write(void *fddv)
+{
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
+ qemu_mutex_lock(&fdd->cli->mutex);
+ fdd->fd_write(fdd->opaque);
+ vitastor_schedule_uring_handler(fdd->cli);
+ qemu_mutex_unlock(&fdd->cli->mutex);
+}
+
+static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
+static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ aio_set_fd_handler(ctx, fd,
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 && (QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1)
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
+ 0 /*is_external*/,
+#endif
+ fd_read,
+ fd_write,
+ fd_read, fd_write,
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
+ NULL /*io_flush*/,
+#endif
@@ -537,92 +427,6 @@ Index: a/block/vitastor.c
+ opaque);
+}
+
+static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ VitastorClient *client = (VitastorClient*)vcli;
+ VitastorFdData *fdd = NULL;
+ int i;
+ for (i = 0; i < client->fd_count; i++)
+ {
+ if (client->fds[i]->fd == fd)
+ {
+ if (fd_read || fd_write)
+ {
+ fdd = client->fds[i];
+ fdd->opaque = opaque;
+ fdd->fd_read = fd_read;
+ fdd->fd_write = fd_write;
+ }
+ else
+ {
+ for (int j = i+1; j < client->fd_count; j++)
+ client->fds[j-1] = client->fds[j];
+ client->fd_count--;
+ }
+ break;
+ }
+ }
+ if ((fd_read || fd_write) && !fdd)
+ {
+ fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
+ fdd->cli = client;
+ fdd->fd = fd;
+ fdd->fd_read = fd_read;
+ fdd->fd_write = fd_write;
+ fdd->opaque = opaque;
+ if (client->fd_count >= client->fd_alloc)
+ {
+ client->fd_alloc = client->fd_alloc*2;
+ if (client->fd_alloc < 16)
+ client->fd_alloc = 16;
+ client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
+ }
+ client->fds[client->fd_count++] = fdd;
+ }
+ universal_aio_set_fd_handler(
+ client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
+ );
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+typedef struct str_array
+{
+ const char **items;
+ int len, alloc;
+} str_array;
+
+static void strarray_push(str_array *a, const char *str)
+{
+ if (a->len >= a->alloc)
+ {
+ a->alloc = !a->alloc ? 4 : 2*a->alloc;
+ a->items = (const char**)realloc(a->items, a->alloc*sizeof(char*));
+ if (!a->items)
+ {
+ fprintf(stderr, "bad alloc\n");
+ abort();
+ }
+ }
+ a->items[a->len++] = str;
+}
+
+static void strarray_push_kv(str_array *a, const char *key, const char *value)
+{
+ if (key && value)
+ {
+ strarray_push(a, key);
+ strarray_push(a, value);
+ }
+}
+
+static void strarray_free(str_array *a)
+{
+ free(a->items);
+ a->items = NULL;
+ a->len = a->alloc = 0;
+}
+#endif
+
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+ VitastorRPC task;
@@ -640,46 +444,10 @@ Index: a/block/vitastor.c
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
+ client->ctx = bdrv_get_aio_context(bs);
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+ str_array opt = {};
+ strarray_push_kv(&opt, "config_path", qdict_get_try_str(options, "config-path"));
+ strarray_push_kv(&opt, "etcd_address", qdict_get_try_str(options, "etcd-host"));
+ strarray_push_kv(&opt, "etcd_prefix", qdict_get_try_str(options, "etcd-prefix"));
+ strarray_push_kv(&opt, "use_rdma", qdict_get_try_str(options, "use-rdma"));
+ strarray_push_kv(&opt, "rdma_device", qdict_get_try_str(options, "rdma-device"));
+ strarray_push_kv(&opt, "rdma_port_num", qdict_get_try_str(options, "rdma-port-num"));
+ strarray_push_kv(&opt, "rdma_gid_index", qdict_get_try_str(options, "rdma-gid-index"));
+ strarray_push_kv(&opt, "rdma_mtu", qdict_get_try_str(options, "rdma-mtu"));
+ strarray_push_kv(&opt, "client_writeback_allowed", (flags & BDRV_O_NOCACHE) ? "0" : "1");
+ client->proxy = vitastor_c_create_uring_json(opt.items, opt.len);
+ strarray_free(&opt);
+ if (client->proxy)
+ {
+ client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
+ if (client->uring_eventfd < 0)
+ {
+ fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
+ error_setg(errp, "failed to create io_uring eventfd");
+ vitastor_close(bs);
+ return -1;
+ }
+ universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
+ }
+ else
+ {
+ // Writeback cache is unusable without io_uring because the client can't correctly flush on exit
+ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower%s\n",
+ strerror(errno), (flags & BDRV_O_NOCACHE ? "" : " and writeback cache will be disabled"));
+#endif
+ client->uring_eventfd = -1;
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+ }
+#endif
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
@@ -693,13 +461,7 @@ Index: a/block/vitastor.c
+ }
+ else
+ {
+#if QEMU_VERSION_MAJOR >= 8
+ aio_co_enter(bdrv_get_aio_context(bs), qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+#else
+ qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+#endif
+ BDRV_POLL_WHILE(bs, !task.complete);
+ }
+ client->image = image;
@@ -738,10 +500,6 @@ Index: a/block/vitastor.c
+ return -1;
+ }
+ bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
+#if QEMU_VERSION_MAJOR > 5 || QEMU_VERSION_MAJOR == 5 && QEMU_VERSION_MINOR >= 1
+ /* When extending regular files, we get zeros from the OS */
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
+#endif
+ //client->aio_context = bdrv_get_aio_context(bs);
+ qdict_del(options, "use-rdma");
+ qdict_del(options, "rdma-mtu");
@@ -763,12 +521,6 @@ Index: a/block/vitastor.c
+{
+ VitastorClient *client = bs->opaque;
+ vitastor_c_destroy(client->proxy);
+ if (client->fds)
+ {
+ free(client->fds);
+ client->fds = NULL;
+ client->fd_alloc = client->fd_count = 0;
+ }
+ qemu_mutex_destroy(&client->mutex);
+ if (client->config_path)
+ g_free(client->config_path);
@@ -838,11 +590,7 @@ Index: a/block/vitastor.c
+ }
+
+ // TODO: Resize inode to <offset> bytes
+#if QEMU_VERSION_MAJOR >= 4
+ client->size = exact || client->size < offset ? offset : client->size;
+#else
+ client->size = offset;
+#endif
+ client->size = offset / BDRV_SECTOR_SIZE;
+
+ return 0;
+}
@@ -889,44 +637,25 @@ Index: a/block/vitastor.c
+ };
+}
+
+static void vitastor_co_generic_bh_cb(void *opaque)
+static void vitastor_co_generic_bh_cb(void *opaque, long retval)
+{
+ VitastorRPC *task = opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+#if QEMU_VERSION_MAJOR == 2
+ qemu_bh_delete(task->bh);
+#endif
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static void vitastor_co_generic_cb(void *opaque, long retval)
+{
+ VitastorRPC *task = opaque;
+ task->ret = retval;
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 2
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+#else
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+#endif
+}
+
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
+{
+ vitastor_co_generic_cb(opaque, retval);
+ vitastor_co_generic_bh_cb(opaque, retval);
+}
+
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
@@ -945,7 +674,6 @@ Index: a/block/vitastor.c
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
@@ -978,8 +706,7 @@ Index: a/block/vitastor.c
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
@@ -997,6 +724,7 @@ Index: a/block/vitastor.c
+ VitastorRPC *task = opaque;
+ VitastorClient *client = task->bs->opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (retval >= 0)
+ {
+ task->bitmap = bitmap;
@@ -1008,17 +736,15 @@ Index: a/block/vitastor.c
+ client->last_bitmap = bitmap;
+ }
+ }
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 2
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static int coroutine_fn vitastor_co_block_status(
@@ -1059,7 +785,6 @@ Index: a/block/vitastor.c
+ task.bitmap = client->last_bitmap = NULL;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ qemu_mutex_unlock(&client->mutex);
+ while (!task.complete)
+ {
@@ -1145,8 +870,7 @@ Index: a/block/vitastor.c
+ vitastor_co_init_task(bs, &task);
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
@@ -1201,13 +925,8 @@ Index: a/block/vitastor.c
+ .bdrv_parse_filename = vitastor_parse_filename,
+
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+#if QEMU_VERSION_MAJOR >= 8
+ .bdrv_co_get_info = vitastor_get_info,
+ .bdrv_co_getlength = vitastor_getlength,
+#else
+ .bdrv_get_info = vitastor_get_info,
+ .bdrv_getlength = vitastor_getlength,
+#endif
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
+ .bdrv_probe_blocksizes = vitastor_probe_blocksizes,
+#endif

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 7f540b03ed..ca551baa42 100644
index dd295cfc6d..3ac5177cbb 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -563,7 +563,7 @@ static QemuOptsList raw_runtime_opts = {
@@ -533,7 +533,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "locking",
.type = QEMU_OPT_STRING,
@@ -26,7 +26,7 @@ index 7f540b03ed..ca551baa42 100644
},
{
.name = "pr-manager",
@@ -663,7 +663,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
@@ -631,7 +631,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->use_lock = false;
break;
case ON_OFF_AUTO_AUTO:

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h
index 685ec58318..22edf4ee96 100644
index 5d1508081f..f665924193 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -260,8 +260,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
@@ -219,8 +219,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
int net_hub_id_for_client(NetClientState *nc, int *id);
NetClientState *net_hub_port_find(int hub_id);

View File

@@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 0893b794e9..6d650a58b9 100644
index 21b33fbe2e..32514193a9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2243,9 +2243,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
@@ -2007,9 +2007,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define CPU_RESOLVING_TYPE TYPE_X86_CPU
#ifdef TARGET_X86_64
@@ -24,4 +24,4 @@ index 0893b794e9..6d650a58b9 100644
+#define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("kvm32")
#endif
#define cpu_list x86_cpu_list
#define cpu_signal_handler cpu_x86_signal_handler

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 52a59386d7..b20c25aee0 100644
index 0371055e6c..840cf56923 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -691,32 +691,35 @@ static void qemu_spice_init(void)
@@ -694,32 +694,35 @@ static void qemu_spice_init(void)
if (tls_port) {
x509_dir = qemu_opt_get(opts, "x509-dir");

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/block/gluster.c b/block/gluster.c
index ad5fadbe79..d0011085c4 100644
index e8ee14c8e9..3eb6a05500 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -43,7 +43,7 @@
@@ -42,7 +42,7 @@
#define GLUSTER_DEBUG_DEFAULT 4
#define GLUSTER_DEBUG_MAX 9
#define GLUSTER_OPT_LOGFILE "logfile"
@@ -21,15 +21,15 @@ index ad5fadbe79..d0011085c4 100644
/*
* Several versions of GlusterFS (3.12? -> 6.0.1) fail when the transfer size
* is greater or equal to 1024 MiB, so we are limiting the transfer size to 512
@@ -425,6 +425,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -424,6 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
int old_errno;
SocketAddressList *server;
uint64_t port;
unsigned long long port;
+ const char *logfile;
glfs = glfs_find_preopened(gconf->volume);
if (glfs) {
@@ -467,9 +468,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -466,9 +467,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
}
}

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index 978671411e..a4749f3b1b 100644
index dcf82b15b8..feeec452f0 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -814,6 +814,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
rados_conf_set(*cluster, "rbd_cache", "false");
}

View File

@@ -0,0 +1,88 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:37 +0200
Subject: [PATCH] PVE: [Up] qmp: add get_link_status
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
net/net.c | 27 +++++++++++++++++++++++++++
qapi/net.json | 15 +++++++++++++++
qapi/pragma.json | 1 +
3 files changed, 43 insertions(+)
diff --git a/net/net.c b/net/net.c
index 76bbb7c31b..82e0a768b4 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1314,6 +1314,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
}
}
+int64_t qmp_get_link_status(const char *name, Error **errp)
+{
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ NetClientState *nc;
+ int queues;
+ bool ret;
+
+ queues = qemu_find_net_clients_except(name, ncs,
+ NET_CLIENT_DRIVER__MAX,
+ MAX_QUEUE_NUM);
+
+ if (queues == 0) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", name);
+ return (int64_t) -1;
+ }
+
+ nc = ncs[0];
+ ret = ncs[0]->link_down;
+
+ if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+ ret = ncs[0]->peer->link_down;
+ }
+
+ return (int64_t) ret ? 0 : 1;
+}
+
void colo_notify_filters_event(int event, Error **errp)
{
NetClientState *nc;
diff --git a/qapi/net.json b/qapi/net.json
index 7fab2e7cd8..74c9a6109e 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -35,6 +35,21 @@
##
{ 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
+##
+# @get_link_status:
+#
+# Get the current link state of the nics or nic.
+#
+# @name: name of the nic you get the state of
+#
+# Return: If link is up 1
+# If link is down 0
+# If an error occure an empty string.
+#
+# Notes: this is an Proxmox VE extension and not offical part of Qemu.
+##
+{ 'command': 'get_link_status', 'data': {'name': 'str'} , 'returns': 'int' }
+
##
# @netdev_add:
#
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 3bc0335d1f..7c91ea3685 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -22,6 +22,7 @@
'system_reset',
'system_wakeup' ],
'command-returns-exceptions': [
+ 'get_link_status',
'human-monitor-command',
'qom-get',
'query-tpm-models',

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/gluster.c b/block/gluster.c
index d0011085c4..2df3d6e35d 100644
index 3eb6a05500..b612918ee8 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -58,6 +58,7 @@ typedef struct GlusterAIOCB {
@@ -57,6 +57,7 @@ typedef struct GlusterAIOCB {
int ret;
Coroutine *coroutine;
AioContext *aio_context;
@@ -27,7 +27,7 @@ index d0011085c4..2df3d6e35d 100644
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -753,8 +754,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
@@ -752,8 +753,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
acb->ret = 0; /* Success */
} else if (ret < 0) {
acb->ret = -errno; /* Read/Write failed */
@@ -45,9 +45,9 @@ index d0011085c4..2df3d6e35d 100644
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1201,9 +1205,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
@@ -1202,9 +1206,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
@@ -59,7 +59,7 @@ index d0011085c4..2df3d6e35d 100644
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
}
@@ -1266,6 +1272,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1268,6 +1274,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
@@ -73,5 +73,5 @@ index d0011085c4..2df3d6e35d 100644
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c
index 78433f3746..25d427edd1 100644
index 908fd0cce5..5dc1d0a2ca 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3062,7 +3062,8 @@ static int img_info(int argc, char **argv)
@@ -2977,7 +2977,8 @@ static int img_info(int argc, char **argv)
list = collect_image_info_list(image_opts, filename, fmt, chain,
force_share);
if (!list) {

View File

@@ -31,14 +31,13 @@ override the output file's size.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img-cmds.hx | 4 +-
qemu-img.c | 202 ++++++++++++++++++++++++++++++-----------------
2 files changed, 133 insertions(+), 73 deletions(-)
qemu-img.c | 187 +++++++++++++++++++++++++++++------------------
2 files changed, 119 insertions(+), 72 deletions(-)
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 1b1dab5b17..d1616c045a 100644
index b3620f29e5..e70ef3dc91 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -58,9 +58,9 @@ SRST
@@ -54,10 +53,10 @@ index 1b1dab5b17..d1616c045a 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 25d427edd1..220e6ec577 100644
index 5dc1d0a2ca..f773182bd0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4899,10 +4899,12 @@ static int img_bitmap(int argc, char **argv)
@@ -4793,10 +4793,12 @@ static int img_bitmap(int argc, char **argv)
#define C_IF 04
#define C_OF 010
#define C_SKIP 020
@@ -70,7 +69,7 @@ index 25d427edd1..220e6ec577 100644
};
struct DdIo {
@@ -4978,6 +4980,19 @@ static int img_dd_skip(const char *arg,
@@ -4872,6 +4874,19 @@ static int img_dd_skip(const char *arg,
return 0;
}
@@ -90,7 +89,7 @@ index 25d427edd1..220e6ec577 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5018,6 +5033,7 @@ static int img_dd(int argc, char **argv)
@@ -4912,6 +4927,7 @@ static int img_dd(int argc, char **argv)
{ "if", img_dd_if, C_IF },
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
@@ -98,7 +97,7 @@ index 25d427edd1..220e6ec577 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5093,91 +5109,112 @@ static int img_dd(int argc, char **argv)
@@ -4987,91 +5003,112 @@ static int img_dd(int argc, char **argv)
arg = NULL;
}
@@ -135,7 +134,11 @@ index 25d427edd1..220e6ec577 100644
- error_report_err(local_err);
- ret = -1;
- goto out;
- }
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- if (!drv->create_opts) {
- error_report("Format driver '%s' does not support image creation",
- drv->format_name);
@@ -147,11 +150,7 @@ index 25d427edd1..220e6ec577 100644
- proto_drv->format_name);
- ret = -1;
- goto out;
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- }
- create_opts = qemu_opts_append(create_opts, drv->create_opts);
- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
-
@@ -275,54 +274,41 @@ index 25d427edd1..220e6ec577 100644
}
if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
@@ -5194,20 +5231,43 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
@@ -5089,11 +5126,17 @@ static int img_dd(int argc, char **argv)
for (out_pos = 0; in_pos < size; ) {
+ int in_ret, out_ret;
int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
for (out_pos = 0; in_pos < size; block_count++) {
int in_ret, out_ret;
-
- ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
- if (ret < 0) {
- if (in_pos + in.bsz > size) {
- in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
+ size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ if (blk1) {
+ in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+ if (in_ret == 0) {
+ in_ret = bytes;
+ }
+ } else {
+ in_ret = read(STDIN_FILENO, in.buf, bytes);
+ in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
- in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
+ in_ret = read(STDIN_FILENO, in.buf, in_bsz);
+ if (in_ret == 0) {
+ /* early EOF is considered an error */
+ error_report("Input ended unexpectedly");
+ ret = -1;
+ goto out;
+ }
+ }
+ if (in_ret < 0) {
error_report("error while reading from input image file: %s",
- strerror(-ret));
+ strerror(-in_ret));
+ ret = -1;
goto out;
}
in_pos += bytes;
if (in_ret < 0) {
error_report("error while reading from input image file: %s",
@@ -5103,9 +5146,13 @@ static int img_dd(int argc, char **argv)
}
in_pos += in_ret;
- ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
- if (ret < 0) {
- out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ if (blk2) {
+ out_ret = blk_pwrite(blk2, out_pos, in_ret, in.buf, 0);
+ if (out_ret == 0) {
+ out_ret = in_ret;
+ }
+ out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ } else {
+ out_ret = write(STDOUT_FILENO, in.buf, in_ret);
+ }
+
- if (out_ret < 0) {
+ if (out_ret != in_ret) {
error_report("error while writing to output image file: %s",
- strerror(-ret));
+ strerror(-out_ret));
+ ret = -1;
goto out;
}
out_pos += bytes;
strerror(-out_ret));
ret = -1;

View File

@@ -10,16 +10,15 @@ an expected end of input.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 220e6ec577..58bf9b43d1 100644
index f773182bd0..98a6562364 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4900,11 +4900,13 @@ static int img_bitmap(int argc, char **argv)
@@ -4794,11 +4794,13 @@ static int img_bitmap(int argc, char **argv)
#define C_OF 010
#define C_SKIP 020
#define C_OSIZE 040
@@ -33,7 +32,7 @@ index 220e6ec577..58bf9b43d1 100644
};
struct DdIo {
@@ -4993,6 +4995,19 @@ static int img_dd_osize(const char *arg,
@@ -4887,6 +4889,19 @@ static int img_dd_osize(const char *arg,
return 0;
}
@@ -53,13 +52,13 @@ index 220e6ec577..58bf9b43d1 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5007,12 +5022,14 @@ static int img_dd(int argc, char **argv)
@@ -4901,12 +4916,14 @@ static int img_dd(int argc, char **argv)
int c, i;
const char *out_fmt = "raw";
const char *fmt = NULL;
- int64_t size = 0;
+ int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
int64_t block_count = 0, out_pos, in_pos;
bool force_share = false;
struct DdInfo dd = {
.flags = 0,
@@ -69,7 +68,7 @@ index 220e6ec577..58bf9b43d1 100644
};
struct DdIo in = {
.bsz = 512, /* Block size is by default 512 bytes */
@@ -5034,6 +5051,7 @@ static int img_dd(int argc, char **argv)
@@ -4928,6 +4945,7 @@ static int img_dd(int argc, char **argv)
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
{ "osize", img_dd_osize, C_OSIZE },
@@ -77,22 +76,20 @@ index 220e6ec577..58bf9b43d1 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5230,9 +5248,10 @@ static int img_dd(int argc, char **argv)
@@ -5124,14 +5142,18 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
- for (out_pos = 0; in_pos < size; ) {
- for (out_pos = 0; in_pos < size; block_count++) {
+ readsize = (dd.isize > 0) ? dd.isize : size;
+ for (out_pos = 0; in_pos < readsize; ) {
+ for (out_pos = 0; in_pos < readsize; block_count++) {
int in_ret, out_ret;
- int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
+ int bytes = (in_pos + in.bsz > readsize) ? readsize - in_pos : in.bsz;
- size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ size_t in_bsz = in_pos + in.bsz > readsize ? readsize - in_pos : in.bsz;
if (blk1) {
in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
if (in_ret == 0) {
@@ -5241,6 +5260,9 @@ static int img_dd(int argc, char **argv)
in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
in_ret = read(STDIN_FILENO, in.buf, bytes);
in_ret = read(STDIN_FILENO, in.buf, in_bsz);
if (in_ret == 0) {
+ if (dd.isize == 0) {
+ goto out;

View File

@@ -1,121 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: fix getopt-string + add documentation]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
docs/tools/qemu-img.rst | 11 ++++++++++-
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 23 ++++++++++++++---------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 15aeddc6d8..5e713e231d 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -208,6 +208,10 @@ Parameters to convert subcommand:
Parameters to dd subcommand:
+.. option:: -n
+
+ Skip the creation of the target volume
+
.. program:: qemu-img-dd
.. option:: bs=BLOCK_SIZE
@@ -488,7 +492,7 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
dd copies from *INPUT* file to *OUTPUT* file converting it from
*FMT* format to *OUTPUT_FMT* format.
@@ -499,6 +503,11 @@ Command description:
The size syntax is similar to :manpage:`dd(1)`'s size syntax.
+ If the ``-n`` option is specified, the target volume creation will be
+ skipped. This is useful for formats such as ``rbd`` if the target
+ volume has already been created with site specific options that cannot
+ be supplied through ``qemu-img``.
+
.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME
Give information about the disk image *FILENAME*. Use it in
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index d1616c045a..b5b0bb4467 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -58,9 +58,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 58bf9b43d1..9d414d639b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5024,7 +5024,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5062,7 +5062,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5082,6 +5082,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5212,13 +5215,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -0,0 +1,65 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qemu-img.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 98a6562364..355b3b82f4 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4918,7 +4918,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t block_count = 0, out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -4956,7 +4956,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:U:n", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -4976,6 +4976,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5106,13 +5109,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -1,130 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Mon, 7 Feb 2022 14:21:01 +0100
Subject: [PATCH] qemu-img dd: add -l option for loading a snapshot
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
docs/tools/qemu-img.rst | 6 +++---
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 33 +++++++++++++++++++++++++++++++--
3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 5e713e231d..9390d5e5cf 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -492,10 +492,10 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
- dd copies from *INPUT* file to *OUTPUT* file converting it from
- *FMT* format to *OUTPUT_FMT* format.
+ dd copies from *INPUT* file or snapshot *SNAPSHOT_PARAM* to *OUTPUT* file
+ converting it from *FMT* format to *OUTPUT_FMT* format.
The data is by default read and written using blocks of 512 bytes but can be
modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index b5b0bb4467..36f97e1f19 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -58,9 +58,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [-l snapshot_param] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 9d414d639b..e13a12137b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5016,6 +5016,7 @@ static int img_dd(int argc, char **argv)
BlockDriver *drv = NULL, *proto_drv = NULL;
BlockBackend *blk1 = NULL, *blk2 = NULL;
QemuOpts *opts = NULL;
+ QemuOpts *sn_opts = NULL;
QemuOptsList *create_opts = NULL;
Error *local_err = NULL;
bool image_opts = false;
@@ -5025,6 +5026,7 @@ static int img_dd(int argc, char **argv)
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
bool force_share = false, skip_create = false;
+ const char *snapshot_name = NULL;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5062,7 +5064,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:l:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5085,6 +5087,19 @@ static int img_dd(int argc, char **argv)
case 'n':
skip_create = true;
break;
+ case 'l':
+ if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+ sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+ optarg, false);
+ if (!sn_opts) {
+ error_report("Failed in parsing snapshot param '%s'",
+ optarg);
+ goto out;
+ }
+ } else {
+ snapshot_name = optarg;
+ }
+ break;
case 'U':
force_share = true;
break;
@@ -5144,11 +5159,24 @@ static int img_dd(int argc, char **argv)
if (dd.flags & C_IF) {
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
force_share);
-
if (!blk1) {
ret = -1;
goto out;
}
+ if (sn_opts) {
+ bdrv_snapshot_load_tmp(blk_bs(blk1),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+ &local_err);
+ } else if (snapshot_name != NULL) {
+ bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(blk1), snapshot_name,
+ &local_err);
+ }
+ if (local_err) {
+ error_reportf_err(local_err, "Failed to load snapshot: ");
+ ret = -1;
+ goto out;
+ }
}
if (dd.flags & C_OSIZE) {
@@ -5303,6 +5331,7 @@ static int img_dd(int argc, char **argv)
out:
g_free(arg);
qemu_opts_del(opts);
+ qemu_opts_del(sn_opts);
qemu_opts_free(create_opts);
blk_unref(blk1);
blk_unref(blk2);

View File

@@ -7,62 +7,17 @@ Actually provide memory information via the query-balloon
command.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: add BalloonInfo to member name exceptions list
rebase for 8.0 - moved to hw/core/machine-hmp-cmds.c]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
hw/virtio/virtio-balloon.c | 33 +++++++++++++++++++++++++++++++--
monitor/hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
qapi/machine.json | 22 +++++++++++++++++++++-
qapi/pragma.json | 1 +
4 files changed, 82 insertions(+), 4 deletions(-)
3 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index c3e55ef9e9..0e32e6201f 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -169,7 +169,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d004cf29d2..2660ed520b 100644
index ae7867a8db..956e3f4e46 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -782,8 +782,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
@@ -820,8 +820,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{
VirtIOBalloon *dev = opaque;
@@ -102,13 +57,54 @@ index d004cf29d2..2660ed520b 100644
}
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index f4ef58d257..c8b97909e7 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -698,7 +698,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/qapi/machine.json b/qapi/machine.json
index a08b6576ca..5c9a4d55f4 100644
index 157712f006..34035c25d1 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1063,9 +1063,29 @@
# @actual: the logical size of the VM in bytes Formula used:
# logical_vm_size = vm_ram_size - balloon_size
@@ -1018,10 +1018,30 @@
# @actual: the logical size of the VM in bytes
# Formula used: logical_vm_size = vm_ram_size - balloon_size
#
+# @last_update: time when stats got updated from guest
+#
@@ -127,6 +123,7 @@ index a08b6576ca..5c9a4d55f4 100644
+# @max_mem: amount of memory (in bytes) assigned to the guest
+#
# Since: 0.14
#
##
-{ 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
+{ 'struct': 'BalloonInfo',
@@ -137,15 +134,3 @@ index a08b6576ca..5c9a4d55f4 100644
##
# @query-balloon:
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7f810b0e97..325e684411 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -35,6 +35,7 @@
'member-name-exceptions': [ # visible in:
'ACPISlotType', # query-acpi-ospm-status
'AcpiTableOptions', # -acpitable
+ 'BalloonInfo', # query-balloon
'BlkdebugEvent', # blockdev-add, -blockdev
'BlkdebugSetStateOptions', # blockdev-add, -blockdev
'BlockDeviceInfo', # query-block

View File

@@ -13,13 +13,13 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 3860a50c3b..40821e2317 100644
index 216fdfaf3a..8f8d5d5276 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -91,6 +91,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
@@ -98,6 +98,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
info->numa_mem_supported = mc->numa_mem_supported;
info->deprecated = !!mc->deprecation_reason;
info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
+
+ if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
+ info->has_is_current = true;
@@ -28,21 +28,21 @@ index 3860a50c3b..40821e2317 100644
+
if (mc->default_cpu_type) {
info->default_cpu_type = g_strdup(mc->default_cpu_type);
}
info->has_default_cpu_type = true;
diff --git a/qapi/machine.json b/qapi/machine.json
index 5c9a4d55f4..fbb61f18e4 100644
index 34035c25d1..cf120ac343 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -139,6 +139,8 @@
@@ -141,6 +141,8 @@
#
# @is-default: whether the machine is default
#
+# @is-current: whether this machine is currently used
+#
# @cpu-max: maximum number of CPUs supported by the machine type
# (since 1.5)
# (since 1.5)
#
@@ -163,7 +165,7 @@
@@ -162,7 +164,7 @@
##
{ 'struct': 'MachineInfo',
'data': { 'name': 'str', '*alias': 'str',
@@ -50,4 +50,4 @@ index 5c9a4d55f4..fbb61f18e4 100644
+ '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str', 'acpi': 'bool' } }
'*default-ram-id': 'str' } }

View File

@@ -6,18 +6,16 @@ Subject: [PATCH] PVE: qapi: modify spice query
Provide the last ticket in the SpiceInfo struct optionally.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI change]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qapi/ui.json | 3 +++
ui/spice-core.c | 4 ++++
2 files changed, 7 insertions(+)
ui/spice-core.c | 5 +++++
2 files changed, 8 insertions(+)
diff --git a/qapi/ui.json b/qapi/ui.json
index 006616aa77..dfd1d3e36b 100644
index cba8665b73..081115ea8a 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -317,11 +317,14 @@
@@ -333,11 +333,14 @@
#
# @channels: a list of @SpiceChannel for each active spice channel
#
@@ -30,17 +28,18 @@ index 006616aa77..dfd1d3e36b 100644
'*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
+ '*ticket': 'str',
'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
'if': 'CONFIG_SPICE' }
'if': 'defined(CONFIG_SPICE)' }
diff --git a/ui/spice-core.c b/ui/spice-core.c
index b20c25aee0..26baeb7846 100644
index 840cf56923..96be349635 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
@@ -534,6 +534,11 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
micro = SPICE_SERVER_VERSION & 0xff;
info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);
+ if (auth_passwd) {
+ info->has_ticket = true;
+ info->ticket = g_strdup(auth_passwd);
+ }
+

View File

@@ -1,282 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 13 Oct 2022 11:33:50 +0200
Subject: [PATCH] PVE: add IOChannel implementation for savevm-async
based on migration/channel-block.c and the implementation that was
present in migration/savevm-async.c before QEMU 7.1.
Passes along read/write requests to the given BlockBackend, while
ensuring that a read request going beyond the end results in a
graceful short read.
Additionally, allows tracking the current position from the outside
(intended to be used for progress tracking).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/channel-savevm-async.c | 183 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.h | 51 +++++++++
migration/meson.build | 1 +
3 files changed, 235 insertions(+)
create mode 100644 migration/channel-savevm-async.c
create mode 100644 migration/channel-savevm-async.h
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
new file mode 100644
index 0000000000..aab081ce07
--- /dev/null
+++ b/migration/channel-savevm-async.c
@@ -0,0 +1,183 @@
+/*
+ * QIO Channel implementation to be used by savevm-async QMP calls
+ */
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "qapi/error.h"
+#include "sysemu/block-backend.h"
+#include "trace.h"
+
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos)
+{
+ QIOChannelSavevmAsync *ioc;
+
+ ioc = QIO_CHANNEL_SAVEVM_ASYNC(object_new(TYPE_QIO_CHANNEL_SAVEVM_ASYNC));
+
+ bdrv_ref(blk_bs(be));
+ ioc->be = be;
+ ioc->bs_pos = bs_pos;
+
+ return ioc;
+}
+
+
+static void
+qio_channel_savevm_async_finalize(Object *obj)
+{
+ QIOChannelSavevmAsync *ioc = QIO_CHANNEL_SAVEVM_ASYNC(obj);
+
+ if (ioc->be) {
+ bdrv_unref(blk_bs(ioc->be));
+ ioc->be = NULL;
+ }
+ ioc->bs_pos = NULL;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_readv(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int **fds,
+ size_t *nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ int64_t maxlen = blk_getlength(be);
+ QEMUIOVector qiov;
+ size_t size;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (*saioc->bs_pos >= maxlen) {
+ error_setg(errp, "cannot read beyond maxlen");
+ return -1;
+ }
+
+ if (maxlen - *saioc->bs_pos < qiov.size) {
+ size = maxlen - *saioc->bs_pos;
+ } else {
+ size = qiov.size;
+ }
+
+ // returns 0 on success
+ ret = blk_preadv(be, *saioc->bs_pos, size, &qiov, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk_preadv failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += size;
+ return size;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_writev(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int *fds,
+ size_t nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ QEMUIOVector qiov;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (qemu_in_coroutine()) {
+ ret = blk_co_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ aio_wait_kick();
+ } else {
+ ret = blk_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk(_co)_pwritev failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += qiov.size;
+ return qiov.size;
+}
+
+
+static int
+qio_channel_savevm_async_set_blocking(QIOChannel *ioc,
+ bool enabled,
+ Error **errp)
+{
+ if (!enabled) {
+ error_setg(errp, "Non-blocking mode not supported for savevm-async");
+ return -1;
+ }
+ return 0;
+}
+
+
+static int
+qio_channel_savevm_async_close(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ int rv = bdrv_flush(blk_bs(saioc->be));
+
+ if (rv < 0) {
+ error_setg_errno(errp, -rv, "Unable to flush VMState");
+ return -1;
+ }
+
+ bdrv_unref(blk_bs(saioc->be));
+ saioc->be = NULL;
+ saioc->bs_pos = NULL;
+
+ return 0;
+}
+
+
+static void
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
+ AioContext *ctx,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ void *opaque)
+{
+ // if channel-block starts doing something, check if this needs adaptation
+}
+
+
+static void
+qio_channel_savevm_async_class_init(ObjectClass *klass,
+ void *class_data G_GNUC_UNUSED)
+{
+ QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+ ioc_klass->io_writev = qio_channel_savevm_async_writev;
+ ioc_klass->io_readv = qio_channel_savevm_async_readv;
+ ioc_klass->io_set_blocking = qio_channel_savevm_async_set_blocking;
+ ioc_klass->io_close = qio_channel_savevm_async_close;
+ ioc_klass->io_set_aio_fd_handler = qio_channel_savevm_async_set_aio_fd_handler;
+}
+
+static const TypeInfo qio_channel_savevm_async_info = {
+ .parent = TYPE_QIO_CHANNEL,
+ .name = TYPE_QIO_CHANNEL_SAVEVM_ASYNC,
+ .instance_size = sizeof(QIOChannelSavevmAsync),
+ .instance_finalize = qio_channel_savevm_async_finalize,
+ .class_init = qio_channel_savevm_async_class_init,
+};
+
+static void
+qio_channel_savevm_async_register_types(void)
+{
+ type_register_static(&qio_channel_savevm_async_info);
+}
+
+type_init(qio_channel_savevm_async_register_types);
diff --git a/migration/channel-savevm-async.h b/migration/channel-savevm-async.h
new file mode 100644
index 0000000000..17ae2cb261
--- /dev/null
+++ b/migration/channel-savevm-async.h
@@ -0,0 +1,51 @@
+/*
+ * QEMU I/O channels driver for savevm-async.c
+ *
+ * Copyright (c) 2022 Proxmox Server Solutions
+ *
+ * Authors:
+ * Fiona Ebner (f.ebner@proxmox.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QIO_CHANNEL_SAVEVM_ASYNC_H
+#define QIO_CHANNEL_SAVEVM_ASYNC_H
+
+#include "io/channel.h"
+#include "qom/object.h"
+
+#define TYPE_QIO_CHANNEL_SAVEVM_ASYNC "qio-channel-savevm-async"
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSavevmAsync, QIO_CHANNEL_SAVEVM_ASYNC)
+
+
+/**
+ * QIOChannelSavevmAsync:
+ *
+ * The QIOChannelBlock object provides a channel implementation that is able to
+ * perform I/O on any BlockBackend whose BlockDriverState directly contains a
+ * VMState (as opposed to indirectly, like qcow2). It allows tracking the
+ * current position from the outside.
+ */
+struct QIOChannelSavevmAsync {
+ QIOChannel parent;
+ BlockBackend *be;
+ size_t *bs_pos;
+};
+
+
+/**
+ * qio_channel_savevm_async_new:
+ * @be: the block backend
+ * @bs_pos: used to keep track of the IOChannels current position
+ *
+ * Create a new IO channel object that can perform I/O on a BlockBackend object
+ * whose BlockDriverState directly contains a VMState.
+ *
+ * Returns: the new channel object
+ */
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos);
+
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
diff --git a/migration/meson.build b/migration/meson.build
index 1ae28523a1..37ddcb5d60 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -13,6 +13,7 @@ system_ss.add(files(
'block-dirty-bitmap.c',
'channel.c',
'channel-block.c',
+ 'channel-savevm-async.c',
'dirtyrate.c',
'exec.c',
'fd.c',

View File

@@ -21,34 +21,28 @@ still opened by QEMU.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[SR: improve aborting
register yank before migration_incoming_state_destroy]
[improve aborting]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[FE: further improve aborting
adapt to removal of QEMUFileOps
improve condition for entering final stage
adapt to QAPI and other changes for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hmp-commands-info.hx | 13 +
hmp-commands.hx | 17 ++
hmp-commands.hx | 33 ++
include/migration/snapshot.h | 2 +
include/monitor/hmp.h | 3 +
include/monitor/hmp.h | 5 +
migration/meson.build | 1 +
migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 38 +++
qapi/migration.json | 34 +++
qapi/misc.json | 16 ++
migration/savevm-async.c | 598 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 57 ++++
qapi/migration.json | 34 ++
qapi/misc.json | 32 ++
qemu-options.hx | 12 +
softmmu/vl.c | 10 +
11 files changed, 677 insertions(+)
11 files changed, 797 insertions(+)
create mode 100644 migration/savevm-async.c
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index f5b37eb74a..10fdd822e0 100644
index 27206ac049..e6dd3be07a 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -525,6 +525,19 @@ SRST
@@ -551,6 +551,19 @@ SRST
Show current migration parameters.
ERST
@@ -69,13 +63,13 @@ index f5b37eb74a..10fdd822e0 100644
.name = "balloon",
.args_type = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2cbd0f77a0..e352f86872 100644
index d78e4cfc47..42203dbe92 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1865,3 +1865,20 @@ SRST
List event channels in the guest
ERST
#endif
@@ -1744,3 +1744,36 @@ ERST
.help = "start a round of guest dirty rate measurement",
.cmd = hmp_calc_dirty_rate,
},
+
+ {
+ .name = "savevm-start",
@@ -86,6 +80,22 @@ index 2cbd0f77a0..e352f86872 100644
+ },
+
+ {
+ .name = "snapshot-drive",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Create internal snapshot.",
+ .cmd = hmp_snapshot_drive,
+ },
+
+ {
+ .name = "delete-drive-snapshot",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Delete internal snapshot.",
+ .cmd = hmp_delete_drive_snapshot,
+ },
+
+ {
+ .name = "savevm-end",
+ .args_type = "",
+ .params = "",
@@ -105,10 +115,10 @@ index e72083b117..c846d37806 100644
+
#endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 13f9a2dedb..7a7def7530 100644
index 3baa1058e2..1247d7362a 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
void hmp_info_uuid(Monitor *mon, const QDict *qdict);
void hmp_info_chardev(Monitor *mon, const QDict *qdict);
void hmp_info_mice(Monitor *mon, const QDict *qdict);
@@ -116,38 +126,37 @@ index 13f9a2dedb..7a7def7530 100644
void hmp_info_migrate(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
void hmp_mouse_move(Monitor *mon, const QDict *qdict);
void hmp_mouse_button(Monitor *mon, const QDict *qdict);
void hmp_mouse_set(Monitor *mon, const QDict *qdict);
@@ -79,6 +80,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
void hmp_netdev_del(Monitor *mon, const QDict *qdict);
void hmp_getfd(Monitor *mon, const QDict *qdict);
void hmp_closefd(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
void hmp_sendkey(Monitor *mon, const QDict *qdict);
void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/migration/meson.build b/migration/meson.build
index 37ddcb5d60..07f6057acc 100644
index f8714dcb15..ea9aedeefc 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -26,6 +26,7 @@ system_ss.add(files(
'options.c',
@@ -23,6 +23,7 @@ softmmu_ss.add(files(
'multifd-zlib.c',
'postcopy-ram.c',
'savevm.c',
+ 'savevm-async.c',
'socket.c',
'tls.c',
'threadinfo.c',
), gnutls)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
new file mode 100644
index 0000000000..e9fc18fb10
index 0000000000..79a0cda906
--- /dev/null
+++ b/migration/savevm-async.c
@@ -0,0 +1,531 @@
@@ -0,0 +1,598 @@
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "migration/migration.h"
+#include "migration/migration-stats.h"
+#include "migration/options.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
@@ -167,10 +176,12 @@ index 0000000000..e9fc18fb10
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+#include "qemu/yank.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+/* used while emulated sync operation in progress */
+#define NOT_DONE -EINPROGRESS
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
@@ -199,7 +210,7 @@ index 0000000000..e9fc18fb10
+ int64_t total_time;
+ QEMUBH *finalize_bh;
+ Coroutine *co;
+ QemuCoSleep target_close_wait;
+ QemuCoSleep *target_close_wait;
+} snap_state;
+
+static bool savevm_aborted(void)
@@ -218,20 +229,24 @@ index 0000000000..e9fc18fb10
+ info->bytes = s->bs_pos;
+ switch (s->state) {
+ case SAVE_STATE_ERROR:
+ info->has_status = true;
+ info->status = g_strdup("failed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
+ if (s->error) {
+ info->has_error = true;
+ info->error = g_strdup(error_get_pretty(s->error));
+ }
+ break;
+ case SAVE_STATE_ACTIVE:
+ info->has_status = true;
+ info->status = g_strdup("active");
+ info->has_total_time = true;
+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+ - s->total_time;
+ break;
+ case SAVE_STATE_COMPLETED:
+ info->has_status = true;
+ info->status = g_strdup("completed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
@@ -253,7 +268,6 @@ index 0000000000..e9fc18fb10
+
+ if (snap_state.file) {
+ ret = qemu_fclose(snap_state.file);
+ snap_state.file = NULL;
+ }
+
+ if (snap_state.target) {
@@ -271,13 +285,15 @@ index 0000000000..e9fc18fb10
+ blk_unref(snap_state.target);
+ snap_state.target = NULL;
+
+ qemu_co_sleep_wake(&snap_state.target_close_wait);
+ if (snap_state.target_close_wait) {
+ qemu_co_sleep_wake(snap_state.target_close_wait);
+ }
+ }
+
+ return ret;
+}
+
+static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
+static void save_snapshot_error(const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
@@ -297,6 +313,60 @@ index 0000000000..e9fc18fb10
+ snap_state.state = SAVE_STATE_ERROR;
+}
+
+static int block_state_close(void *opaque, Error **errp)
+{
+ snap_state.file = NULL;
+ return blk_flush(snap_state.target);
+}
+
+typedef struct BlkRwCo {
+ int64_t offset;
+ QEMUIOVector *qiov;
+ ssize_t ret;
+} BlkRwCo;
+
+static void coroutine_fn block_state_write_entry(void *opaque) {
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
+ rwco->qiov, 0);
+ aio_wait_kick();
+}
+
+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos, Error **errp)
+{
+ QEMUIOVector qiov;
+ BlkRwCo rwco;
+
+ assert(pos == snap_state.bs_pos);
+ rwco = (BlkRwCo) {
+ .offset = pos,
+ .qiov = &qiov,
+ .ret = NOT_DONE,
+ };
+
+ qemu_iovec_init_external(&qiov, iov, iovcnt);
+
+ if (qemu_in_coroutine()) {
+ block_state_write_entry(&rwco);
+ } else {
+ Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
+ bdrv_coroutine_enter(blk_bs(snap_state.target), co);
+ BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
+ }
+ if (rwco.ret < 0) {
+ return rwco.ret;
+ }
+
+ snap_state.bs_pos += qiov.size;
+ return qiov.size;
+}
+
+static const QEMUFileOps block_file_ops = {
+ .writev_buffer = block_state_writev_buffer,
+ .close = block_state_close,
+};
+
+static void process_savevm_finalize(void *opaque)
+{
+ int ret;
@@ -331,7 +401,7 @@ index 0000000000..e9fc18fb10
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+ }
+
@@ -352,11 +422,8 @@ index 0000000000..e9fc18fb10
+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_COMPLETED;
+ } else if (aborted) {
+ /*
+ * If there was an error, there's no need to set a new one here.
+ * If the snapshot was canceled, leave setting the state to
+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
+ */
+ save_snapshot_error("process_savevm_cleanup: found aborted state: %d",
+ snap_state.state);
+ } else {
+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+ snap_state.state);
@@ -388,32 +455,18 @@ index 0000000000..e9fc18fb10
+ }
+
+ while (snap_state.state == SAVE_STATE_ACTIVE) {
+ uint64_t pending_size, pend_precopy, pend_postcopy;
+ uint64_t threshold = 400 * 1000;
+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
+
+ /*
+ * pending_{estimate,exact} are expected to be called without iothread
+ * lock. Similar to what is done in migration.c, call the exact variant
+ * only once pend_precopy in the estimate is below the threshold.
+ */
+ /* pending is expected to be called without iothread lock */
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
+ if (pend_precopy <= threshold) {
+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
+ }
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
+ qemu_mutex_lock_iothread();
+ pending_size = pend_precopy + pend_postcopy;
+
+ /*
+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
+ * large enough so that the guest can't dirty this much between the
+ * check and the guest actually being stopped, but it should be small
+ * enough to avoid long downtimes for non-hibernation snapshots.
+ */
+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
+
+ /* Note that there is no progress for pend_postcopy when iterating */
+ if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+
+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
+ ret = qemu_savevm_state_iterate(snap_state.file, false);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
@@ -422,7 +475,11 @@ index 0000000000..e9fc18fb10
+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+ } else {
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ global_state_store();
+ ret = global_state_store();
+ if (ret) {
+ save_snapshot_error("global_state_store error %d", ret);
+ break;
+ }
+
+ DPRINTF("savevm iterate complete\n");
+ break;
@@ -451,9 +508,7 @@ index 0000000000..e9fc18fb10
+ if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_graph_co_rdlock();
+ bdrv_flush(bs);
+ bdrv_graph_co_rdunlock();
+ aio_co_reschedule_self(qemu_get_aio_context());
+ }
+ }
@@ -464,7 +519,7 @@ index 0000000000..e9fc18fb10
+ qemu_bh_schedule(snap_state.finalize_bh);
+}
+
+void qmp_savevm_start(const char *statefile, Error **errp)
+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+{
+ Error *local_err = NULL;
+ MigrationState *ms = migrate_get_current();
@@ -483,7 +538,7 @@ index 0000000000..e9fc18fb10
+ return;
+ }
+
+ if (migrate_block()) {
+ if (migrate_use_block()) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Block migration and snapshots are incompatible");
+ return;
@@ -494,14 +549,13 @@ index 0000000000..e9fc18fb10
+ snap_state.bs_pos = 0;
+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ snap_state.blocker = NULL;
+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
+
+ if (snap_state.error) {
+ error_free(snap_state.error);
+ snap_state.error = NULL;
+ }
+
+ if (!statefile) {
+ if (!has_statefile) {
+ vm_stop(RUN_STATE_SAVE_VM);
+ snap_state.state = SAVE_STATE_COMPLETED;
+ return;
@@ -521,9 +575,7 @@ index 0000000000..e9fc18fb10
+ goto restart;
+ }
+
+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
+ &snap_state.bs_pos));
+ snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+
+ if (!snap_state.file) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -536,8 +588,7 @@ index 0000000000..e9fc18fb10
+ * here (blocking main thread, from QMP) to avoid race conditions.
+ */
+ migrate_init(ms);
+ memset(&mig_stats, 0, sizeof(mig_stats));
+ memset(&compression_counters, 0, sizeof(compression_counters));
+ memset(&ram_counters, 0, sizeof(ram_counters));
+ ms->to_dst_file = snap_state.file;
+
+ error_setg(&snap_state.blocker, "block device is in use by savevm");
@@ -602,8 +653,9 @@ index 0000000000..e9fc18fb10
+ * call exits the statefile will be closed and can be removed immediately */
+ DPRINTF("savevm-end: waiting for cleanup\n");
+ timeout = 30L * 1000 * 1000 * 1000;
+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
+ qemu_co_sleep_ns_wakeable(snap_state.target_close_wait,
+ QEMU_CLOCK_REALTIME, timeout);
+ snap_state.target_close_wait = NULL;
+ if (snap_state.target) {
+ save_snapshot_error("timeout waiting for target file close in "
+ "qmp_savevm_end");
@@ -612,14 +664,46 @@ index 0000000000..e9fc18fb10
+ return;
+ }
+
+ // File closed and no other error, so ensure next snapshot can be started.
+ if (snap_state.state != SAVE_STATE_ERROR) {
+ snap_state.state = SAVE_STATE_DONE;
+ }
+
+ DPRINTF("savevm-end: cleanup done\n");
+}
+
+// FIXME: Deprecated
+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
+{
+ // Compatibility to older qemu-server.
+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
+}
+
+// FIXME: Deprecated
+void qmp_delete_drive_snapshot(const char *device, const char *name,
+ Error **errp)
+{
+ // Compatibility to older qemu-server.
+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
+ true, name, errp);
+}
+
+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
+ size_t size, Error **errp)
+{
+ BlockBackend *be = opaque;
+ int64_t maxlen = blk_getlength(be);
+ if (pos > maxlen) {
+ return -EIO;
+ }
+ if ((pos + size) > maxlen) {
+ size = maxlen - pos - 1;
+ }
+ if (size == 0) {
+ return 0;
+ }
+ return blk_pread(be, pos, buf, size);
+}
+
+static const QEMUFileOps loadstate_file_ops = {
+ .get_buffer = loadstate_get_buffer,
+};
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+ BlockBackend *be;
@@ -627,7 +711,6 @@ index 0000000000..e9fc18fb10
+ Error *blocker = NULL;
+
+ QEMUFile *f;
+ size_t bs_pos = 0;
+ int ret = -EINVAL;
+
+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
@@ -641,7 +724,7 @@ index 0000000000..e9fc18fb10
+ blk_op_block_all(be, blocker);
+
+ /* restore the VM state */
+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_fopen_ops(be, &loadstate_file_ops);
+ if (!f) {
+ error_setg(errp, "Could not open VM state file");
+ goto the_end;
@@ -654,10 +737,6 @@ index 0000000000..e9fc18fb10
+ dirty_bitmap_mig_before_vm_start();
+
+ qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
+ migration_incoming_state_destroy();
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error while loading VM state");
@@ -675,28 +754,39 @@ index 0000000000..e9fc18fb10
+ return ret;
+}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 6c559b48c8..91be698308 100644
index c8b97909e7..64a84cf4ee 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -22,6 +22,7 @@
#include "monitor/monitor-internal.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-migration.h"
#include "qapi/qapi-commands-misc.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qerror.h"
@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
mtree_info(flatview, dispatch_tree, owner, disabled);
@@ -1961,6 +1961,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}
+
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+ qmp_savevm_start(statefile, &errp);
+ qmp_savevm_start(statefile != NULL, statefile, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_snapshot_drive(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_delete_drive_snapshot(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
@@ -713,7 +803,7 @@ index 6c559b48c8..91be698308 100644
+ SaveVMInfo *info;
+ info = qmp_query_savevm(NULL);
+
+ if (info->status) {
+ if (info->has_status) {
+ monitor_printf(mon, "savevm status: %s\n", info->status);
+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+ info->total_time);
@@ -723,17 +813,21 @@ index 6c559b48c8..91be698308 100644
+ if (info->has_bytes) {
+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+ }
+ if (info->error) {
+ if (info->has_error) {
+ monitor_printf(mon, "Error: %s\n", info->error);
+ }
+}
+
void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index 8843e74b59..aca0ca1ac1 100644
index 1124a2dda8..3d72b3e3f3 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -291,6 +291,40 @@
'*dirty-limit-throttle-time-per-round': 'uint64',
'*dirty-limit-ring-full-time': 'uint64'} }
@@ -247,6 +247,40 @@
'*compression': 'CompressionStats',
'*socket-address': ['SocketAddress'] } }
+##
+# @SaveVMInfo:
@@ -773,10 +867,10 @@ index 8843e74b59..aca0ca1ac1 100644
# @query-migrate:
#
diff --git a/qapi/misc.json b/qapi/misc.json
index cda2effa81..94a58bb0bf 100644
index 5c2ca3b556..9bc14e1032 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -456,6 +456,22 @@
@@ -431,6 +431,38 @@
##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
@@ -789,6 +883,22 @@ index cda2effa81..94a58bb0bf 100644
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
@@ -800,10 +910,10 @@ index cda2effa81..94a58bb0bf 100644
# @CommandLineParameterType:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index 8073f5edf5..dc1ececc9c 100644
index 83aa59a920..002ba697e9 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4483,6 +4483,18 @@ SRST
@@ -4131,6 +4131,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor)
ERST
@@ -823,21 +933,21 @@ index 8073f5edf5..dc1ececc9c 100644
DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index ba6ad8a8df..ddeace306e 100644
index 5ca11e7469..220c67cd32 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -164,6 +164,7 @@ static const char *accelerators;
static bool have_custom_ram_size;
static const char *ram_memdev_id;
@@ -150,6 +150,7 @@ static const char *incoming;
static const char *loadvm;
static const char *accelerators;
static QDict *machine_opts_dict;
+static const char *loadstate;
static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
static int display_remote;
@@ -2647,6 +2648,12 @@ void qmp_x_exit_preconfig(Error **errp)
if (loadvm) {
load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
static ram_addr_t maxram_size;
static uint64_t ram_slots;
@@ -2700,6 +2701,12 @@ void qmp_x_exit_preconfig(Error **errp)
autostart = 0;
exit(1);
}
+ } else if (loadstate) {
+ Error *local_err = NULL;
+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
@@ -847,7 +957,7 @@ index ba6ad8a8df..ddeace306e 100644
}
if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init();
@@ -3196,6 +3203,9 @@ void qemu_init(int argc, char **argv)
@@ -3238,6 +3245,9 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_loadvm:
loadvm = optarg;
break;

View File

@@ -0,0 +1,188 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/qemu-file.c | 38 +++++++++++++++++++++++++-------------
migration/qemu-file.h | 1 +
migration/savevm-async.c | 4 ++--
3 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 6338d8e2ff..6697a93a7e 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,8 +30,8 @@
#include "trace.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile {
const QEMUFileOps *ops;
@@ -45,7 +45,8 @@ struct QEMUFile {
when reading */
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -103,7 +104,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, bool has_ioc, size_t buffer_size)
{
QEMUFile *f;
@@ -112,9 +113,17 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
f->opaque = opaque;
f->ops = ops;
f->has_ioc = has_ioc;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
+
return f;
}
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
+{
+ return qemu_fopen_ops_sized(opaque, ops, has_ioc, DEFAULT_IO_BUF_SIZE);
+}
+
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
{
@@ -349,7 +358,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
}
len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
- IO_BUF_SIZE - pending, &local_error);
+ f->buf_allocated_size - pending, &local_error);
if (len > 0) {
f->buf_size += len;
f->pos += len;
@@ -389,6 +398,9 @@ int qemu_fclose(QEMUFile *f)
ret = ret2;
}
}
+
+ free(f->buf);
+
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -443,7 +455,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -469,7 +481,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -516,8 +528,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -567,7 +579,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -601,7 +613,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
*/
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -626,7 +638,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -778,7 +790,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+ ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 3f36d4dc8c..67501fd9cf 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -121,6 +121,7 @@ typedef struct QEMUFileHooks {
} QEMUFileHooks;
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc);
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, bool has_ioc, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_get_fd(QEMUFile *f);
int qemu_fclose(QEMUFile *f);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 79a0cda906..970ee3b3fc 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -418,7 +418,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
goto restart;
}
- snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+ snap_state.file = qemu_fopen_ops_sized(&snap_state, &block_file_ops, false, 4 * 1024 * 1024);
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -567,7 +567,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */
- f = qemu_fopen_ops(be, &loadstate_file_ops);
+ f = qemu_fopen_ops_sized(be, &loadstate_file_ops, false, 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -1,216 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/qemu-file.c | 49 +++++++++++++++++++++++++++-------------
migration/qemu-file.h | 2 ++
migration/savevm-async.c | 5 ++--
3 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 19c33c9985..e9ffff0f0a 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -33,8 +33,8 @@
#include "options.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile {
const QEMUFileHooks *hooks;
@@ -46,7 +46,8 @@ struct QEMUFile {
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -100,7 +101,9 @@ int qemu_file_shutdown(QEMUFile *f)
return 0;
}
-static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
+ bool is_writable,
+ size_t buffer_size)
{
QEMUFile *f;
@@ -109,6 +112,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
object_ref(ioc);
f->ioc = ioc;
f->is_writable = is_writable;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
return f;
}
@@ -119,17 +124,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
*/
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
{
- return qemu_file_new_impl(f->ioc, !f->is_writable);
+ return qemu_file_new_impl(f->ioc, !f->is_writable, DEFAULT_IO_BUF_SIZE);
}
QEMUFile *qemu_file_new_output(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, true);
+ return qemu_file_new_impl(ioc, true, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, true, buffer_size);
}
QEMUFile *qemu_file_new_input(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, false);
+ return qemu_file_new_impl(ioc, false, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, false, buffer_size);
}
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
@@ -375,7 +390,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
do {
len = qio_channel_read(f->ioc,
(char *)f->buf + pending,
- IO_BUF_SIZE - pending,
+ f->buf_allocated_size - pending,
&local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
@@ -425,6 +440,8 @@ int qemu_fclose(QEMUFile *f)
}
g_clear_pointer(&f->ioc, object_unref);
+ free(f->buf);
+
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -479,7 +496,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -504,7 +521,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -549,8 +566,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -600,7 +617,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -634,7 +651,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
*/
size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -659,7 +676,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -777,7 +794,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+ ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 47015f5201..1312b7c903 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -63,7 +63,9 @@ typedef struct QEMUFileHooks {
} QEMUFileHooks;
QEMUFile *qemu_file_new_input(QIOChannel *ioc);
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
QEMUFile *qemu_file_new_output(QIOChannel *ioc);
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_fclose(QEMUFile *f);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index e9fc18fb10..80624fada8 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -378,7 +378,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
&snap_state.bs_pos));
- snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_file_new_output_sized(ioc, 4 * 1024 * 1024);
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -496,7 +496,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */
- f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_file_new_input_sized(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)),
+ 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -4,32 +4,30 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
Subject: [PATCH] PVE: block: add the zeroinit block driver filter
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 1 +
block/zeroinit.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 201 insertions(+)
block/zeroinit.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 197 insertions(+)
create mode 100644 block/zeroinit.c
diff --git a/block/meson.build b/block/meson.build
index 529fc172c6..1833c71ce9 100644
index 0450914c7a..7a0bc3df09 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -40,6 +40,7 @@ block_ss.add(files(
'throttle-groups.c',
'throttle.c',
@@ -41,6 +41,7 @@ block_ss.add(files(
'vmdk.c',
'vpc.c',
'write-threshold.c',
+ 'zeroinit.c',
), zstd, zlib, gnutls)
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/zeroinit.c b/block/zeroinit.c
new file mode 100644
index 0000000000..1257342724
index 0000000000..5529627f7e
--- /dev/null
+++ b/block/zeroinit.c
@@ -0,0 +1,200 @@
@@ -0,0 +1,196 @@
+/*
+ * Filter to fake a zero-initialized block device.
+ *
@@ -43,7 +41,6 @@ index 0000000000..1257342724
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
@@ -110,9 +107,7 @@ index 0000000000..1257342724
+
+ /* Open the raw file */
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &child_of_bds,
+ BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+ false, &local_err);
+ bs, &child_of_bds, BDRV_CHILD_FILTERED, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
@@ -137,28 +132,28 @@ index 0000000000..1257342724
+ (void)s;
+}
+
+static coroutine_fn int64_t zeroinit_co_getlength(BlockDriverState *bs)
+static int64_t zeroinit_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags)
+ int count, BdrvRequestFlags flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ if (offset >= s->extents)
+ return 0;
+ return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ int64_t extents = offset + bytes;
@@ -179,9 +174,9 @@ index 0000000000..1257342724
+}
+
+static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -191,10 +186,9 @@ index 0000000000..1257342724
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
+}
+
+static coroutine_fn int zeroinit_co_get_info(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+static int zeroinit_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return bdrv_co_get_info(bs->file->bs, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
+}
+
+static BlockDriver bdrv_zeroinit = {
@@ -205,7 +199,7 @@ index 0000000000..1257342724
+ .bdrv_parse_filename = zeroinit_parse_filename,
+ .bdrv_file_open = zeroinit_open,
+ .bdrv_close = zeroinit_close,
+ .bdrv_co_getlength = zeroinit_co_getlength,
+ .bdrv_getlength = zeroinit_getlength,
+ .bdrv_child_perm = bdrv_default_perms,
+ .bdrv_co_flush_to_disk = zeroinit_co_flush,
+
@@ -221,7 +215,7 @@ index 0000000000..1257342724
+ .bdrv_co_pdiscard = zeroinit_co_pdiscard,
+
+ .bdrv_co_truncate = zeroinit_co_truncate,
+ .bdrv_co_get_info = zeroinit_co_get_info,
+ .bdrv_get_info = zeroinit_get_info,
+};
+
+static void bdrv_zeroinit_init(void)

View File

@@ -14,12 +14,12 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 11 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx
index dc1ececc9c..848d2dfdd1 100644
index 002ba697e9..a05959b9f1 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1197,6 +1197,9 @@ legacy PC, they are not recommended for modern configurations.
@@ -1005,6 +1005,9 @@ DEFHEADING()
ERST
DEFHEADING(Block device options:)
+DEF("id", HAS_ARG, QEMU_OPTION_id,
+ "-id n set the VMID", QEMU_ARCH_ALL)
@@ -28,10 +28,10 @@ index dc1ececc9c..848d2dfdd1 100644
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index ddeace306e..3ee90b3b94 100644
index 220c67cd32..d87cf6e103 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2683,6 +2683,7 @@ void qemu_init(int argc, char **argv)
@@ -2736,6 +2736,7 @@ void qemu_init(int argc, char **argv, char **envp)
MachineClass *machine_class;
bool userconfig = true;
FILE *vmstate_dump_file = NULL;
@@ -39,9 +39,9 @@ index ddeace306e..3ee90b3b94 100644
qemu_add_opts(&qemu_drive_opts);
qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -3308,6 +3309,13 @@ void qemu_init(int argc, char **argv)
machine_parse_property_opt(qemu_find_opts("smp-opts"),
"smp", optarg);
@@ -3360,6 +3361,13 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_smp:
machine_parse_property_opt(qemu_find_opts("smp-opts"), "smp", optarg, &error_fatal);
break;
+ case QEMU_OPTION_id:
+ vm_id = strtol(optarg, (char **)&optarg, 10);

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4a34f03047..59b917e50c 100644
index 2a20982066..7968ad5a93 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -252,6 +252,15 @@ static void apic_reset_common(DeviceState *dev)
@@ -278,6 +278,15 @@ static void apic_reset_common(DeviceState *dev)
info->vapic_base_update(s);
apic_init_reset(dev);

View File

@@ -13,10 +13,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index ca551baa42..8b3b83e9d4 100644
index 3ac5177cbb..907aa3f22e 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2873,6 +2873,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2443,6 +2443,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
int fd;
uint64_t perm, shared;
int result = 0;
@@ -24,7 +24,7 @@ index ca551baa42..8b3b83e9d4 100644
/* Validate options and set default values */
assert(options->driver == BLOCKDEV_DRIVER_FILE);
@@ -2913,19 +2914,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2483,19 +2484,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
@@ -59,7 +59,7 @@ index ca551baa42..8b3b83e9d4 100644
}
/* Clear the file by truncating it to 0 */
@@ -2979,13 +2983,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2549,13 +2553,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
}
out_unlock:
@@ -82,7 +82,7 @@ index ca551baa42..8b3b83e9d4 100644
}
out_close:
@@ -3009,6 +3015,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2580,6 +2586,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
PreallocMode prealloc;
char *buf = NULL;
Error *local_err = NULL;
@@ -90,7 +90,7 @@ index ca551baa42..8b3b83e9d4 100644
/* Skip file: protocol prefix */
strstart(filename, "file:", &filename);
@@ -3031,6 +3038,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2602,6 +2609,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
return -EINVAL;
}
@@ -109,7 +109,7 @@ index ca551baa42..8b3b83e9d4 100644
options = (BlockdevCreateOptions) {
.driver = BLOCKDEV_DRIVER_FILE,
.u.file = {
@@ -3042,6 +3061,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2613,6 +2632,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
.nocow = nocow,
.has_extent_size_hint = has_extent_size_hint,
.extent_size_hint = extent_size_hint,
@@ -119,10 +119,10 @@ index ca551baa42..8b3b83e9d4 100644
};
return raw_co_create(&options, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a5cea82139..bb471c078d 100644
index 6356a63695..fdfa579d00 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4880,7 +4880,8 @@
@@ -4341,7 +4341,8 @@
'size': 'size',
'*preallocation': 'PreallocMode',
'*nocow': 'bool',

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 589c9524f8..2505dd658a 100644
index 6b8cfcf6d8..3ec67e32d3 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -536,8 +536,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
@@ -519,8 +519,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
qemu_chr_fe_set_echo(&mon->common.chr, true);
/* Note: we run QMP monitor in I/O thread when @chr supports that */

View File

@@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f0d35c6401..1427983543 100644
index 2cf2f321f9..e0f857820d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -148,7 +148,8 @@ GlobalProperty hw_compat_4_0[] = {
@@ -107,7 +107,8 @@ GlobalProperty hw_compat_4_0[] = {
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-device", "edid", "false" },
{ "virtio-device", "use-started", "false" },

View File

@@ -11,71 +11,70 @@ and only if 'is-current').
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-qmp-cmds.c | 5 +++++
hw/core/machine-qmp-cmds.c | 6 ++++++
include/hw/boards.h | 2 ++
qapi/machine.json | 4 +++-
softmmu/vl.c | 25 +++++++++++++++++++++++++
4 files changed, 35 insertions(+), 1 deletion(-)
4 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 40821e2317..ee93ddd69a 100644
index 8f8d5d5276..370e66d9cc 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -95,6 +95,11 @@ MachineInfoList *qmp_query_machines(Error **errp)
@@ -102,6 +102,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
info->has_is_current = true;
info->is_current = true;
+
+ // PVE version string only exists for current machine
+ if (mc->pve_version) {
+ info->has_pve_version = true;
+ info->pve_version = g_strdup(mc->pve_version);
+ }
}
if (mc->default_cpu_type) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index ed83360198..f8b88cd86a 100644
index accd6eff35..1b16728389 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -235,6 +235,8 @@ struct MachineClass {
@@ -205,6 +205,8 @@ struct MachineClass {
const char *desc;
const char *deprecation_reason;
+ const char *pve_version;
+
void (*init)(MachineState *state);
void (*reset)(MachineState *state, ShutdownCause reason);
void (*reset)(MachineState *state);
void (*wakeup)(MachineState *state);
diff --git a/qapi/machine.json b/qapi/machine.json
index fbb61f18e4..7da3c519ba 100644
index cf120ac343..a6f483af4f 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -161,6 +161,8 @@
@@ -160,6 +160,8 @@
#
# @acpi: machine type supports ACPI (since 8.0)
# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
#
+# @pve-version: custom PVE version suffix specified as 'machine+pveN'
+#
# Since: 1.2
##
{ 'struct': 'MachineInfo',
@@ -168,7 +170,7 @@
@@ -167,7 +169,7 @@
'*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
- '*default-ram-id': 'str', 'acpi': 'bool' } }
+ '*default-ram-id': 'str', 'acpi': 'bool', '*pve-version': 'str' } }
- '*default-ram-id': 'str' } }
+ '*default-ram-id': 'str', '*pve-version': 'str' } }
##
# @query-machines:
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 3ee90b3b94..4b6d0b82fd 100644
index d87cf6e103..e9d40065bc 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1597,6 +1597,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
@@ -1621,6 +1621,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp)
{
const char *optarg = qdict_get_try_str(qdict, "type");
@@ -83,7 +82,7 @@ index 3ee90b3b94..4b6d0b82fd 100644
GSList *machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class;
Error *local_err = NULL;
@@ -1614,6 +1615,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
@@ -1638,6 +1639,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
}
}
@@ -95,7 +94,7 @@ index 3ee90b3b94..4b6d0b82fd 100644
g_slist_free(machines);
if (local_err) {
error_append_hint(&local_err, "Use -machine help to list supported machines\n");
@@ -3250,12 +3256,31 @@ void qemu_init(int argc, char **argv)
@@ -3312,12 +3318,31 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_machine:
{
bool help;

View File

@@ -3,57 +3,51 @@ From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:57 +0200
Subject: [PATCH] PVE-Backup: add vma backup format code
Notes about partial restoring: skipping a certain drive is done via a
map line of the form skip=drive-scsi0. Since in PVE, most archives are
compressed and piped to vma for restore, it's not easily possible to
skip reads.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: improvements during create
allow partial restore]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
[FE: create: register all streams before entering coroutines]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
meson.build | 5 +
vma-reader.c | 867 ++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 818 +++++++++++++++++++++++++++++++++++++++++
vma.c | 900 ++++++++++++++++++++++++++++++++++++++++++++++
vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 790 ++++++++++++++++++++++++++++++++++++++++++
vma.c | 851 +++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 ++++++++
6 files changed, 2742 insertions(+)
6 files changed, 2655 insertions(+)
create mode 100644 vma-reader.c
create mode 100644 vma-writer.c
create mode 100644 vma.c
create mode 100644 vma.h
diff --git a/block/meson.build b/block/meson.build
index 1833c71ce9..59b71ba9f3 100644
index 7a0bc3df09..9ce9246194 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -43,6 +43,8 @@ block_ss.add(files(
@@ -44,6 +44,8 @@ block_ss.add(files(
'zeroinit.c',
), zstd, zlib, gnutls)
+block_ss.add(files('../vma-writer.c'), libuuid)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
block_ss.add(when: 'CONFIG_QCOW1', if_true: files('qcow.c'))
diff --git a/meson.build b/meson.build
index a9c4f28247..cd95530d3b 100644
index b3e7ec0e92..cc46eabb42 100644
--- a/meson.build
+++ b/meson.build
@@ -1778,6 +1778,8 @@ endif
@@ -1064,6 +1064,8 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
+libuuid = cc.find_library('uuid', required: true)
+
# libselinux
selinux = dependency('libselinux',
required: get_option('selinux'),
@@ -3908,6 +3910,9 @@ if have_tools
dependencies: [blockdev, qemuutil, gnutls, selinux],
install: true)
# Malloc tests
malloc = []
@@ -2743,6 +2745,9 @@ if have_tools
qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
dependencies: [blockdev, qemuutil, gnutls], install: true)
+ vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
+ dependencies: [authz, block, crypto, io, qom], install: true)
@@ -63,10 +57,10 @@ index a9c4f28247..cd95530d3b 100644
subdir('contrib/elf2dmp')
diff --git a/vma-reader.c b/vma-reader.c
new file mode 100644
index 0000000000..81a891c6b1
index 0000000000..2b1d1cdab3
--- /dev/null
+++ b/vma-reader.c
@@ -0,0 +1,867 @@
@@ -0,0 +1,857 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -84,6 +78,7 @@ index 0000000000..81a891c6b1
+#include <glib.h>
+#include <uuid/uuid.h>
+
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/ratelimit.h"
+#include "vma.h"
@@ -97,7 +92,6 @@ index 0000000000..81a891c6b1
+ bool write_zeroes;
+ unsigned long *bitmap;
+ int bitmap_size;
+ bool skip;
+} VmaRestoreState;
+
+struct VmaReader {
@@ -261,9 +255,6 @@ index 0000000000..81a891c6b1
+ if (vmar->rstate[i].bitmap) {
+ g_free(vmar->rstate[i].bitmap);
+ }
+ if (vmar->rstate[i].target) {
+ blk_unref(vmar->rstate[i].target);
+ }
+ }
+
+ if (vmar->md5csum) {
@@ -495,14 +486,13 @@ index 0000000000..81a891c6b1
+}
+
+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes, bool skip)
+ BlockBackend *target, bool write_zeroes)
+{
+ assert(vmar);
+ assert(dev_id);
+
+ vmar->rstate[dev_id].target = target;
+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
+ vmar->rstate[dev_id].skip = skip;
+
+ int64_t size = vmar->devinfo[dev_id].size;
+
@@ -517,30 +507,28 @@ index 0000000000..81a891c6b1
+}
+
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
+ bool write_zeroes, bool skip, Error **errp)
+ bool write_zeroes, Error **errp)
+{
+ assert(vmar);
+ assert(target != NULL || skip);
+ assert(target != NULL);
+ assert(dev_id);
+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
+ assert(vmar->rstate[dev_id].target == NULL);
+
+ if (target != NULL) {
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+
+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
+ allocate_rstate(vmar, dev_id, target, write_zeroes);
+
+ return 0;
+}
@@ -598,7 +586,7 @@ index 0000000000..81a891c6b1
+ }
+ }
+ } else {
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
+ if (res < 0) {
+ error_setg(errp, "blk_pwrite to %s failed (%d)",
+ bdrv_get_device_name(blk_bs(target)), res);
@@ -633,23 +621,19 @@ index 0000000000..81a891c6b1
+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
+ BlockBackend *target = NULL;
+
+ bool skip = rstate->skip;
+
+ if (dev_id != vmar->vmstate_stream) {
+ target = rstate->target;
+ if (!verify && !target && !skip) {
+ if (!verify && !target) {
+ error_setg(errp, "got wrong dev id %d", dev_id);
+ return -1;
+ }
+
+ if (!skip) {
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+
+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
+ } else {
@@ -695,7 +679,7 @@ index 0000000000..81a891c6b1
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num, nb_sectors,
@@ -731,7 +715,7 @@ index 0000000000..81a891c6b1
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num,
@@ -756,7 +740,7 @@ index 0000000000..81a891c6b1
+ vmar->partial_zero_cluster_data += zero_size;
+ }
+
+ if (rstate->write_zeroes && !verify && !skip) {
+ if (rstate->write_zeroes && !verify) {
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ zero_vma_block, sector_num,
+ nb_sectors, errp) < 0) {
@@ -927,7 +911,7 @@ index 0000000000..81a891c6b1
+
+ for (dev_id = 1; dev_id < 255; dev_id++) {
+ if (vma_reader_get_device_info(vmar, dev_id)) {
+ allocate_rstate(vmar, dev_id, NULL, false, false);
+ allocate_rstate(vmar, dev_id, NULL, false);
+ }
+ }
+
@@ -936,10 +920,10 @@ index 0000000000..81a891c6b1
+
diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644
index 0000000000..126b296647
index 0000000000..11d8321ffd
--- /dev/null
+++ b/vma-writer.c
@@ -0,0 +1,818 @@
@@ -0,0 +1,790 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -955,8 +939,6 @@ index 0000000000..126b296647
+
+#include "qemu/osdep.h"
+#include <glib.h>
+#include <linux/magic.h>
+#include <sys/vfs.h>
+#include <uuid/uuid.h>
+
+#include "vma.h"
@@ -965,8 +947,6 @@ index 0000000000..126b296647
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
+
+#define DEBUG_VMA 0
+
@@ -1149,10 +1129,10 @@ index 0000000000..126b296647
+{
+ assert(qemu_in_coroutine());
+ AioContext *ctx = qemu_get_current_aio_context();
+ aio_set_fd_handler(ctx, fd, NULL, (IOHandler *)qemu_coroutine_enter, NULL,
+ aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter,
+ NULL, qemu_coroutine_self());
+ qemu_coroutine_yield();
+ aio_set_fd_handler(ctx, fd, NULL, NULL, NULL, NULL, NULL);
+ aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL);
+}
+
+static ssize_t coroutine_fn
@@ -1201,23 +1181,6 @@ index 0000000000..126b296647
+ return (done == bytes) ? bytes : -1;
+}
+
+static bool is_path_tmpfs(const char *path) {
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ warn_report("statfs call for %s failed, assuming not tmpfs - %s\n",
+ path, strerror(errno));
+ return false;
+ }
+
+ return fs.f_type == TMPFS_MAGIC;
+}
+
+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
+{
+ const char *p;
@@ -1267,19 +1230,12 @@ index 0000000000..126b296647
+ }
+ /* try to use O_NONBLOCK */
+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
+ } else {
+ gchar *dirname = g_path_get_dirname(filename);
+ oflags = O_NONBLOCK|O_WRONLY|O_EXCL;
+ if (!is_path_tmpfs(dirname)) {
+ oflags |= O_DIRECT;
+ }
+ g_free(dirname);
+ } else {
+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_EXCL;
+ vmaw->fd = qemu_create(filename, oflags, 0644, errp);
+ }
+
+ if (vmaw->fd < 0) {
+ error_free(*errp);
+ *errp = NULL;
+ error_setg(errp, "can't open file %s - %s\n", filename,
+ g_strerror(errno));
+ goto err;
@@ -1760,10 +1716,10 @@ index 0000000000..126b296647
+}
diff --git a/vma.c b/vma.c
new file mode 100644
index 0000000000..347f6283ca
index 0000000000..df542b7732
--- /dev/null
+++ b/vma.c
@@ -0,0 +1,900 @@
@@ -0,0 +1,851 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -1781,11 +1737,11 @@ index 0000000000..347f6283ca
+#include <glib.h>
+
+#include "vma.h"
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include "qemu/memalign.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/block-backend.h"
+
@@ -1797,7 +1753,7 @@ index 0000000000..347f6283ca
+ "vma list <filename>\n"
+ "vma config <filename> [-c config]\n"
+ "vma create <filename> [-c config] pathname ...\n"
+ "vma extract <filename> [-d <drive-list>] [-r <fifo>] <targetdir>\n"
+ "vma extract <filename> [-r <fifo>] <targetdir>\n"
+ "vma verify <filename> [-v]\n"
+ ;
+
@@ -1904,7 +1860,6 @@ index 0000000000..347f6283ca
+ char *throttling_group;
+ char *cache;
+ bool write_zero;
+ bool skip;
+} RestoreMap;
+
+static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -1942,10 +1897,9 @@ index 0000000000..347f6283ca
+ const char *filename;
+ const char *dirname;
+ const char *readmap = NULL;
+ gchar **drive_list = NULL;
+
+ for (;;) {
+ c = getopt(argc, argv, "hvd:r:");
+ c = getopt(argc, argv, "hvr:");
+ if (c == -1) {
+ break;
+ }
@@ -1954,9 +1908,6 @@ index 0000000000..347f6283ca
+ case 'h':
+ help();
+ break;
+ case 'd':
+ drive_list = g_strsplit(optarg, ",", 254);
+ break;
+ case 'r':
+ readmap = optarg;
+ break;
@@ -2016,61 +1967,47 @@ index 0000000000..347f6283ca
+ char *bps = NULL;
+ char *group = NULL;
+ char *cache = NULL;
+ char *devname = NULL;
+ bool skip = false;
+ uint64_t bps_value = 0;
+ const char *path = NULL;
+ bool write_zero = true;
+
+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
+ break;
+ }
+ int len = strlen(line);
+ if (line[len - 1] == '\n') {
+ line[len - 1] = '\0';
+ len = len - 1;
+ if (len == 0) {
+ if (len == 1) {
+ break;
+ }
+ }
+
+ if (strncmp(line, "skip", 4) == 0) {
+ if (len < 6 || line[4] != '=') {
+ g_error("read map failed - option 'skip' has no value ('%s')",
+ inbuf);
+ } else {
+ devname = line + 5;
+ skip = true;
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ } else {
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ }
+
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ path = extract_devname(path, &devname, -1);
+ }
+
+ uint64_t bps_value = 0;
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ const char *path;
+ bool write_zero;
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ char *devname = NULL;
+ path = extract_devname(path, &devname, -1);
+ if (!devname) {
+ g_error("read map failed - no dev name specified ('%s')",
+ inbuf);
@@ -2084,7 +2021,6 @@ index 0000000000..347f6283ca
+ map->throttling_group = group;
+ map->cache = cache;
+ map->write_zero = write_zero;
+ map->skip = skip;
+
+ g_hash_table_insert(devmap, map->devname, map);
+
@@ -2093,12 +2029,14 @@ index 0000000000..347f6283ca
+
+ int i;
+ int vmstate_fd = -1;
+ bool drive_rename_bitmap[255];
+ memset(drive_rename_bitmap, 0, sizeof(drive_rename_bitmap));
+ guint8 vmstate_stream = 0;
+
+ BlockBackend *blk = NULL;
+
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
+ vmstate_stream = i;
+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
+ if (vmstate_fd < 0) {
@@ -2114,25 +2052,8 @@ index 0000000000..347f6283ca
+ const char *cache = NULL;
+ int flags = BDRV_O_RDWR;
+ bool write_zero = true;
+ bool skip = false;
+
+ BlockBackend *blk = NULL;
+
+ if (drive_list) {
+ skip = true;
+ int j;
+ for (j = 0; drive_list[j]; j++) {
+ if (strcmp(drive_list[j], di->devname) == 0) {
+ skip = false;
+ drive_rename_bitmap[i] = true;
+ break;
+ }
+ }
+ } else {
+ drive_rename_bitmap[i] = true;
+ }
+
+ if (!skip && readmap) {
+ if (readmap) {
+ RestoreMap *map;
+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
+ if (map == NULL) {
@@ -2144,8 +2065,7 @@ index 0000000000..347f6283ca
+ throttling_group = map->throttling_group;
+ cache = map->cache;
+ write_zero = map->write_zero;
+ skip = map->skip;
+ } else if (!skip) {
+ } else {
+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ printf("DEVINFO %s %zd\n", devfn, di->size);
@@ -2163,60 +2083,57 @@ index 0000000000..347f6283ca
+ write_zero = false;
+ }
+
+ if (!skip) {
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+ g_error("%s", error_get_pretty(errp));
+ }
+
@@ -2226,10 +2143,6 @@ index 0000000000..347f6283ca
+ }
+ }
+
+ if (drive_list) {
+ g_strfreev(drive_list);
+ }
+
+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
+ g_error("restore failed - %s", error_get_pretty(errp));
+ }
@@ -2237,7 +2150,7 @@ index 0000000000..347f6283ca
+ if (!readmap) {
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && drive_rename_bitmap[i]) {
+ if (di && (i != vmstate_stream)) {
+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ char *fn = g_strdup_printf("%s/disk-%s.raw",
@@ -2252,6 +2165,8 @@ index 0000000000..347f6283ca
+
+ vma_reader_destroy(vmar);
+
+ blk_unref(blk);
+
+ bdrv_close_all();
+
+ return ret;
@@ -2336,7 +2251,7 @@ index 0000000000..347f6283ca
+ struct iovec iov;
+ QEMUIOVector qiov;
+
+ int64_t start, end, readlen;
+ int64_t start, end;
+ int ret = 0;
+
+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -2350,24 +2265,16 @@ index 0000000000..347f6283ca
+ iov.iov_len = VMA_CLUSTER_SIZE;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ if (start + 1 == end) {
+ memset(buf, 0, VMA_CLUSTER_SIZE);
+ readlen = job->len - start * VMA_CLUSTER_SIZE;
+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+ } else {
+ readlen = VMA_CLUSTER_SIZE;
+ }
+
+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
+ readlen, &qiov, 0);
+ VMA_CLUSTER_SIZE, &qiov, 0);
+ if (ret < 0) {
+ vma_writer_set_error(job->vmaw, "read error");
+ vma_writer_set_error(job->vmaw, "read error", -1);
+ goto out;
+ }
+
+ size_t zb = 0;
+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed");
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
+ goto out;
+ }
+ }
@@ -2666,7 +2573,7 @@ index 0000000000..347f6283ca
+}
diff --git a/vma.h b/vma.h
new file mode 100644
index 0000000000..86d2873aa5
index 0000000000..c895c97f6d
--- /dev/null
+++ b/vma.h
@@ -0,0 +1,150 @@
@@ -2804,7 +2711,7 @@ index 0000000000..86d2873aa5
+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
+
+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
+
+
+VmaReader *vma_reader_create(const char *filename, Error **errp);
@@ -2814,7 +2721,7 @@ index 0000000000..86d2873aa5
+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes,
+ bool skip, Error **errp);
+ Error **errp);
+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
+ Error **errp);
+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);

View File

@@ -9,20 +9,18 @@ Subject: [PATCH] PVE-Backup: add backup-dump block driver
- job.c: make job_should_pause non-static
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to coroutine changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/backup-dump.c | 168 +++++++++++++++++++++++++++++++
block/backup.c | 30 ++----
block/meson.build | 1 +
include/block/block_int-common.h | 35 +++++++
job.c | 3 +-
5 files changed, 214 insertions(+), 23 deletions(-)
block/backup-dump.c | 168 ++++++++++++++++++++++++++++++++++++++
block/backup.c | 32 +++-----
block/meson.build | 1 +
include/block/block_int.h | 35 ++++++++
job.c | 3 +-
5 files changed, 216 insertions(+), 23 deletions(-)
create mode 100644 block/backup-dump.c
diff --git a/block/backup-dump.c b/block/backup-dump.c
new file mode 100644
index 0000000000..232a094426
index 0000000000..93d7f46950
--- /dev/null
+++ b/block/backup-dump.c
@@ -0,0 +1,168 @@
@@ -37,6 +35,7 @@ index 0000000000..232a094426
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qom/object_interfaces.h"
+#include "block/block_int.h"
+
@@ -47,8 +46,7 @@ index 0000000000..232a094426
+ void *dump_cb_data;
+} BDRVBackupDumpState;
+
+static coroutine_fn int qemu_backup_dump_co_get_info(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+static int qemu_backup_dump_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -89,7 +87,7 @@ index 0000000000..232a094426
+ /* Nothing to do. */
+}
+
+static coroutine_fn int64_t qemu_backup_dump_co_getlength(BlockDriverState *bs)
+static int64_t qemu_backup_dump_getlength(BlockDriverState *bs)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -149,8 +147,8 @@ index 0000000000..232a094426
+
+ .bdrv_close = qemu_backup_dump_close,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_getlength = qemu_backup_dump_co_getlength,
+ .bdrv_co_get_info = qemu_backup_dump_co_get_info,
+ .bdrv_getlength = qemu_backup_dump_getlength,
+ .bdrv_get_info = qemu_backup_dump_get_info,
+
+ .bdrv_co_writev = qemu_backup_dump_co_writev,
+
@@ -195,16 +193,16 @@ index 0000000000..232a094426
+ return bs;
+}
diff --git a/block/backup.c b/block/backup.c
index 39410dcf8d..af87fa6aa9 100644
index bd3614ce70..8bae9b060e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,28 +29,6 @@
@@ -31,28 +31,6 @@
#include "block/copy-before-write.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-typedef struct BackupBlockJob {
- BlockJob common;
- BlockDriverState *cbw;
- BlockDriverState *backup_top;
- BlockDriverState *source_bs;
- BlockDriverState *target_bs;
-
@@ -227,10 +225,11 @@ index 39410dcf8d..af87fa6aa9 100644
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -457,6 +435,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
@@ -504,6 +482,16 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
cluster_size = block_copy_cluster_size(bcs);
+ cluster_size = backup_calculate_cluster_size(target, errp);
+ if (cluster_size < 0) {
+ goto error;
+ }
@@ -239,11 +238,12 @@ index 39410dcf8d..af87fa6aa9 100644
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ cluster_size = MAX(cluster_size, bdi.cluster_size);
+ }
if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+
/*
* If source is in backing chain of target assume that target is going to be
* used for "image fleecing", i.e. it should represent a kind of snapshot of
diff --git a/block/meson.build b/block/meson.build
index 59b71ba9f3..6fde9f7dcd 100644
index 9ce9246194..19bc2b7cbb 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files(
@@ -251,28 +251,28 @@ index 59b71ba9f3..6fde9f7dcd 100644
'amend.c',
'backup.c',
+ 'backup-dump.c',
'copy-before-write.c',
'backup-top.c',
'blkdebug.c',
'blklogwrites.c',
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 74195c3004..0f2e1817ad 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 11442893d0..8f6135e6a5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
#include "block/aio.h"
#include "block/block-common.h"
#include "block/accounting.h"
#include "block/block.h"
+#include "block/block-copy.h"
#include "block/block-global-state.h"
#include "block/snapshot.h"
#include "qemu/iov.h"
@@ -60,6 +61,40 @@
#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
@@ -63,6 +64,40 @@
#define BLOCK_PROBE_BUF_SIZE 512
+typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
+
+BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *bdrv_backuo_dump_create(
+ int dump_cb_block_size,
+ uint64_t byte_size,
+ BackupDumpFunc *dump_cb,
@@ -284,7 +284,7 @@ index 74195c3004..0f2e1817ad 100644
+typedef struct BlockCopyState BlockCopyState;
+typedef struct BackupBlockJob {
+ BlockJob common;
+ BlockDriverState *cbw;
+ BlockDriverState *backup_top;
+ BlockDriverState *source_bs;
+ BlockDriverState *target_bs;
+
@@ -308,16 +308,16 @@ index 74195c3004..0f2e1817ad 100644
BDRV_TRACKED_READ,
BDRV_TRACKED_WRITE,
diff --git a/job.c b/job.c
index 72d57f0934..93e22d180b 100644
index e7a5d28854..44eec9a441 100644
--- a/job.c
+++ b/job.c
@@ -330,7 +330,8 @@ static bool job_started_locked(Job *job)
@@ -269,7 +269,8 @@ static bool job_started(Job *job)
return job->co;
}
/* Called with job_mutex held. */
-static bool job_should_pause_locked(Job *job)
+bool job_should_pause_locked(Job *job);
+bool job_should_pause_locked(Job *job)
-static bool job_should_pause(Job *job)
+bool job_should_pause(Job *job);
+bool job_should_pause(Job *job)
{
return job->pause_count > 0;
}

View File

@@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Mar 2022 08:35:05 +0100
Subject: [PATCH] block/backup: move bcs bitmap initialization to job creation
For backing up the state of multiple disks from the same time, a job
for each disk has to be created. It's convenient if the jobs don't
have to be started at the same time and if operation of the VM can be
resumed after job creation. This would lead to a window between job
creation and running the job, where writes can happen. But no writes
should happen between setting up the copy-before-write filter and
setting up the block copy state bitmap, because then new writes would
just pass through.
Commit 06e0a9c16405c0a4c1eca33cf286cc04c42066a2 moved initalization of
the bitmap to setting up the copy-before-write filter when sync_mode
is not MIRROR_SYNC_MODE_BITMAP. Ensure that the bitmap is initialized
upon job creation for the remaining case too, by moving the
backup_init_bcs_bitmap call to backup_job_create.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index db3791f4d1..39410dcf8d 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
true);
} else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
/*
- * We can't hog the coroutine to initialize this thoroughly.
- * Set a flag and resume work when we are able to yield safely.
+ * Initialization is costly here. Simply set a flag and let the
+ * backup_run coroutine resume work once it can yield safely.
*/
block_copy_set_skip_unallocated(job->bcs, true);
}
@@ -252,8 +252,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
int ret;
- backup_init_bcs_bitmap(s);
-
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
@@ -495,6 +493,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
+ backup_init_bcs_bitmap(job);
+
return &job->common;
error:

File diff suppressed because it is too large Load Diff

View File

@@ -5,19 +5,17 @@ Subject: [PATCH] PVE-Backup: pbs-restore - new command to restore from proxmox
backup server
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
meson.build | 4 +
pbs-restore.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 240 insertions(+)
pbs-restore.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 228 insertions(+)
create mode 100644 pbs-restore.c
diff --git a/meson.build b/meson.build
index d53976d621..c3330310d9 100644
index 7d7e474313..dd1c5bdb4e 100644
--- a/meson.build
+++ b/meson.build
@@ -3914,6 +3914,10 @@ if have_tools
@@ -2749,6 +2749,10 @@ if have_tools
vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
dependencies: [authz, block, crypto, io, qom], install: true)
@@ -30,10 +28,10 @@ index d53976d621..c3330310d9 100644
subdir('contrib/elf2dmp')
diff --git a/pbs-restore.c b/pbs-restore.c
new file mode 100644
index 0000000000..f03d9bab8d
index 0000000000..4d3f925a1b
--- /dev/null
+++ b/pbs-restore.c
@@ -0,0 +1,236 @@
@@ -0,0 +1,224 @@
+/*
+ * Qemu image restore helper for Proxmox Backup
+ *
@@ -52,6 +50,7 @@ index 0000000000..f03d9bab8d
+#include <getopt.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
@@ -65,7 +64,7 @@ index 0000000000..f03d9bab8d
+static void help(void)
+{
+ const char *help_msg =
+ "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
+ "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+ ;
+
+ printf("%s", help_msg);
@@ -97,7 +96,7 @@ index 0000000000..f03d9bab8d
+ }
+ res = blk_pwrite_zeroes(callback_data->target, offset, data_len, 0);
+ } else {
+ res = blk_pwrite(callback_data->target, offset, data_len, data, 0);
+ res = blk_pwrite(callback_data->target, offset, data, data_len, 0);
+ }
+
+ if (res < 0) {
@@ -113,7 +112,6 @@ index 0000000000..f03d9bab8d
+ Error *main_loop_err = NULL;
+ const char *format = "raw";
+ const char *repository = NULL;
+ const char *backup_ns = NULL;
+ const char *keyfile = NULL;
+ int verbose = false;
+ bool skip_zero = false;
@@ -127,7 +125,6 @@ index 0000000000..f03d9bab8d
+ {"verbose", no_argument, 0, 'v'},
+ {"format", required_argument, 0, 'f'},
+ {"repository", required_argument, 0, 'r'},
+ {"ns", required_argument, 0, 'n'},
+ {"keyfile", required_argument, 0, 'k'},
+ {0, 0, 0, 0}
+ };
@@ -148,9 +145,6 @@ index 0000000000..f03d9bab8d
+ case 'r':
+ repository = g_strdup(argv[optind - 1]);
+ break;
+ case 'n':
+ backup_ns = g_strdup(argv[optind - 1]);
+ break;
+ case 'k':
+ keyfile = g_strdup(argv[optind - 1]);
+ break;
@@ -201,16 +195,8 @@ index 0000000000..f03d9bab8d
+ fprintf(stderr, "connecting to repository '%s'\n", repository);
+ }
+ char *pbs_error = NULL;
+ ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+ repository,
+ snapshot,
+ backup_ns,
+ password,
+ keyfile,
+ key_password,
+ fingerprint,
+ &pbs_error
+ );
+ ProxmoxRestoreHandle *conn = proxmox_restore_new(
+ repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ if (conn == NULL) {
+ fprintf(stderr, "restore failed: %s\n", pbs_error);
+ return -1;

View File

@@ -0,0 +1,452 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 29 Jun 2020 11:06:03 +0200
Subject: [PATCH] PVE-Backup: Add dirty-bitmap tracking for incremental backups
Uses QEMU's existing MIRROR_SYNC_MODE_BITMAP and a dirty-bitmap on top
of all backed-up drives. This will only execute the data-write callback
for any changed chunks, the PBS rust code will reuse chunks from the
previous index for everything it doesn't receive if reuse_index is true.
On error or cancellation, remove all dirty bitmaps to ensure
consistency.
Add PBS/incremental specific information to query backup info QMP and
HMP commands.
Only supported for PBS backups.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
monitor/hmp-cmds.c | 45 ++++++++++----
proxmox-backup-client.c | 3 +-
proxmox-backup-client.h | 1 +
pve-backup.c | 103 ++++++++++++++++++++++++++++++---
qapi/block-core.json | 12 +++-
6 files changed, 142 insertions(+), 23 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 1e29681d30..3fca3ce3e9 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,6 +1042,7 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
+ false, false, // PBS incremental
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 7efcd2d641..b2b5f1298b 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -221,19 +221,42 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "End time: %s", ctime(&info->end_time));
}
- int per = (info->has_total && info->total &&
- info->has_transferred && info->transferred) ?
- (info->transferred * 100)/info->total : 0;
- int zero_per = (info->has_total && info->total &&
- info->has_zero_bytes && info->zero_bytes) ?
- (info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Backup file: %s\n", info->backup_file);
monitor_printf(mon, "Backup uuid: %s\n", info->uuid);
- monitor_printf(mon, "Total size: %zd\n", info->total);
- monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
- monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
- info->zero_bytes, zero_per);
+
+ if (!(info->has_total && info->total)) {
+ // this should not happen normally
+ monitor_printf(mon, "Total size: %d\n", 0);
+ } else {
+ bool incremental = false;
+ size_t total_or_dirty = info->total;
+ if (info->has_transferred) {
+ if (info->has_dirty && info->dirty) {
+ if (info->dirty < info->total) {
+ total_or_dirty = info->dirty;
+ incremental = true;
+ }
+ }
+ }
+
+ int per = (info->transferred * 100)/total_or_dirty;
+
+ monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+
+ int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
+ (info->zero_bytes * 100)/info->total : 0;
+ monitor_printf(mon, "Total size: %zd\n", info->total);
+ monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
+ info->transferred, per);
+ monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
+ info->zero_bytes, zero_per);
+
+ if (info->has_reused) {
+ int reused_per = (info->reused * 100)/total_or_dirty;
+ monitor_printf(mon, "Reused bytes: %zd (%d%%)\n",
+ info->reused, reused_per);
+ }
+ }
}
qapi_free_BackupStatus(info);
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index a8f6653a81..4ce7bc0b5e 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -89,6 +89,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp)
{
Coroutine *co = qemu_coroutine_self();
@@ -98,7 +99,7 @@ proxmox_backup_co_register_image(
int pbs_res = -1;
proxmox_backup_register_image_async(
- pbs, device_name, size ,proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
+ pbs, device_name, size, incremental, proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
qemu_coroutine_yield();
if (pbs_res < 0) {
if (errp) error_setg(errp, "backup register image failed: %s", pbs_err ? pbs_err : "unknown error");
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
index 1dda8b7d8f..8cbf645b2c 100644
--- a/proxmox-backup-client.h
+++ b/proxmox-backup-client.h
@@ -32,6 +32,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp);
diff --git a/pve-backup.c b/pve-backup.c
index 66868dec14..6cdbd40529 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -28,6 +28,8 @@
*
*/
+const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
+
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
@@ -39,7 +41,9 @@ static struct PVEBackupState {
uuid_t uuid;
char uuid_str[37];
size_t total;
+ size_t dirty;
size_t transferred;
+ size_t reused;
size_t zero_bytes;
} stat;
int64_t speed;
@@ -66,6 +70,7 @@ typedef struct PVEBackupDevInfo {
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
+ BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
} PVEBackupDevInfo;
@@ -105,11 +110,12 @@ static bool pvebackup_error_or_canceled(void)
return error_or_canceled;
}
-static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes, size_t reused)
{
qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.zero_bytes += zero_bytes;
backup_state.stat.transferred += transferred;
+ backup_state.stat.reused += reused;
qemu_mutex_unlock(&backup_state.stat.lock);
}
@@ -148,7 +154,8 @@ pvebackup_co_dump_pbs_cb(
pvebackup_propagate_error(local_err);
return pbs_res;
} else {
- pvebackup_add_transfered_bytes(size, !buf ? size : 0);
+ size_t reused = (pbs_res == 0) ? size : 0;
+ pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
}
return size;
@@ -208,11 +215,11 @@ pvebackup_co_dump_vma_cb(
} else {
if (remaining >= VMA_CLUSTER_SIZE) {
assert(ret == VMA_CLUSTER_SIZE);
- pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
+ pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes, 0);
remaining -= VMA_CLUSTER_SIZE;
} else {
assert(ret == remaining);
- pvebackup_add_transfered_bytes(remaining, zero_bytes);
+ pvebackup_add_transfered_bytes(remaining, zero_bytes, 0);
remaining = 0;
}
}
@@ -248,6 +255,18 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
+ } else {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+ }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -303,6 +322,12 @@ static void pvebackup_complete_cb(void *opaque, int ret)
// remove self from job queue
backup_state.di_list = g_list_remove(backup_state.di_list, di);
+ if (di->bitmap && ret < 0) {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
g_free(di);
qemu_mutex_unlock(&backup_state.backup_mutex);
@@ -472,12 +497,18 @@ static bool create_backup_jobs(void) {
assert(di->target != NULL);
+ MirrorSyncMode sync_mode = MIRROR_SYNC_MODE_FULL;
+ BitmapSyncMode bitmap_mode = BITMAP_SYNC_MODE_NEVER;
+ if (di->bitmap) {
+ sync_mode = MIRROR_SYNC_MODE_BITMAP;
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
AioContext *aio_context = bdrv_get_aio_context(di->bs);
aio_context_acquire(aio_context);
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
aio_context_release(aio_context);
@@ -528,6 +559,8 @@ typedef struct QmpBackupTask {
const char *fingerprint;
bool has_fingerprint;
int64_t backup_time;
+ bool has_use_dirty_bitmap;
+ bool use_dirty_bitmap;
bool has_format;
BackupFormat format;
bool has_config_file;
@@ -619,6 +652,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
+ size_t dirty = 0;
l = di_list;
while (l) {
@@ -656,6 +690,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
+ bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -675,7 +711,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
goto err;
}
- if (proxmox_backup_co_connect(pbs, task->errp) < 0)
+ int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ if (connect_result < 0)
goto err;
/* register all devices */
@@ -686,9 +723,40 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *devname = bdrv_get_device_name(di->bs);
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, task->errp);
- if (dev_id < 0)
+ BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
+ bool expect_only_dirty = false;
+
+ if (use_dirty_bitmap) {
+ if (bitmap == NULL) {
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ if (!bitmap) {
+ goto err;
+ }
+ } else {
+ expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ }
+
+ if (expect_only_dirty) {
+ dirty += bdrv_get_dirty_count(bitmap);
+ } else {
+ /* mark entire bitmap as dirty to make full backup */
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ dirty += di->size;
+ }
+ di->bitmap = bitmap;
+ } else {
+ dirty += di->size;
+
+ /* after a full backup the old dirty bitmap is invalid anyway */
+ if (bitmap != NULL) {
+ bdrv_release_dirty_bitmap(bitmap);
+ }
+ }
+
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ if (dev_id < 0) {
goto err;
+ }
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
goto err;
@@ -697,6 +765,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->dev_id = dev_id;
}
} else if (format == BACKUP_FORMAT_VMA) {
+ dirty = total;
+
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
@@ -724,6 +794,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
} else if (format == BACKUP_FORMAT_DIR) {
+ dirty = total;
+
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
@@ -796,8 +868,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
+ backup_state.stat.dirty = dirty;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -821,6 +895,10 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
if (di->target) {
bdrv_unref(di->target);
}
@@ -862,6 +940,7 @@ UuidInfo *qmp_backup(
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -880,6 +959,8 @@ UuidInfo *qmp_backup(
.backup_id = backup_id,
.has_backup_time = has_backup_time,
.backup_time = backup_time,
+ .has_use_dirty_bitmap = has_use_dirty_bitmap,
+ .use_dirty_bitmap = use_dirty_bitmap,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
@@ -948,10 +1029,14 @@ BackupStatus *qmp_query_backup(Error **errp)
info->has_total = true;
info->total = backup_state.stat.total;
+ info->has_dirty = true;
+ info->dirty = backup_state.stat.dirty;
info->has_zero_bytes = true;
info->zero_bytes = backup_state.stat.zero_bytes;
info->has_transferred = true;
info->transferred = backup_state.stat.transferred;
+ info->has_reused = true;
+ info->reused = backup_state.stat.reused;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c5d604693f..a138ad08d4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -712,8 +712,13 @@
#
# @total: total amount of bytes involved in the backup process
#
+# @dirty: with incremental mode (PBS) this is the amount of bytes involved
+# in the backup process which are marked dirty.
+#
# @transferred: amount of bytes already backed up.
#
+# @reused: amount of bytes reused due to deduplication.
+#
# @zero-bytes: amount of 'zero' bytes detected.
#
# @start-time: time (epoch) when backup job started.
@@ -726,8 +731,8 @@
#
##
{ 'struct': 'BackupStatus',
- 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int',
- '*transferred': 'int', '*zero-bytes': 'int',
+ 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
+ '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
'*backup-file': 'str', '*uuid': 'str' } }
@@ -770,6 +775,8 @@
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
#
+# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
+#
# Returns: the uuid of the backup job
#
##
@@ -780,6 +787,7 @@
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
+ '*use-dirty-bitmap': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,219 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Thu, 9 Jul 2020 12:53:08 +0200
Subject: [PATCH] PVE: various PBS fixes
pbs: fix crypt and compress parameters
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
PVE: handle PBS write callback with big blocks correctly
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
PVE: add zero block handling to PBS dump callback
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 ++-
pve-backup.c | 59 ++++++++++++++++++++++++++--------
qapi/block-core.json | 6 ++++
3 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 3fca3ce3e9..69254396d5 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,7 +1042,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
- false, false, // PBS incremental
+ false, false, // PBS use-dirty-bitmap
+ false, false, // PBS compress
+ false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/pve-backup.c b/pve-backup.c
index 6cdbd40529..7527885251 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -8,6 +8,7 @@
#include "block/blockjob.h"
#include "qapi/qapi-commands-block.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
/* PVE backup state and related function */
@@ -67,6 +68,7 @@ opts_init(pvebackup_init);
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
size_t size;
+ uint64_t block_size;
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
@@ -135,10 +137,13 @@ pvebackup_co_dump_pbs_cb(
PVEBackupDevInfo *di = opaque;
assert(backup_state.pbs);
+ assert(buf);
Error *local_err = NULL;
int pbs_res = -1;
+ bool is_zero_block = size == di->block_size && buffer_is_zero(buf, size);
+
qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
@@ -147,16 +152,28 @@ pvebackup_co_dump_pbs_cb(
return -1;
}
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ uint64_t transferred = 0;
+ uint64_t reused = 0;
+ while (transferred < size) {
+ uint64_t left = size - transferred;
+ uint64_t to_transfer = left < di->block_size ? left : di->block_size;
+
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
+ is_zero_block ? NULL : buf + transferred, start + transferred,
+ to_transfer, &local_err);
+ transferred += to_transfer;
+
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return pbs_res;
+ }
+
+ reused += pbs_res == 0 ? to_transfer : 0;
+ }
+
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
-
- if (pbs_res < 0) {
- pvebackup_propagate_error(local_err);
- return pbs_res;
- } else {
- size_t reused = (pbs_res == 0) ? size : 0;
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
- }
+ pvebackup_add_transfered_bytes(size, is_zero_block ? size : 0, reused);
return size;
}
@@ -178,6 +195,7 @@ pvebackup_co_dump_vma_cb(
int ret = -1;
assert(backup_state.vmaw);
+ assert(buf);
uint64_t remaining = size;
@@ -204,9 +222,7 @@ pvebackup_co_dump_vma_cb(
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
+ buf += VMA_CLUSTER_SIZE;
if (ret < 0) {
Error *local_err = NULL;
vma_writer_error_propagate(backup_state.vmaw, &local_err);
@@ -569,6 +585,10 @@ typedef struct QmpBackupTask {
const char *firewall_file;
bool has_devlist;
const char *devlist;
+ bool has_compress;
+ bool compress;
+ bool has_encrypt;
+ bool encrypt;
bool has_speed;
int64_t speed;
Error **errp;
@@ -692,6 +712,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -701,8 +722,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->has_password ? task->password : NULL,
task->has_keyfile ? task->keyfile : NULL,
task->has_key_password ? task->key_password : NULL,
+ task->has_compress ? task->compress : true,
+ task->has_encrypt ? task->encrypt : task->has_keyfile,
task->has_fingerprint ? task->fingerprint : NULL,
- &pbs_err);
+ &pbs_err);
if (!pbs) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
@@ -721,6 +744,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ di->block_size = dump_cb_block_size;
+
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
@@ -941,6 +966,8 @@ UuidInfo *qmp_backup(
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -951,6 +978,8 @@ UuidInfo *qmp_backup(
.backup_file = backup_file,
.has_password = has_password,
.password = password,
+ .has_keyfile = has_keyfile,
+ .keyfile = keyfile,
.has_key_password = has_key_password,
.key_password = key_password,
.has_fingerprint = has_fingerprint,
@@ -961,6 +990,10 @@ UuidInfo *qmp_backup(
.backup_time = backup_time,
.has_use_dirty_bitmap = has_use_dirty_bitmap,
.use_dirty_bitmap = use_dirty_bitmap,
+ .has_compress = has_compress,
+ .compress = compress,
+ .has_encrypt = has_encrypt,
+ .encrypt = encrypt,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a138ad08d4..a75f1b4687 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -777,6 +777,10 @@
#
# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
+# @compress: use compression (optional for format 'pbs', defaults to true)
+#
+# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
+#
# Returns: the uuid of the backup job
#
##
@@ -788,6 +792,8 @@
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
+ '*compress': 'bool',
+ '*encrypt': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -7,27 +7,20 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[error cleanups, file_open implementation]
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[FE: adapt to changed function signatures
make pbs_co_preadv return values consistent with QEMU
getlength is now a coroutine function]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 3 +
block/pbs.c | 305 +++++++++++++++++++++++++++++++++++++++++++
block/pbs.c | 271 +++++++++++++++++++++++++++++++++++++++++++
configure | 9 ++
meson.build | 2 +-
qapi/block-core.json | 13 ++
qapi/pragma.json | 1 +
6 files changed, 332 insertions(+), 1 deletion(-)
meson.build | 1 +
qapi/block-core.json | 13 +++
5 files changed, 297 insertions(+)
create mode 100644 block/pbs.c
diff --git a/block/meson.build b/block/meson.build
index 6d468f89e5..becc99ac4e 100644
index 9e433daf2e..e3ed5ac97c 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -50,6 +50,9 @@ block_ss.add(files(
@@ -51,6 +51,9 @@ block_ss.add(files(
'../pve-backup.c',
), libproxmox_backup_qemu)
@@ -35,14 +28,14 @@ index 6d468f89e5..becc99ac4e 100644
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/pbs.c b/block/pbs.c
new file mode 100644
index 0000000000..a2211e0f3b
index 0000000000..78dad0dcc4
--- /dev/null
+++ b/block/pbs.c
@@ -0,0 +1,305 @@
@@ -0,0 +1,271 @@
+/*
+ * Proxmox Backup Server read-only block driver
+ */
@@ -55,12 +48,10 @@ index 0000000000..a2211e0f3b
+#include "qemu/option.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+
+#include <proxmox-backup-qemu.h>
+
+#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
+#define PBS_OPT_SNAPSHOT "snapshot"
+#define PBS_OPT_ARCHIVE "archive"
+#define PBS_OPT_KEYFILE "keyfile"
@@ -74,7 +65,6 @@ index 0000000000..a2211e0f3b
+ int64_t length;
+
+ char *repository;
+ char *namespace;
+ char *snapshot;
+ char *archive;
+} BDRVPBSState;
@@ -89,11 +79,6 @@ index 0000000000..a2211e0f3b
+ .help = "The server address and repository to connect to.",
+ },
+ {
+ .name = PBS_OPT_NAMESPACE,
+ .type = QEMU_OPT_STRING,
+ .help = "Optional: The snapshot's namespace.",
+ },
+ {
+ .name = PBS_OPT_SNAPSHOT,
+ .type = QEMU_OPT_STRING,
+ .help = "The snapshot to read.",
@@ -129,7 +114,7 @@ index 0000000000..a2211e0f3b
+
+
+// filename format:
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+// pbs:repository=<repo>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+static void pbs_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
@@ -165,7 +150,6 @@ index 0000000000..a2211e0f3b
+ s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
+ const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
+ const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+ const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
+ const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
+ const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
+
@@ -178,12 +162,9 @@ index 0000000000..a2211e0f3b
+ if (!key_password) {
+ key_password = getenv("PBS_ENCRYPTION_PASSWORD");
+ }
+ if (namespace) {
+ s->namespace = g_strdup(namespace);
+ }
+
+ /* connect to PBS server in read mode */
+ s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
+ s->conn = proxmox_restore_new(s->repository, s->snapshot, password,
+ keyfile, key_password, fingerprint, &pbs_error);
+
+ /* invalidates qemu_opt_get char pointers from above */
@@ -228,13 +209,12 @@ index 0000000000..a2211e0f3b
+static void pbs_close(BlockDriverState *bs) {
+ BDRVPBSState *s = bs->opaque;
+ g_free(s->repository);
+ g_free(s->namespace);
+ g_free(s->snapshot);
+ g_free(s->archive);
+ proxmox_restore_disconnect(s->conn);
+}
+
+static coroutine_fn int64_t pbs_co_getlength(BlockDriverState *bs)
+static int64_t pbs_getlength(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ return s->length;
@@ -252,34 +232,20 @@ index 0000000000..a2211e0f3b
+}
+
+static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BDRVPBSState *s = bs->opaque;
+ int ret;
+ char *pbs_error = NULL;
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+ uint8_t *buf = malloc(bytes);
+
+ ReadCallbackData rcb = {
+ .co = qemu_coroutine_self(),
+ .ctx = bdrv_get_aio_context(bs),
+ };
+
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, (uint64_t)offset, (uint64_t)bytes,
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, offset, bytes,
+ read_callback, (void *) &rcb, &ret, &pbs_error);
+
+ qemu_coroutine_yield();
@@ -290,17 +256,15 @@ index 0000000000..a2211e0f3b
+ return -EIO;
+ }
+
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ free(buf);
+
+ return 0;
+ return ret;
+}
+
+static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
+ "any attached disk devices are set to read-only!\n");
@@ -310,13 +274,8 @@ index 0000000000..a2211e0f3b
+static void pbs_refresh_filename(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ if (s->namespace) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+ s->repository, s->namespace, s->snapshot, s->archive);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+ }
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+}
+
+static const char *const pbs_strong_runtime_opts[] = {
@@ -333,7 +292,7 @@ index 0000000000..a2211e0f3b
+ .bdrv_file_open = pbs_file_open,
+ .bdrv_open = pbs_open,
+ .bdrv_close = pbs_close,
+ .bdrv_co_getlength = pbs_co_getlength,
+ .bdrv_getlength = pbs_getlength,
+
+ .bdrv_co_preadv = pbs_co_preadv,
+ .bdrv_co_pwritev = pbs_co_pwritev,
@@ -349,72 +308,71 @@ index 0000000000..a2211e0f3b
+
+block_init(bdrv_pbs_init);
diff --git a/configure b/configure
index 133f4e3235..f5a830c1f3 100755
index 6e308ed77f..869e97c72f 100755
--- a/configure
+++ b/configure
@@ -256,6 +256,7 @@ qemu_suffix="qemu"
softmmu="yes"
linux_user=""
bsd_user=""
@@ -428,6 +428,7 @@ vdi=${default_feature:-yes}
vvfat=${default_feature:-yes}
qed=${default_feature:-yes}
parallels=${default_feature:-yes}
+pbs_bdrv="yes"
plugins="$default_feature"
ninja=""
python=
@@ -809,6 +810,10 @@ for opt do
libxml2="auto"
debug_mutex="no"
libpmem="auto"
@@ -1486,6 +1487,10 @@ for opt do
;;
--enable-download) download="enabled"; git_submodules_action=update;
--enable-parallels) parallels="yes"
;;
+ --disable-pbs-bdrv) pbs_bdrv="no"
+ ;;
+ --enable-pbs-bdrv) pbs_bdrv="yes"
+ ;;
--enable-plugins) if test "$mingw32" = "yes"; then
error_exit "TCG plugins not currently supported on Windows platforms"
else
@@ -959,6 +964,7 @@ cat << EOF
bsd-user all BSD usermode emulation targets
pie Position Independent Executables
debug-tcg TCG debugging (default is disabled)
--disable-vhost-user) vhost_user="no"
;;
--enable-vhost-user) vhost_user="yes"
@@ -1956,6 +1961,7 @@ disabled with --disable-FEATURE, default is enabled if available
vvfat vvfat image format support
qed qed image format support
parallels parallels image format support
+ pbs-bdrv Proxmox backup server read-only block driver support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -1744,6 +1750,9 @@ if test "$solaris" = "yes" ; then
crypto-afalg Linux AF_ALG crypto backend driver
capstone capstone disassembler support
debug-mutex mutex debugging support
@@ -4624,6 +4630,9 @@ fi
if test "$linux_aio" = "yes" ; then
echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
fi
echo "SRC_PATH=$source_path" >> $config_host_mak
echo "TARGET_DIRS=$target_list" >> $config_host_mak
+if test "$pbs_bdrv" = "yes" ; then
+ echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
+fi
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
if test "$vhost_scsi" = "yes" ; then
echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
fi
diff --git a/meson.build b/meson.build
index c3330310d9..cbfc9a43fb 100644
index dd1c5bdb4e..45c1f2de73 100644
--- a/meson.build
+++ b/meson.build
@@ -4319,7 +4319,7 @@ summary_info += {'bzip2 support': libbzip2}
summary_info += {'lzfse support': liblzfse}
summary_info += {'zstd support': zstd}
summary_info += {'NUMA host support': numa}
-summary_info += {'capstone': capstone}
@@ -3111,6 +3111,7 @@ summary_info += {'lzfse support': liblzfse.found()}
summary_info += {'zstd support': zstd.found()}
summary_info += {'NUMA host support': config_host.has_key('CONFIG_NUMA')}
summary_info += {'libxml2': libxml2.found()}
+summary_info += {'PBS bdrv support': config_host.has_key('CONFIG_PBS_BDRV')}
summary_info += {'libpmem support': libpmem}
summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libudev': libudev}
summary_info += {'capstone': capstone_opt == 'disabled' ? false : capstone_opt}
summary_info += {'libpmem support': libpmem.found()}
summary_info += {'libdaxctl support': libdaxctl.found()}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1b8462a51b..d67a6d448a 100644
index a75f1b4687..e4d0c923a4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3396,6 +3396,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
@@ -2982,6 +2982,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
+ 'pbs',
'ssh', 'throttle', 'vdi', 'vhdx',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
@@ -3482,6 +3483,17 @@
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
@@ -3045,6 +3046,17 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
@@ -427,28 +385,16 @@ index 1b8462a51b..d67a6d448a 100644
+{ 'struct': 'BlockdevOptionsPbs',
+ 'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
+ '*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
+ '*key_password': 'str', '*namespace': 'str' } }
+ '*key_password': 'str' } }
+
##
# @BlockdevOptionsNVMe:
#
@@ -4890,6 +4902,7 @@
@@ -4263,6 +4275,7 @@
'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
+ 'pbs': 'BlockdevOptionsPbs',
'nvme': 'BlockdevOptionsNVMe',
'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
'if': 'CONFIG_BLKIO' },
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 325e684411..b6079f6a0e 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -45,6 +45,7 @@
'BlockInfo', # query-block
'BlockdevAioOptions', # blockdev-add, -blockdev
'BlockdevDriver', # blockdev-add, query-blockstats, ...
+ 'BlockdevOptionsPbs', # for PBS backwards compat
'BlockdevVmdkAdapterType', # blockdev-create (to match VMDK spec)
'BlockdevVmdkSubformat', # blockdev-create (to match VMDK spec)
'ColoCompareProperties', # object_add, -object
'parallels': 'BlockdevOptionsGenericFormat',
'preallocate':'BlockdevOptionsPreallocate',

View File

@@ -0,0 +1,74 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 8 Jul 2020 11:57:53 +0200
Subject: [PATCH] PVE: add query_proxmox_support QMP command
Generic interface for future use, currently used for PBS dirty-bitmap
backup support.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[PVE: query-proxmox-support: include library version]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 9 +++++++++
qapi/block-core.json | 29 +++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/pve-backup.c b/pve-backup.c
index 7527885251..8cba8e97d3 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1075,3 +1075,12 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+
+ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+{
+ ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
+ ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
+ ret->pbs_dirty_bitmap = true;
+ ret->pbs_dirty_bitmap_savevm = true;
+ return ret;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e4d0c923a4..3eebe7ff71 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -822,6 +822,35 @@
##
{ 'command': 'backup-cancel' }
+##
+# @ProxmoxSupportStatus:
+#
+# Contains info about supported features added by Proxmox.
+#
+# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
+# supported.
+#
+# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
+# safely be set for savevm-async.
+#
+# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+#
+##
+{ 'struct': 'ProxmoxSupportStatus',
+ 'data': { 'pbs-dirty-bitmap': 'bool',
+ 'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-library-version': 'str' } }
+
+##
+# @query-proxmox-support:
+#
+# Returns information about supported features added by Proxmox.
+#
+# Returns: @ProxmoxSupportStatus
+#
+##
+{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -0,0 +1,441 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 19 Aug 2020 17:02:00 +0200
Subject: [PATCH] PVE: add query-pbs-bitmap-info QMP call
Returns advanced information about dirty bitmaps used (or not used) for
the latest PBS backup.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp-cmds.c | 28 ++++++-----
pve-backup.c | 117 ++++++++++++++++++++++++++++++++-----------
qapi/block-core.json | 56 +++++++++++++++++++++
3 files changed, 159 insertions(+), 42 deletions(-)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index b2b5f1298b..7a449edafa 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -198,6 +198,7 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
void hmp_info_backup(Monitor *mon, const QDict *qdict)
{
BackupStatus *info;
+ PBSBitmapInfoList *bitmap_info;
info = qmp_query_backup(NULL);
@@ -228,26 +229,29 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
// this should not happen normally
monitor_printf(mon, "Total size: %d\n", 0);
} else {
- bool incremental = false;
size_t total_or_dirty = info->total;
- if (info->has_transferred) {
- if (info->has_dirty && info->dirty) {
- if (info->dirty < info->total) {
- total_or_dirty = info->dirty;
- incremental = true;
- }
- }
+ bitmap_info = qmp_query_pbs_bitmap_info(NULL);
+
+ while (bitmap_info) {
+ monitor_printf(mon, "Drive %s:\n",
+ bitmap_info->value->drive);
+ monitor_printf(mon, " bitmap action: %s\n",
+ PBSBitmapAction_str(bitmap_info->value->action));
+ monitor_printf(mon, " size: %zd\n",
+ bitmap_info->value->size);
+ monitor_printf(mon, " dirty: %zd\n",
+ bitmap_info->value->dirty);
+ bitmap_info = bitmap_info->next;
}
- int per = (info->transferred * 100)/total_or_dirty;
-
- monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+ qapi_free_PBSBitmapInfoList(bitmap_info);
int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
(info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Total size: %zd\n", info->total);
+ int trans_per = (info->transferred * 100)/total_or_dirty;
monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
+ info->transferred, trans_per);
monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
info->zero_bytes, zero_per);
diff --git a/pve-backup.c b/pve-backup.c
index 8cba8e97d3..22420db26a 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -46,6 +46,7 @@ static struct PVEBackupState {
size_t transferred;
size_t reused;
size_t zero_bytes;
+ GList *bitmap_list;
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -672,7 +673,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
- size_t dirty = 0;
l = di_list;
while (l) {
@@ -693,18 +693,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_generate(uuid);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.reused = 0;
+
+ /* clear previous backup's bitmap_list */
+ if (backup_state.stat.bitmap_list) {
+ GList *bl = backup_state.stat.bitmap_list;
+ while (bl) {
+ g_free(((PBSBitmapInfo *)bl->data)->drive);
+ g_free(bl->data);
+ bl = g_list_next(bl);
+ }
+ g_list_free(backup_state.stat.bitmap_list);
+ backup_state.stat.bitmap_list = NULL;
+ }
+
if (format == BACKUP_FORMAT_PBS) {
if (!task->has_password) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_id) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_time) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
- goto err;
+ goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
@@ -731,12 +746,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
- goto err;
+ goto err_mutex;
}
int connect_result = proxmox_backup_co_connect(pbs, task->errp);
if (connect_result < 0)
- goto err;
+ goto err_mutex;
/* register all devices */
l = di_list;
@@ -747,6 +762,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->block_size = dump_cb_block_size;
const char *devname = bdrv_get_device_name(di->bs);
+ PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
+ size_t dirty = di->size;
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
@@ -755,49 +772,59 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (bitmap == NULL) {
bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
if (!bitmap) {
- goto err;
+ goto err_mutex;
}
+ action = PBS_BITMAP_ACTION_NEW;
} else {
expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
}
if (expect_only_dirty) {
- dirty += bdrv_get_dirty_count(bitmap);
+ /* track clean chunks as reused */
+ dirty = MIN(bdrv_get_dirty_count(bitmap), di->size);
+ backup_state.stat.reused += di->size - dirty;
+ action = PBS_BITMAP_ACTION_USED;
} else {
/* mark entire bitmap as dirty to make full backup */
bdrv_set_dirty_bitmap(bitmap, 0, di->size);
- dirty += di->size;
+ if (action != PBS_BITMAP_ACTION_NEW) {
+ action = PBS_BITMAP_ACTION_INVALID;
+ }
}
di->bitmap = bitmap;
} else {
- dirty += di->size;
-
/* after a full backup the old dirty bitmap is invalid anyway */
if (bitmap != NULL) {
bdrv_release_dirty_bitmap(bitmap);
+ action = PBS_BITMAP_ACTION_NOT_USED_REMOVED;
}
}
int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
if (dev_id < 0) {
- goto err;
+ goto err_mutex;
}
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
di->dev_id = dev_id;
+
+ PBSBitmapInfo *info = g_malloc(sizeof(*info));
+ info->drive = g_strdup(devname);
+ info->action = action;
+ info->size = di->size;
+ info->dirty = dirty;
+ backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- dirty = total;
-
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
error_propagate(task->errp, local_err);
}
- goto err;
+ goto err_mutex;
}
/* register all devices for vma writer */
@@ -807,7 +834,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
l = g_list_next(l);
if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
@@ -815,16 +842,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (di->dev_id <= 0) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
- goto err;
+ goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- dirty = total;
-
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
- goto err;
+ goto err_mutex;
}
backup_dir = task->backup_file;
@@ -841,18 +866,18 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->size, flags, false, &local_err);
if (local_err) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
}
} else {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
- goto err;
+ goto err_mutex;
}
@@ -860,7 +885,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_config_file) {
if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
@@ -868,12 +893,11 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_firewall_file) {
if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
/* initialize global backup_state now */
-
- qemu_mutex_lock(&backup_state.stat.lock);
+ /* note: 'reused' and 'bitmap_list' are initialized earlier */
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -893,10 +917,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
- backup_state.stat.dirty = dirty;
+ backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -913,6 +936,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->result = uuid_info;
return;
+err_mutex:
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
err:
l = di_list;
@@ -1076,11 +1102,42 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+PBSBitmapInfoList *qmp_query_pbs_bitmap_info(Error **errp)
+{
+ PBSBitmapInfoList *head = NULL, **p_next = &head;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+
+ GList *l = backup_state.stat.bitmap_list;
+ while (l) {
+ PBSBitmapInfo *info = (PBSBitmapInfo *)l->data;
+ l = g_list_next(l);
+
+ /* clone bitmap info to avoid auto free after QMP marshalling */
+ PBSBitmapInfo *info_ret = g_malloc0(sizeof(*info_ret));
+ info_ret->drive = g_strdup(info->drive);
+ info_ret->action = info->action;
+ info_ret->size = info->size;
+ info_ret->dirty = info->dirty;
+
+ PBSBitmapInfoList *info_list = g_malloc0(sizeof(*info_list));
+ info_list->value = info_ret;
+
+ *p_next = info_list;
+ p_next = &info_list->next;
+ }
+
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ return head;
+}
+
ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
{
ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->query_bitmap_info = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3eebe7ff71..170c13984d 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -830,6 +830,8 @@
# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
# supported.
#
+# @query-bitmap-info: True if the 'query-pbs-bitmap-info' QMP call is supported.
+#
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -838,6 +840,7 @@
##
{ 'struct': 'ProxmoxSupportStatus',
'data': { 'pbs-dirty-bitmap': 'bool',
+ 'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-library-version': 'str' } }
@@ -851,6 +854,59 @@
##
{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+##
+# @PBSBitmapAction:
+#
+# An action taken on a dirty-bitmap when a backup job was started.
+#
+# @not-used: Bitmap mode was not enabled.
+#
+# @not-used-removed: Bitmap mode was not enabled, but a bitmap from a
+# previous backup still existed and was removed.
+#
+# @new: A new bitmap was attached to the drive for this backup.
+#
+# @used: An existing bitmap will be used to only backup changed data.
+#
+# @invalid: A bitmap existed, but had to be cleared since it's associated
+# base snapshot did not match the base given for the current job or
+# the crypt mode has changed.
+#
+##
+{ 'enum': 'PBSBitmapAction',
+ 'data': ['not-used', 'not-used-removed', 'new', 'used', 'invalid'] }
+
+##
+# @PBSBitmapInfo:
+#
+# Contains information about dirty bitmaps used for each drive in a PBS backup.
+#
+# @drive: The underlying drive.
+#
+# @action: The action that was taken when the backup started.
+#
+# @size: The total size of the drive.
+#
+# @dirty: How much of the drive is considered dirty and will be backed up,
+# or 'size' if everything will be.
+#
+##
+{ 'struct': 'PBSBitmapInfo',
+ 'data': { 'drive': 'str', 'action': 'PBSBitmapAction', 'size': 'int',
+ 'dirty': 'int' } }
+
+##
+# @query-pbs-bitmap-info:
+#
+# Returns information about dirty bitmaps used on the most recently started
+# backup. Returns nothing when the last backup was not using PBS or if no
+# backup occured in this session.
+#
+# Returns: @PBSBitmapInfo
+#
+##
+{ 'command': 'query-pbs-bitmap-info', 'returns': ['PBSBitmapInfo'] }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -14,45 +14,45 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build
index cbfc9a43fb..8206270272 100644
index 45c1f2de73..44071acbb7 100644
--- a/meson.build
+++ b/meson.build
@@ -1779,6 +1779,7 @@ endif
@@ -1065,6 +1065,7 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true)
+libsystemd = cc.find_library('systemd', required: true)
libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
# libselinux
@@ -3406,6 +3407,7 @@ if have_block
# Malloc tests
@@ -2246,6 +2247,7 @@ if have_block
# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
# os-win32.c does not
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+ blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
system_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
endif
diff --git a/os-posix.c b/os-posix.c
index 0cc1d991b1..f33d9901cf 100644
index ae6c9f2a5e..36807806bf 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -29,6 +29,8 @@
@@ -28,6 +28,8 @@
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
+#include <systemd/sd-journal.h>
+#include <syslog.h>
#include "qemu-common.h"
/* Needed early for CONFIG_BSD etc. */
#include "net/slirp.h"
@@ -332,9 +334,10 @@ void os_setup_post(void)
@@ -291,9 +293,10 @@ void os_setup_post(void)
dup2(fd, 0);
dup2(fd, 1);
- /* In case -D is given do not redirect stderr to /dev/null */
+ /* In case -D is given do not redirect stderr to journal */
if (!qemu_log_enabled()) {
if (!qemu_logfile) {
- dup2(fd, 2);
+ int journal_fd = sd_journal_stream_fd("QEMU", LOG_ERR, 0);
+ dup2(journal_fd, 2);

View File

@@ -7,14 +7,14 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/qemu/job.h | 12 ++++++++++++
job.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
job.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index e502787dd8..963cf2bef5 100644
index 41162ed494..6662c63519 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -381,6 +381,18 @@ void job_unlock(void);
@@ -285,6 +285,18 @@ typedef enum JobCreateFlags {
*/
JobTxn *job_txn_new(void);
@@ -34,10 +34,10 @@ index e502787dd8..963cf2bef5 100644
* Release a reference that was previously acquired with job_txn_add_job or
* job_txn_new. If it's the last reference to the object, it will be freed.
diff --git a/job.c b/job.c
index 93e22d180b..2b31f1e14f 100644
index 44eec9a441..a0753ff2f1 100644
--- a/job.c
+++ b/job.c
@@ -93,6 +93,8 @@ struct JobTxn {
@@ -72,6 +72,8 @@ struct JobTxn {
/* Reference count */
int refcnt;
@@ -45,8 +45,8 @@ index 93e22d180b..2b31f1e14f 100644
+ bool sequential;
};
void job_lock(void)
@@ -118,6 +120,25 @@ JobTxn *job_txn_new(void)
/* Right now, this mutex is only needed to synchronize accesses to job->busy
@@ -102,6 +104,25 @@ JobTxn *job_txn_new(void)
return txn;
}
@@ -69,23 +69,20 @@ index 93e22d180b..2b31f1e14f 100644
+ job_start(first);
+}
+
/* Called with job_mutex held. */
static void job_txn_ref_locked(JobTxn *txn)
static void job_txn_ref(JobTxn *txn)
{
@@ -1057,6 +1078,12 @@ static void job_completed_txn_success_locked(Job *job)
txn->refcnt++;
@@ -850,6 +871,9 @@ static void job_completed_txn_success(Job *job)
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!job_is_completed_locked(other_job)) {
if (!job_is_completed(other_job)) {
+ if (txn->sequential) {
+ job_unlock();
+ /* Needs to be called without holding the job lock */
+ job_start(other_job);
+ job_lock();
+ }
return;
}
assert(other_job->ret == 0);
@@ -1268,6 +1295,13 @@ int job_finish_sync_locked(Job *job,
@@ -1020,6 +1044,13 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
return -EBUSY;
}
@@ -93,9 +90,9 @@ index 93e22d180b..2b31f1e14f 100644
+ * of cancelling, these have not begun work so job_enter won't do anything,
+ * let's ensure they are marked as ABORTING if required */
+ if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+ job_update_rc_locked(job);
+ job_update_rc(job);
+ }
+
job_unlock();
AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
(job_enter(job), !job_is_completed(job)));
AIO_WAIT_WHILE(job->aio_context,
(job_enter(job), !job_is_completed(job)));

View File

@@ -0,0 +1,294 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 20 Aug 2020 14:25:00 +0200
Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states
By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were
successful - meaning we don't need to remove them when the backup fails,
since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
To keep the rate-limiting and IO impact from before, we use a sequential
transaction, so drives will still be backed up one after the other.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 169 +++++++++++++++------------------------------------
1 file changed, 50 insertions(+), 119 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 22420db26a..2e628d68e4 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -52,6 +52,7 @@ static struct PVEBackupState {
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
+ JobTxn *txn;
QemuMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
@@ -71,32 +72,12 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
- bool completed;
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
+ BlockJob *job;
} PVEBackupDevInfo;
-static void pvebackup_run_next_job(void);
-
-static BlockJob *
-lookup_active_block_job(PVEBackupDevInfo *di)
-{
- if (!di->completed && di->bs) {
- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue;
- }
-
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) {
- return job;
- }
- }
- }
- return NULL;
-}
-
static void pvebackup_propagate_error(Error *err)
{
qemu_mutex_lock(&backup_state.stat.lock);
@@ -272,18 +253,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
- } else {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
- }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -322,8 +291,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
qemu_mutex_lock(&backup_state.backup_mutex);
- di->completed = true;
-
if (ret < 0) {
Error *local_err = NULL;
error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
@@ -336,20 +303,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
block_on_coroutine_fn(pvebackup_complete_stream, di);
- // remove self from job queue
+ // remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
- if (di->bitmap && ret < 0) {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
g_free(di);
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ /* call cleanup if we're the last job */
+ if (!g_list_first(backup_state.di_list)) {
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ }
- pvebackup_run_next_job();
+ qemu_mutex_unlock(&backup_state.backup_mutex);
}
static void pvebackup_cancel(void)
@@ -371,36 +335,28 @@ static void pvebackup_cancel(void)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
+ /* it's enough to cancel one job in the transaction, the rest will follow
+ * automatically */
+ GList *bdi = g_list_first(backup_state.di_list);
+ BlockJob *cancel_job = bdi && bdi->data ?
+ ((PVEBackupDevInfo *)bdi->data)->job :
+ NULL;
+
+ /* ref the job before releasing the mutex, just to be safe */
+ if (cancel_job) {
+ job_ref(&cancel_job->job);
+ }
+
+ /* job_cancel_sync may enter the job, so we need to release the
+ * backup_mutex to avoid deadlock */
qemu_mutex_unlock(&backup_state.backup_mutex);
- for(;;) {
-
- BlockJob *next_job = NULL;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
- if (job != NULL) {
- next_job = job;
- break;
- }
- }
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (next_job) {
- AioContext *aio_context = next_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&next_job->job);
- aio_context_release(aio_context);
- } else {
- break;
- }
+ if (cancel_job) {
+ AioContext *aio_context = cancel_job->job.aio_context;
+ aio_context_acquire(aio_context);
+ job_cancel_sync(&cancel_job->job);
+ job_unref(&cancel_job->job);
+ aio_context_release(aio_context);
}
}
@@ -459,51 +415,19 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-bool job_should_pause(Job *job);
-
-static void pvebackup_run_next_job(void)
-{
- assert(!qemu_in_coroutine());
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
-
- if (job) {
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- AioContext *aio_context = job->job.aio_context;
- aio_context_acquire(aio_context);
-
- if (job_should_pause(&job->job)) {
- bool error_or_canceled = pvebackup_error_or_canceled();
- if (error_or_canceled) {
- job_cancel_sync(&job->job);
- } else {
- job_resume(&job->job);
- }
- }
- aio_context_release(aio_context);
- return;
- }
- }
-
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-}
-
static bool create_backup_jobs(void) {
assert(!qemu_in_coroutine());
Error *local_err = NULL;
+ /* create job transaction to synchronize bitmap commit and cancel all
+ * jobs in case one errors */
+ if (backup_state.txn) {
+ job_txn_unref(backup_state.txn);
+ }
+ backup_state.txn = job_txn_new_seq();
+
BackupPerf perf = { .max_workers = 16 };
/* create and start all jobs (paused state) */
@@ -526,7 +450,7 @@ static bool create_backup_jobs(void) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
aio_context_release(aio_context);
@@ -538,7 +462,8 @@ static bool create_backup_jobs(void) {
pvebackup_propagate_error(create_job_err);
break;
}
- job_start(&job->job);
+
+ di->job = job;
bdrv_unref(di->target);
di->target = NULL;
@@ -556,6 +481,10 @@ static bool create_backup_jobs(void) {
bdrv_unref(di->target);
di->target = NULL;
}
+
+ if (di->job) {
+ job_unref(&di->job->job);
+ }
}
}
@@ -946,10 +875,6 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
if (di->target) {
bdrv_unref(di->target);
}
@@ -1038,9 +963,15 @@ UuidInfo *qmp_backup(
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
- create_backup_jobs();
+ bool errors = create_backup_jobs();
qemu_mutex_unlock(&backup_state.backup_mutex);
- pvebackup_run_next_job();
+
+ if (!errors) {
+ /* start the first job in the transaction
+ * note: this might directly enter the job, so we need to do this
+ * after unlocking the backup_mutex */
+ job_txn_start_seq(backup_state.txn);
+ }
} else {
qemu_mutex_unlock(&backup_state.backup_mutex);
}

View File

@@ -0,0 +1,501 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 28 Sep 2020 13:40:51 +0200
Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
create_backup_jobs
proxmox_backup_co_finish is already async, but previously we would wait
for the coroutine using block_on_coroutine_fn(). Avoid this by
scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
as a real coroutine when calling from pvebackup_complete_cb. This is ok,
since complete_stream uses the backup_mutex internally to synchronize,
and other streams can happily continue writing in the meantime anyway.
To accomodate, backup_mutex is converted to a CoMutex. This means
converting every user to a coroutine. This is not just useful here, but
will come in handy once this series[0] is merged, and QMP calls can be
yield-able coroutines too. Then we can also finally get rid of
block_on_coroutine_fn.
Cases of aio_context_acquire/release from within what is now a coroutine
are changed to aio_co_reschedule_self, which works since a running
coroutine always holds the aio lock for the context it is running in.
job_cancel_sync is called from a BH since it can't be run from a
coroutine (uses AIO_WAIT_WHILE internally).
Same thing for create_backup_jobs, which is converted to a BH too.
To communicate the finishing state, a new property is introduced to
query-backup: 'finishing'. A new state is explicitly not used, since
that would break compatibility with older qemu-server versions.
Also fix create_backup_jobs:
No more weird bool returns, just the standard "errp" format used
everywhere else too. With this, if backup_job_create fails, the error
message is actually returned over QMP and can be shown to the user.
To facilitate correct cleanup on such an error, we call
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
This additionally allows us to actually hold the backup_mutex during
operation.
Also add a job_cancel_sync before job_unref, since a job must be in
STATUS_NULL to be deleted by unref, which could trigger an assert
before.
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
qapi/block-core.json | 5 +-
2 files changed, 144 insertions(+), 78 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 2e628d68e4..9c20ef3a5e 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
static struct PVEBackupState {
struct {
- // Everithing accessed from qmp_backup_query command is protected using lock
+ // Everything accessed from qmp_backup_query command is protected using
+ // this lock. Do NOT hold this lock for long times, as it is sometimes
+ // acquired from coroutines, and thus any wait time may block the guest.
QemuMutex lock;
Error *error;
time_t start_time;
@@ -47,20 +49,22 @@ static struct PVEBackupState {
size_t reused;
size_t zero_bytes;
GList *bitmap_list;
+ bool finishing;
+ bool starting;
} stat;
int64_t speed;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
JobTxn *txn;
- QemuMutex backup_mutex;
+ CoMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
qemu_mutex_init(&backup_state.stat.lock);
- qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.backup_mutex);
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
+ int completed_ret; // INT_MAX if not completed
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
}
// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_co_cleanup(void *unused)
+static void coroutine_fn pvebackup_co_cleanup(void)
{
assert(qemu_in_coroutine());
qemu_mutex_lock(&backup_state.stat.lock);
- backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = true;
qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
}
-// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_complete_stream(void *opaque)
+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
{
PVEBackupDevInfo *di = opaque;
+ int ret = di->completed_ret;
- bool error_or_canceled = pvebackup_error_or_canceled();
-
- if (backup_state.vmaw) {
- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool starting = backup_state.stat.starting;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ if (starting) {
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
+ * must) skip all cleanup, as we don't know what has and hasn't been
+ * initialized yet. */
+ return;
}
- if (backup_state.pbs && !error_or_canceled) {
- Error *local_err = NULL;
- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
- if (local_err != NULL) {
- pvebackup_propagate_error(local_err);
- }
- }
-}
-
-static void pvebackup_complete_cb(void *opaque, int ret)
-{
- assert(!qemu_in_coroutine());
-
- PVEBackupDevInfo *di = opaque;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (ret < 0) {
Error *local_err = NULL;
@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
assert(di->target == NULL);
- block_on_coroutine_fn(pvebackup_complete_stream, di);
+ bool error_or_canceled = pvebackup_error_or_canceled();
+
+ if (backup_state.vmaw) {
+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ }
+
+ if (backup_state.pbs && !error_or_canceled) {
+ Error *local_err = NULL;
+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
+ if (local_err != NULL) {
+ pvebackup_propagate_error(local_err);
+ }
+ }
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/* call cleanup if we're the last job */
if (!g_list_first(backup_state.di_list)) {
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ pvebackup_co_cleanup();
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-static void pvebackup_cancel(void)
+static void pvebackup_complete_cb(void *opaque, int ret)
{
- assert(!qemu_in_coroutine());
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+ /*
+ * Schedule stream cleanup in async coroutine. close_image and finish might
+ * take a while, so we can't block on them here. This way it also doesn't
+ * matter if we're already running in a coroutine or not.
+ * Note: di is a pointer to an entry in the global backup_state struct, so
+ * it stays valid.
+ */
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
+ aio_co_enter(qemu_get_aio_context(), co);
+}
+
+/*
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
+ * main loop processing via a bottom half.
+ */
+static void job_cancel_bh(void *opaque) {
+ CoCtxData *data = (CoCtxData*)opaque;
+ Job *job = (Job*)data->data;
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job);
+ aio_context_release(job_ctx);
+ aio_co_enter(data->ctx, data->co);
+}
+
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
+{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
((PVEBackupDevInfo *)bdi->data)->job :
NULL;
- /* ref the job before releasing the mutex, just to be safe */
if (cancel_job) {
- job_ref(&cancel_job->job);
+ CoCtxData data = {
+ .ctx = qemu_get_current_aio_context(),
+ .co = qemu_coroutine_self(),
+ .data = &cancel_job->job,
+ };
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+ qemu_coroutine_yield();
}
- /* job_cancel_sync may enter the job, so we need to release the
- * backup_mutex to avoid deadlock */
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (cancel_job) {
- AioContext *aio_context = cancel_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&cancel_job->job);
- job_unref(&cancel_job->job);
- aio_context_release(aio_context);
- }
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
void qmp_backup_cancel(Error **errp)
{
- pvebackup_cancel();
+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
}
// assumes the caller holds backup_mutex
@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-static bool create_backup_jobs(void) {
+/*
+ * backup_job_create can *not* be run from a coroutine (and requires an
+ * acquired AioContext), so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+ */
+static void create_backup_jobs_bh(void *opaque) {
assert(!qemu_in_coroutine());
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
Error *local_err = NULL;
/* create job transaction to synchronize bitmap commit and cancel all
@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
aio_context_release(aio_context);
- if (!job || local_err != NULL) {
- Error *create_job_err = NULL;
- error_setg(&create_job_err, "backup_job_create failed: %s",
+ di->job = job;
+
+ if (!job || local_err) {
+ error_setg(errp, "backup_job_create failed: %s",
local_err ? error_get_pretty(local_err) : "null");
-
- pvebackup_propagate_error(create_job_err);
break;
}
- di->job = job;
-
bdrv_unref(di->target);
di->target = NULL;
}
- bool errors = pvebackup_error_or_canceled();
-
- if (errors) {
+ if (*errp) {
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
}
if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_cancel_sync(&di->job->job);
job_unref(&di->job->job);
+ aio_context_release(ctx);
}
}
}
- return errors;
+ /* return */
+ aio_co_enter(data->ctx, data->co);
}
typedef struct QmpBackupTask {
@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
-// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
QmpBackupTask *task = opaque;
task->result = NULL; // just to be sure
@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
di->size = size;
total += size;
+
+ di->completed_ret = INT_MAX;
}
uuid_generate(uuid);
@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info->UUID = uuid_str;
task->result = uuid_info;
+
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(task->errp, local_err);
+ goto err;
+ }
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.starting = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ /* start the first job in the transaction */
+ job_txn_start_seq(backup_state.txn);
+
return;
err_mutex:
@@ -885,6 +960,7 @@ err:
g_free(di);
}
g_list_free(di_list);
+ backup_state.di_list = NULL;
if (devs) {
g_strfreev(devs);
@@ -905,6 +981,8 @@ err:
}
task->result = NULL;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
.errp = errp,
};
- qemu_mutex_lock(&backup_state.backup_mutex);
-
block_on_coroutine_fn(pvebackup_co_prepare, &task);
- if (*errp == NULL) {
- bool errors = create_backup_jobs();
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (!errors) {
- /* start the first job in the transaction
- * note: this might directly enter the job, so we need to do this
- * after unlocking the backup_mutex */
- job_txn_start_seq(backup_state.txn);
- }
- } else {
- qemu_mutex_unlock(&backup_state.backup_mutex);
- }
-
return task.result;
}
@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
info->transferred = backup_state.stat.transferred;
info->has_reused = true;
info->reused = backup_state.stat.reused;
+ info->finishing = backup_state.stat.finishing;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 170c13984d..a0d1d278e9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -729,12 +729,15 @@
#
# @uuid: uuid for this backup job
#
+# @finishing: if status='active' and finishing=true, then the backup process is
+# waiting for the target to finish.
+#
##
{ 'struct': 'BackupStatus',
'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
'*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
- '*backup-file': 'str', '*uuid': 'str' } }
+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
##
# @BackupFormat:

View File

@@ -13,23 +13,21 @@ safe migration is possible and makes sense.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: split up state_pending for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/misc.h | 3 ++
migration/meson.build | 2 +
migration/migration.c | 1 +
migration/pbs-state.c | 104 +++++++++++++++++++++++++++++++++++++++
migration/pbs-state.c | 106 +++++++++++++++++++++++++++++++++++++++
pve-backup.c | 1 +
qapi/block-core.json | 6 +++
6 files changed, 117 insertions(+)
6 files changed, 119 insertions(+)
create mode 100644 migration/pbs-state.c
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 7dcc0b5c2c..4c940b2475 100644
index 465906710d..4f0aeceb6f 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -77,4 +77,7 @@ bool migration_in_bg_snapshot(void);
@@ -75,4 +75,7 @@ bool migration_in_bg_snapshot(void);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
@@ -38,37 +36,38 @@ index 7dcc0b5c2c..4c940b2475 100644
+
#endif
diff --git a/migration/meson.build b/migration/meson.build
index 07f6057acc..343994d891 100644
index ea9aedeefc..c27dc9bd97 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -7,7 +7,9 @@ migration_files = files(
'vmstate.c',
@@ -7,8 +7,10 @@ migration_files = files(
'qemu-file-channel.c',
'qemu-file.c',
'yank_functions.c',
+ 'pbs-state.c',
)
+system_ss.add(libproxmox_backup_qemu)
softmmu_ss.add(migration_files)
+softmmu_ss.add(libproxmox_backup_qemu)
system_ss.add(files(
softmmu_ss.add(files(
'block-dirty-bitmap.c',
diff --git a/migration/migration.c b/migration/migration.c
index 7a4c8beb5d..0a955a2a18 100644
index 041b8451a6..9df2eed75e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -162,6 +162,7 @@ void migration_object_init(void)
@@ -218,6 +218,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ pbs_state_mig_init();
}
void migration_cancel(const Error *error)
void migration_cancel(void)
diff --git a/migration/pbs-state.c b/migration/pbs-state.c
new file mode 100644
index 0000000000..887e998b9e
index 0000000000..29f2b3860d
--- /dev/null
+++ b/migration/pbs-state.c
@@ -0,0 +1,104 @@
@@ -0,0 +1,106 @@
+/*
+ * PBS (dirty-bitmap) state migration
+ */
@@ -87,8 +86,11 @@ index 0000000000..887e998b9e
+/* state is accessed via this static variable directly, 'opaque' is NULL */
+static PBSState pbs_state;
+
+static void pbs_state_pending(void *opaque, uint64_t *must_precopy,
+ uint64_t *can_postcopy)
+static void pbs_state_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ /* we send everything in save_setup, so nothing is ever pending */
+}
@@ -158,8 +160,7 @@ index 0000000000..887e998b9e
+static SaveVMHandlers savevm_pbs_state_handlers = {
+ .save_setup = pbs_state_save_setup,
+ .has_postcopy = pbs_state_has_postcopy,
+ .state_pending_exact = pbs_state_pending,
+ .state_pending_estimate = pbs_state_pending,
+ .save_live_pending = pbs_state_save_pending,
+ .is_active_iterate = pbs_state_is_active_iterate,
+ .load_state = pbs_state_load,
+ .is_active = pbs_state_is_active,
@@ -174,22 +175,22 @@ index 0000000000..887e998b9e
+ NULL);
+}
diff --git a/pve-backup.c b/pve-backup.c
index d84d807654..9c8b88d075 100644
index 9c20ef3a5e..59ccb38ceb 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1060,6 +1060,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
@@ -1132,6 +1132,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index d67a6d448a..09de550c95 100644
index a0d1d278e9..e5de769dc1 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -991,6 +991,11 @@
@@ -838,6 +838,11 @@
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -198,14 +199,14 @@ index d67a6d448a..09de550c95 100644
+# migration cap if this is false/unset may lead
+# to crashes on migration!
+#
# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
# parameter.
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
@@ -1001,6 +1006,7 @@
##
@@ -845,6 +850,7 @@
'data': { 'pbs-dirty-bitmap': 'bool',
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
'backup-max-workers': 'bool' } }
'pbs-library-version': 'str' } }
##

View File

@@ -19,10 +19,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index e1ae3b7316..285dd1d148 100644
index 35f5ef688d..c4640925e7 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -540,7 +540,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
@@ -538,7 +538,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
error_report_err(local_err);

View File

@@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 30 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 34f97ab646..398782963d 100644
index 4d2a416ce7..c345d30812 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1391,12 +1391,42 @@ static char *get_initiator_name(QemuOpts *opts)
@@ -1372,12 +1372,42 @@ static char *get_initiator_name(QemuOpts *opts)
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;

View File

@@ -1,80 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:05 +0200
Subject: [PATCH] Revert "block/rbd: workaround for ceph issue #53784"
This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/rbd.c | 42 ++----------------------------------------
1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index a4749f3b1b..53e0396b51 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1511,7 +1511,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
- uint64_t head = 0;
assert(offset + bytes <= s->image_size);
@@ -1539,43 +1538,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
-#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
- /*
- * librbd had a bug until early 2022 that affected all versions of ceph that
- * supported fast-diff. This bug results in reporting of incorrect offsets
- * if the offset parameter to rbd_diff_iterate2 is not object aligned.
- * Work around this bug by rounding down the offset to object boundaries.
- * This is OK because we call rbd_diff_iterate2 with whole_object = true.
- * However, this workaround only works for non cloned images with default
- * striping.
- *
- * See: https://tracker.ceph.com/issues/53784
- */
-
- /* check if RBD image has non-default striping enabled */
- if (features & RBD_FEATURE_STRIPINGV2) {
- return status;
- }
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- /*
- * check if RBD image is a clone (= has a parent).
- *
- * rbd_get_parent_info is deprecated from Nautilus onwards, but the
- * replacement rbd_get_parent is not present in Luminous and Mimic.
- */
- if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
- return status;
- }
-#pragma GCC diagnostic pop
-
- head = req.offs & (s->object_size - 1);
- req.offs -= head;
- bytes += head;
-#endif
-
- r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1594,8 +1557,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}
- assert(req.bytes > head);
- *pnum = req.bytes - head;
+ *pnum = req.bytes;
return status;
}

View File

@@ -0,0 +1,598 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 26 Jan 2021 15:45:30 +0100
Subject: [PATCH] PVE: Use coroutine QMP for backup/cancel_backup
Finally turn backup QMP calls into coroutines, now that it's possible.
This has the benefit that calls are asynchronous to the main loop, i.e.
long running operations like connecting to a PBS server will no longer
hang the VM.
Additionally, it allows us to get rid of block_on_coroutine_fn, which
was always a hacky workaround.
While we're already spring cleaning, also remove the QmpBackupTask
struct, since we can now put the 'prepare' function directly into
qmp_backup and thus no longer need those giant walls of text.
(Note that for our patches to work with 5.2.0 this change is actually
required, otherwise monitor_get_fd() fails as we're not in a QMP
coroutine, but one we start ourselves - we could of course set the
monitor for that coroutine ourselves, but let's just fix it the right
way instead)
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +-
hmp-commands.hx | 2 +
proxmox-backup-client.c | 31 -----
pve-backup.c | 232 ++++++++++-----------------------
qapi/block-core.json | 4 +-
5 files changed, 77 insertions(+), 196 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 69254396d5..b838586fc0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1016,7 +1016,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
g_free(global_snapshots);
}
-void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup_cancel(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
@@ -1025,7 +1025,7 @@ void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, error);
}
-void hmp_backup(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 7faba36b39..dca4e58858 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -109,6 +109,7 @@ ERST
"\n\t\t\t Use -d to dump data into a directory instead"
"\n\t\t\t of using VMA format.",
.cmd = hmp_backup,
+ .coroutine = true,
},
SRST
@@ -122,6 +123,7 @@ ERST
.params = "",
.help = "cancel the current VM backup",
.cmd = hmp_backup_cancel,
+ .coroutine = true,
},
SRST
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index 4ce7bc0b5e..0923037dec 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -5,37 +5,6 @@
/* Proxmox Backup Server client bindings using coroutines */
-typedef struct BlockOnCoroutineWrapper {
- AioContext *ctx;
- CoroutineEntry *entry;
- void *entry_arg;
- bool finished;
-} BlockOnCoroutineWrapper;
-
-static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
-{
- BlockOnCoroutineWrapper *wrapper = opaque;
- wrapper->entry(wrapper->entry_arg);
- wrapper->finished = true;
- aio_wait_kick();
-}
-
-void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
-{
- assert(!qemu_in_coroutine());
-
- AioContext *ctx = qemu_get_current_aio_context();
- BlockOnCoroutineWrapper wrapper = {
- .finished = false,
- .entry = entry,
- .entry_arg = entry_arg,
- .ctx = ctx,
- };
- Coroutine *wrapper_co = qemu_coroutine_create(block_on_coroutine_wrapper, &wrapper);
- aio_co_enter(ctx, wrapper_co);
- AIO_WAIT_WHILE(ctx, !wrapper.finished);
-}
-
// This is called from another thread, so we use aio_co_schedule()
static void proxmox_backup_schedule_wake(void *data) {
CoCtxData *waker = (CoCtxData *)data;
diff --git a/pve-backup.c b/pve-backup.c
index 59ccb38ceb..f858003a06 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -357,7 +357,7 @@ static void job_cancel_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
+void coroutine_fn qmp_backup_cancel(Error **errp)
{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
@@ -394,11 +394,6 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-void qmp_backup_cancel(Error **errp)
-{
- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
-}
-
// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
@@ -533,50 +528,27 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-typedef struct QmpBackupTask {
- const char *backup_file;
- bool has_password;
- const char *password;
- bool has_keyfile;
- const char *keyfile;
- bool has_key_password;
- const char *key_password;
- bool has_backup_id;
- const char *backup_id;
- bool has_backup_time;
- const char *fingerprint;
- bool has_fingerprint;
- int64_t backup_time;
- bool has_use_dirty_bitmap;
- bool use_dirty_bitmap;
- bool has_format;
- BackupFormat format;
- bool has_config_file;
- const char *config_file;
- bool has_firewall_file;
- const char *firewall_file;
- bool has_devlist;
- const char *devlist;
- bool has_compress;
- bool compress;
- bool has_encrypt;
- bool encrypt;
- bool has_speed;
- int64_t speed;
- Error **errp;
- UuidInfo *result;
-} QmpBackupTask;
-
-static void coroutine_fn pvebackup_co_prepare(void *opaque)
+UuidInfo coroutine_fn *qmp_backup(
+ const char *backup_file,
+ bool has_password, const char *password,
+ bool has_keyfile, const char *keyfile,
+ bool has_key_password, const char *key_password,
+ bool has_fingerprint, const char *fingerprint,
+ bool has_backup_id, const char *backup_id,
+ bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
+ bool has_format, BackupFormat format,
+ bool has_config_file, const char *config_file,
+ bool has_firewall_file, const char *firewall_file,
+ bool has_devlist, const char *devlist,
+ bool has_speed, int64_t speed, Error **errp)
{
assert(qemu_in_coroutine());
qemu_co_mutex_lock(&backup_state.backup_mutex);
- QmpBackupTask *task = opaque;
-
- task->result = NULL; // just to be sure
-
BlockBackend *blk;
BlockDriverState *bs = NULL;
const char *backup_dir = NULL;
@@ -593,17 +565,17 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
+ return NULL;
}
/* Todo: try to auto-detect format based on file name */
- BackupFormat format = task->has_format ? task->format : BACKUP_FORMAT_VMA;
+ format = has_format ? format : BACKUP_FORMAT_VMA;
- if (task->has_devlist) {
- devs = g_strsplit_set(task->devlist, ",;:", -1);
+ if (has_devlist) {
+ devs = g_strsplit_set(devlist, ",;:", -1);
gchar **d = devs;
while (d && *d) {
@@ -611,14 +583,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (blk) {
bs = blk_bs(blk);
if (!bdrv_is_inserted(bs)) {
- error_setg(task->errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
goto err;
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
di_list = g_list_append(di_list, di);
} else {
- error_set(task->errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", *d);
goto err;
}
@@ -641,7 +613,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (!di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
goto err;
}
@@ -651,13 +623,13 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, task->errp)) {
+ if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
goto err;
}
ssize_t size = bdrv_getlength(di->bs);
if (size < 0) {
- error_setg_errno(task->errp, -di->size, "bdrv_getlength failed");
+ error_setg_errno(errp, -di->size, "bdrv_getlength failed");
goto err;
}
di->size = size;
@@ -684,47 +656,44 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (format == BACKUP_FORMAT_PBS) {
- if (!task->has_password) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
+ if (!has_password) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
goto err_mutex;
}
- if (!task->has_backup_id) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
+ if (!has_backup_id) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
goto err_mutex;
}
- if (!task->has_backup_time) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
+ if (!has_backup_time) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
- bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
-
-
char *pbs_err = NULL;
pbs = proxmox_backup_new(
- task->backup_file,
- task->backup_id,
- task->backup_time,
+ backup_file,
+ backup_id,
+ backup_time,
dump_cb_block_size,
- task->has_password ? task->password : NULL,
- task->has_keyfile ? task->keyfile : NULL,
- task->has_key_password ? task->key_password : NULL,
- task->has_compress ? task->compress : true,
- task->has_encrypt ? task->encrypt : task->has_keyfile,
- task->has_fingerprint ? task->fingerprint : NULL,
+ has_password ? password : NULL,
+ has_keyfile ? keyfile : NULL,
+ has_key_password ? key_password : NULL,
+ has_compress ? compress : true,
+ has_encrypt ? encrypt : has_keyfile,
+ has_fingerprint ? fingerprint : NULL,
&pbs_err);
if (!pbs) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
goto err_mutex;
}
- int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ int connect_result = proxmox_backup_co_connect(pbs, errp);
if (connect_result < 0)
goto err_mutex;
@@ -743,9 +712,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
- if (use_dirty_bitmap) {
+ if (has_use_dirty_bitmap && use_dirty_bitmap) {
if (bitmap == NULL) {
- bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, errp);
if (!bitmap) {
goto err_mutex;
}
@@ -775,12 +744,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
if (dev_id < 0) {
goto err_mutex;
}
- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, errp))) {
goto err_mutex;
}
@@ -794,10 +763,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
}
goto err_mutex;
}
@@ -808,25 +777,25 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, errp))) {
goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
if (di->dev_id <= 0) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- if (mkdir(task->backup_file, 0640) != 0) {
- error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
- task->backup_file);
+ if (mkdir(backup_file, 0640) != 0) {
+ error_setg_errno(errp, errno, "can't create directory '%s'\n",
+ backup_file);
goto err_mutex;
}
- backup_dir = task->backup_file;
+ backup_dir = backup_file;
l = di_list;
while (l) {
@@ -840,34 +809,34 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_img_create(di->targetfile, "raw", NULL, NULL, NULL,
di->size, flags, false, &local_err);
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
}
} else {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
goto err_mutex;
}
/* add configuration file to archive */
- if (task->has_config_file) {
- if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_config_file) {
+ if (pvebackup_co_add_config(config_file, config_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
/* add firewall file to archive */
- if (task->has_firewall_file) {
- if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_firewall_file) {
+ if (pvebackup_co_add_config(firewall_file, firewall_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
@@ -885,7 +854,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (backup_state.stat.backup_file) {
g_free(backup_state.stat.backup_file);
}
- backup_state.stat.backup_file = g_strdup(task->backup_file);
+ backup_state.stat.backup_file = g_strdup(backup_file);
uuid_copy(backup_state.stat.uuid, uuid);
uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
@@ -900,7 +869,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_mutex_unlock(&backup_state.stat.lock);
- backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+ backup_state.speed = (has_speed && speed > 0) ? speed : 0;
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -910,8 +879,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
- task->result = uuid_info;
-
/* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
* backup_mutex locked. This is fine, a CoMutex can be held across yield
* points, and we'll release it as soon as the BH reschedules us.
@@ -925,7 +892,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_coroutine_yield();
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err;
}
@@ -938,7 +905,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
/* start the first job in the transaction */
job_txn_start_seq(backup_state.txn);
- return;
+ return uuid_info;
err_mutex:
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -969,7 +936,7 @@ err:
if (vmaw) {
Error *err = NULL;
vma_writer_close(vmaw, &err);
- unlink(task->backup_file);
+ unlink(backup_file);
}
if (pbs) {
@@ -980,65 +947,8 @@ err:
rmdir(backup_dir);
}
- task->result = NULL;
-
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
-}
-
-UuidInfo *qmp_backup(
- const char *backup_file,
- bool has_password, const char *password,
- bool has_keyfile, const char *keyfile,
- bool has_key_password, const char *key_password,
- bool has_fingerprint, const char *fingerprint,
- bool has_backup_id, const char *backup_id,
- bool has_backup_time, int64_t backup_time,
- bool has_use_dirty_bitmap, bool use_dirty_bitmap,
- bool has_compress, bool compress,
- bool has_encrypt, bool encrypt,
- bool has_format, BackupFormat format,
- bool has_config_file, const char *config_file,
- bool has_firewall_file, const char *firewall_file,
- bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
-{
- QmpBackupTask task = {
- .backup_file = backup_file,
- .has_password = has_password,
- .password = password,
- .has_keyfile = has_keyfile,
- .keyfile = keyfile,
- .has_key_password = has_key_password,
- .key_password = key_password,
- .has_fingerprint = has_fingerprint,
- .fingerprint = fingerprint,
- .has_backup_id = has_backup_id,
- .backup_id = backup_id,
- .has_backup_time = has_backup_time,
- .backup_time = backup_time,
- .has_use_dirty_bitmap = has_use_dirty_bitmap,
- .use_dirty_bitmap = use_dirty_bitmap,
- .has_compress = has_compress,
- .compress = compress,
- .has_encrypt = has_encrypt,
- .encrypt = encrypt,
- .has_format = has_format,
- .format = format,
- .has_config_file = has_config_file,
- .config_file = config_file,
- .has_firewall_file = has_firewall_file,
- .firewall_file = firewall_file,
- .has_devlist = has_devlist,
- .devlist = devlist,
- .has_speed = has_speed,
- .speed = speed,
- .errp = errp,
- };
-
- block_on_coroutine_fn(pvebackup_co_prepare, &task);
-
- return task.result;
+ return NULL;
}
BackupStatus *qmp_query_backup(Error **errp)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e5de769dc1..afa67c28d2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -801,7 +801,7 @@
'*config-file': 'str',
'*firewall-file': 'str',
'*devlist': 'str', '*speed': 'int' },
- 'returns': 'UuidInfo' }
+ 'returns': 'UuidInfo', 'coroutine': true }
##
# @query-backup:
@@ -823,7 +823,7 @@
# Notes: This command succeeds even if there is no backup process running.
#
##
-{ 'command': 'backup-cancel' }
+{ 'command': 'backup-cancel', 'coroutine': true }
##
# @ProxmoxSupportStatus:

View File

@@ -1,35 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:07 +0200
Subject: [PATCH] Revert "block/rbd: fix handling of holes in
.bdrv_co_block_status"
This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/rbd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 53e0396b51..0913a0af39 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1470,11 +1470,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);
-
- /* treat a hole like an unallocated area and bail out */
- if (!exists) {
- return 0;
- }
+ /*
+ * we do not diff against a snapshot so we should never receive a callback
+ * for a hole.
+ */
+ assert(exists);
if (!req->exists && offs > req->offs) {
/*

View File

@@ -0,0 +1,98 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 10 Feb 2021 11:07:06 +0100
Subject: [PATCH] PBS: add master key support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
this requires a new enough libproxmox-backup-qemu0, and allows querying
from the PVE side to avoid QMP calls with unsupported parameters.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 3 +++
qapi/block-core.json | 7 +++++++
3 files changed, 11 insertions(+)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index b838586fc0..5b52b93232 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1039,6 +1039,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS password
false, NULL, // PBS keyfile
false, NULL, // PBS key_password
+ false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
diff --git a/pve-backup.c b/pve-backup.c
index f858003a06..04ebfc1e33 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -533,6 +533,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_password, const char *password,
bool has_keyfile, const char *keyfile,
bool has_key_password, const char *key_password,
+ bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
@@ -681,6 +682,7 @@ UuidInfo coroutine_fn *qmp_backup(
has_password ? password : NULL,
has_keyfile ? keyfile : NULL,
has_key_password ? key_password : NULL,
+ has_master_keyfile ? master_keyfile : NULL,
has_compress ? compress : true,
has_encrypt ? encrypt : has_keyfile,
has_fingerprint ? fingerprint : NULL,
@@ -1044,5 +1046,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_savevm = true;
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index afa67c28d2..84e4406d21 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -772,6 +772,8 @@
#
# @key-password: password for keyfile (optional for format 'pbs')
#
+# @master-keyfile: PEM-formatted master public keyfile (optional for format 'pbs')
+#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
# @backup-id: backup ID (required for format 'pbs')
@@ -791,6 +793,7 @@
'*password': 'str',
'*keyfile': 'str',
'*key-password': 'str',
+ '*master-keyfile': 'str',
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
@@ -843,6 +846,9 @@
# migration cap if this is false/unset may lead
# to crashes on migration!
#
+# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
+# parameter.
+#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
##
@@ -851,6 +857,7 @@
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
'pbs-library-version': 'str' } }
##

View File

@@ -1,161 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Tue, 17 May 2022 09:46:02 +0200
Subject: [PATCH] Revert "block/rbd: implement bdrv_co_block_status"
During backup, bdrv_co_block_status is called for each block copy
chunk. When RBD is used, the current implementation with
rbd_diff_iterate2() using whole_object=true takes about linearly more
time, depending on the image size. Since there are linearly more
chunks, the slowdown is quadratic, becoming unacceptable for large
images (starting somewhere between 500-1000 GiB in my testing).
This reverts commit 0347a8fd4c3faaedf119be04c197804be40a384b as a
stop-gap measure, until it's clear how to make the implemenation
more efficient.
Upstream bug report:
https://gitlab.com/qemu-project/qemu/-/issues/1026
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/rbd.c | 112 ----------------------------------------------------
1 file changed, 112 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 0913a0af39..1dab254517 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -108,12 +108,6 @@ typedef struct RBDTask {
int64_t ret;
} RBDTask;
-typedef struct RBDDiffIterateReq {
- uint64_t offs;
- uint64_t bytes;
- bool exists;
-} RBDDiffIterateReq;
-
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid,
@@ -1456,111 +1450,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}
-/*
- * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
- * value in the callback routine. Choose a value that does not conflict with
- * an existing exitcode and return it if we want to prematurely stop the
- * execution because we detected a change in the allocation status.
- */
-#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
-
-static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
- int exists, void *opaque)
-{
- RBDDiffIterateReq *req = opaque;
-
- assert(req->offs + req->bytes <= offs);
- /*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
- assert(exists);
-
- if (!req->exists && offs > req->offs) {
- /*
- * we started in an unallocated area and hit the first allocated
- * block. req->bytes must be set to the length of the unallocated area
- * before the allocated area. stop further processing.
- */
- req->bytes = offs - req->offs;
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- if (req->exists && offs > req->offs + req->bytes) {
- /*
- * we started in an allocated area and jumped over an unallocated area,
- * req->bytes contains the length of the allocated area before the
- * unallocated area. stop further processing.
- */
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- req->bytes += len;
- req->exists = true;
-
- return 0;
-}
-
-static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
-{
- BDRVRBDState *s = bs->opaque;
- int status, r;
- RBDDiffIterateReq req = { .offs = offset };
- uint64_t features, flags;
-
- assert(offset + bytes <= s->image_size);
-
- /* default to all sectors allocated */
- status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
- *map = offset;
- *file = bs;
- *pnum = bytes;
-
- /* check if RBD image supports fast-diff */
- r = rbd_get_features(s->image, &features);
- if (r < 0) {
- return status;
- }
- if (!(features & RBD_FEATURE_FAST_DIFF)) {
- return status;
- }
-
- /* check if RBD fast-diff result is valid */
- r = rbd_get_flags(s->image, &flags);
- if (r < 0) {
- return status;
- }
- if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
- return status;
- }
-
- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
- qemu_rbd_diff_iterate_cb, &req);
- if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
- return status;
- }
- assert(req.bytes <= bytes);
- if (!req.exists) {
- if (r == 0) {
- /*
- * rbd_diff_iterate2 does not invoke callbacks for unallocated
- * areas. This here catches the case where no callback was
- * invoked at all (req.bytes == 0).
- */
- assert(req.bytes == 0);
- req.bytes = bytes;
- }
- status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
- }
-
- *pnum = req.bytes;
- return status;
-}
-
static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
{
BDRVRBDState *s = bs->opaque;
@@ -1796,7 +1685,6 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif
- .bdrv_co_block_status = qemu_rbd_co_block_status,
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,

View File

@@ -0,0 +1,53 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 9 Dec 2020 11:46:57 +0100
Subject: [PATCH] PVE: block/pbs: fast-path reads without allocation if
possible
...and switch over to g_malloc/g_free while at it to align with other
QEMU code.
Tracing shows the fast-path is taken almost all the time, though not
100% so the slow one is still necessary.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/pbs.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/pbs.c b/block/pbs.c
index 78dad0dcc4..ac54e816c0 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -200,7 +200,16 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
BDRVPBSState *s = bs->opaque;
int ret;
char *pbs_error = NULL;
- uint8_t *buf = malloc(bytes);
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
ReadCallbackData rcb = {
.co = qemu_coroutine_self(),
@@ -218,8 +227,10 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
return -EIO;
}
- qemu_iovec_from_buf(qiov, 0, buf, bytes);
- free(buf);
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
return ret;
}

View File

@@ -1,153 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 6 Apr 2023 14:59:31 +0200
Subject: [PATCH] alloc-track: fix deadlock during drop
by replacing the block node directly after changing the backing file
instead of rescheduling it.
With changes in QEMU 8.0, calling bdrv_get_info (and bdrv_unref)
during drop can lead to a deadlock when using iothread (only triggered
with multiple disks, except during debugging where it also triggered
with one disk sometimes):
1. job_unref_locked acquires the AioContext and calls job->driver->free
2. track_drop gets scheduled
3. bdrv_graph_wrlock is called and polls which leads to track_drop being
called
4. track_drop acquires the AioContext recursively
5. bdrv_get_info is a wrapped coroutine (since 8.0) and thus polls for
bdrv_co_get_info. This releases the AioContext, but only once! The
documentation for the AIO_WAIT_WHILE macro states that the
AioContext lock needs to be acquired exactly once, but there does
not seem to be a way for track_drop to know if it acquired the lock
recursively or not (without adding further hacks).
6. Because the AioContext is still held by the main thread once, it can't
be acquired before entering bdrv_co_get_info in co_schedule_bh_cb
which happens in the iothread
When doing the operation in change_backing_file, the AioContext has
already been acquired by the caller, so the issue with the recursive
lock goes away.
The comment explaining why delaying the replace is necessary is
> we need to schedule this for later however, since when this function
> is called, the blockjob modifying us is probably not done yet and
> has a blocker on 'bs'
However, there is no check for blockers in bdrv_replace_node. It would
need to be done by us, the caller, with check_to_replace_node.
Furthermore, the mirror job also does its call to bdrv_replace_node
while there is an active blocker (inserted by mirror itself) and they
use a specialized version to check for blockers instead of
check_to_replace_node there. Alloc-track could also do something
similar to check for other blockers, but it should be fine to rely on
Proxmox VE that no other operation with the blockdev is going on.
Mirror also drains the target before replacing the node, but the
target can have other users. In case of alloc-track the file child
should not be accessible by anybody else and so there can't be an
in-flight operation for the file child when alloc-track is drained.
The rescheduling based on refcounting is a hack and it doesn't seem to
be necessary anymore. It's not clear what the original issue from the
comment was. Testing with older builds with track_drop done directly
without rescheduling also didn't lead to any noticable issue for me.
One issue it might have been is the one fixed by b1e1af394d
("block/stream: Drain subtree around graph change"), where
block-stream had a use-after-free if the base node changed at an
inconvenient time (which alloc-track's auto-drop does).
It's also not possible to just not auto-replace the alloc-track. Not
replacing it at all leads to other operations like block resize
hanging, and there is no good way to replace it manually via QMP
(there is x-blockdev-change, but it is experimental and doesn't
implement the required operation yet). Also, it's just cleaner in
general to not leave unnecessary block nodes lying around.
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 54 ++++++++++++++-------------------------------
1 file changed, 16 insertions(+), 38 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index b75d7c6460..76da140a68 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -25,7 +25,6 @@
typedef enum DropState {
DropNone,
- DropRequested,
DropInProgress,
} DropState;
@@ -268,37 +267,6 @@ static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
}
}
-static void track_drop(void *opaque)
-{
- BlockDriverState *bs = (BlockDriverState*)opaque;
- BlockDriverState *file = bs->file->bs;
- BDRVAllocTrackState *s = bs->opaque;
-
- assert(file);
-
- /* we rely on the fact that we're not used anywhere else, so let's wait
- * until we're only used once - in the drive connected to the guest (and one
- * ref is held by bdrv_ref in track_change_backing_file) */
- if (bs->refcnt > 2) {
- aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
- return;
- }
- AioContext *aio_context = bdrv_get_aio_context(bs);
- aio_context_acquire(aio_context);
-
- bdrv_drained_begin(bs);
-
- /* now that we're drained, we can safely set 'DropInProgress' */
- s->drop_state = DropInProgress;
- bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-
- bdrv_replace_node(bs, file, &error_abort);
- bdrv_set_backing_hd(bs, NULL, &error_abort);
- bdrv_drained_end(bs);
- bdrv_unref(bs);
- aio_context_release(aio_context);
-}
-
static int track_change_backing_file(BlockDriverState *bs,
const char *backing_file,
const char *backing_fmt)
@@ -308,13 +276,23 @@ static int track_change_backing_file(BlockDriverState *bs,
backing_file == NULL && backing_fmt == NULL)
{
/* backing file has been disconnected, there's no longer any use for
- * this node, so let's remove ourselves from the block graph - we need
- * to schedule this for later however, since when this function is
- * called, the blockjob modifying us is probably not done yet and has a
- * blocker on 'bs' */
- s->drop_state = DropRequested;
+ * this node, so let's remove ourselves from the block graph */
+ BlockDriverState *file = bs->file->bs;
+
+ /* Just to be sure, because bdrv_replace_node unrefs it */
bdrv_ref(bs);
- aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress;
+
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+
+ bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
return 0;

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/stream.c b/block/stream.c
index e522bbdec5..afed72db55 100644
index 97bee482dc..50093c9f57 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -27,7 +27,7 @@ enum {
@@ -28,7 +28,7 @@ enum {
* large enough to process multiple clusters in a single call, so
* that populating contiguous regions of the image is efficient.
*/

View File

@@ -1,190 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 5 May 2023 13:39:53 +0200
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
In spirit, this is a partial revert of commit 9b09503752 ("migration:
run setup callbacks out of big lock"), but only for the snapshot case.
For snapshots, the bdrv_writev_vmstate() function is used during setup
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
while calling it could lead to an assertion failure. To understand
how, first note the following:
1. Generated coroutine wrappers for block layer functions spawn the
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
2. If the host OS switches threads at an inconvenient time, it can
happen that a bottom half scheduled for the main thread's AioContext
is executed as part of a vCPU thread's aio_poll().
An example leading to the assertion failure is as follows:
main thread:
1. A snapshot-save QMP command gets issued.
2. snapshot_save_job_bh() is scheduled.
vCPU thread:
3. aio_poll() for the main thread's AioContext is called (e.g. when
the guest writes to a pflash device, as part of blk_pwrite which is a
generated coroutine wrapper).
4. snapshot_save_job_bh() is executed as part of aio_poll().
3. qemu_savevm_state() is called.
4. qemu_mutex_unlock_iothread() is called. Now
qemu_get_current_aio_context() returns 0x0.
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
But this function is a generated coroutine wrapper, so it uses
AIO_WAIT_WHILE. There, the assertion
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
will fail.
To fix it, ensure that the BQL is held during setup. To avoid changing
the behavior for migration too, introduce conditionals for the setup
callbacks that need the BQL and only take the lock if it's not already
held.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/register.h | 2 +-
migration/block-dirty-bitmap.c | 15 ++++++++++++---
migration/block.c | 15 ++++++++++++---
migration/ram.c | 16 +++++++++++++---
migration/savevm.c | 2 --
5 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/include/migration/register.h b/include/migration/register.h
index 90914f32f5..c728fd9120 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
* by other locks.
*/
int (*save_live_iterate)(QEMUFile *f, void *opaque);
+ int (*save_setup)(QEMUFile *f, void *opaque);
/* This runs outside the iothread lock! */
- int (*save_setup)(QEMUFile *f, void *opaque);
/* Note for save_live_pending:
* must_precopy:
* - must be migrated in precopy or in stopped state
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 285dd1d148..f7ee5a74d9 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1219,10 +1219,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
{
DBMSaveState *s = &((DBMState *)opaque)->save;
SaveBitmapState *dbms = NULL;
+ bool release_lock = false;
- qemu_mutex_lock_iothread();
+ /* For snapshots, the BQL is held during setup. */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
if (init_dirty_bitmap_migration(s) < 0) {
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return -1;
}
@@ -1230,7 +1237,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
send_bitmap_start(f, s, dbms);
}
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return 0;
}
diff --git a/migration/block.c b/migration/block.c
index 86c2256a2b..8423e0c9f9 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -725,21 +725,30 @@ static void block_migration_cleanup(void *opaque)
static int block_save_setup(QEMUFile *f, void *opaque)
{
int ret;
+ bool release_lock = false;
trace_migration_block_save("setup", block_mig_state.submitted,
block_mig_state.transferred);
- qemu_mutex_lock_iothread();
+ /* For snapshots, the BQL is held during setup. */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
ret = init_blk_migration(f);
if (ret < 0) {
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return ret;
}
/* start track dirty blocks */
ret = set_dirty_tracking();
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
if (ret) {
return ret;
diff --git a/migration/ram.c b/migration/ram.c
index 6e1514f69f..6a1aec7031 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2896,8 +2896,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
static void ram_init_bitmaps(RAMState *rs)
{
- /* For memory_global_dirty_log_start below. */
- qemu_mutex_lock_iothread();
+ bool release_lock = false;
+
+ /*
+ * For memory_global_dirty_log_start below.
+ * For snapshots, the BQL is held during setup.
+ */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
qemu_mutex_lock_ramlist();
WITH_RCU_READ_LOCK_GUARD() {
@@ -2909,7 +2917,9 @@ static void ram_init_bitmaps(RAMState *rs)
}
}
qemu_mutex_unlock_ramlist();
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
/*
* After an eventual first bitmap sync, fixup the initial bitmap
diff --git a/migration/savevm.c b/migration/savevm.c
index d60c4f487a..3c015722f7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1625,10 +1625,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
reset_vfio_bytes_transferred();
ms->to_dst_file = f;
- qemu_mutex_unlock_iothread();
qemu_savevm_state_header(f);
qemu_savevm_state_setup(f);
- qemu_mutex_lock_iothread();
while (qemu_file_get_error(f) == 0) {
if (qemu_savevm_state_iterate(f, false) > 0) {

View File

@@ -17,17 +17,17 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+)
diff --git a/block/io.c b/block/io.c
index 83d1b1dfdc..24a3c84c93 100644
index f38e7f81d8..28c3a712b6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1710,6 +1710,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
int sliced_niov;
size_t sliced_head, sliced_tail;
@@ -1764,6 +1764,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
{
int ret;
+ if (!qiov) {
+ return 0;
+ }
+
/* Should have been checked by the caller already */
ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
if (ret < 0) {
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {

View File

@@ -1,29 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 5 May 2023 15:30:16 +0200
Subject: [PATCH] savevm-async: don't hold BQL during setup
See commit "migration: for snapshots, hold the BQL during setup
callbacks" for why. This is separate, because a version of that one
will hopefully land upstream.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/savevm-async.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 80624fada8..b1d85a4b41 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -401,10 +401,8 @@ void qmp_savevm_start(const char *statefile, Error **errp)
snap_state.state = SAVE_STATE_ACTIVE;
snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
- qemu_mutex_unlock_iothread();
qemu_savevm_state_header(snap_state.file);
qemu_savevm_state_setup(snap_state.file);
- qemu_mutex_lock_iothread();
/* Async processing from here on out happens in iohandler context, so let
* the target bdrv have its home there.

View File

@@ -24,22 +24,18 @@ once the backing image is removed. It will be replaced by 'file'.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
make error return value consistent with QEMU
avoid premature break during read]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
block/alloc-track.c | 345 ++++++++++++++++++++++++++++++++++++++++++++
block/meson.build | 1 +
2 files changed, 353 insertions(+)
2 files changed, 346 insertions(+)
create mode 100644 block/alloc-track.c
diff --git a/block/alloc-track.c b/block/alloc-track.c
new file mode 100644
index 0000000000..b75d7c6460
index 0000000000..35f2737c89
--- /dev/null
+++ b/block/alloc-track.c
@@ -0,0 +1,352 @@
@@ -0,0 +1,345 @@
+/*
+ * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this
@@ -55,7 +51,6 @@ index 0000000000..b75d7c6460
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
@@ -165,13 +160,13 @@ index 0000000000..b75d7c6460
+ }
+}
+
+static coroutine_fn int64_t track_co_getlength(BlockDriverState *bs)
+static int64_t track_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn track_co_preadv(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ QEMUIOVector local_qiov;
@@ -182,11 +177,6 @@ index 0000000000..b75d7c6460
+ int64_t local_bytes;
+ bool alloc;
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+
+ /* a read request can span multiple granularity-sized chunks, and can thus
+ * contain blocks with different allocation status - we could just iterate
+ * granularity-wise, but for better performance use bdrv_dirty_bitmap_next_X
@@ -217,8 +207,7 @@ index 0000000000..b75d7c6460
+ ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
+ &local_qiov, flags);
+ } else {
+ qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ ret = 0;
+ ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ }
+
+ if (ret != 0) {
@@ -230,21 +219,21 @@ index 0000000000..b75d7c6460
+}
+
+static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static coroutine_fn int track_co_flush(BlockDriverState *bs)
@@ -368,7 +357,7 @@ index 0000000000..b75d7c6460
+
+ .bdrv_file_open = track_open,
+ .bdrv_close = track_close,
+ .bdrv_co_getlength = track_co_getlength,
+ .bdrv_getlength = track_getlength,
+ .bdrv_child_perm = track_child_perm,
+ .bdrv_refresh_limits = track_refresh_limits,
+
@@ -393,7 +382,7 @@ index 0000000000..b75d7c6460
+
+block_init(bdrv_alloc_track_init);
diff --git a/block/meson.build b/block/meson.build
index becc99ac4e..0a69836593 100644
index e3ed5ac97c..d1ee260048 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh)

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 15:26:30 +0200
Subject: [PATCH] PVE: whitelist 'invalid' QAPI names for backwards compat
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qapi/pragma.json | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7c91ea3685..c3888d654c 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -12,6 +12,7 @@
'device_add',
'device_del',
'expire_password',
+ 'get_link_status',
'migrate_cancel',
'netdev_add',
'netdev_del',
@@ -60,6 +61,8 @@
'SysEmuTarget', # query-cpu-fast, query-target
'UuidInfo', # query-uuid
'VncClientInfo', # query-vnc, query-vnc-servers, ...
- 'X86CPURegister32' # qom-get of x86 CPU properties
+ 'X86CPURegister32', # qom-get of x86 CPU properties
# feature-words, filtered-features
+ 'BlockdevOptionsPbs', # for PBS backwards compat
+ 'BalloonInfo'
] } }

View File

@@ -0,0 +1,35 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 17:36:55 +0200
Subject: [PATCH] PVE: savevm-async: register yank before
migration_incoming_state_destroy
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/savevm-async.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 970ee3b3fc..b3ccc069f1 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -19,6 +19,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
+#include "qemu/yank.h"
/* #define DEBUG_SAVEVM_STATE */
@@ -580,6 +581,10 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
dirty_bitmap_mig_before_vm_start();
qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
migration_incoming_state_destroy();
if (ret < 0) {
error_setg_errno(errp, -ret, "Error while loading VM state");

92
debian/patches/series vendored
View File

@@ -1,16 +1,7 @@
extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
extra/0005-Revert-Revert-graph-lock-Disable-locking-for-now.patch
extra/0006-migration-states-workaround-snapshot-performance-reg.patch
extra/0007-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
extra/0008-target-i386-the-sgx_epc_get_section-stub-is-reachabl.patch
extra/0009-ui-clipboard-mark-type-as-not-available-when-there-i.patch
extra/0010-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch
extra/0011-virtio-Re-enable-notifications-after-drain.patch
extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch
extra/0013-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch
extra/0002-monitor-hmp-add-support-for-flag-argument-with-value.patch
extra/0003-monitor-refactor-set-expire_password-and-allow-VNC-d.patch
extra/0004-block-mirror-fix-NULL-pointer-dereference-in-mirror_.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -23,43 +14,46 @@ pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
pve/0007-PVE-Up-qmp-add-get_link_status.patch
pve/0008-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0009-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0010-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0011-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0012-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
pve/0014-PVE-qapi-modify-query-machines.patch
pve/0015-PVE-qapi-modify-spice-query.patch
pve/0016-PVE-add-IOChannel-implementation-for-savevm-async.patch
pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
pve/0018-PVE-add-optional-buffer-size-to-QEMUFile.patch
pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0023-PVE-monitor-disable-oob-capability.patch
pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0025-PVE-Allow-version-code-in-machine-type.patch
pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
pve/0027-PVE-Backup-add-vma-backup-format-code.patch
pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
pve/0029-PVE-Add-sequential-job-transaction-support.patch
pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0037-PVE-block-stream-increase-chunk-size.patch
pve/0038-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
pve/0039-block-add-alloc-track-driver.patch
pve/0040-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
pve/0041-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
pve/0042-Revert-block-rbd-implement-bdrv_co_block_status.patch
pve/0043-alloc-track-fix-deadlock-during-drop.patch
pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
pve/0045-savevm-async-don-t-hold-BQL-during-setup.patch
pve-qemu-8.1-vitastor.patch
pve/0016-PVE-add-savevm-async-for-background-state-snapshots.patch
pve/0017-PVE-add-optional-buffer-size-to-QEMUFile.patch
pve/0018-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0019-PVE-Add-dummy-id-command-line-parameter.patch
pve/0020-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0021-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0022-PVE-monitor-disable-oob-capability.patch
pve/0023-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0024-PVE-Allow-version-code-in-machine-type.patch
pve/0025-PVE-Backup-add-vma-backup-format-code.patch
pve/0026-PVE-Backup-add-backup-dump-block-driver.patch
pve/0027-PVE-Backup-proxmox-backup-patches-for-qemu.patch
pve/0028-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0029-PVE-Backup-Add-dirty-bitmap-tracking-for-incremental.patch
pve/0030-PVE-various-PBS-fixes.patch
pve/0031-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0032-PVE-add-query_proxmox_support-QMP-command.patch
pve/0033-PVE-add-query-pbs-bitmap-info-QMP-call.patch
pve/0034-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0035-PVE-Add-sequential-job-transaction-support.patch
pve/0036-PVE-Backup-Use-a-transaction-to-synchronize-job-stat.patch
pve/0037-PVE-Backup-Don-t-block-on-finishing-and-cleanup-crea.patch
pve/0038-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0039-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0040-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0041-PVE-Use-coroutine-QMP-for-backup-cancel_backup.patch
pve/0042-PBS-add-master-key-support.patch
pve/0043-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch
pve/0044-PVE-block-stream-increase-chunk-size.patch
pve/0045-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
pve/0046-block-add-alloc-track-driver.patch
pve/0047-PVE-whitelist-invalid-QAPI-names-for-backwards-compa.patch
pve/0048-PVE-savevm-async-register-yank-before-migration_inco.patch
pve-qemu-6.1-vitastor.patch

View File

@@ -1,6 +1,7 @@
# install the userspace utilities
debian/kvm-ifup etc/kvm/
debian/kvm-ifdown etc/kvm/
#install ovmf uefi rom
debian/OVMF_CODE-pure-efi.fd usr/share/kvm/
debian/OVMF_VARS-pure-efi.fd usr/share/kvm/
debian/kvm-ifdown etc/kvm/
# install the userspace utilities
debian/kvm-ifup etc/kvm/

View File

@@ -1,13 +1,16 @@
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
usr/bin/qemu-system-x86_64 usr/bin/kvm
# qemu-system-i386 and qemu-system-x86_64 provides the same hardware emulation
usr/bin/qemu-system-x86_64 usr/bin/qemu-system-i386
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
# upstream provides a qemu man page,
# we symlink to kvm for backward compatibility
# and to qemu-system-{i386,x86_64} to fullfill our 'Provides: qemu-system-x86'
usr/share/man/man1/qemu.1 usr/share/man/man1/kvm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-i386.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-x86_64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1

View File

@@ -1,7 +1,4 @@
pve-qemu-kvm: arch-dependent-file-in-usr-share [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: binary-from-other-architecture [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: embedded-javascript-library please use * [usr/share/doc/pve-qemu-kvm/kvm/_static/*]
pve-qemu-kvm: groff-message *: warning [*]: can't break line [usr/share/man/*]
pve-qemu-kvm: groff-message *: warning [*]: cannot adjust line [usr/share/man/*]
pve-qemu-kvm: statically-linked-binary [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: unstripped-binary-or-object [usr/share/kvm/hppa-firmware.img]
pve-qemu-kvm: arch-dependent-file-in-usr-share usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: binary-from-other-architecture usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: unstripped-binary-or-object usr/share/kvm/hppa-firmware.img
pve-qemu-kvm: statically-linked-binary usr/share/kvm/hppa-firmware.img

138
debian/rules vendored
View File

@@ -1,12 +1,22 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Sample debian/rules that uses debhelper.
# This file was originally written by Joey Hess and Craig Small.
# As a special exception, when this file is copied by dh-make into a
# dh-make output file, you may use that output file without restriction.
# This special exception was added by Craig Small in version 0.37 of dh-make.
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
HOST_CPU ?= $(DEB_HOST_GNU_CPU)
# These are used for cross-compiling and for saving the configure script
# from having to guess our platform (since we know it already)
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
ARCH ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU)
PACKAGE=pve-qemu-kvm
destdir := $(CURDIR)/debian/$(PACKAGE)
@@ -17,70 +27,58 @@ machinefile := $(destdir)/usr/share/kvm/machine-versions-x86_64.json
# default QEMU out-of-tree build directory is ./build
BUILDDIR=build
# FIXME: pass to configure as --extra-cflags=CFLAGS ?! also LDFLAGS?
CFLAGS += -Wall
CFLAGS = -Wall
export CFLAGS
# DEB_BUILD_OPTIONS=parallel=N
MAKEFLAGS += $(subst parallel=,-j,$(filter parallel=%,${DEB_BUILD_OPTIONS}))
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
CFLAGS += -O0
else
CFLAGS += -O2
endif
${BUILDDIR}/config.status: configure
dh_testdir
# Add here commands to configure the package.
ifneq "$(wildcard /usr/share/misc/config.sub)" ""
cp -f /usr/share/misc/config.sub config.sub
endif
ifneq "$(wildcard /usr/share/misc/config.guess)" ""
cp -f /usr/share/misc/config.guess config.guess
endif
# guest-agent is only required for guest systems
./configure \
--disable-download \
--docdir=/usr/share/doc/pve-qemu-kvm \
--localstatedir=/var \
--prefix=/usr \
--sysconfdir=/etc \
--target-list=$(HOST_CPU)-softmmu,aarch64-softmmu \
--with-suffix="kvm" \
--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM_REVISION}" \
--audio-drv-list="alsa" \
--datadir=/usr/share \
--libexecdir=/usr/lib/kvm \
--disable-capstone \
--disable-gtk \
--disable-guest-agent \
--disable-guest-agent-msi \
--disable-libnfs \
--disable-libssh \
--disable-sdl \
--disable-smartcard \
--disable-strip \
--disable-xen \
--enable-curl \
--enable-docs \
--enable-glusterfs \
--enable-gnutls \
--enable-libiscsi \
--enable-libusb \
--enable-linux-aio \
--enable-linux-io-uring \
--enable-numa \
--enable-opengl \
--enable-rbd \
--enable-seccomp \
--enable-slirp \
--enable-spice \
--enable-usb-redir \
--enable-virglrenderer \
--enable-virtfs \
--enable-zstd
--with-git-submodules=ignore \
--docdir=/usr/share/doc/pve-qemu-kvm \
--localstatedir=/var \
--prefix=/usr \
--sysconfdir=/etc \
--target-list=$(ARCH)-softmmu,aarch64-softmmu \
--with-suffix="kvm" \
--with-pkgversion="${DEB_SOURCE}_${DEB_VERSION_UPSTREAM}" \
--audio-drv-list="alsa" \
--datadir=/usr/share \
--libexecdir=/usr/lib/kvm \
--disable-capstone \
--disable-gtk \
--disable-guest-agent \
--disable-guest-agent-msi \
--disable-libnfs \
--disable-libxml2 \
--disable-sdl \
--disable-smartcard \
--disable-strip \
--disable-xen \
--enable-curl \
--enable-docs \
--enable-glusterfs \
--enable-gnutls \
--enable-libiscsi \
--enable-libusb \
--enable-linux-aio \
--enable-linux-io-uring \
--enable-numa \
--enable-rbd \
--enable-seccomp \
--enable-spice \
--enable-usb-redir \
--enable-virtfs \
--enable-virtiofsd \
--enable-xfsctl
build: build-arch build-indep
build-arch: build-stamp
build-indep: build-stamp
build: build-stamp
build-stamp: ${BUILDDIR}/config.status
@@ -98,8 +96,15 @@ clean:
dh_testroot
rm -f build-stamp
# Add here commands to clean up before the build process.
# Add here commands to clean up after the build process.
-$(MAKE) distclean
ifneq "$(wildcard /usr/share/misc/config.sub)" ""
cp -f /usr/share/misc/config.sub config.sub
endif
ifneq "$(wildcard /usr/share/misc/config.guess)" ""
cp -f /usr/share/misc/config.guess config.guess
endif
dh_clean
@@ -113,6 +118,21 @@ install: build
# Add here commands to install the package into debian/pve-kvm.
$(MAKE) DESTDIR=$(destdir) install
# we do not need openbios files (sparc/ppc)
rm -rf $(destdir)/usr/share/kvm/openbios-*
# remove ppc files
rm $(destdir)/usr/share/kvm/*.dtb
rm $(destdir)/usr/share/kvm/s390-ccw.img
rm $(destdir)/usr/share/kvm/s390-netboot.img
rm $(destdir)/usr/share/kvm/qemu_vga.ndrv
rm $(destdir)/usr/share/kvm/slof.bin
rm $(destdir)/usr/share/kvm/u-boot.e500
# remove Alpha files
rm $(destdir)/usr/share/kvm/palcode-clipper
# remove RISC-V files
rm $(destdir)/usr/share/kvm/opensbi-riscv32-generic-fw_dynamic.elf
rm $(destdir)/usr/share/kvm/opensbi-riscv64-generic-fw_dynamic.elf
# Remove things we don't package at all, would be a "kvm-dev" package
rm -Rf $(destdir)/usr/include/linux/
rm -Rf $(destdir)/usr/include
@@ -148,7 +168,7 @@ binary-arch: build install
# dh_installinfo
dh_installman
dh_link
dh_strip --dbgsym-migration='pve-qemu-kvm-dbg (<<8.0.0-1~)'
dh_strip --dbg-package=pve-qemu-kvm-dbg
dh_compress
dh_fixperms
# dh_perl

View File

@@ -1,3 +0,0 @@
debian/OVMF_CODE-pure-efi.fd
debian/OVMF_VARS-pure-efi.fd
debian/Logo.bmp

View File

@@ -1 +0,0 @@
source-is-missing [roms/SLOF/*.oco]

27
keycodemapdb/LICENSE.BSD Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) Individual contributors.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of PyCA Cryptography nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

339
keycodemapdb/LICENSE.GPL2 Normal file
View File

@@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

114
keycodemapdb/README Normal file
View File

@@ -0,0 +1,114 @@
Key code / scan code / key symbol mapping database
==================================================
This module provides a database that maps between different
key code / scan code / key symbol sets:
- Linux evdev
- OS-X
- AT Set 1
- AT Set 2
- AT Set 3
- XT
- Linux XT KBD driver
- USB HID
- Win32
- XWin XT
- XKBD XT
- Xorg Evdev
- Xorg KBD
- Xorg OS-X
- XOrg Cygwin
- RFB
Licensing
---------
The contents of this package are dual licensed under the terms of:
- GNU General Public License (version 2 or later)
- 3-clause BSD License
The output files generated by keymap-gen may be distributed & used under
the terms of either of the above licenses.
Data formats
------------
The following output formats are possible
- Code map
An array mapping between key code sets values
Indexes in the array are values from the source code set.
Entries in the array are values from the target code set
- Code table
An array listing all values in a key code set
Indexes in the array are simply a numeric counter
Entries in the array are values from the key code set
The size of the array matches the total number of entries in
the keycode database.
- Name map
An array mapping between key code sets values and names
Indexes in the array are values from the source code set
Entries in the array are names from the target code set
- Name table
An array listing all names in a key code set
Indexes in the array are simply a numeric counter
Entries in the array are values from the key code set
The size of the array matches the total number of entries in
the keycode database.
Output languages
----------------
The tool is capable of generating data tables for the following
programming languages / environments
- Standard C
- GLib2 (standard C, but with GLib2 data types)
- Python
- Perl
Usage
-----
Map values from AT Set 1 to USB HID, generating tables for the
C programming language
$ keymap-gen --lang stdc code-map data/keymaps.csv atset1 usb
Generate a tables of names for Linux key codes, OS-X key codes,
in python - equivalent array indexes map between the two sets.
A variable name override is used
$ keymap-gen --varname linux_keycodes --lang stdc \
code-table data/keymaps.csv linux
$ keymap-gen --varname osx_keycodes --lang stdc \
code-table data/keymaps.csv os-x
Generate a mapping from XOrg XWin values to Win32 names
$ keymap-gen --lang perl name-map data/keymaps.csv xorgxwin win32
Generate a table of names for Linux key codes in Perl
$ keymap-gen --lang perl name-table data/keymaps.csv linux

89
keycodemapdb/data/README Normal file
View File

@@ -0,0 +1,89 @@
This directory contains the raw data for mapping between different
keyboard codes. Naming if often based on the US keyboard layout, but
does not indicate the symbol actually generated by the key.
The columns currently in this data set are:
Linux
-----
Name and value of the hardware independent keycodes used by the linux
kernel and exposed through the input subsystem.
References: linux/input.h
macOS
-----
Low level key codes as exposed by Mac OS X/macOS.
References: Carbon/HIToolbox/Events.h
PC scan code sets
-----------------
Scan codes for the three orignal PC keyboard generations:
Set 1: XT
Set 2: AT
Set 3: PS/2
The sets include codes for modern keys as well and not just the keys
present on those original keyboards.
References: linux/drivers/input/keyboard/atkbd.c
USB HID
-------
Codes as specified by the HID profile in USB.
References: linux/drivers/hid/usbhid/usbkbd.c
Windows Virtual-key codes
-------------------------
The low level, hardware independent "VKEYs" exposed by Windows.
References: mingw32/winuser.h
XWin XT
-------
X11 keycodes generated by the XWin server. Based on the XT scan code
set.
References: xorg-server/hw/xwin/{winkeybd.c,winkeynames.h}
Xfree86 KBD XT
--------------
X11 keycodes generated by the Xfree86 keyboard drivers. Based on the XT
scan code set.
References: xf86-input-keyboard/src/at_scancode.c
X11 keysyms
-----------
Corresponding X11 keysym value(s) for a US keyboard layout.
WARNING: These columns represent symbols, not physical keys, and should
be used with extreme care.
References: http://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h
HTML KeyboardEvent.code
-----------------------
Key codes seen in the KeyboardEvent.code attribute as part of the
UI Events specification.
References: https://www.w3.org/TR/uievents-code/
XKEYBOARD key names
-------------------
Hardware independent key names as used in the XKEYBOARD extension.
References: /usr/share/X11/xkb/keycodes/

Some files were not shown because too many files have changed in this diff Show More