Compare commits

..

2 Commits

Author SHA1 Message Date
0fa004b912 Add bdrv_co_block_status 2023-07-18 18:26:09 +03:00
78a75914b3 Add Vitastor support 2022-12-15 19:32:46 +03:00
122 changed files with 12057 additions and 5032 deletions

7
.gitignore vendored
View File

@@ -1,7 +0,0 @@
/*.build
/*.buildinfo
/*.changes
/*.deb
/*.dsc
/*.tar*
/pve-qemu-kvm-*.*/

View File

@@ -1,88 +1,60 @@
include /usr/share/dpkg/default.mk
include /usr/share/dpkg/pkg-info.mk
include /usr/share/dpkg/architecture.mk
PACKAGE = pve-qemu-kvm
SRCDIR := qemu
BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
ORIG_SRC_TAR=$(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz
BUILDDIR ?= ${PACKAGE}-${DEB_VERSION_UPSTREAM}
GITVERSION := $(shell git rev-parse HEAD)
DSC=$(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION).dsc
DEB = $(PACKAGE)_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB_DBG = $(PACKAGE)-dbgsym_$(DEB_VERSION_UPSTREAM_REVISION)_$(DEB_BUILD_ARCH).deb
DEB = ${PACKAGE}_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEB_DBG = ${PACKAGE}-dbg_${DEB_VERSION_UPSTREAM_REVISION}_${DEB_BUILD_ARCH}.deb
DEBS = $(DEB) $(DEB_DBG)
all: $(DEBS)
.PHONY: submodule
submodule:
ifeq ($(shell test -f "$(SRCDIR)/configure" && echo 1 || echo 0), 0)
git submodule update --init --recursive
cd $(SRCDIR); meson subprojects download
endif
test -f "${SRCDIR}/configure" || git submodule update --init --recursive
PC_BIOS_FW_PURGE_LIST_IN = \
hppa-firmware.img \
openbios-ppc \
openbios-sparc32 \
openbios-sparc64 \
palcode-clipper \
s390-ccw.img \
s390-netboot.img \
u-boot.e500 \
.*\.dtb \
qemu_vga.ndrv \
slof.bin \
opensbi-riscv.*-generic-fw_dynamic.bin \
BLOB_PURGE_SED_CMDS = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "/$(FILE)/d")
BLOB_PURGE_FILTER = $(foreach FILE,$(PC_BIOS_FW_PURGE_LIST_IN),-e "$(FILE)")
$(BUILDDIR): submodule
$(BUILDDIR): keycodemapdb | submodule
# check if qemu/ was used for a build
# if so, please run 'make distclean' in the submodule and try again
test ! -f $(SRCDIR)/build/config.status
rm -rf $@.tmp $@
cp -a $(SRCDIR) $@.tmp
cp -a debian $@.tmp/debian
rm -rf $@.tmp/roms/edk2 # packaged separately
find $@.tmp/pc-bios -type f | grep $(BLOB_PURGE_FILTER) | xargs rm -f
sed -i $(BLOB_PURGE_SED_CMDS) $@.tmp/pc-bios/meson.build
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $@.tmp/debian/SOURCE
mv $@.tmp $@
rm -rf $(BUILDDIR)
cp -a $(SRCDIR) $(BUILDDIR)
cp -a debian $(BUILDDIR)/debian
rm -rf $(BUILDDIR)/ui/keycodemapdb
cp -a keycodemapdb $(BUILDDIR)/ui/
echo "git clone git://git.proxmox.com/git/pve-qemu.git\\ngit checkout $(GITVERSION)" > $(BUILDDIR)/debian/SOURCE
.PHONY: deb kvm
deb kvm: $(DEBS)
$(DEB_DBG): $(DEB)
$(DEB): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j32
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j8
lintian $(DEBS)
sbuild: $(DSC)
sbuild $(DSC)
$(ORIG_SRC_TAR): $(BUILDDIR)
tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
.PHONY: dsc
dsc:
rm -rf *.dsc $(BUILDDIR)
$(MAKE) $(DSC)
lintian $(DSC)
$(DSC): $(ORIG_SRC_TAR) $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -S -us -uc -d
.PHONY: update
update:
cd $(SRCDIR) && git submodule deinit ui/keycodemapdb || true
rm -rf $(SRCDIR)/ui/keycodemapdb
mkdir $(SRCDIR)/ui/keycodemapdb
cd $(SRCDIR) && git submodule update --init ui/keycodemapdb
rm -rf keycodemapdb
mkdir keycodemapdb
cp -R $(SRCDIR)/ui/keycodemapdb/* keycodemapdb/
git add keycodemapdb
.PHONY: upload
upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
upload: $(DEBS)
tar cf - $(DEBS) | ssh repoman@repo.proxmox.com upload --product pve --dist $(UPLOAD_DIST)
tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist bullseye
.PHONY: distclean clean
distclean: clean
clean:
rm -rf $(PACKAGE)-[0-9]*/ $(PACKAGE)*.tar* *.deb *.dsc *.build *.buildinfo *.changes
rm -rf $(BUILDDIR) $(PACKAGE)*.deb *.buildinfo *.changes
.PHONY: dinstall
dinstall: $(DEBS)

248
debian/changelog vendored
View File

@@ -1,246 +1,14 @@
pve-qemu-kvm (8.1.5-3+vitastor1) bookworm; urgency=medium
pve-qemu-kvm (7.1.0-4+vitastor3) bullseye; urgency=medium
* Add bdrv_co_block_status implementation for QCOW2 export support
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 12 Jan 2023 02:31:18 +0300
pve-qemu-kvm (7.1.0-4+vitastor2) bullseye; urgency=medium
* Add Vitastor support
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 23 Feb 2024 12:19:05 +0300
pve-qemu-kvm (8.1.5-3) bookworm; urgency=medium
* backport fix for potential deadlock during QMP stop command if the VM has
disks attached through VirtIO-Block and IO-Thread enabled
* fix #4507: add patch to automatically increase NOFILE soft limit
-- Proxmox Support Team <support@proxmox.com> Wed, 21 Feb 2024 20:11:23 +0100
pve-qemu-kvm (8.1.5-2) bookworm; urgency=medium
* work around for a situation where guest IO might get stuck, if the VM is
configure with iothread and VirtIO block/SCSI
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:41:27 +0100
pve-qemu-kvm (8.1.5-1) bookworm; urgency=medium
* update to 8.1.5 stable release, including more relevant fixes like:
- virtio-net: correctly copy vnet header when flushing TX
- hw/pflash: implement update buffer for block writes
- Fixes to i386 emulation and ARM emulation.
-- Proxmox Support Team <support@proxmox.com> Fri, 02 Feb 2024 19:08:13 +0100
pve-qemu-kvm (8.1.2-6) bookworm; urgency=medium
* revert attempted fix to avoid rare issue with stuck guest IO when using
iothread, because it caused a much more common issue with iothreads
consuming too much CPU
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Dec 2023 14:22:06 +0100
pve-qemu-kvm (8.1.2-5) bookworm; urgency=medium
* backport workaround for stuck guest IO with iothread and VirtIO block/SCSI
in some rare edge cases
* backport fix for potential deadlock when issuing the "resize" QMP command
for a disk that is using iothread
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Dec 2023 16:58:27 +0100
pve-qemu-kvm (8.1.2-4) bookworm; urgency=medium
* fix vnc clipboard in the host to guest direction
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Nov 2023 14:28:21 +0100
pve-qemu-kvm (8.1.2-3) bookworm; urgency=medium
* fix #5054: backport fix for software reset with SATA, avoiding breakage
with, e.g., some FreeBSD VMs
-- Proxmox Support Team <support@proxmox.com> Mon, 20 Nov 2023 10:24:50 +0100
pve-qemu-kvm (8.1.2-2) bookworm; urgency=medium
* revert "x86: acpi: workaround Windows not handling name references in
Package properly" as that seems to have broken networking (and possibly
other things) one some localized variants of Windows (e.g., the German
versions).
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Nov 2023 11:55:23 +0100
pve-qemu-kvm (8.1.2-1) bookworm; urgency=medium
* update submodule and patches to QEMU 8.1.2
* use QEMU's keycode-map-db again instead of our static copy from QEMU 6.0
* disable graph locking, newly introduced in the 8.1 release, as it has
still various deadlock issuess, e.g., during canceling backup jobs.
-- Proxmox Support Team <support@proxmox.com> Tue, 24 Oct 2023 13:42:45 +0200
pve-qemu-kvm (8.0.2-7) bookworm; urgency=medium
* fix #2874: SATA: avoid unsolicited write to sector 0 during reset
-- Proxmox Support Team <support@proxmox.com> Wed, 04 Oct 2023 08:33:35 +0200
pve-qemu-kvm (8.0.2-6) bookworm; urgency=medium
* fix #1534: vma: add extract-filter for disk images allowing users to pass
a comma separated list of the disks they want to extract from an archive.
* backup: create jobs in a drained section to avoid subtle bugs where
something interferes with the block-copy-state bitmap on initialization
* backup: drop experimental, and since a while also fully broken, directory
backup format (BACKUP_FORMAT_DIR). This format was never exposed via the
Proxmox VE API, but only available via QMP, as its broken since QEMU 8 and
we got zero reports about that, it's safe to assume that there are no
public users, so just remove it completely.
-- Proxmox Support Team <support@proxmox.com> Wed, 06 Sep 2023 17:03:59 +0200
pve-qemu-kvm (8.0.2-5) bookworm; urgency=medium
* improve memory footprint after backup by not keeping as much memory
resident.
* fix file descriptor leak for vhost (used by default by vNICs).
-- Proxmox Support Team <support@proxmox.com> Wed, 16 Aug 2023 11:52:24 +0200
pve-qemu-kvm (8.0.2-4) bookworm; urgency=medium
* fix resume for snapshot and hibernate in combination with iothread and
dirty bitmap
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Jul 2023 12:58:22 +0200
pve-qemu-kvm (8.0.2-3) bookworm; urgency=medium
* fix regression in QEMU 8.0 for drive mirror with bitmap
-- Proxmox Support Team <support@proxmox.com> Thu, 15 Jun 2023 13:57:46 +0200
pve-qemu-kvm (8.0.2-2) bookworm; urgency=medium
* drop custom get_link_status QMP command, was never really used.
* drop custom & deprecated drive snapshot QMP commands, we use a better
alternative since a while.
-- Proxmox Support Team <support@proxmox.com> Fri, 09 Jun 2023 07:57:56 +0200
pve-qemu-kvm (8.0.2-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.2
* update patches for avoiding issues with DMA reentrancy to current,
slightly optimized version.
-- Proxmox Support Team <support@proxmox.com> Tue, 06 Jun 2023 16:34:50 +0200
pve-qemu-kvm (8.0.0-1) bookworm; urgency=medium
* update to QEMU stable release 8.0.0
* re-build for Proxmox VE 8 / Debian 12 Bookworm
* adapt to the local virtiofsd C variant being dropped, it has been
rewritten in Rust and is now hosted in a separate source repository.
-- Proxmox Support Team <support@proxmox.com> Mon, 22 May 2023 13:45:49 +0200
pve-qemu-kvm (7.2.0-8) bullseye; urgency=medium
* backport fix for ACPI CPU hotplug issue with TCG
* cherry-pick TCG-related stable fixes for 7.2 for users that turned off KVM
HW acceleration
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Mar 2023 15:47:08 +0100
pve-qemu-kvm (7.2.0-7) bullseye; urgency=medium
* improve fix for potential deadlock with trim for IDE/SATA and draining
* backport stable fixes:
- hw/nvme: fix missing endian conversions for doorbell buffers
- hw/smbios: fix field corruption in type 4 table
- virtio-rng-pci: fix transitional migration compat for vectors
- hw/timer/hpet: Fix expiration time overflow
- vhost/vdpa: stop all svq on device deletion
- vhost: avoid a potential use of an uninitialized variable in the call to
vhost_svq_poll
- chardev/char-socket: set s->listener = NULL in char_socket_finalize to
fix a potential crash after live-migration
- intel-iommu: fail MAP notifier without caching mode
- intel-iommu: fail DEVIOTLB_UNMAP without dt mode
* fix a regression for when the LSI SCSI controller is used
-- Proxmox Support Team <support@proxmox.com> Mon, 13 Mar 2023 17:42:49 +0100
pve-qemu-kvm (7.2.0-6) bullseye; urgency=medium
* fix 7.2 regression for Linux boot failures with megasas SCSI
* fix 7.0 regression for a potential deadlock with trim for IDE/SATA and
draining
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Mar 2023 14:32:17 +0100
pve-qemu-kvm (7.2.0-5) bullseye; urgency=medium
* fix #4476: savevm-async: avoid looping without progress
* savevm-async: decrease the boundary for free space for (memory) state left
on target from 30 MiB to 100 MiB, improving the heuristic for when to
enter the final "pause and sync" stage.
* QMP backup: use correct error number when getting blockdrive length fails
* backport fix for some DMA reentrancy issues, better protecting against
malicious guests
* backport fix for iSCSI double free issue leading to crashes
-- Proxmox Support Team <support@proxmox.com> Tue, 21 Feb 2023 13:49:43 +0100
pve-qemu-kvm (7.2.0-4) bullseye; urgency=medium
* backport fix for a 7.2 regression when using VirtIO disk with
detect-zeroes=unmap
-- Proxmox Support Team <support@proxmox.com> Fri, 27 Jan 2023 09:37:49 +0100
pve-qemu-kvm (7.2.0-3) bullseye; urgency=medium
* add fix for live-migration with virtio-rng devices, which regressed in
QEMU 7.2.0.
-- Proxmox Support Team <support@proxmox.com> Thu, 12 Jan 2023 13:13:14 +0100
pve-qemu-kvm (7.2.0-2) bullseye; urgency=medium
* enable slirp again for now, as in qemu-server, user networking is
supported (via CLI/API) when no bridge is set on a virtual NIC
* cherry-pick stable fixes for 7.2. Two for virtio-mem and one for vIOMMU.
Both features are not yet exposed in PVE's qemu-server, but there's work
going on to change that.
-- Proxmox Support Team <support@proxmox.com> Tue, 10 Jan 2023 15:47:48 +0100
pve-qemu-kvm (7.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.2.0
* drop 'slirp' networking
-- Proxmox Support Team <support@proxmox.com> Fri, 16 Dec 2022 13:18:21 +0100
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 15 Dec 2022 19:32:28 +0300
pve-qemu-kvm (7.1.0-4) bullseye; urgency=medium

1
debian/compat vendored Normal file
View File

@@ -0,0 +1 @@
10

29
debian/control vendored
View File

@@ -2,8 +2,9 @@ Source: pve-qemu-kvm
Section: admin
Priority: optional
Maintainer: Proxmox Support Team <support@proxmox.com>
Build-Depends: debhelper-compat (= 13),
Build-Depends: autotools-dev,
check,
debhelper (>= 9),
libacl1-dev,
libaio-dev,
libattr1-dev,
@@ -15,21 +16,21 @@ Build-Depends: debhelper-compat (= 13),
libglusterfs-dev (>= 5.2-2),
libgnutls28-dev,
libiscsi-dev (>= 1.12.0),
libjemalloc-dev,
libjpeg-dev,
libjson-perl,
libnuma-dev,
libpci-dev,
libpixman-1-dev,
libproxmox-backup-qemu0-dev (>= 1.3.0),
libproxmox-backup-qemu0-dev (>= 1.3.0-1),
librbd-dev (>= 0.48),
libsdl1.2-dev,
libseccomp-dev,
libslirp-dev,
libspice-protocol-dev (>= 0.12.14~),
libspice-server-dev (>= 0.14.0~),
libsystemd-dev,
liburing-dev,
libusb-1.0-0-dev (>= 1.0.17),
libusb-1.0-0-dev (>= 1.0.17-1),
libusbredirparser-dev (>= 0.6-2),
libvirglrenderer-dev,
libzstd-dev,
@@ -37,8 +38,9 @@ Build-Depends: debhelper-compat (= 13),
python3-minimal,
python3-sphinx,
python3-sphinx-rtd-theme,
python3-venv,
quilt,
texi2html,
texinfo,
uuid-dev,
xfslibs-dev,
Standards-Version: 3.7.2
@@ -55,16 +57,16 @@ Depends: ceph-common (>= 0.48),
libglusterfs-dev | glusterfs-common (>= 5.6),
libglusterfs0 | glusterfs-common (>= 5.6),
libiscsi4 (>= 1.12.0) | libiscsi7,
libjemalloc2,
libjpeg62-turbo,
libspice-server1 (>= 0.14.0~),
libusb-1.0-0 (>= 1.0.17-1),
libusbredirparser1 (>= 0.6-2),
vitastor-client (>= 0.9.4),
libuuid1,
${misc:Depends},
${shlibs:Depends},
Recommends: numactl,
Suggests: libgl1,
Recommends: numactl
Suggests: libgl1
Conflicts: kvm,
pve-kvm,
pve-qemu-kvm-2.6.18,
@@ -72,17 +74,22 @@ Conflicts: kvm,
qemu-kvm,
qemu-system-arm,
qemu-system-common,
qemu-system-data,
qemu-system-x86,
qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils,
Provides: qemu-system-arm, qemu-system-x86, qemu-utils
Replaces: pve-kvm,
pve-qemu-kvm-2.6.18,
qemu-system-arm,
qemu-system-x86,
qemu-utils,
Breaks: qemu-server (<= 8.0.6)
Description: Full virtualization on x86 hardware
Using KVM, one can run multiple virtual PCs, each running unmodified Linux or
Windows images. Each virtual machine has private virtualized hardware: a
network card, disk, graphics adapter, etc.
Package: pve-qemu-kvm-dbg
Architecture: any
Section: debug
Depends: pve-qemu-kvm (= ${binary:Version})
Description: pve qemu debugging symbols
This package contains the debugging symbols for pve-qemu-kvm.

View File

@@ -24,5 +24,4 @@ while (<STDIN>) {
die "no QEMU machine types detected from STDIN input" if scalar (@$machines) <= 0;
print to_json($machines, { utf8 => 1, canonical => 1 })
or die "failed to encode detected machines as JSON - $!\n";
print to_json($machines, { utf8 => 1 }) or die "$!\n";

View File

@@ -27,18 +27,16 @@ Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebased for 8.1.1]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 98 +++++++++++++++++++++-----
blockdev.c | 38 +++++++++-
blockdev.c | 39 +++++++++-
include/block/block_int-global-state.h | 4 +-
qapi/block-core.json | 25 ++++++-
qapi/block-core.json | 29 ++++++--
tests/unit/test-block-iothread.c | 4 +-
5 files changed, 142 insertions(+), 27 deletions(-)
5 files changed, 145 insertions(+), 29 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index d3cacd1708..1ff42c8af1 100644
index 3c4ab1159d..f2eca983f1 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@@ -59,7 +57,7 @@ index d3cacd1708..1ff42c8af1 100644
BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi;
uint8_t *buf;
@@ -705,7 +707,8 @@ static int mirror_exit_common(Job *job)
@@ -696,7 +698,8 @@ static int mirror_exit_common(Job *job)
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
@@ -69,7 +67,7 @@ index d3cacd1708..1ff42c8af1 100644
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
if (bdrv_cow_bs(unfiltered_target) != backing) {
@@ -809,6 +812,16 @@ static void mirror_abort(Job *job)
@@ -794,6 +797,16 @@ static void mirror_abort(Job *job)
assert(ret == 0);
}
@@ -86,7 +84,7 @@ index d3cacd1708..1ff42c8af1 100644
static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -997,7 +1010,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
@@ -973,7 +986,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -96,7 +94,7 @@ index d3cacd1708..1ff42c8af1 100644
ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit;
@@ -1251,6 +1265,7 @@ static const BlockJobDriver mirror_job_driver = {
@@ -1212,6 +1226,7 @@ static const BlockJobDriver mirror_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
@@ -104,7 +102,7 @@ index d3cacd1708..1ff42c8af1 100644
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = mirror_cancel,
@@ -1267,6 +1282,7 @@ static const BlockJobDriver commit_active_job_driver = {
@@ -1228,6 +1243,7 @@ static const BlockJobDriver commit_active_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
@@ -112,7 +110,7 @@ index d3cacd1708..1ff42c8af1 100644
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = commit_active_cancel,
@@ -1658,7 +1674,10 @@ static BlockJob *mirror_start_job(
@@ -1593,7 +1609,10 @@ static BlockJob *mirror_start_job(
BlockCompletionFunc *cb,
void *opaque,
const BlockJobDriver *driver,
@@ -124,7 +122,7 @@ index d3cacd1708..1ff42c8af1 100644
bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode,
Error **errp)
@@ -1670,10 +1689,39 @@ static BlockJob *mirror_start_job(
@@ -1605,10 +1624,39 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
@@ -166,7 +164,7 @@ index d3cacd1708..1ff42c8af1 100644
assert(is_power_of_2(granularity));
if (buf_size < 0) {
@@ -1804,7 +1852,9 @@ static BlockJob *mirror_start_job(
@@ -1740,7 +1788,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
@@ -177,7 +175,7 @@ index d3cacd1708..1ff42c8af1 100644
s->backing_mode = backing_mode;
s->zero_target = zero_target;
s->copy_mode = copy_mode;
@@ -1825,6 +1875,18 @@ static BlockJob *mirror_start_job(
@@ -1761,6 +1811,18 @@ static BlockJob *mirror_start_job(
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
}
@@ -196,7 +194,7 @@ index d3cacd1708..1ff42c8af1 100644
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
BLK_PERM_CONSISTENT_READ,
@@ -1902,6 +1964,9 @@ fail:
@@ -1838,6 +1900,9 @@ fail:
if (s->dirty_bitmap) {
bdrv_release_dirty_bitmap(s->dirty_bitmap);
}
@@ -206,7 +204,7 @@ index d3cacd1708..1ff42c8af1 100644
job_early_fail(&s->common.job);
}
@@ -1919,31 +1984,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1855,31 +1920,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -243,7 +241,7 @@ index d3cacd1708..1ff42c8af1 100644
}
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1970,7 +2029,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1906,7 +1965,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque,
@@ -254,32 +252,33 @@ index d3cacd1708..1ff42c8af1 100644
errp);
if (!job) {
diff --git a/blockdev.c b/blockdev.c
index c28462a633..a402fa4bf7 100644
index 9230888e34..9a1a3118ed 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2849,6 +2849,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2951,6 +2951,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target,
const char *replaces,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
+ bool has_bitmap,
+ const char *bitmap_name,
+ bool has_bitmap_mode,
+ BitmapSyncMode bitmap_mode,
BlockMirrorBackingMode backing_mode,
bool zero_target,
bool has_speed, int64_t speed,
@@ -2867,6 +2870,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2970,6 +2974,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
{
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT;
+ BdrvDirtyBitmap *bitmap = NULL;
GLOBAL_STATE_CODE();
GRAPH_RDLOCK_GUARD_MAINLOOP();
@@ -2921,6 +2925,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_speed) {
speed = 0;
@@ -3024,6 +3029,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if (bitmap_name) {
+ if (has_bitmap) {
+ if (granularity) {
+ error_setg(errp, "Granularity and bitmap cannot both be set");
+ return;
@@ -302,53 +301,53 @@ index c28462a633..a402fa4bf7 100644
+ }
+ }
+
if (!replaces) {
if (!has_replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
@@ -2966,8 +2993,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3070,8 +3098,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
- replaces, job_flags,
- has_replaces ? replaces : NULL, job_flags,
- speed, granularity, buf_size, sync, backing_mode, zero_target,
+ replaces, job_flags, speed, granularity, buf_size, sync,
+ bitmap, bitmap_mode, backing_mode, zero_target,
+ has_replaces ? replaces : NULL, job_flags, speed, granularity,
+ buf_size, sync, bitmap, bitmap_mode, backing_mode, zero_target,
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
}
@@ -3115,6 +3142,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3216,6 +3244,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->job_id, bs, target_bs,
arg->replaces, arg->sync,
+ arg->bitmap,
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
arg->has_replaces, arg->replaces, arg->sync,
+ arg->has_bitmap, arg->bitmap,
+ arg->has_bitmap_mode, arg->bitmap_mode,
backing_mode, zero_target,
arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
@@ -3136,6 +3165,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3237,6 +3267,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
const char *device, const char *target,
const char *replaces,
bool has_replaces, const char *replaces,
MirrorSyncMode sync,
+ const char *bitmap,
+ bool has_bitmap, const char *bitmap,
+ bool has_bitmap_mode, BitmapSyncMode bitmap_mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
@@ -3184,7 +3215,8 @@ void qmp_blockdev_mirror(const char *job_id,
@@ -3286,7 +3318,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
}
blockdev_mirror_common(job_id, bs, target_bs,
- replaces, sync, backing_mode,
+ replaces, sync,
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
- has_replaces, replaces, sync, backing_mode,
+ has_replaces, replaces, sync, has_bitmap,
+ bitmap, has_bitmap_mode, bitmap_mode, backing_mode,
zero_target, has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index da5fb31089..32f0f9858a 100644
index b49f4eb35b..9d744db618 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -152,7 +152,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -149,7 +149,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -360,26 +359,31 @@ index da5fb31089..32f0f9858a 100644
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bca1a0c372..a5cea82139 100644
index 2173e7734a..e1857e7094 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2145,6 +2145,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -2000,10 +2000,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (Since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2187,7 +2196,9 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M (since 1.4).
+# power of 2 between 512 and 64M. Must not be specified if
+# @bitmap is present (since 1.4).
#
# @buf-size: maximum amount of data in flight from source to
# target (since 1.4).
@@ -2043,7 +2052,9 @@
{ 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -390,23 +394,28 @@ index bca1a0c372..a5cea82139 100644
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
@@ -2471,6 +2482,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
@@ -2322,10 +2333,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This
+# argument must be present for bitmap mode and absent otherwise.
+# The bitmap's granularity is used instead of @granularity (since
+# 4.1).
+# @bitmap: The name of a bitmap to use for sync=bitmap mode. This argument must
+# be present for bitmap mode and absent otherwise. The bitmap's
+# granularity is used instead of @granularity (since 4.1).
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+# after the operation concludes. Must be present if sync is
+# "bitmap". Must NOT be present otherwise. (Since 4.1)
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+# the operation concludes. Must be present if sync is "bitmap".
+# Must NOT be present otherwise. (Since 4.1)
+#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
@@ -2521,7 +2541,8 @@
# @granularity: granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M
+# power of 2 between 512 and 64M . Must not be specified if
+# @bitmap is present.
#
# @buf-size: maximum amount of data in flight from source to
# target
@@ -2375,7 +2395,8 @@
{ 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str',
@@ -417,10 +426,10 @@ index bca1a0c372..a5cea82139 100644
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index d727a5fee8..8a34aa2328 100644
index 8b55eccc89..f4650be8e5 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -757,8 +757,8 @@ static void test_propagate_mirror(void)
@@ -753,8 +753,8 @@ static void test_propagate_mirror(void)
/* Start a mirror job */
mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
@@ -430,4 +439,4 @@ index d727a5fee8..8a34aa2328 100644
+ false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
&error_abort);
WITH_JOB_LOCK_GUARD() {
job = job_get("job0");

View File

@@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 1ff42c8af1..11b8a8e959 100644
index f2eca983f1..b6475d50ad 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -682,8 +682,6 @@ static int mirror_exit_common(Job *job)
@@ -673,8 +673,6 @@ static int mirror_exit_common(Job *job)
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
}
@@ -36,7 +36,7 @@ index 1ff42c8af1..11b8a8e959 100644
/* Make sure that the source BDS doesn't go away during bdrv_replace_node,
* before we can call bdrv_drained_end */
bdrv_ref(src);
@@ -788,6 +786,18 @@ static int mirror_exit_common(Job *job)
@@ -775,6 +773,18 @@ static int mirror_exit_common(Job *job)
block_job_remove_all_bdrv(bjob);
bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
@@ -55,7 +55,7 @@ index 1ff42c8af1..11b8a8e959 100644
bs_opaque->job = NULL;
bdrv_drained_end(src);
@@ -1699,10 +1709,6 @@ static BlockJob *mirror_start_job(
@@ -1634,10 +1644,6 @@ static BlockJob *mirror_start_job(
" sync mode",
MirrorSyncMode_str(sync_mode));
return NULL;
@@ -66,7 +66,7 @@ index 1ff42c8af1..11b8a8e959 100644
}
} else if (bitmap) {
error_setg(errp,
@@ -1719,6 +1725,12 @@ static BlockJob *mirror_start_job(
@@ -1654,6 +1660,12 @@ static BlockJob *mirror_start_job(
return NULL;
}
granularity = bdrv_dirty_bitmap_granularity(bitmap);

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
index a402fa4bf7..01b0ab0549 100644
index 9a1a3118ed..a57b0af2e7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2946,6 +2946,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3050,6 +3050,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return;
}
@@ -28,4 +28,4 @@ index a402fa4bf7..01b0ab0549 100644
+ return;
}
if (!replaces) {
if (!has_replaces) {

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 11b8a8e959..00f2665ca4 100644
index b6475d50ad..8b3342f9ec 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -792,8 +792,8 @@ static int mirror_exit_common(Job *job)
@@ -779,8 +779,8 @@ static int mirror_exit_common(Job *job)
job->ret == 0 && ret == 0)) {
/* Success; synchronize copy back to sync. */
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@@ -30,7 +30,7 @@ index 11b8a8e959..00f2665ca4 100644
}
}
bdrv_release_dirty_bitmap(s->dirty_bitmap);
@@ -1892,11 +1892,8 @@ static BlockJob *mirror_start_job(
@@ -1828,11 +1828,8 @@ static BlockJob *mirror_start_job(
}
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {

View File

@@ -12,8 +12,6 @@ uniform w.r.t. backup block jobs.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: rebase for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/mirror.c | 28 +++------------
blockdev.c | 29 +++++++++++++++
@@ -21,10 +19,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 00f2665ca4..60cf574de5 100644
index 8b3342f9ec..1d4ff0efad 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1699,31 +1699,13 @@ static BlockJob *mirror_start_job(
@@ -1634,31 +1634,13 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
@@ -62,17 +60,17 @@ index 00f2665ca4..60cf574de5 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
index 01b0ab0549..cd5f205ad1 100644
index a57b0af2e7..ce62a9b439 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2925,7 +2925,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -3029,7 +3029,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
+ if ((sync == MIRROR_SYNC_MODE_BITMAP) ||
+ (sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
+ /* done before desugaring 'incremental' to print the right message */
+ if (!bitmap_name) {
+ if (!has_bitmap) {
+ error_setg(errp, "Must provide a valid bitmap name for "
+ "'%s' sync mode", MirrorSyncMode_str(sync));
+ return;
@@ -93,7 +91,7 @@ index 01b0ab0549..cd5f205ad1 100644
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
+
if (bitmap_name) {
if (has_bitmap) {
+ if (sync != MIRROR_SYNC_MODE_BITMAP) {
+ error_setg(errp, "Sync mode '%s' not supported with bitmap.",
+ MirrorSyncMode_str(sync));

View File

@@ -48,7 +48,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 965f5d5450..e04bd059b6 100644
index a4b40e8391..d64ae8f34e 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
@@ -60,10 +60,10 @@ index 965f5d5450..e04bd059b6 100644
void monitor_init_globals(void);
void monitor_init_globals_core(void);
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index 252de85681..8db28f9272 100644
index caa2e90ef2..e1596f79ab 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -151,6 +151,13 @@ typedef struct {
@@ -152,6 +152,13 @@ typedef struct {
QemuMutex qmp_queue_lock;
/* Input queue that holds all the parsed QMP requests */
GQueue *qmp_requests;
@@ -78,10 +78,10 @@ index 252de85681..8db28f9272 100644
/**
diff --git a/monitor/monitor.c b/monitor/monitor.c
index dc352f9e9d..56e1307014 100644
index 86949024f6..c306cadcf4 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -117,6 +117,21 @@ bool monitor_cur_is_qmp(void)
@@ -135,6 +135,21 @@ bool monitor_cur_is_qmp(void)
return cur_mon && monitor_is_qmp(cur_mon);
}
@@ -104,10 +104,10 @@ index dc352f9e9d..56e1307014 100644
* Is @mon is using readline?
* Note: not all HMP monitors use readline, e.g., gdbserver has a
diff --git a/monitor/qmp.c b/monitor/qmp.c
index a239945e8d..589c9524f8 100644
index 092c527b6f..6b8cfcf6d8 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -165,6 +165,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
QDict *rsp;
QDict *error;
@@ -116,7 +116,7 @@ index a239945e8d..589c9524f8 100644
rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
&mon->common);
@@ -180,7 +182,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
}
}
@@ -135,7 +135,7 @@ index a239945e8d..589c9524f8 100644
qobject_unref(rsp);
}
@@ -461,6 +473,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
switch (event) {
case CHR_EVENT_OPENED:
@@ -144,7 +144,7 @@ index a239945e8d..589c9524f8 100644
monitor_qmp_caps_reset(mon);
data = qmp_greeting(mon);
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 176b549473..790bb7d1da 100644
index 0990873ec8..e605003771 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -117,16 +117,28 @@ typedef struct QmpDispatchBH {
@@ -180,13 +180,13 @@ index 176b549473..790bb7d1da 100644
aio_co_wake(data->co);
}
@@ -253,6 +265,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
@@ -231,6 +243,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
.ret = &ret,
.errp = &err,
.co = qemu_coroutine_self(),
+ .conn_nr = monitor_get_connection_nr(cur_mon),
};
aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh,
aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
&data);
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index afa477aae6..d3ff124bf3 100644

View File

@@ -0,0 +1,76 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sam Li <faithilikerun@gmail.com>
Date: Sat, 24 Sep 2022 22:48:15 +0800
Subject: [PATCH] block/io_uring: revert "Use io_uring_register_ring_fd() to
skip fd operations"
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1193
The commit "Use io_uring_register_ring_fd() to skip fd operations" broke
when booting a guest with iothread and io_uring. That is because the
io_uring_register_ring_fd() call is made from the main thread instead of
IOThread where io_uring_submit() is called. It can not be guaranteed
to register the ring fd in the correct thread or unregister the same ring
fd if the IOThread is disabled. This optimization is not critical so we
will revert previous commit.
This reverts commit e2848bc574fe2715c694bf8fe9a1ba7f78a1125a
and 77e3f038af1764983087e3551a0fde9951952c4d.
Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
block/io_uring.c | 13 +------------
meson.build | 1 -
2 files changed, 1 insertion(+), 13 deletions(-)
diff --git a/block/io_uring.c b/block/io_uring.c
index a1760152e0..973e15d876 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -11,7 +11,6 @@
#include "qemu/osdep.h"
#include <liburing.h>
#include "block/aio.h"
-#include "qemu/error-report.h"
#include "qemu/queue.h"
#include "block/block.h"
#include "block/raw-aio.h"
@@ -19,7 +18,6 @@
#include "qapi/error.h"
#include "trace.h"
-
/* io_uring ring size */
#define MAX_ENTRIES 128
@@ -432,17 +430,8 @@ LuringState *luring_init(Error **errp)
}
ioq_init(&s->io_q);
-#ifdef CONFIG_LIBURING_REGISTER_RING_FD
- if (io_uring_register_ring_fd(&s->ring) < 0) {
- /*
- * Only warn about this error: we will fallback to the non-optimized
- * io_uring operations.
- */
- warn_report("failed to register linux io_uring ring file descriptor");
- }
-#endif
-
return s;
+
}
void luring_cleanup(LuringState *s)
diff --git a/meson.build b/meson.build
index 20fddbd707..d5230eadd6 100644
--- a/meson.build
+++ b/meson.build
@@ -1793,7 +1793,6 @@ config_host_data.set('CONFIG_LIBNFS', libnfs.found())
config_host_data.set('CONFIG_LIBSSH', libssh.found())
config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
-config_host_data.set('CONFIG_LIBURING_REGISTER_RING_FD', cc.has_function('io_uring_register_ring_fd', prefix: '#include <liburing.h>', dependencies:linux_io_uring))
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())

View File

@@ -1,69 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 28 Feb 2023 09:11:29 -0800
Subject: [PATCH] scsi: megasas: Internal cdbs have 16-byte length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Host drivers do not necessarily set cdb_len in megasas io commands.
With commits 6d1511cea0 ("scsi: Reject commands if the CDB length
exceeds buf_len") and fe9d8927e2 ("scsi: Add buf_len parameter to
scsi_req_new()"), this results in failures to boot Linux from affected
SCSI drives because cdb_len is set to 0 by the host driver.
Set the cdb length to its actual size to solve the problem.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(picked-up from https://lists.nongnu.org/archive/html/qemu-devel/2023-02/msg08653.html)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/scsi/megasas.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 32c70c9e99..984b6a3145 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -1781,7 +1781,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
uint8_t cdb[16];
int len;
struct SCSIDevice *sdev = NULL;
- int target_id, lun_id, cdb_len;
+ int target_id, lun_id;
lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
@@ -1790,7 +1790,6 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
target_id = cmd->frame->header.target_id;
lun_id = cmd->frame->header.lun_id;
- cdb_len = cmd->frame->header.cdb_len;
if (target_id < MFI_MAX_LD && lun_id == 0) {
sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
@@ -1805,15 +1804,6 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
return MFI_STAT_DEVICE_NOT_FOUND;
}
- if (cdb_len > 16) {
- trace_megasas_scsi_invalid_cdb_len(
- mfi_frame_desc(frame_cmd), 1, target_id, lun_id, cdb_len);
- megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
- cmd->frame->header.scsi_status = CHECK_CONDITION;
- s->event_count++;
- return MFI_STAT_SCSI_DONE_WITH_ERROR;
- }
-
cmd->iov_size = lba_count * sdev->blocksize;
if (megasas_map_sgl(s, cmd, &cmd->frame->io.sgl)) {
megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
@@ -1824,7 +1814,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
megasas_encode_lba(cdb, lba_start, lba_count, is_write);
cmd->req = scsi_req_new(sdev, cmd->index,
- lun_id, cdb, cdb_len, cmd);
+ lun_id, cdb, sizeof(cdb), cmd);
if (!cmd->req) {
trace_megasas_scsi_req_alloc_failed(
mfi_frame_desc(frame_cmd), target_id, lun_id);

View File

@@ -1,100 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Tue, 7 Mar 2023 15:03:02 +0100
Subject: [PATCH] ide: avoid potential deadlock when draining during trim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The deadlock can happen as follows:
1. ide_issue_trim is called, and increments the in_flight counter.
2. ide_issue_trim_cb calls blk_aio_pdiscard.
3. Somebody else starts draining (e.g. backup to insert the cbw node).
4. ide_issue_trim_cb is called as the completion callback for
blk_aio_pdiscard.
5. ide_issue_trim_cb issues yet another blk_aio_pdiscard request.
6. The request is added to the wait queue via blk_wait_while_drained,
because draining has been started.
7. Nobody ever decrements the in_flight counter and draining can't
finish. This would be done by ide_trim_bh_cb, which is called after
ide_issue_trim_cb has issued its last request, but
ide_issue_trim_cb is not called anymore, because it's the
completion callback of blk_aio_pdiscard, which waits on draining.
Quoting Hanna Czenczek:
> The point of 7e5cdb345f was that we need any in-flight count to
> accompany a set s->bus->dma->aiocb. While blk_aio_pdiscard() is
> happening, we dont necessarily need another count. But we do need
> it while there is no blk_aio_pdiscard().
> ide_issue_trim_cb() returns in two cases (and, recursively through
> its callers, leaves s->bus->dma->aiocb set):
> 1. After calling blk_aio_pdiscard(), which will keep an in-flight
> count,
> 2. After calling replay_bh_schedule_event() (i.e.
> qemu_bh_schedule()), which does not keep an in-flight count.
Thus, even after moving the blk_inc_in_flight to above the
replay_bh_schedule_event call, the invariant "ide_issue_trim_cb
returns with an accompanying in-flight count" is still satisfied.
However, the issue 7e5cdb345f fixed for canceling resurfaces, because
ide_cancel_dma_sync assumes that it just needs to drain once. But now
the in_flight count is not consistently > 0 during the trim operation.
So, change it to drain until !s->bus->dma->aiocb, which means that the
operation finished (s->bus->dma->aiocb is cleared by ide_set_inactive
via the ide_dma_cb when the end of the transfer is reached).
Discussion here:
https://lists.nongnu.org/archive/html/qemu-devel/2023-03/msg02506.html
Fixes: 7e5cdb345f ("ide: Increment BB in-flight counter for TRIM BH")
Suggested-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/ide/core.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index c3508acbb1..289347af58 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -444,7 +444,7 @@ static void ide_trim_bh_cb(void *opaque)
iocb->bh = NULL;
qemu_aio_unref(iocb);
- /* Paired with an increment in ide_issue_trim() */
+ /* Paired with an increment in ide_issue_trim_cb() */
blk_dec_in_flight(blk);
}
@@ -504,6 +504,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
done:
iocb->aiocb = NULL;
if (iocb->bh) {
+ /* Paired with a decrement in ide_trim_bh_cb() */
+ blk_inc_in_flight(s->blk);
replay_bh_schedule_event(iocb->bh);
}
}
@@ -516,9 +518,6 @@ BlockAIOCB *ide_issue_trim(
IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
- /* Paired with a decrement in ide_trim_bh_cb() */
- blk_inc_in_flight(s->blk);
-
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
@@ -742,8 +741,9 @@ void ide_cancel_dma_sync(IDEState *s)
*/
if (s->bus->dma->aiocb) {
trace_ide_cancel_dma_sync_remaining();
- blk_drain(s->blk);
- assert(s->bus->dma->aiocb == NULL);
+ while (s->bus->dma->aiocb) {
+ blk_drain(s->blk);
+ }
}
}

View File

@@ -0,0 +1,51 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Yusuke Okada <okada.yusuke@jp.fujitsu.com>
Date: Thu, 18 Aug 2022 14:46:19 -0400
Subject: [PATCH] virtiofsd: use g_date_time_get_microsecond to get subsecond
The "%f" specifier in g_date_time_format() is only available in glib
2.65.2 or later. If combined with older glib, the function returns null
and the timestamp displayed as "(null)".
For backward compatibility, g_date_time_get_microsecond should be used
to retrieve subsecond.
In this patch the g_date_time_format() leaves subsecond field as "%06d"
and let next snprintf to format with g_date_time_get_microsecond.
Signed-off-by: Yusuke Okada <okada.yusuke@jp.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: 20220818184618.2205172-1-yokada.996@gmail.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry-picked from commit f16d15c9276bd8f501f861c39cbd4adc812d0c1d)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
tools/virtiofsd/passthrough_ll.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 371a7bead6..20f0f41f99 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -4185,6 +4185,7 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile)
static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
{
g_autofree char *localfmt = NULL;
+ char buf[64];
if (current_log_level < level) {
return;
@@ -4197,9 +4198,11 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
fmt);
} else {
g_autoptr(GDateTime) now = g_date_time_new_now_utc();
- g_autofree char *nowstr = g_date_time_format(now, "%Y-%m-%d %H:%M:%S.%f%z");
+ g_autofree char *nowstr = g_date_time_format(now,
+ "%Y-%m-%d %H:%M:%S.%%06d%z");
+ snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now));
localfmt = g_strdup_printf("[%s] [ID: %08ld] %s",
- nowstr, syscall(__NR_gettid), fmt);
+ buf, syscall(__NR_gettid), fmt);
}
fmt = localfmt;
}

View File

@@ -0,0 +1,56 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maksim Davydov <davydov-max@yandex-team.ru>
Date: Thu, 25 Aug 2022 19:52:47 +0300
Subject: [PATCH] chardev: fix segfault in finalize
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If finalize chardev-msmouse or chardev-wctable is called immediately after
init it cases QEMU to crash with segfault. This happens because of
QTAILQ_REMOVE in qemu_input_handler_unregister tries to dereference
NULL pointer.
For instance, this error can be reproduced via `qom-list-properties`
command.
Signed-off-by: Maksim Davydov <davydov-max@yandex-team.ru>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Message-Id: <20220825165247.33704-1-davydov-max@yandex-team.ru>
(trivial backport from fc0c128531ed55f058bfbad4f1348ebd9a0187f2)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
chardev/msmouse.c | 4 +++-
chardev/wctablet.c | 4 +++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/chardev/msmouse.c b/chardev/msmouse.c
index eb9231dcdb..2cc1b16561 100644
--- a/chardev/msmouse.c
+++ b/chardev/msmouse.c
@@ -146,7 +146,9 @@ static void char_msmouse_finalize(Object *obj)
{
MouseChardev *mouse = MOUSE_CHARDEV(obj);
- qemu_input_handler_unregister(mouse->hs);
+ if (mouse->hs) {
+ qemu_input_handler_unregister(mouse->hs);
+ }
}
static QemuInputHandler msmouse_handler = {
diff --git a/chardev/wctablet.c b/chardev/wctablet.c
index e8b292c43c..43bdf6b608 100644
--- a/chardev/wctablet.c
+++ b/chardev/wctablet.c
@@ -319,7 +319,9 @@ static void wctablet_chr_finalize(Object *obj)
{
TabletChardev *tablet = WCTABLET_CHARDEV(obj);
- qemu_input_handler_unregister(tablet->hs);
+ if (tablet->hs) {
+ qemu_input_handler_unregister(tablet->hs);
+ }
}
static void wctablet_chr_open(Chardev *chr,

View File

@@ -1,48 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 28 Jul 2023 10:47:48 +0200
Subject: [PATCH] migration/block-dirty-bitmap: fix loading bitmap when there
is an iothread
The bdrv_create_dirty_bitmap() function (which is also called by
bdrv_dirty_bitmap_create_successor()) uses bdrv_getlength(bs). This is
a wrapper around a coroutine, and thus uses bdrv_poll_co(). Polling
tries to release the AioContext which will trigger an assert() if it
hasn't been acquired before.
The issue does not happen for migration, because there we are in a
coroutine already, so the wrapper will just call bdrv_co_getlength()
directly without polling.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/block-dirty-bitmap.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 032fc5f405..e1ae3b7316 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -805,8 +805,11 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
"destination", bdrv_dirty_bitmap_name(s->bitmap));
return -EINVAL;
} else {
+ AioContext *ctx = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(ctx);
s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
s->bitmap_name, &local_err);
+ aio_context_release(ctx);
if (!s->bitmap) {
error_report_err(local_err);
return -EINVAL;
@@ -833,7 +836,10 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
bdrv_disable_dirty_bitmap(s->bitmap);
if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
+ AioContext *ctx = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(ctx);
bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
+ aio_context_release(ctx);
if (local_err) {
error_report_err(local_err);
return -EINVAL;

View File

@@ -1,140 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 10:07:03 +0200
Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0]. Because the AioContext locks still exist, it
should be safe to disable locking again.
From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
> We don't currently rely on graph locking yet. It is supposed to replace
> the AioContext lock eventually to enable multiqueue support, but as long
> as we still have the AioContext lock, it is sufficient without the graph
> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
> more either and this commit can be reverted. (Of course, it can also be
> reverted while the AioContext lock still exists if the callers have been
> fixed.)
[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/graph-lock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 5e66f01ae8..5c2873262a 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,12 +108,19 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(BlockDriverState *bs)
{
+#if 0
AioContext *ctx = NULL;
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
assert(!qatomic_read(&has_writer));
/*
@@ -158,11 +168,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
if (ctx) {
aio_context_acquire(bdrv_get_aio_context(bs));
}
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -174,10 +186,13 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -237,10 +252,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -258,6 +275,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -275,13 +293,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}

View File

@@ -0,0 +1,42 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 28 Oct 2022 10:09:46 +0200
Subject: [PATCH] init: daemonize: defuse PID file resolve error
When proxmox-file-restore invokes QEMU, the PID file is a (temporary)
file that's already unlinked, so resolving the absolute path here
failed.
It should not be a critical error when the PID file unlink handler
can't be registered, because the path can't be resolved for whatever
reason. If the file is already gone from QEMU's perspective (i.e.
errno is ENOENT), silently ignore the error. Otherwise, print a
warning.
Reported-by: Dominik Csapak <d.csapak@proxmox.com>
Suggested-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
softmmu/vl.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 706bd7cff7..3381c56af7 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2438,10 +2438,11 @@ static void qemu_maybe_daemonize(const char *pid_file)
pid_file_realpath = g_malloc0(PATH_MAX);
if (!realpath(pid_file, pid_file_realpath)) {
- error_report("cannot resolve PID file path: %s: %s",
- pid_file, strerror(errno));
- unlink(pid_file);
- exit(1);
+ if (errno != ENOENT) {
+ warn_report("not removing PID file on exit: cannot resolve PID "
+ "file path: %s: %s", pid_file, strerror(errno));
+ }
+ return;
}
qemu_unlink_pidfile_notifier = (struct UnlinkPidfileNotifier) {

View File

@@ -0,0 +1,77 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 27 Oct 2022 03:27:26 -0400
Subject: [PATCH] block/block-backend: blk_set_enable_write_cache is IO_CODE
blk_set_enable_write_cache() is defined as GLOBAL_STATE_CODE
but can be invoked from iothreads when handling scsi requests.
This triggers an assertion failure:
0x00007fd6c3515ce1 in raise () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fd6c34ff537 in abort () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fd6c34ff40f in ?? () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fd6c350e662 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
0x000056149e2cea03 in blk_set_enable_write_cache (wce=true, blk=0x5614a01c27f0)
at ../src/block/block-backend.c:1949
0x000056149e2d0a67 in blk_set_enable_write_cache (blk=0x5614a01c27f0,
wce=<optimized out>) at ../src/block/block-backend.c:1951
0x000056149dfe9c59 in scsi_disk_apply_mode_select (p=0x7fd6b400c00e "\004",
page=<optimized out>, s=<optimized out>) at ../src/hw/scsi/scsi-disk.c:1520
mode_select_pages (change=true, len=18, p=0x7fd6b400c00e "\004", r=0x7fd6b4001ff0)
at ../src/hw/scsi/scsi-disk.c:1570
scsi_disk_emulate_mode_select (inbuf=<optimized out>, r=0x7fd6b4001ff0) at
../src/hw/scsi/scsi-disk.c:1640
scsi_disk_emulate_write_data (req=0x7fd6b4001ff0) at ../src/hw/scsi/scsi-disk.c:1934
0x000056149e18ff16 in virtio_scsi_handle_cmd_req_submit (req=<optimized out>,
req=<optimized out>, s=0x5614a12f16b0) at ../src/hw/scsi/virtio-scsi.c:719
virtio_scsi_handle_cmd_vq (vq=0x7fd6bab92140, s=0x5614a12f16b0) at
../src/hw/scsi/virtio-scsi.c:761
virtio_scsi_handle_cmd (vq=<optimized out>, vdev=<optimized out>) at
../src/hw/scsi/virtio-scsi.c:775
virtio_scsi_handle_cmd (vdev=0x5614a12f16b0, vq=0x7fd6bab92140) at
../src/hw/scsi/virtio-scsi.c:765
0x000056149e1a8aa6 in virtio_queue_notify_vq (vq=0x7fd6bab92140) at
../src/hw/virtio/virtio.c:2365
0x000056149e3ccea5 in aio_dispatch_handler (ctx=ctx@entry=0x5614a01babe0,
node=<optimized out>) at ../src/util/aio-posix.c:369
0x000056149e3cd868 in aio_dispatch_ready_handlers (ready_list=0x7fd6c09b2680,
ctx=0x5614a01babe0) at ../src/util/aio-posix.c:399
aio_poll (ctx=0x5614a01babe0, blocking=blocking@entry=true) at
../src/util/aio-posix.c:713
0x000056149e2a7796 in iothread_run (opaque=opaque@entry=0x56149ffde500) at
../src/iothread.c:67
0x000056149e3d0859 in qemu_thread_start (args=0x7fd6c09b26f0) at
../src/util/qemu-thread-posix.c:504
0x00007fd6c36b9ea7 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
0x00007fd6c35d9aef in clone () from /lib/x86_64-linux-gnu/libc.so.6
Changing GLOBAL_STATE_CODE in IO_CODE is allowed, since GSC callers are
allowed to call IO_CODE.
Resolves: #1272
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221027072726.2681500-1-eesposit@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Tested-by: Antoine Damhet <antoine.damhet@shadow.tech>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit be8da05b5ed8fb546731b9edb997f303f272bad8)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/block-backend.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/block-backend.c b/block/block-backend.c
index d4a5df2ac2..1b563e628b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1946,7 +1946,7 @@ bool blk_enable_write_cache(BlockBackend *blk)
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- GLOBAL_STATE_CODE();
+ IO_CODE();
blk->enable_write_cache = wce;
}

View File

@@ -1,57 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 11:19:14 +0200
Subject: [PATCH] migration states: workaround snapshot performance regression
Commit 813cd616 ("migration: Use migration_transferred_bytes() to
calculate rate_limit") introduced a prohibitive performance regression
when taking a snapshot [0]. The reason turns out to be the flushing
done by migration_transferred_bytes()
Just use a _noflush version of the relevant function as a workaround
until upstream fixes the issue. This is inspired by a not-applied
upstream series [1], but doing the very minimum to avoid the
regression.
[0]: https://gitlab.com/qemu-project/qemu/-/issues/1821
[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-05/msg07708.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/migration-stats.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/migration/migration-stats.c b/migration/migration-stats.c
index 095d6d75bb..8073c8ebaa 100644
--- a/migration/migration-stats.c
+++ b/migration/migration-stats.c
@@ -18,6 +18,20 @@
MigrationAtomicStats mig_stats;
+/*
+ * Same as migration_transferred_bytes below, but using the _noflush
+ * variant of qemu_file_transferred() to avoid a performance
+ * regression in migration_rate_exceeded().
+ */
+static uint64_t migration_transferred_bytes_noflush(QEMUFile *f)
+{
+ uint64_t multifd = stat64_get(&mig_stats.multifd_bytes);
+ uint64_t qemu_file = qemu_file_transferred_noflush(f);
+
+ trace_migration_transferred_bytes(qemu_file, multifd);
+ return qemu_file + multifd;
+}
+
bool migration_rate_exceeded(QEMUFile *f)
{
if (qemu_file_get_error(f)) {
@@ -25,7 +39,7 @@ bool migration_rate_exceeded(QEMUFile *f)
}
uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
- uint64_t rate_limit_current = migration_transferred_bytes(f);
+ uint64_t rate_limit_current = migration_transferred_bytes_noflush(f);
uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
uint64_t rate_limit_max = stat64_get(&mig_stats.rate_limit_max);

View File

@@ -1,45 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 17 Nov 2023 11:18:06 +0100
Subject: [PATCH] Revert "x86: acpi: workaround Windows not handling name
references in Package properly"
This reverts commit 44d975ef340e2f21f236f9520c53e1b30d2213a4.
As reported in the community forum [0] and reproduced locally this
breaks VirtIO network adapters in (at least) the German ISO of Windows
Server 2022. The fix itself was for
> Issue is not fatal but as result acpi-index/"PCI Label ID" property
> is either not shown in device details page or shows incorrect value.
so revert and tolerate that as a stop-gap, rather than have the
devices not working at all.
[0]: https://forum.proxmox.com/threads/92094/post-605684
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/acpi-build.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index bb12b0ad43..de14d3c3da 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -362,13 +362,9 @@ Aml *aml_pci_device_dsm(void)
{
Aml *params = aml_local(0);
Aml *pkg = aml_package(2);
- aml_append(pkg, aml_int(0));
- aml_append(pkg, aml_int(0));
+ aml_append(pkg, aml_name("BSEL"));
+ aml_append(pkg, aml_name("ASUN"));
aml_append(method, aml_store(pkg, params));
- aml_append(method,
- aml_store(aml_name("BSEL"), aml_index(params, aml_int(0))));
- aml_append(method,
- aml_store(aml_name("ASUN"), aml_index(params, aml_int(1))));
aml_append(method,
aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1),
aml_arg(2), aml_arg(3), params))

View File

@@ -1,34 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 1 Feb 2022 20:09:41 +0100
Subject: [PATCH] target/i386: the sgx_epc_get_section stub is reachable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The sgx_epc_get_section stub is reachable from cpu_x86_cpuid. It
should not assert, instead it should just return true just like
the "real" sgx_epc_get_section does when SGX is disabled.
Reported-by: Vladimír Beneš <vbenes@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20220201190941.106001-1-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry-picked from commit 219615740425d9683588207b40a365e6741691a6)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/i386/sgx-stub.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
index 26833eb233..16b1dfd90b 100644
--- a/hw/i386/sgx-stub.c
+++ b/hw/i386/sgx-stub.c
@@ -34,5 +34,5 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
{
- g_assert_not_reached();
+ return true;
}

View File

@@ -1,86 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed, 24 Jan 2024 11:57:48 +0100
Subject: [PATCH] ui/clipboard: mark type as not available when there is no
data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT
message with len=0. In qemu_clipboard_set_data(), the clipboard info
will be updated setting data to NULL (because g_memdup(data, size)
returns NULL when size is 0). If the client does not set the
VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then
the 'request' callback for the clipboard peer is not initialized.
Later, because data is NULL, qemu_clipboard_request() can be reached
via vdagent_chr_write() and vdagent_clipboard_recv_request() and
there, the clipboard owner's 'request' callback will be attempted to
be called, but that is a NULL pointer.
In particular, this can happen when using the KRDC (22.12.3) VNC
client.
Another scenario leading to the same issue is with two clients (say
noVNC and KRDC):
The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and
initializes its cbpeer.
The KRDC client does not, but triggers a vnc_client_cut_text() (note
it's not the _ext variant)). There, a new clipboard info with it as
the 'owner' is created and via qemu_clipboard_set_data() is called,
which in turn calls qemu_clipboard_update() with that info.
In qemu_clipboard_update(), the notifier for the noVNC client will be
called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the
noVNC client. The 'owner' in that clipboard info is the clipboard peer
for the KRDC client, which did not initialize the 'request' function.
That sounds correct to me, it is the owner of that clipboard info.
Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set
the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it
passes), that clipboard info is passed to qemu_clipboard_request() and
the original segfault still happens.
Fix the issue by handling updates with size 0 differently. In
particular, mark in the clipboard info that the type is not available.
While at it, switch to g_memdup2(), because g_memdup() is deprecated.
Cc: qemu-stable@nongnu.org
Fixes: CVE-2023-6683
Reported-by: Markus Frank <m.frank@proxmox.com>
Suggested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Tested-by: Markus Frank <m.frank@proxmox.com>
(picked from https://lists.nongnu.org/archive/html/qemu-stable/2024-01/msg00228.html)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
ui/clipboard.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/ui/clipboard.c b/ui/clipboard.c
index 3d14bffaf8..b3f6fa3c9e 100644
--- a/ui/clipboard.c
+++ b/ui/clipboard.c
@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer,
}
g_free(info->types[type].data);
- info->types[type].data = g_memdup(data, size);
- info->types[type].size = size;
- info->types[type].available = true;
+ if (size) {
+ info->types[type].data = g_memdup2(data, size);
+ info->types[type].size = size;
+ info->types[type].available = true;
+ } else {
+ info->types[type].data = NULL;
+ info->types[type].size = 0;
+ info->types[type].available = false;
+ }
if (update) {
qemu_clipboard_update(info);

View File

@@ -1,65 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 16:31:56 +0100
Subject: [PATCH] virtio-scsi: Attach event vq notifier with no_poll
As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi:
don't waste CPU polling the event virtqueue"), we only attach an io_read
notifier for the virtio-scsi event virtqueue instead, and no polling
notifiers. During operation, the event virtqueue is typically
non-empty, but none of the buffers are intended to be used immediately.
Instead, they only get used when certain events occur. Therefore, it
makes no sense to continuously poll it when non-empty, because it is
supposed to be and stay non-empty.
We do this by using virtio_queue_aio_attach_host_notifier_no_poll()
instead of virtio_queue_aio_attach_host_notifier() for the event
virtqueue.
Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement
BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use
virtio_queue_aio_attach_host_notifier() for all virtqueues, including
the event virtqueue. This can lead to it being polled again, undoing
the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552.
Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the
event virtqueue.
("virtio-scsi: implement BlockDevOps->drained_begin()")
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
Fixes: 766aa2de0f29b657148e04599320d771c36fd126
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/scsi/virtio-scsi.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 45b95ea070..ad24a882fd 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -1148,6 +1148,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
static void virtio_scsi_drained_end(SCSIBus *bus)
{
VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
VirtIODevice *vdev = VIRTIO_DEVICE(s);
uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
s->parent_obj.conf.num_queues;
@@ -1165,7 +1166,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
for (uint32_t i = 0; i < total_queues; i++) {
VirtQueue *vq = virtio_get_queue(vdev, i);
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+ if (vq == vs->event_vq) {
+ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
+ } else {
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+ }
}
}

View File

@@ -1,125 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 16:31:57 +0100
Subject: [PATCH] virtio: Re-enable notifications after drain
During drain, we do not care about virtqueue notifications, which is why
we remove the handlers on it. When removing those handlers, whether vq
notifications are enabled or not depends on whether we were in polling
mode or not; if not, they are enabled (by default); if so, they have
been disabled by the io_poll_start callback.
Because we do not care about those notifications after removing the
handlers, this is fine. However, we have to explicitly ensure they are
enabled when re-attaching the handlers, so we will resume receiving
notifications. We do this in virtio_queue_aio_attach_host_notifier*().
If such a function is called while we are in a polling section,
attaching the notifiers will then invoke the io_poll_start callback,
re-disabling notifications.
Because we will always miss virtqueue updates in the drained section, we
also need to poll the virtqueue once after attaching the notifiers.
Buglink: https://issues.redhat.com/browse/RHEL-3934
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++
include/block/aio.h | 7 ++++++-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 969c25f4cf..02cce83111 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3526,6 +3526,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
{
+ /*
+ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
+ * Re-enable them. (And if detach has not been used before, notifications
+ * being enabled is still the default state while a notifier is attached;
+ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
+ * notifications enabled once the polling section is left.)
+ */
+ if (!virtio_queue_get_notification(vq)) {
+ virtio_queue_set_notification(vq, 1);
+ }
+
aio_set_event_notifier(ctx, &vq->host_notifier,
virtio_queue_host_notifier_read,
virtio_queue_host_notifier_aio_poll,
@@ -3533,6 +3544,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
aio_set_event_notifier_poll(ctx, &vq->host_notifier,
virtio_queue_host_notifier_aio_poll_begin,
virtio_queue_host_notifier_aio_poll_end);
+
+ /*
+ * We will have ignored notifications about new requests from the guest
+ * while no notifiers were attached, so "kick" the virt queue to process
+ * those requests now.
+ */
+ event_notifier_set(&vq->host_notifier);
}
/*
@@ -3543,14 +3561,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
*/
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
{
+ /* See virtio_queue_aio_attach_host_notifier() */
+ if (!virtio_queue_get_notification(vq)) {
+ virtio_queue_set_notification(vq, 1);
+ }
+
aio_set_event_notifier(ctx, &vq->host_notifier,
virtio_queue_host_notifier_read,
NULL, NULL);
+
+ /*
+ * See virtio_queue_aio_attach_host_notifier().
+ * Note that this may be unnecessary for the type of virtqueues this
+ * function is used for. Still, it will not hurt to have a quick look into
+ * whether we can/should process any of the virtqueue elements.
+ */
+ event_notifier_set(&vq->host_notifier);
}
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
{
aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
+
+ /*
+ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
+ * will run after io_poll_begin(), so by removing the notifier, we do not
+ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
+ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
+ * notifications are enabled or disabled. It does not really matter anyway;
+ * we just removed the notifier, so we do not care about notifications until
+ * we potentially re-attach it. The attach_host_notifier functions will
+ * ensure that notifications are enabled again when they are needed.
+ */
}
void virtio_queue_host_notifier_read(EventNotifier *n)
diff --git a/include/block/aio.h b/include/block/aio.h
index 32042e8905..79efadfa48 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -498,9 +498,14 @@ void aio_set_event_notifier(AioContext *ctx,
AioPollFn *io_poll,
EventNotifierHandler *io_poll_ready);
-/* Set polling begin/end callbacks for an event notifier that has already been
+/*
+ * Set polling begin/end callbacks for an event notifier that has already been
* registered with aio_set_event_notifier. Do nothing if the event notifier is
* not registered.
+ *
+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
+ * during polling, it will not be called, so an io_poll_begin() is not
+ * necessarily always followed by an io_poll_end().
*/
void aio_set_event_notifier_poll(AioContext *ctx,
EventNotifier *notifier,

View File

@@ -1,119 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 18 Dec 2023 11:13:40 +0100
Subject: [PATCH] qemu_init: increase NOFILE soft limit on POSIX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In many configurations, e.g. multiple vNICs with multiple queues or
with many Ceph OSDs, the default soft limit of 1024 is not enough.
QEMU is supposed to work fine with file descriptors >= 1024 and does
not use select() on POSIX. Bump the soft limit to the allowed hard
limit to avoid issues with the aforementioned configurations.
Of course the limit could be raised from the outside, but the man page
of systemd.exec states about 'LimitNOFILE=':
> Don't use.
> [...]
> Typically applications should increase their soft limit to the hard
> limit on their own, if they are OK with working with file
> descriptors above 1023,
If the soft limit is already the same as the hard limit, avoid the
superfluous setrlimit call. This can avoid a warning with a strict
seccomp filter blocking setrlimit if NOFILE was already raised before
executing QEMU.
Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
---
include/sysemu/os-posix.h | 1 +
include/sysemu/os-win32.h | 5 +++++
os-posix.c | 22 ++++++++++++++++++++++
softmmu/vl.c | 2 ++
4 files changed, 30 insertions(+)
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 1030d39904..edc415aff5 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -48,6 +48,7 @@ void os_setup_early_signal_handling(void);
void os_set_proc_name(const char *s);
void os_setup_signal_handling(void);
void os_daemonize(void);
+void os_setup_limits(void);
void os_setup_post(void);
int os_mlock(void);
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 91aa0d7ec0..f6e23fe01e 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -129,6 +129,11 @@ static inline int os_mlock(void)
return -ENOSYS;
}
+void os_setup_limits(void)
+{
+ return;
+}
+
#define fsync _commit
#if !defined(lseek)
diff --git a/os-posix.c b/os-posix.c
index cfcb96533c..0cc1d991b1 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -24,6 +24,7 @@
*/
#include "qemu/osdep.h"
+#include <sys/resource.h>
#include <sys/wait.h>
#include <pwd.h>
#include <grp.h>
@@ -286,6 +287,27 @@ void os_daemonize(void)
}
}
+void os_setup_limits(void)
+{
+ struct rlimit nofile;
+
+ if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) {
+ warn_report("unable to query NOFILE limit: %s", strerror(errno));
+ return;
+ }
+
+ if (nofile.rlim_cur == nofile.rlim_max) {
+ return;
+ }
+
+ nofile.rlim_cur = nofile.rlim_max;
+
+ if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) {
+ warn_report("unable to set NOFILE limit: %s", strerror(errno));
+ return;
+ }
+}
+
void os_setup_post(void)
{
int fd = 0;
diff --git a/softmmu/vl.c b/softmmu/vl.c
index c9e9ede237..ba6ad8a8df 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2713,6 +2713,8 @@ void qemu_init(int argc, char **argv)
error_init(argv[0]);
qemu_init_exec_dir(argv[0]);
+ os_setup_limits();
+
qemu_init_arch_modules();
qemu_init_subsystems();

View File

@@ -1,61 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 22 Jan 2024 12:26:25 -0500
Subject: [PATCH] virtio-blk: avoid using ioeventfd state in irqfd conditional
Requests that complete in an IOThread use irqfd to notify the guest
while requests that complete in the main loop thread use the traditional
qdev irq code path. The reason for this conditional is that the irq code
path requires the BQL:
if (s->ioeventfd_started && !s->ioeventfd_disabled) {
virtio_notify_irqfd(vdev, req->vq);
} else {
virtio_notify(vdev, req->vq);
}
There is a corner case where the conditional invokes the irq code path
instead of the irqfd code path:
static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev)
{
...
/*
* Set ->ioeventfd_started to false before draining so that host notifiers
* are not detached/attached anymore.
*/
s->ioeventfd_started = false;
/* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
blk_drain(s->conf.conf.blk);
During blk_drain() the conditional produces the wrong result because
ioeventfd_started is false.
Use qemu_in_iothread() instead of checking the ioeventfd state.
Cc: qemu-stable@nongnu.org
Buglink: https://issues.redhat.com/browse/RHEL-15394
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20240122172625.415386-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
[FE: backport: dataplane -> ioeventfd rework didn't happen yet]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/block/virtio-blk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 39e7f23fab..61bd1f6859 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
iov_discard_undo(&req->inhdr_undo);
iov_discard_undo(&req->outhdr_undo);
virtqueue_push(req->vq, &req->elem, req->in_len);
- if (s->dataplane_started && !s->dataplane_disabled) {
+ if (qemu_in_iothread()) {
virtio_blk_data_plane_notify(s->dataplane, req->vq);
} else {
virtio_notify(vdev, req->vq);

View File

@@ -1,8 +1,8 @@
Index: pve-qemu-kvm-8.1.2/block/meson.build
Index: qemu/block/meson.build
===================================================================
--- pve-qemu-kvm-8.1.2.orig/block/meson.build
+++ pve-qemu-kvm-8.1.2/block/meson.build
@@ -123,6 +123,7 @@ foreach m : [
--- qemu.orig/block/meson.build
+++ qemu/block/meson.build
@@ -111,6 +111,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
@@ -10,18 +10,18 @@ Index: pve-qemu-kvm-8.1.2/block/meson.build
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-8.1.2/meson.build
Index: qemu/meson.build
===================================================================
--- pve-qemu-kvm-8.1.2.orig/meson.build
+++ pve-qemu-kvm-8.1.2/meson.build
@@ -1303,6 +1303,26 @@ if not get_option('rbd').auto() or have_
--- qemu.orig/meson.build
+++ qemu/meson.build
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'))
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
@@ -41,27 +41,27 @@ Index: pve-qemu-kvm-8.1.2/meson.build
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -2123,6 +2143,7 @@ if numa.found()
endif
@@ -1802,6 +1822,7 @@ config_host_data.set('CONFIG_NUMA', numa
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
config_host_data.set('CONFIG_SDL', sdl.found())
@@ -4298,6 +4319,7 @@ summary_info += {'fdt support': fd
summary_info += {'libcap-ng support': libcap_ng}
summary_info += {'bpf support': libbpf}
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3965,6 +3986,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: pve-qemu-kvm-8.1.2/meson_options.txt
Index: qemu/meson_options.txt
===================================================================
--- pve-qemu-kvm-8.1.2.orig/meson_options.txt
+++ pve-qemu-kvm-8.1.2/meson_options.txt
@@ -186,6 +186,8 @@ option('lzo', type : 'feature', value :
--- qemu.orig/meson_options.txt
+++ qemu/meson_options.txt
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
@@ -70,20 +70,20 @@ Index: pve-qemu-kvm-8.1.2/meson_options.txt
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
Index: qemu/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-8.1.2.orig/qapi/block-core.json
+++ pve-qemu-kvm-8.1.2/qapi/block-core.json
@@ -3403,7 +3403,7 @@
'raw', 'rbd',
--- qemu.orig/qapi/block-core.json
+++ qemu/qapi/block-core.json
@@ -3209,7 +3209,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4465,6 +4465,28 @@
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4149,6 +4149,28 @@
'*server': ['InetSocketAddressBase'] } }
##
@@ -112,15 +112,15 @@ Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4923,6 +4945,7 @@
@@ -4593,6 +4615,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5360,6 +5383,17 @@
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4985,6 +5008,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
@@ -138,7 +138,7 @@ Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5581,6 +5615,7 @@
@@ -5182,6 +5216,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
@@ -146,20 +146,20 @@ Index: pve-qemu-kvm-8.1.2/qapi/block-core.json
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure
Index: qemu/scripts/ci/org.centos/stream/8/x86_64/configure
===================================================================
--- pve-qemu-kvm-8.1.2.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -30,7 +30,7 @@
--with-suffix="qemu-kvm" \
--firmwarepath=/usr/share/qemu-firmware \
--- qemu.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ qemu/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -176,6 +176,7 @@
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
@@ -167,32 +167,11 @@ Index: pve-qemu-kvm-8.1.2/scripts/ci/org.centos/stream/8/x86_64/configure
--enable-rdma \
--enable-seccomp \
--enable-snappy \
Index: pve-qemu-kvm-8.1.2/scripts/meson-buildoptions.sh
===================================================================
--- pve-qemu-kvm-8.1.2.orig/scripts/meson-buildoptions.sh
+++ pve-qemu-kvm-8.1.2/scripts/meson-buildoptions.sh
@@ -153,6 +153,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -416,6 +417,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;
Index: a/block/vitastor.c
===================================================================
--- /dev/null
+++ a/block/vitastor.c
@@ -0,0 +1,1076 @@
@@ -0,0 +1,797 @@
+// Copyright (c) Vitaliy Filippov, 2019+
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
+
@@ -204,9 +183,6 @@ Index: a/block/vitastor.c
+#endif
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#if QEMU_VERSION_MAJOR >= 8
+#include "block/block-io.h"
+#endif
+#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
@@ -230,11 +206,6 @@ Index: a/block/vitastor.c
+#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
+#define qobject_unref QDECREF
+#endif
+#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
+#include "sysemu/replay.h"
+#else
+#include "sysemu/sysemu.h"
+#endif
+
+#include "vitastor_c.h"
+
@@ -248,13 +219,9 @@ Index: a/block/vitastor.c
+}
+#endif
+
+typedef struct VitastorFdData VitastorFdData;
+
+typedef struct VitastorClient
+{
+ void *proxy;
+ int uring_eventfd;
+
+ void *watch;
+ char *config_path;
+ char *etcd_host;
@@ -271,24 +238,12 @@ Index: a/block/vitastor.c
+ int rdma_gid_index;
+ int rdma_mtu;
+ QemuMutex mutex;
+ AioContext *ctx;
+ VitastorFdData **fds;
+ int fd_count, fd_alloc;
+ int bh_uring_scheduled;
+
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
+ uint32_t last_bitmap_granularity;
+ uint8_t *last_bitmap;
+} VitastorClient;
+
+typedef struct VitastorFdData
+{
+ VitastorClient *cli;
+ int fd;
+ IOHandler *fd_read, *fd_write;
+ void *opaque;
+} VitastorFdData;
+
+typedef struct VitastorRPC
+{
+ BlockDriverState *bs;
@@ -299,21 +254,10 @@ Index: a/block/vitastor.c
+ uint64_t inode, offset, len;
+ uint32_t bitmap_granularity;
+ uint8_t *bitmap;
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
+ QEMUBH *bh;
+#endif
+} VitastorRPC;
+
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
+typedef struct VitastorBH
+{
+ VitastorClient *cli;
+ QEMUBH *bh;
+} VitastorBH;
+#endif
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
+static void vitastor_co_generic_cb(void *opaque, long retval);
+static void vitastor_co_generic_bh_cb(void *opaque, long retval);
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
+static void vitastor_close(BlockDriverState *bs);
+
@@ -391,13 +335,8 @@ Index: a/block/vitastor.c
+ !strcmp(name, "rdma-gid-index") ||
+ !strcmp(name, "rdma-mtu"))
+ {
+#if QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1
+ unsigned long long num_val;
+ if (parse_uint_full(value, &num_val, 0))
+#else
+ uint64_t num_val;
+ if (parse_uint_full(value, 0, &num_val))
+#endif
+ {
+ error_setg(errp, "Illegal %s: %s", name, value);
+ goto out;
@@ -434,54 +373,6 @@ Index: a/block/vitastor.c
+ return;
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+static void vitastor_uring_handler(void *opaque)
+{
+ VitastorClient *client = (VitastorClient*)opaque;
+ qemu_mutex_lock(&client->mutex);
+ client->bh_uring_scheduled = 0;
+ vitastor_c_uring_handle_events(client->proxy);
+ qemu_mutex_unlock(&client->mutex);
+}
+
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
+static void vitastor_bh_uring_handler(void *opaque)
+{
+ VitastorBH *vbh = opaque;
+ vitastor_bh_handler(vbh->cli);
+ qemu_bh_delete(vbh->bh);
+ free(vbh);
+}
+#endif
+
+static void vitastor_schedule_uring_handler(VitastorClient *client)
+{
+ void *opaque = client;
+ if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
+ {
+ client->bh_uring_scheduled = 1;
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
+ replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
+ aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
+#else
+ VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
+ vbh->cli = client;
+#if QEMU_VERSION_MAJOR >= 2
+ vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
+#else
+ vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
+#endif
+ qemu_bh_schedule(vbh->bh);
+#endif
+ }
+}
+#else
+static void vitastor_schedule_uring_handler(VitastorClient *client)
+{
+}
+#endif
+
+static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
+{
+ BlockDriverState *bs = task->bs;
@@ -489,8 +380,7 @@ Index: a/block/vitastor.c
+ task->co = qemu_coroutine_self();
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
+ vitastor_schedule_uring_handler(client);
+ vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task->complete)
@@ -499,32 +389,13 @@ Index: a/block/vitastor.c
+ }
+}
+
+static void vitastor_aio_fd_read(void *fddv)
+{
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
+ qemu_mutex_lock(&fdd->cli->mutex);
+ fdd->fd_read(fdd->opaque);
+ vitastor_schedule_uring_handler(fdd->cli);
+ qemu_mutex_unlock(&fdd->cli->mutex);
+}
+
+static void vitastor_aio_fd_write(void *fddv)
+{
+ VitastorFdData *fdd = (VitastorFdData*)fddv;
+ qemu_mutex_lock(&fdd->cli->mutex);
+ fdd->fd_write(fdd->opaque);
+ vitastor_schedule_uring_handler(fdd->cli);
+ qemu_mutex_unlock(&fdd->cli->mutex);
+}
+
+static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
+static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ aio_set_fd_handler(ctx, fd,
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 && (QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1)
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
+ 0 /*is_external*/,
+#endif
+ fd_read,
+ fd_write,
+ fd_read, fd_write,
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
+ NULL /*io_flush*/,
+#endif
@@ -537,92 +408,6 @@ Index: a/block/vitastor.c
+ opaque);
+}
+
+static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ VitastorClient *client = (VitastorClient*)vcli;
+ VitastorFdData *fdd = NULL;
+ int i;
+ for (i = 0; i < client->fd_count; i++)
+ {
+ if (client->fds[i]->fd == fd)
+ {
+ if (fd_read || fd_write)
+ {
+ fdd = client->fds[i];
+ fdd->opaque = opaque;
+ fdd->fd_read = fd_read;
+ fdd->fd_write = fd_write;
+ }
+ else
+ {
+ for (int j = i+1; j < client->fd_count; j++)
+ client->fds[j-1] = client->fds[j];
+ client->fd_count--;
+ }
+ break;
+ }
+ }
+ if ((fd_read || fd_write) && !fdd)
+ {
+ fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
+ fdd->cli = client;
+ fdd->fd = fd;
+ fdd->fd_read = fd_read;
+ fdd->fd_write = fd_write;
+ fdd->opaque = opaque;
+ if (client->fd_count >= client->fd_alloc)
+ {
+ client->fd_alloc = client->fd_alloc*2;
+ if (client->fd_alloc < 16)
+ client->fd_alloc = 16;
+ client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
+ }
+ client->fds[client->fd_count++] = fdd;
+ }
+ universal_aio_set_fd_handler(
+ client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
+ );
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+typedef struct str_array
+{
+ const char **items;
+ int len, alloc;
+} str_array;
+
+static void strarray_push(str_array *a, const char *str)
+{
+ if (a->len >= a->alloc)
+ {
+ a->alloc = !a->alloc ? 4 : 2*a->alloc;
+ a->items = (const char**)realloc(a->items, a->alloc*sizeof(char*));
+ if (!a->items)
+ {
+ fprintf(stderr, "bad alloc\n");
+ abort();
+ }
+ }
+ a->items[a->len++] = str;
+}
+
+static void strarray_push_kv(str_array *a, const char *key, const char *value)
+{
+ if (key && value)
+ {
+ strarray_push(a, key);
+ strarray_push(a, value);
+ }
+}
+
+static void strarray_free(str_array *a)
+{
+ free(a->items);
+ a->items = NULL;
+ a->len = a->alloc = 0;
+}
+#endif
+
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+ VitastorRPC task;
@@ -640,46 +425,10 @@ Index: a/block/vitastor.c
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
+ client->ctx = bdrv_get_aio_context(bs);
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+ str_array opt = {};
+ strarray_push_kv(&opt, "config_path", qdict_get_try_str(options, "config-path"));
+ strarray_push_kv(&opt, "etcd_address", qdict_get_try_str(options, "etcd-host"));
+ strarray_push_kv(&opt, "etcd_prefix", qdict_get_try_str(options, "etcd-prefix"));
+ strarray_push_kv(&opt, "use_rdma", qdict_get_try_str(options, "use-rdma"));
+ strarray_push_kv(&opt, "rdma_device", qdict_get_try_str(options, "rdma-device"));
+ strarray_push_kv(&opt, "rdma_port_num", qdict_get_try_str(options, "rdma-port-num"));
+ strarray_push_kv(&opt, "rdma_gid_index", qdict_get_try_str(options, "rdma-gid-index"));
+ strarray_push_kv(&opt, "rdma_mtu", qdict_get_try_str(options, "rdma-mtu"));
+ strarray_push_kv(&opt, "client_writeback_allowed", (flags & BDRV_O_NOCACHE) ? "0" : "1");
+ client->proxy = vitastor_c_create_uring_json(opt.items, opt.len);
+ strarray_free(&opt);
+ if (client->proxy)
+ {
+ client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
+ if (client->uring_eventfd < 0)
+ {
+ fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
+ error_setg(errp, "failed to create io_uring eventfd");
+ vitastor_close(bs);
+ return -1;
+ }
+ universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
+ }
+ else
+ {
+ // Writeback cache is unusable without io_uring because the client can't correctly flush on exit
+ fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower%s\n",
+ strerror(errno), (flags & BDRV_O_NOCACHE ? "" : " and writeback cache will be disabled"));
+#endif
+ client->uring_eventfd = -1;
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
+ }
+#endif
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
@@ -693,13 +442,7 @@ Index: a/block/vitastor.c
+ }
+ else
+ {
+#if QEMU_VERSION_MAJOR >= 8
+ aio_co_enter(bdrv_get_aio_context(bs), qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+#else
+ qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+#endif
+ BDRV_POLL_WHILE(bs, !task.complete);
+ }
+ client->image = image;
@@ -738,10 +481,6 @@ Index: a/block/vitastor.c
+ return -1;
+ }
+ bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
+#if QEMU_VERSION_MAJOR > 5 || QEMU_VERSION_MAJOR == 5 && QEMU_VERSION_MINOR >= 1
+ /* When extending regular files, we get zeros from the OS */
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
+#endif
+ //client->aio_context = bdrv_get_aio_context(bs);
+ qdict_del(options, "use-rdma");
+ qdict_del(options, "rdma-mtu");
@@ -763,12 +502,6 @@ Index: a/block/vitastor.c
+{
+ VitastorClient *client = bs->opaque;
+ vitastor_c_destroy(client->proxy);
+ if (client->fds)
+ {
+ free(client->fds);
+ client->fds = NULL;
+ client->fd_alloc = client->fd_count = 0;
+ }
+ qemu_mutex_destroy(&client->mutex);
+ if (client->config_path)
+ g_free(client->config_path);
@@ -838,11 +571,7 @@ Index: a/block/vitastor.c
+ }
+
+ // TODO: Resize inode to <offset> bytes
+#if QEMU_VERSION_MAJOR >= 4
+ client->size = exact || client->size < offset ? offset : client->size;
+#else
+ client->size = offset;
+#endif
+ client->size = offset / BDRV_SECTOR_SIZE;
+
+ return 0;
+}
@@ -889,44 +618,25 @@ Index: a/block/vitastor.c
+ };
+}
+
+static void vitastor_co_generic_bh_cb(void *opaque)
+static void vitastor_co_generic_bh_cb(void *opaque, long retval)
+{
+ VitastorRPC *task = opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+#if QEMU_VERSION_MAJOR == 2
+ qemu_bh_delete(task->bh);
+#endif
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static void vitastor_co_generic_cb(void *opaque, long retval)
+{
+ VitastorRPC *task = opaque;
+ task->ret = retval;
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 2
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+#else
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+#endif
+}
+
+static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
+{
+ vitastor_co_generic_cb(opaque, retval);
+ vitastor_co_generic_bh_cb(opaque, retval);
+}
+
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
@@ -945,7 +655,6 @@ Index: a/block/vitastor.c
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
@@ -978,8 +687,7 @@ Index: a/block/vitastor.c
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
@@ -997,6 +705,7 @@ Index: a/block/vitastor.c
+ VitastorRPC *task = opaque;
+ VitastorClient *client = task->bs->opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (retval >= 0)
+ {
+ task->bitmap = bitmap;
@@ -1008,17 +717,15 @@ Index: a/block/vitastor.c
+ client->last_bitmap = bitmap;
+ }
+ }
+#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
+ replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+#elif QEMU_VERSION_MAJOR >= 2
+ task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
+ qemu_bh_schedule(task->bh);
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static int coroutine_fn vitastor_co_block_status(
@@ -1059,7 +766,6 @@ Index: a/block/vitastor.c
+ task.bitmap = client->last_bitmap = NULL;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ qemu_mutex_unlock(&client->mutex);
+ while (!task.complete)
+ {
@@ -1145,8 +851,7 @@ Index: a/block/vitastor.c
+ vitastor_co_init_task(bs, &task);
+
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
+ vitastor_schedule_uring_handler(client);
+ vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+
+ while (!task.complete)
@@ -1201,13 +906,8 @@ Index: a/block/vitastor.c
+ .bdrv_parse_filename = vitastor_parse_filename,
+
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+#if QEMU_VERSION_MAJOR >= 8
+ .bdrv_co_get_info = vitastor_get_info,
+ .bdrv_co_getlength = vitastor_getlength,
+#else
+ .bdrv_get_info = vitastor_get_info,
+ .bdrv_getlength = vitastor_getlength,
+#endif
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
+ .bdrv_probe_blocksizes = vitastor_probe_blocksizes,
+#endif

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 7f540b03ed..ca551baa42 100644
index 48cd096624..3d60b80286 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -563,7 +563,7 @@ static QemuOptsList raw_runtime_opts = {
@@ -553,7 +553,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "locking",
.type = QEMU_OPT_STRING,
@@ -26,7 +26,7 @@ index 7f540b03ed..ca551baa42 100644
},
{
.name = "pr-manager",
@@ -663,7 +663,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
@@ -653,7 +653,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->use_lock = false;
break;
case ON_OFF_AUTO_AUTO:

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h
index 685ec58318..22edf4ee96 100644
index 523136c7ac..c27859b4f6 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -260,8 +260,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
@@ -226,8 +226,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
int net_hub_id_for_client(NetClientState *nc, int *id);
NetClientState *net_hub_port_find(int hub_id);

View File

@@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 0893b794e9..6d650a58b9 100644
index 82004b65b9..4868db8f94 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2243,9 +2243,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
@@ -2133,9 +2133,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define CPU_RESOLVING_TYPE TYPE_X86_CPU
#ifdef TARGET_X86_64

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 52a59386d7..b20c25aee0 100644
index c3ac20ad43..37774f1c0a 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -691,32 +691,35 @@ static void qemu_spice_init(void)
@@ -689,32 +689,35 @@ static void qemu_spice_init(void)
if (tls_port) {
x509_dir = qemu_opt_get(opts, "x509-dir");

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/block/gluster.c b/block/gluster.c
index ad5fadbe79..d0011085c4 100644
index b60213ab80..93da76bc31 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -43,7 +43,7 @@
@@ -42,7 +42,7 @@
#define GLUSTER_DEBUG_DEFAULT 4
#define GLUSTER_DEBUG_MAX 9
#define GLUSTER_OPT_LOGFILE "logfile"
@@ -21,15 +21,15 @@ index ad5fadbe79..d0011085c4 100644
/*
* Several versions of GlusterFS (3.12? -> 6.0.1) fail when the transfer size
* is greater or equal to 1024 MiB, so we are limiting the transfer size to 512
@@ -425,6 +425,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -424,6 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
int old_errno;
SocketAddressList *server;
uint64_t port;
unsigned long long port;
+ const char *logfile;
glfs = glfs_find_preopened(gconf->volume);
if (glfs) {
@@ -467,9 +468,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
@@ -466,9 +467,15 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
}
}

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index 978671411e..a4749f3b1b 100644
index f826410f40..64a8d7d48b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -963,6 +963,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -820,6 +820,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
rados_conf_set(*cluster, "rbd_cache", "false");
}

View File

@@ -0,0 +1,88 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:37 +0200
Subject: [PATCH] PVE: [Up] qmp: add get_link_status
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
net/net.c | 27 +++++++++++++++++++++++++++
qapi/net.json | 15 +++++++++++++++
qapi/pragma.json | 1 +
3 files changed, 43 insertions(+)
diff --git a/net/net.c b/net/net.c
index 2db160e063..8329347891 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1343,6 +1343,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
}
}
+int64_t qmp_get_link_status(const char *name, Error **errp)
+{
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ NetClientState *nc;
+ int queues;
+ bool ret;
+
+ queues = qemu_find_net_clients_except(name, ncs,
+ NET_CLIENT_DRIVER__MAX,
+ MAX_QUEUE_NUM);
+
+ if (queues == 0) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", name);
+ return (int64_t) -1;
+ }
+
+ nc = ncs[0];
+ ret = ncs[0]->link_down;
+
+ if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+ ret = ncs[0]->peer->link_down;
+ }
+
+ return (int64_t) ret ? 0 : 1;
+}
+
void colo_notify_filters_event(int event, Error **errp)
{
NetClientState *nc;
diff --git a/qapi/net.json b/qapi/net.json
index 75ba2cb989..a3c93ab88f 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -35,6 +35,21 @@
##
{ 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
+##
+# @get_link_status:
+#
+# Get the current link state of the nics or nic.
+#
+# @name: name of the nic you get the state of
+#
+# Return: If link is up 1
+# If link is down 0
+# If an error occure an empty string.
+#
+# Notes: this is an Proxmox VE extension and not offical part of Qemu.
+##
+{ 'command': 'get_link_status', 'data': {'name': 'str'} , 'returns': 'int' }
+
##
# @netdev_add:
#
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7f810b0e97..a2358e303a 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -26,6 +26,7 @@
'system_wakeup' ],
# Commands allowed to return a non-dictionary
'command-returns-exceptions': [
+ 'get_link_status',
'human-monitor-command',
'qom-get',
'query-tpm-models',

View File

@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/gluster.c b/block/gluster.c
index d0011085c4..2df3d6e35d 100644
index 93da76bc31..1079b6186b 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -58,6 +58,7 @@ typedef struct GlusterAIOCB {
@@ -57,6 +57,7 @@ typedef struct GlusterAIOCB {
int ret;
Coroutine *coroutine;
AioContext *aio_context;
@@ -27,7 +27,7 @@ index d0011085c4..2df3d6e35d 100644
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -753,8 +754,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
@@ -752,8 +753,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
acb->ret = 0; /* Success */
} else if (ret < 0) {
acb->ret = -errno; /* Read/Write failed */
@@ -39,7 +39,7 @@ index d0011085c4..2df3d6e35d 100644
}
aio_co_schedule(acb->aio_context, acb->coroutine);
@@ -1021,6 +1024,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
@@ -1022,6 +1025,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
@@ -47,7 +47,7 @@ index d0011085c4..2df3d6e35d 100644
ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1201,9 +1205,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
@@ -1203,9 +1207,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
@@ -59,7 +59,7 @@ index d0011085c4..2df3d6e35d 100644
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
}
@@ -1266,6 +1272,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1269,6 +1275,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
@@ -67,7 +67,7 @@ index d0011085c4..2df3d6e35d 100644
ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1314,6 +1321,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
@@ -1317,6 +1324,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c
index 78433f3746..25d427edd1 100644
index 7d4b33b3da..bb36f42dd2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3062,7 +3062,8 @@ static int img_info(int argc, char **argv)
@@ -3010,7 +3010,8 @@ static int img_info(int argc, char **argv)
list = collect_image_info_list(image_opts, filename, fmt, chain,
force_share);
if (!list) {

View File

@@ -54,10 +54,10 @@ index 1b1dab5b17..d1616c045a 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 25d427edd1..220e6ec577 100644
index bb36f42dd2..74afcb79ef 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4899,10 +4899,12 @@ static int img_bitmap(int argc, char **argv)
@@ -4826,10 +4826,12 @@ static int img_bitmap(int argc, char **argv)
#define C_IF 04
#define C_OF 010
#define C_SKIP 020
@@ -70,7 +70,7 @@ index 25d427edd1..220e6ec577 100644
};
struct DdIo {
@@ -4978,6 +4980,19 @@ static int img_dd_skip(const char *arg,
@@ -4905,6 +4907,19 @@ static int img_dd_skip(const char *arg,
return 0;
}
@@ -90,7 +90,7 @@ index 25d427edd1..220e6ec577 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5018,6 +5033,7 @@ static int img_dd(int argc, char **argv)
@@ -4945,6 +4960,7 @@ static int img_dd(int argc, char **argv)
{ "if", img_dd_if, C_IF },
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
@@ -98,7 +98,7 @@ index 25d427edd1..220e6ec577 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5093,91 +5109,112 @@ static int img_dd(int argc, char **argv)
@@ -5020,91 +5036,112 @@ static int img_dd(int argc, char **argv)
arg = NULL;
}
@@ -275,10 +275,10 @@ index 25d427edd1..220e6ec577 100644
}
if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
@@ -5194,20 +5231,43 @@ static int img_dd(int argc, char **argv)
@@ -5121,20 +5158,43 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
for (out_pos = 0; in_pos < size; ) {
for (out_pos = 0; in_pos < size; block_count++) {
+ int in_ret, out_ret;
int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
-

View File

@@ -16,10 +16,10 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 220e6ec577..58bf9b43d1 100644
index 74afcb79ef..14594d44b6 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4900,11 +4900,13 @@ static int img_bitmap(int argc, char **argv)
@@ -4827,11 +4827,13 @@ static int img_bitmap(int argc, char **argv)
#define C_OF 010
#define C_SKIP 020
#define C_OSIZE 040
@@ -33,7 +33,7 @@ index 220e6ec577..58bf9b43d1 100644
};
struct DdIo {
@@ -4993,6 +4995,19 @@ static int img_dd_osize(const char *arg,
@@ -4920,6 +4922,19 @@ static int img_dd_osize(const char *arg,
return 0;
}
@@ -53,13 +53,13 @@ index 220e6ec577..58bf9b43d1 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -5007,12 +5022,14 @@ static int img_dd(int argc, char **argv)
@@ -4934,12 +4949,14 @@ static int img_dd(int argc, char **argv)
int c, i;
const char *out_fmt = "raw";
const char *fmt = NULL;
- int64_t size = 0;
+ int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
int64_t block_count = 0, out_pos, in_pos;
bool force_share = false;
struct DdInfo dd = {
.flags = 0,
@@ -69,7 +69,7 @@ index 220e6ec577..58bf9b43d1 100644
};
struct DdIo in = {
.bsz = 512, /* Block size is by default 512 bytes */
@@ -5034,6 +5051,7 @@ static int img_dd(int argc, char **argv)
@@ -4961,6 +4978,7 @@ static int img_dd(int argc, char **argv)
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
{ "osize", img_dd_osize, C_OSIZE },
@@ -77,20 +77,20 @@ index 220e6ec577..58bf9b43d1 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5230,9 +5248,10 @@ static int img_dd(int argc, char **argv)
@@ -5157,9 +5175,10 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
- for (out_pos = 0; in_pos < size; ) {
- for (out_pos = 0; in_pos < size; block_count++) {
+ readsize = (dd.isize > 0) ? dd.isize : size;
+ for (out_pos = 0; in_pos < readsize; ) {
+ for (out_pos = 0; in_pos < readsize; block_count++) {
int in_ret, out_ret;
- int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
+ int bytes = (in_pos + in.bsz > readsize) ? readsize - in_pos : in.bsz;
if (blk1) {
in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
if (in_ret == 0) {
@@ -5241,6 +5260,9 @@ static int img_dd(int argc, char **argv)
@@ -5168,6 +5187,9 @@ static int img_dd(int argc, char **argv)
} else {
in_ret = read(STDIN_FILENO, in.buf, bytes);
if (in_ret == 0) {

View File

@@ -5,7 +5,7 @@ Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: fix getopt-string + add documentation]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
docs/tools/qemu-img.rst | 11 ++++++++++-
qemu-img-cmds.hx | 4 ++--
@@ -13,7 +13,7 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 15aeddc6d8..5e713e231d 100644
index 85a6e05b35..699229eef6 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -208,6 +208,10 @@ Parameters to convert subcommand:
@@ -65,19 +65,19 @@ index d1616c045a..b5b0bb4467 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 58bf9b43d1..9d414d639b 100644
index 14594d44b6..c6b4a5567d 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5024,7 +5024,7 @@ static int img_dd(int argc, char **argv)
@@ -4951,7 +4951,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
int64_t block_count = 0, out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5062,7 +5062,7 @@ static int img_dd(int argc, char **argv)
@@ -4989,7 +4989,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
@@ -86,7 +86,7 @@ index 58bf9b43d1..9d414d639b 100644
if (c == EOF) {
break;
}
@@ -5082,6 +5082,9 @@ static int img_dd(int argc, char **argv)
@@ -5009,6 +5009,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
@@ -96,7 +96,7 @@ index 58bf9b43d1..9d414d639b 100644
case 'U':
force_share = true;
break;
@@ -5212,13 +5215,15 @@ static int img_dd(int argc, char **argv)
@@ -5139,13 +5142,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}

View File

@@ -7,62 +7,17 @@ Actually provide memory information via the query-balloon
command.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: add BalloonInfo to member name exceptions list
rebase for 8.0 - moved to hw/core/machine-hmp-cmds.c]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
hw/virtio/virtio-balloon.c | 33 +++++++++++++++++++++++++++++++--
monitor/hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
qapi/machine.json | 22 +++++++++++++++++++++-
qapi/pragma.json | 1 +
4 files changed, 82 insertions(+), 4 deletions(-)
3 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index c3e55ef9e9..0e32e6201f 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -169,7 +169,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d004cf29d2..2660ed520b 100644
index 73ac5eb675..bbfe7eca62 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -782,8 +782,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
@@ -806,8 +806,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{
VirtIOBalloon *dev = opaque;
@@ -102,13 +57,54 @@ index d004cf29d2..2660ed520b 100644
}
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index c6cd6f91dd..15572befb1 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -715,7 +715,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
+ monitor_printf(mon, " max_mem=%" PRId64, info->max_mem >> 20);
+ if (info->has_total_mem) {
+ monitor_printf(mon, " total_mem=%" PRId64, info->total_mem >> 20);
+ }
+ if (info->has_free_mem) {
+ monitor_printf(mon, " free_mem=%" PRId64, info->free_mem >> 20);
+ }
+
+ if (info->has_mem_swapped_in) {
+ monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
+ }
+ if (info->has_mem_swapped_out) {
+ monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
+ }
+ if (info->has_major_page_faults) {
+ monitor_printf(mon, " major_page_faults=%" PRId64,
+ info->major_page_faults);
+ }
+ if (info->has_minor_page_faults) {
+ monitor_printf(mon, " minor_page_faults=%" PRId64,
+ info->minor_page_faults);
+ }
+ if (info->has_last_update) {
+ monitor_printf(mon, " last_update=%" PRId64,
+ info->last_update);
+ }
+
+ monitor_printf(mon, "\n");
qapi_free_BalloonInfo(info);
}
diff --git a/qapi/machine.json b/qapi/machine.json
index a08b6576ca..5c9a4d55f4 100644
index 6afd1936b0..8b4be9b718 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1063,9 +1063,29 @@
# @actual: the logical size of the VM in bytes Formula used:
# logical_vm_size = vm_ram_size - balloon_size
@@ -1054,9 +1054,29 @@
# @actual: the logical size of the VM in bytes
# Formula used: logical_vm_size = vm_ram_size - balloon_size
#
+# @last_update: time when stats got updated from guest
+#
@@ -137,15 +133,3 @@ index a08b6576ca..5c9a4d55f4 100644
##
# @query-balloon:
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7f810b0e97..325e684411 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -35,6 +35,7 @@
'member-name-exceptions': [ # visible in:
'ACPISlotType', # query-acpi-ospm-status
'AcpiTableOptions', # -acpitable
+ 'BalloonInfo', # query-balloon
'BlkdebugEvent', # blockdev-add, -blockdev
'BlkdebugSetStateOptions', # blockdev-add, -blockdev
'BlockDeviceInfo', # query-block

View File

@@ -13,13 +13,13 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 3860a50c3b..40821e2317 100644
index 4f4ab30f8c..76fff60a6b 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -91,6 +91,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
@@ -99,6 +99,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
info->numa_mem_supported = mc->numa_mem_supported;
info->deprecated = !!mc->deprecation_reason;
info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
+
+ if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
+ info->has_is_current = true;
@@ -28,21 +28,21 @@ index 3860a50c3b..40821e2317 100644
+
if (mc->default_cpu_type) {
info->default_cpu_type = g_strdup(mc->default_cpu_type);
}
info->has_default_cpu_type = true;
diff --git a/qapi/machine.json b/qapi/machine.json
index 5c9a4d55f4..fbb61f18e4 100644
index 8b4be9b718..555458f785 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -139,6 +139,8 @@
@@ -138,6 +138,8 @@
#
# @is-default: whether the machine is default
#
+# @is-current: whether this machine is currently used
+#
# @cpu-max: maximum number of CPUs supported by the machine type
# (since 1.5)
# (since 1.5)
#
@@ -163,7 +165,7 @@
@@ -159,7 +161,7 @@
##
{ 'struct': 'MachineInfo',
'data': { 'name': 'str', '*alias': 'str',
@@ -50,4 +50,4 @@ index 5c9a4d55f4..fbb61f18e4 100644
+ '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str', 'acpi': 'bool' } }
'*default-ram-id': 'str' } }

View File

@@ -6,18 +6,16 @@ Subject: [PATCH] PVE: qapi: modify spice query
Provide the last ticket in the SpiceInfo struct optionally.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI change]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qapi/ui.json | 3 +++
ui/spice-core.c | 4 ++++
2 files changed, 7 insertions(+)
ui/spice-core.c | 5 +++++
2 files changed, 8 insertions(+)
diff --git a/qapi/ui.json b/qapi/ui.json
index 006616aa77..dfd1d3e36b 100644
index cf58ab4283..0be2388941 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -317,11 +317,14 @@
@@ -310,11 +310,14 @@
#
# @channels: a list of @SpiceChannel for each active spice channel
#
@@ -33,14 +31,15 @@ index 006616aa77..dfd1d3e36b 100644
'if': 'CONFIG_SPICE' }
diff --git a/ui/spice-core.c b/ui/spice-core.c
index b20c25aee0..26baeb7846 100644
index 37774f1c0a..367f77f2b4 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -548,6 +548,10 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
@@ -534,6 +534,11 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
micro = SPICE_SERVER_VERSION & 0xff;
info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);
+ if (auth_passwd) {
+ info->has_ticket = true;
+ info->ticket = g_strdup(auth_passwd);
+ }
+

View File

@@ -15,19 +15,19 @@ Additionally, allows tracking the current position from the outside
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/channel-savevm-async.c | 183 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.c | 182 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.h | 51 +++++++++
migration/meson.build | 1 +
3 files changed, 235 insertions(+)
3 files changed, 234 insertions(+)
create mode 100644 migration/channel-savevm-async.c
create mode 100644 migration/channel-savevm-async.h
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
new file mode 100644
index 0000000000..aab081ce07
index 0000000000..06d5484778
--- /dev/null
+++ b/migration/channel-savevm-async.c
@@ -0,0 +1,183 @@
@@ -0,0 +1,182 @@
+/*
+ * QIO Channel implementation to be used by savevm-async QMP calls
+ */
@@ -71,7 +71,6 @@ index 0000000000..aab081ce07
+ size_t niov,
+ int **fds,
+ size_t *nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
@@ -269,14 +268,14 @@ index 0000000000..17ae2cb261
+
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
diff --git a/migration/meson.build b/migration/meson.build
index 1ae28523a1..37ddcb5d60 100644
index 690487cf1a..8cac83c06c 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -13,6 +13,7 @@ system_ss.add(files(
@@ -13,6 +13,7 @@ softmmu_ss.add(files(
'block-dirty-bitmap.c',
'channel.c',
'channel-block.c',
+ 'channel-savevm-async.c',
'dirtyrate.c',
'colo-failover.c',
'colo.c',
'exec.c',
'fd.c',

View File

@@ -21,34 +21,31 @@ still opened by QEMU.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[SR: improve aborting
register yank before migration_incoming_state_destroy]
[improve aborting]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[FE: further improve aborting
adapt to removal of QEMUFileOps
improve condition for entering final stage
adapt to QAPI and other changes for 8.0]
adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hmp-commands-info.hx | 13 +
hmp-commands.hx | 17 ++
hmp-commands.hx | 33 +++
include/migration/snapshot.h | 2 +
include/monitor/hmp.h | 3 +
include/monitor/hmp.h | 5 +
migration/meson.build | 1 +
migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 38 +++
monitor/hmp-cmds.c | 57 ++++
qapi/migration.json | 34 +++
qapi/misc.json | 16 ++
qapi/misc.json | 32 +++
qemu-options.hx | 12 +
softmmu/vl.c | 10 +
11 files changed, 677 insertions(+)
11 files changed, 730 insertions(+)
create mode 100644 migration/savevm-async.c
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index f5b37eb74a..10fdd822e0 100644
index 188d9ece3b..97b88eaaad 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -525,6 +525,19 @@ SRST
@@ -538,6 +538,19 @@ SRST
Show current migration parameters.
ERST
@@ -69,13 +66,13 @@ index f5b37eb74a..10fdd822e0 100644
.name = "balloon",
.args_type = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2cbd0f77a0..e352f86872 100644
index 182e639d14..bbcc73e942 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1865,3 +1865,20 @@ SRST
List event channels in the guest
ERST
#endif
@@ -1800,3 +1800,36 @@ ERST
"\n\t\t\t\t\t limit on a specified virtual cpu",
.cmd = hmp_cancel_vcpu_dirty_limit,
},
+
+ {
+ .name = "savevm-start",
@@ -86,6 +83,22 @@ index 2cbd0f77a0..e352f86872 100644
+ },
+
+ {
+ .name = "snapshot-drive",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Create internal snapshot.",
+ .cmd = hmp_snapshot_drive,
+ },
+
+ {
+ .name = "delete-drive-snapshot",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Delete internal snapshot.",
+ .cmd = hmp_delete_drive_snapshot,
+ },
+
+ {
+ .name = "savevm-end",
+ .args_type = "",
+ .params = "",
@@ -105,10 +118,10 @@ index e72083b117..c846d37806 100644
+
#endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 13f9a2dedb..7a7def7530 100644
index a618eb1e4e..55067beff1 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
@@ -26,6 +26,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
void hmp_info_uuid(Monitor *mon, const QDict *qdict);
void hmp_info_chardev(Monitor *mon, const QDict *qdict);
void hmp_info_mice(Monitor *mon, const QDict *qdict);
@@ -116,38 +129,38 @@ index 13f9a2dedb..7a7def7530 100644
void hmp_info_migrate(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
void hmp_mouse_move(Monitor *mon, const QDict *qdict);
void hmp_mouse_button(Monitor *mon, const QDict *qdict);
void hmp_mouse_set(Monitor *mon, const QDict *qdict);
@@ -80,6 +81,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
void hmp_netdev_del(Monitor *mon, const QDict *qdict);
void hmp_getfd(Monitor *mon, const QDict *qdict);
void hmp_closefd(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
void hmp_sendkey(Monitor *mon, const QDict *qdict);
void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/migration/meson.build b/migration/meson.build
index 37ddcb5d60..07f6057acc 100644
index 8cac83c06c..0842d00cd2 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -26,6 +26,7 @@ system_ss.add(files(
'options.c',
@@ -24,6 +24,7 @@ softmmu_ss.add(files(
'multifd-zlib.c',
'postcopy-ram.c',
'savevm.c',
+ 'savevm-async.c',
'socket.c',
'tls.c',
'threadinfo.c',
), gnutls)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
new file mode 100644
index 0000000000..e9fc18fb10
index 0000000000..05d394c0e2
--- /dev/null
+++ b/migration/savevm-async.c
@@ -0,0 +1,531 @@
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "migration/migration.h"
+#include "migration/migration-stats.h"
+#include "migration/options.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
@@ -167,7 +180,6 @@ index 0000000000..e9fc18fb10
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+#include "qemu/yank.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
@@ -218,20 +230,24 @@ index 0000000000..e9fc18fb10
+ info->bytes = s->bs_pos;
+ switch (s->state) {
+ case SAVE_STATE_ERROR:
+ info->has_status = true;
+ info->status = g_strdup("failed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
+ if (s->error) {
+ info->has_error = true;
+ info->error = g_strdup(error_get_pretty(s->error));
+ }
+ break;
+ case SAVE_STATE_ACTIVE:
+ info->has_status = true;
+ info->status = g_strdup("active");
+ info->has_total_time = true;
+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+ - s->total_time;
+ break;
+ case SAVE_STATE_COMPLETED:
+ info->has_status = true;
+ info->status = g_strdup("completed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
@@ -277,7 +293,7 @@ index 0000000000..e9fc18fb10
+ return ret;
+}
+
+static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
+static void save_snapshot_error(const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
@@ -331,7 +347,7 @@ index 0000000000..e9fc18fb10
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+ }
+
@@ -388,32 +404,18 @@ index 0000000000..e9fc18fb10
+ }
+
+ while (snap_state.state == SAVE_STATE_ACTIVE) {
+ uint64_t pending_size, pend_precopy, pend_postcopy;
+ uint64_t threshold = 400 * 1000;
+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
+
+ /*
+ * pending_{estimate,exact} are expected to be called without iothread
+ * lock. Similar to what is done in migration.c, call the exact variant
+ * only once pend_precopy in the estimate is below the threshold.
+ */
+ /* pending is expected to be called without iothread lock */
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
+ if (pend_precopy <= threshold) {
+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
+ }
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
+ qemu_mutex_lock_iothread();
+ pending_size = pend_precopy + pend_postcopy;
+
+ /*
+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
+ * large enough so that the guest can't dirty this much between the
+ * check and the guest actually being stopped, but it should be small
+ * enough to avoid long downtimes for non-hibernation snapshots.
+ */
+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
+
+ /* Note that there is no progress for pend_postcopy when iterating */
+ if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+
+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
+ ret = qemu_savevm_state_iterate(snap_state.file, false);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
@@ -422,7 +424,11 @@ index 0000000000..e9fc18fb10
+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+ } else {
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ global_state_store();
+ ret = global_state_store();
+ if (ret) {
+ save_snapshot_error("global_state_store error %d", ret);
+ break;
+ }
+
+ DPRINTF("savevm iterate complete\n");
+ break;
@@ -451,9 +457,7 @@ index 0000000000..e9fc18fb10
+ if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_graph_co_rdlock();
+ bdrv_flush(bs);
+ bdrv_graph_co_rdunlock();
+ aio_co_reschedule_self(qemu_get_aio_context());
+ }
+ }
@@ -464,7 +468,7 @@ index 0000000000..e9fc18fb10
+ qemu_bh_schedule(snap_state.finalize_bh);
+}
+
+void qmp_savevm_start(const char *statefile, Error **errp)
+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+{
+ Error *local_err = NULL;
+ MigrationState *ms = migrate_get_current();
@@ -483,7 +487,7 @@ index 0000000000..e9fc18fb10
+ return;
+ }
+
+ if (migrate_block()) {
+ if (migrate_use_block()) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Block migration and snapshots are incompatible");
+ return;
@@ -501,7 +505,7 @@ index 0000000000..e9fc18fb10
+ snap_state.error = NULL;
+ }
+
+ if (!statefile) {
+ if (!has_statefile) {
+ vm_stop(RUN_STATE_SAVE_VM);
+ snap_state.state = SAVE_STATE_COMPLETED;
+ return;
@@ -536,8 +540,7 @@ index 0000000000..e9fc18fb10
+ * here (blocking main thread, from QMP) to avoid race conditions.
+ */
+ migrate_init(ms);
+ memset(&mig_stats, 0, sizeof(mig_stats));
+ memset(&compression_counters, 0, sizeof(compression_counters));
+ memset(&ram_counters, 0, sizeof(ram_counters));
+ ms->to_dst_file = snap_state.file;
+
+ error_setg(&snap_state.blocker, "block device is in use by savevm");
@@ -620,6 +623,22 @@ index 0000000000..e9fc18fb10
+ DPRINTF("savevm-end: cleanup done\n");
+}
+
+// FIXME: Deprecated
+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
+{
+ // Compatibility to older qemu-server.
+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
+}
+
+// FIXME: Deprecated
+void qmp_delete_drive_snapshot(const char *device, const char *name,
+ Error **errp)
+{
+ // Compatibility to older qemu-server.
+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
+ true, name, errp);
+}
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+ BlockBackend *be;
@@ -654,10 +673,6 @@ index 0000000000..e9fc18fb10
+ dirty_bitmap_mig_before_vm_start();
+
+ qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
+ migration_incoming_state_destroy();
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error while loading VM state");
@@ -675,28 +690,39 @@ index 0000000000..e9fc18fb10
+ return ret;
+}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 6c559b48c8..91be698308 100644
index 15572befb1..1507180990 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -22,6 +22,7 @@
#include "monitor/monitor-internal.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-migration.h"
#include "qapi/qapi-commands-misc.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qerror.h"
@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
mtree_info(flatview, dispatch_tree, owner, disabled);
@@ -1925,6 +1925,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}
+
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+ qmp_savevm_start(statefile, &errp);
+ qmp_savevm_start(statefile != NULL, statefile, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_snapshot_drive(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_delete_drive_snapshot(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
@@ -713,7 +739,7 @@ index 6c559b48c8..91be698308 100644
+ SaveVMInfo *info;
+ info = qmp_query_savevm(NULL);
+
+ if (info->status) {
+ if (info->has_status) {
+ monitor_printf(mon, "savevm status: %s\n", info->status);
+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+ info->total_time);
@@ -723,17 +749,21 @@ index 6c559b48c8..91be698308 100644
+ if (info->has_bytes) {
+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+ }
+ if (info->error) {
+ if (info->has_error) {
+ monitor_printf(mon, "Error: %s\n", info->error);
+ }
+}
+
void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index 8843e74b59..aca0ca1ac1 100644
index 81185d4311..3129f71fa8 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -291,6 +291,40 @@
'*dirty-limit-throttle-time-per-round': 'uint64',
'*dirty-limit-ring-full-time': 'uint64'} }
@@ -261,6 +261,40 @@
'*compression': 'CompressionStats',
'*socket-address': ['SocketAddress'] } }
+##
+# @SaveVMInfo:
@@ -773,10 +803,10 @@ index 8843e74b59..aca0ca1ac1 100644
# @query-migrate:
#
diff --git a/qapi/misc.json b/qapi/misc.json
index cda2effa81..94a58bb0bf 100644
index 27ef5a2b20..b3ce75dcae 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -456,6 +456,22 @@
@@ -435,6 +435,38 @@
##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
@@ -789,6 +819,22 @@ index cda2effa81..94a58bb0bf 100644
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
@@ -800,10 +846,10 @@ index cda2effa81..94a58bb0bf 100644
# @CommandLineParameterType:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index 8073f5edf5..dc1ececc9c 100644
index 31c04f7eea..c2ca6e91b5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4483,6 +4483,18 @@ SRST
@@ -4341,6 +4341,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor)
ERST
@@ -823,10 +869,10 @@ index 8073f5edf5..dc1ececc9c 100644
DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index ba6ad8a8df..ddeace306e 100644
index 706bd7cff7..b8637c4262 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -164,6 +164,7 @@ static const char *accelerators;
@@ -165,6 +165,7 @@ static const char *accelerators;
static bool have_custom_ram_size;
static const char *ram_memdev_id;
static QDict *machine_opts_dict;
@@ -834,7 +880,7 @@ index ba6ad8a8df..ddeace306e 100644
static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
static int display_remote;
@@ -2647,6 +2648,12 @@ void qmp_x_exit_preconfig(Error **errp)
@@ -2584,6 +2585,12 @@ void qmp_x_exit_preconfig(Error **errp)
if (loadvm) {
load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
@@ -847,7 +893,7 @@ index ba6ad8a8df..ddeace306e 100644
}
if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init();
@@ -3196,6 +3203,9 @@ void qemu_init(int argc, char **argv)
@@ -3133,6 +3140,9 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_loadvm:
loadvm = optarg;
break;

View File

@@ -19,11 +19,11 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
3 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 19c33c9985..e9ffff0f0a 100644
index 4f400c2e52..21e8998867 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -33,8 +33,8 @@
#include "options.h"
@@ -31,8 +31,8 @@
#include "trace.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
@@ -33,7 +33,7 @@ index 19c33c9985..e9ffff0f0a 100644
struct QEMUFile {
const QEMUFileHooks *hooks;
@@ -46,7 +46,8 @@ struct QEMUFile {
@@ -55,7 +55,8 @@ struct QEMUFile {
int buf_index;
int buf_size; /* 0 when writing */
@@ -43,8 +43,8 @@ index 19c33c9985..e9ffff0f0a 100644
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -100,7 +101,9 @@ int qemu_file_shutdown(QEMUFile *f)
return 0;
@@ -106,7 +107,9 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
-static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
@@ -54,7 +54,7 @@ index 19c33c9985..e9ffff0f0a 100644
{
QEMUFile *f;
@@ -109,6 +112,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
@@ -115,6 +118,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
object_ref(ioc);
f->ioc = ioc;
f->is_writable = is_writable;
@@ -63,7 +63,7 @@ index 19c33c9985..e9ffff0f0a 100644
return f;
}
@@ -119,17 +124,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
@@ -125,17 +130,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
*/
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
{
@@ -94,7 +94,7 @@ index 19c33c9985..e9ffff0f0a 100644
}
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
@@ -375,7 +390,7 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
@@ -393,7 +408,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
do {
len = qio_channel_read(f->ioc,
(char *)f->buf + pending,
@@ -103,7 +103,7 @@ index 19c33c9985..e9ffff0f0a 100644
&local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
@@ -425,6 +440,8 @@ int qemu_fclose(QEMUFile *f)
@@ -443,6 +458,8 @@ int qemu_fclose(QEMUFile *f)
}
g_clear_pointer(&f->ioc, object_unref);
@@ -112,7 +112,7 @@ index 19c33c9985..e9ffff0f0a 100644
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -479,7 +496,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
@@ -497,7 +514,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
@@ -121,7 +121,7 @@ index 19c33c9985..e9ffff0f0a 100644
qemu_fflush(f);
}
}
@@ -504,7 +521,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
@@ -523,7 +540,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
@@ -130,7 +130,7 @@ index 19c33c9985..e9ffff0f0a 100644
if (l > size) {
l = size;
}
@@ -549,8 +566,8 @@ size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t si
@@ -570,8 +587,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
size_t index;
assert(!qemu_file_is_writable(f));
@@ -141,7 +141,7 @@ index 19c33c9985..e9ffff0f0a 100644
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -600,7 +617,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
@@ -621,7 +638,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
size_t res;
uint8_t *src;
@@ -150,16 +150,16 @@ index 19c33c9985..e9ffff0f0a 100644
if (res == 0) {
return done;
}
@@ -634,7 +651,7 @@ size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size
@@ -655,7 +672,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
*/
size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -659,7 +676,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
@@ -680,7 +697,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
@@ -168,7 +168,7 @@ index 19c33c9985..e9ffff0f0a 100644
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -777,7 +794,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
@@ -832,7 +849,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
@@ -178,7 +178,7 @@ index 19c33c9985..e9ffff0f0a 100644
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 47015f5201..1312b7c903 100644
index fa13d04d78..914f1a63a8 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -63,7 +63,9 @@ typedef struct QEMUFileHooks {
@@ -192,10 +192,10 @@ index 47015f5201..1312b7c903 100644
int qemu_fclose(QEMUFile *f);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index e9fc18fb10..80624fada8 100644
index b3692739a0..e65a5e3482 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -378,7 +378,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
@@ -367,7 +367,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
&snap_state.bs_pos));
@@ -204,7 +204,7 @@ index e9fc18fb10..80624fada8 100644
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -496,7 +496,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
@@ -500,7 +500,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */

View File

@@ -4,32 +4,32 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
Subject: [PATCH] PVE: block: add the zeroinit block driver filter
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
[adapt to changed function signatures]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 1 +
block/zeroinit.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 201 insertions(+)
block/zeroinit.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 197 insertions(+)
create mode 100644 block/zeroinit.c
diff --git a/block/meson.build b/block/meson.build
index 529fc172c6..1833c71ce9 100644
index 60bc305597..ad40c10b6a 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -40,6 +40,7 @@ block_ss.add(files(
'throttle-groups.c',
'throttle.c',
@@ -43,6 +43,7 @@ block_ss.add(files(
'vmdk.c',
'vpc.c',
'write-threshold.c',
+ 'zeroinit.c',
), zstd, zlib, gnutls)
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/zeroinit.c b/block/zeroinit.c
new file mode 100644
index 0000000000..1257342724
index 0000000000..20ee611f22
--- /dev/null
+++ b/block/zeroinit.c
@@ -0,0 +1,200 @@
@@ -0,0 +1,196 @@
+/*
+ * Filter to fake a zero-initialized block device.
+ *
@@ -43,7 +43,6 @@ index 0000000000..1257342724
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
@@ -110,9 +109,7 @@ index 0000000000..1257342724
+
+ /* Open the raw file */
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next",
+ bs, &child_of_bds,
+ BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+ false, &local_err);
+ bs, &child_of_bds, BDRV_CHILD_FILTERED, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
@@ -137,9 +134,9 @@ index 0000000000..1257342724
+ (void)s;
+}
+
+static coroutine_fn int64_t zeroinit_co_getlength(BlockDriverState *bs)
+static int64_t zeroinit_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
@@ -191,10 +188,9 @@ index 0000000000..1257342724
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, req_flags, errp);
+}
+
+static coroutine_fn int zeroinit_co_get_info(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+static int zeroinit_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return bdrv_co_get_info(bs->file->bs, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
+}
+
+static BlockDriver bdrv_zeroinit = {
@@ -205,7 +201,7 @@ index 0000000000..1257342724
+ .bdrv_parse_filename = zeroinit_parse_filename,
+ .bdrv_file_open = zeroinit_open,
+ .bdrv_close = zeroinit_close,
+ .bdrv_co_getlength = zeroinit_co_getlength,
+ .bdrv_getlength = zeroinit_getlength,
+ .bdrv_child_perm = bdrv_default_perms,
+ .bdrv_co_flush_to_disk = zeroinit_co_flush,
+
@@ -221,7 +217,7 @@ index 0000000000..1257342724
+ .bdrv_co_pdiscard = zeroinit_co_pdiscard,
+
+ .bdrv_co_truncate = zeroinit_co_truncate,
+ .bdrv_co_get_info = zeroinit_co_get_info,
+ .bdrv_get_info = zeroinit_get_info,
+};
+
+static void bdrv_zeroinit_init(void)

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 11 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx
index dc1ececc9c..848d2dfdd1 100644
index c2ca6e91b5..ab4734ef32 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1197,6 +1197,9 @@ legacy PC, they are not recommended for modern configurations.
@@ -1118,6 +1118,9 @@ backend describes how QEMU handles the data.
ERST
@@ -28,10 +28,10 @@ index dc1ececc9c..848d2dfdd1 100644
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index ddeace306e..3ee90b3b94 100644
index b8637c4262..39f149924e 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2683,6 +2683,7 @@ void qemu_init(int argc, char **argv)
@@ -2620,6 +2620,7 @@ void qemu_init(int argc, char **argv, char **envp)
MachineClass *machine_class;
bool userconfig = true;
FILE *vmstate_dump_file = NULL;
@@ -39,7 +39,7 @@ index ddeace306e..3ee90b3b94 100644
qemu_add_opts(&qemu_drive_opts);
qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -3308,6 +3309,13 @@ void qemu_init(int argc, char **argv)
@@ -3245,6 +3246,13 @@ void qemu_init(int argc, char **argv, char **envp)
machine_parse_property_opt(qemu_find_opts("smp-opts"),
"smp", optarg);
break;

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4a34f03047..59b917e50c 100644
index 2a20982066..7968ad5a93 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -252,6 +252,15 @@ static void apic_reset_common(DeviceState *dev)
@@ -278,6 +278,15 @@ static void apic_reset_common(DeviceState *dev)
info->vapic_base_update(s);
apic_init_reset(dev);

View File

@@ -13,10 +13,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index ca551baa42..8b3b83e9d4 100644
index 3d60b80286..49ee1db5f9 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2873,6 +2873,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2475,6 +2475,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
int fd;
uint64_t perm, shared;
int result = 0;
@@ -24,7 +24,7 @@ index ca551baa42..8b3b83e9d4 100644
/* Validate options and set default values */
assert(options->driver == BLOCKDEV_DRIVER_FILE);
@@ -2913,19 +2914,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2515,19 +2516,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
@@ -59,7 +59,7 @@ index ca551baa42..8b3b83e9d4 100644
}
/* Clear the file by truncating it to 0 */
@@ -2979,13 +2983,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2581,13 +2585,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
}
out_unlock:
@@ -82,7 +82,7 @@ index ca551baa42..8b3b83e9d4 100644
}
out_close:
@@ -3009,6 +3015,7 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2612,6 +2618,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
PreallocMode prealloc;
char *buf = NULL;
Error *local_err = NULL;
@@ -90,7 +90,7 @@ index ca551baa42..8b3b83e9d4 100644
/* Skip file: protocol prefix */
strstart(filename, "file:", &filename);
@@ -3031,6 +3038,18 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2634,6 +2641,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
return -EINVAL;
}
@@ -109,7 +109,7 @@ index ca551baa42..8b3b83e9d4 100644
options = (BlockdevCreateOptions) {
.driver = BLOCKDEV_DRIVER_FILE,
.u.file = {
@@ -3042,6 +3061,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename,
@@ -2645,6 +2664,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
.nocow = nocow,
.has_extent_size_hint = has_extent_size_hint,
.extent_size_hint = extent_size_hint,
@@ -119,10 +119,10 @@ index ca551baa42..8b3b83e9d4 100644
};
return raw_co_create(&options, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a5cea82139..bb471c078d 100644
index e1857e7094..ddac91e8f6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4880,7 +4880,8 @@
@@ -4537,7 +4537,8 @@
'size': 'size',
'*preallocation': 'PreallocMode',
'*nocow': 'bool',

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 589c9524f8..2505dd658a 100644
index 6b8cfcf6d8..3ec67e32d3 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -536,8 +536,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
@@ -519,8 +519,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
qemu_chr_fe_set_echo(&mon->common.chr, true);
/* Note: we run QMP monitor in I/O thread when @chr supports that */

View File

@@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f0d35c6401..1427983543 100644
index a673302cce..fa424440bd 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -148,7 +148,8 @@ GlobalProperty hw_compat_4_0[] = {
@@ -127,7 +127,8 @@ GlobalProperty hw_compat_4_0[] = {
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-device", "edid", "false" },
{ "virtio-device", "use-started", "false" },

View File

@@ -11,71 +11,70 @@ and only if 'is-current').
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to QAPI changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
hw/core/machine-qmp-cmds.c | 5 +++++
hw/core/machine-qmp-cmds.c | 6 ++++++
include/hw/boards.h | 2 ++
qapi/machine.json | 4 +++-
softmmu/vl.c | 25 +++++++++++++++++++++++++
4 files changed, 35 insertions(+), 1 deletion(-)
4 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 40821e2317..ee93ddd69a 100644
index 76fff60a6b..ec9201fb9a 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -95,6 +95,11 @@ MachineInfoList *qmp_query_machines(Error **errp)
@@ -103,6 +103,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
info->has_is_current = true;
info->is_current = true;
+
+ // PVE version string only exists for current machine
+ if (mc->pve_version) {
+ info->has_pve_version = true;
+ info->pve_version = g_strdup(mc->pve_version);
+ }
}
if (mc->default_cpu_type) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index ed83360198..f8b88cd86a 100644
index 7b416c9787..8ae15c51aa 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -235,6 +235,8 @@ struct MachineClass {
@@ -230,6 +230,8 @@ struct MachineClass {
const char *desc;
const char *deprecation_reason;
+ const char *pve_version;
+
void (*init)(MachineState *state);
void (*reset)(MachineState *state, ShutdownCause reason);
void (*reset)(MachineState *state);
void (*wakeup)(MachineState *state);
diff --git a/qapi/machine.json b/qapi/machine.json
index fbb61f18e4..7da3c519ba 100644
index 555458f785..d868e4d31d 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -161,6 +161,8 @@
@@ -157,6 +157,8 @@
#
# @acpi: machine type supports ACPI (since 8.0)
# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
#
+# @pve-version: custom PVE version suffix specified as 'machine+pveN'
+#
# Since: 1.2
##
{ 'struct': 'MachineInfo',
@@ -168,7 +170,7 @@
@@ -164,7 +166,7 @@
'*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
- '*default-ram-id': 'str', 'acpi': 'bool' } }
+ '*default-ram-id': 'str', 'acpi': 'bool', '*pve-version': 'str' } }
- '*default-ram-id': 'str' } }
+ '*default-ram-id': 'str', '*pve-version': 'str' } }
##
# @query-machines:
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 3ee90b3b94..4b6d0b82fd 100644
index 39f149924e..0d233d55f3 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1597,6 +1597,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
@@ -1580,6 +1580,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp)
{
const char *optarg = qdict_get_try_str(qdict, "type");
@@ -83,7 +82,7 @@ index 3ee90b3b94..4b6d0b82fd 100644
GSList *machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class;
Error *local_err = NULL;
@@ -1614,6 +1615,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
@@ -1597,6 +1598,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
}
}
@@ -95,7 +94,7 @@ index 3ee90b3b94..4b6d0b82fd 100644
g_slist_free(machines);
if (local_err) {
error_append_hint(&local_err, "Use -machine help to list supported machines\n");
@@ -3250,12 +3256,31 @@ void qemu_init(int argc, char **argv)
@@ -3187,12 +3193,31 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_machine:
{
bool help;

View File

@@ -25,7 +25,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index db3791f4d1..39410dcf8d 100644
index b2b649e305..b6fa9e8a69 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
@@ -48,7 +48,7 @@ index db3791f4d1..39410dcf8d 100644
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
@@ -495,6 +493,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
@@ -492,6 +490,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);

View File

@@ -3,46 +3,40 @@ From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:57 +0200
Subject: [PATCH] PVE-Backup: add vma backup format code
Notes about partial restoring: skipping a certain drive is done via a
map line of the form skip=drive-scsi0. Since in PVE, most archives are
compressed and piped to vma for restore, it's not easily possible to
skip reads.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: improvements during create
allow partial restore]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
[FE: create: register all streams before entering coroutines]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
meson.build | 5 +
vma-reader.c | 867 ++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 818 +++++++++++++++++++++++++++++++++++++++++
vma.c | 900 ++++++++++++++++++++++++++++++++++++++++++++++
vma-reader.c | 859 ++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 791 ++++++++++++++++++++++++++++++++++++++++++
vma.c | 849 +++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 ++++++++
6 files changed, 2742 insertions(+)
6 files changed, 2656 insertions(+)
create mode 100644 vma-reader.c
create mode 100644 vma-writer.c
create mode 100644 vma.c
create mode 100644 vma.h
diff --git a/block/meson.build b/block/meson.build
index 1833c71ce9..59b71ba9f3 100644
index ad40c10b6a..3a0b84bc11 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -43,6 +43,8 @@ block_ss.add(files(
@@ -46,6 +46,8 @@ block_ss.add(files(
'zeroinit.c',
), zstd, zlib, gnutls)
+block_ss.add(files('../vma-writer.c'), libuuid)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
if get_option('qcow1').allowed()
diff --git a/meson.build b/meson.build
index a9c4f28247..cd95530d3b 100644
index d5230eadd6..ffff66c0cc 100644
--- a/meson.build
+++ b/meson.build
@@ -1778,6 +1778,8 @@ endif
@@ -1462,6 +1462,8 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
@@ -51,7 +45,7 @@ index a9c4f28247..cd95530d3b 100644
# libselinux
selinux = dependency('libselinux',
required: get_option('selinux'),
@@ -3908,6 +3910,9 @@ if have_tools
@@ -3607,6 +3609,9 @@ if have_tools
dependencies: [blockdev, qemuutil, gnutls, selinux],
install: true)
@@ -63,10 +57,10 @@ index a9c4f28247..cd95530d3b 100644
subdir('contrib/elf2dmp')
diff --git a/vma-reader.c b/vma-reader.c
new file mode 100644
index 0000000000..81a891c6b1
index 0000000000..e65f1e8415
--- /dev/null
+++ b/vma-reader.c
@@ -0,0 +1,867 @@
@@ -0,0 +1,859 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -97,7 +91,6 @@ index 0000000000..81a891c6b1
+ bool write_zeroes;
+ unsigned long *bitmap;
+ int bitmap_size;
+ bool skip;
+} VmaRestoreState;
+
+struct VmaReader {
@@ -495,14 +488,13 @@ index 0000000000..81a891c6b1
+}
+
+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes, bool skip)
+ BlockBackend *target, bool write_zeroes)
+{
+ assert(vmar);
+ assert(dev_id);
+
+ vmar->rstate[dev_id].target = target;
+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
+ vmar->rstate[dev_id].skip = skip;
+
+ int64_t size = vmar->devinfo[dev_id].size;
+
@@ -517,30 +509,28 @@ index 0000000000..81a891c6b1
+}
+
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
+ bool write_zeroes, bool skip, Error **errp)
+ bool write_zeroes, Error **errp)
+{
+ assert(vmar);
+ assert(target != NULL || skip);
+ assert(target != NULL);
+ assert(dev_id);
+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
+ assert(vmar->rstate[dev_id].target == NULL);
+
+ if (target != NULL) {
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
+
+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
+ allocate_rstate(vmar, dev_id, target, write_zeroes);
+
+ return 0;
+}
@@ -633,23 +623,19 @@ index 0000000000..81a891c6b1
+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
+ BlockBackend *target = NULL;
+
+ bool skip = rstate->skip;
+
+ if (dev_id != vmar->vmstate_stream) {
+ target = rstate->target;
+ if (!verify && !target && !skip) {
+ if (!verify && !target) {
+ error_setg(errp, "got wrong dev id %d", dev_id);
+ return -1;
+ }
+
+ if (!skip) {
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
+
+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
+ } else {
@@ -695,7 +681,7 @@ index 0000000000..81a891c6b1
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num, nb_sectors,
@@ -731,7 +717,7 @@ index 0000000000..81a891c6b1
+ return -1;
+ }
+
+ if (!verify && !skip) {
+ if (!verify) {
+ int nb_sectors = end_sector - sector_num;
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ buf + start, sector_num,
@@ -756,7 +742,7 @@ index 0000000000..81a891c6b1
+ vmar->partial_zero_cluster_data += zero_size;
+ }
+
+ if (rstate->write_zeroes && !verify && !skip) {
+ if (rstate->write_zeroes && !verify) {
+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
+ zero_vma_block, sector_num,
+ nb_sectors, errp) < 0) {
@@ -927,7 +913,7 @@ index 0000000000..81a891c6b1
+
+ for (dev_id = 1; dev_id < 255; dev_id++) {
+ if (vma_reader_get_device_info(vmar, dev_id)) {
+ allocate_rstate(vmar, dev_id, NULL, false, false);
+ allocate_rstate(vmar, dev_id, NULL, false);
+ }
+ }
+
@@ -936,10 +922,10 @@ index 0000000000..81a891c6b1
+
diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644
index 0000000000..126b296647
index 0000000000..df4b20793d
--- /dev/null
+++ b/vma-writer.c
@@ -0,0 +1,818 @@
@@ -0,0 +1,791 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -955,8 +941,6 @@ index 0000000000..126b296647
+
+#include "qemu/osdep.h"
+#include <glib.h>
+#include <linux/magic.h>
+#include <sys/vfs.h>
+#include <uuid/uuid.h>
+
+#include "vma.h"
@@ -965,7 +949,6 @@ index 0000000000..126b296647
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
+
+#define DEBUG_VMA 0
@@ -1149,10 +1132,10 @@ index 0000000000..126b296647
+{
+ assert(qemu_in_coroutine());
+ AioContext *ctx = qemu_get_current_aio_context();
+ aio_set_fd_handler(ctx, fd, NULL, (IOHandler *)qemu_coroutine_enter, NULL,
+ NULL, qemu_coroutine_self());
+ aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter,
+ NULL, NULL, qemu_coroutine_self());
+ qemu_coroutine_yield();
+ aio_set_fd_handler(ctx, fd, NULL, NULL, NULL, NULL, NULL);
+ aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL, NULL);
+}
+
+static ssize_t coroutine_fn
@@ -1201,23 +1184,6 @@ index 0000000000..126b296647
+ return (done == bytes) ? bytes : -1;
+}
+
+static bool is_path_tmpfs(const char *path) {
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ warn_report("statfs call for %s failed, assuming not tmpfs - %s\n",
+ path, strerror(errno));
+ return false;
+ }
+
+ return fs.f_type == TMPFS_MAGIC;
+}
+
+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
+{
+ const char *p;
@@ -1267,19 +1233,12 @@ index 0000000000..126b296647
+ }
+ /* try to use O_NONBLOCK */
+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
+ } else {
+ gchar *dirname = g_path_get_dirname(filename);
+ oflags = O_NONBLOCK|O_WRONLY|O_EXCL;
+ if (!is_path_tmpfs(dirname)) {
+ oflags |= O_DIRECT;
+ }
+ g_free(dirname);
+ } else {
+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_EXCL;
+ vmaw->fd = qemu_create(filename, oflags, 0644, errp);
+ }
+
+ if (vmaw->fd < 0) {
+ error_free(*errp);
+ *errp = NULL;
+ error_setg(errp, "can't open file %s - %s\n", filename,
+ g_strerror(errno));
+ goto err;
@@ -1760,10 +1719,10 @@ index 0000000000..126b296647
+}
diff --git a/vma.c b/vma.c
new file mode 100644
index 0000000000..347f6283ca
index 0000000000..e8dffb43e0
--- /dev/null
+++ b/vma.c
@@ -0,0 +1,900 @@
@@ -0,0 +1,849 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -1797,7 +1756,7 @@ index 0000000000..347f6283ca
+ "vma list <filename>\n"
+ "vma config <filename> [-c config]\n"
+ "vma create <filename> [-c config] pathname ...\n"
+ "vma extract <filename> [-d <drive-list>] [-r <fifo>] <targetdir>\n"
+ "vma extract <filename> [-r <fifo>] <targetdir>\n"
+ "vma verify <filename> [-v]\n"
+ ;
+
@@ -1904,7 +1863,6 @@ index 0000000000..347f6283ca
+ char *throttling_group;
+ char *cache;
+ bool write_zero;
+ bool skip;
+} RestoreMap;
+
+static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -1942,10 +1900,9 @@ index 0000000000..347f6283ca
+ const char *filename;
+ const char *dirname;
+ const char *readmap = NULL;
+ gchar **drive_list = NULL;
+
+ for (;;) {
+ c = getopt(argc, argv, "hvd:r:");
+ c = getopt(argc, argv, "hvr:");
+ if (c == -1) {
+ break;
+ }
@@ -1954,9 +1911,6 @@ index 0000000000..347f6283ca
+ case 'h':
+ help();
+ break;
+ case 'd':
+ drive_list = g_strsplit(optarg, ",", 254);
+ break;
+ case 'r':
+ readmap = optarg;
+ break;
@@ -2016,61 +1970,47 @@ index 0000000000..347f6283ca
+ char *bps = NULL;
+ char *group = NULL;
+ char *cache = NULL;
+ char *devname = NULL;
+ bool skip = false;
+ uint64_t bps_value = 0;
+ const char *path = NULL;
+ bool write_zero = true;
+
+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
+ break;
+ }
+ int len = strlen(line);
+ if (line[len - 1] == '\n') {
+ line[len - 1] = '\0';
+ len = len - 1;
+ if (len == 0) {
+ if (len == 1) {
+ break;
+ }
+ }
+
+ if (strncmp(line, "skip", 4) == 0) {
+ if (len < 6 || line[4] != '=') {
+ g_error("read map failed - option 'skip' has no value ('%s')",
+ inbuf);
+ } else {
+ devname = line + 5;
+ skip = true;
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ } else {
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
+ }
+
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ path = extract_devname(path, &devname, -1);
+ }
+
+ uint64_t bps_value = 0;
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
+
+ const char *path;
+ bool write_zero;
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ char *devname = NULL;
+ path = extract_devname(path, &devname, -1);
+ if (!devname) {
+ g_error("read map failed - no dev name specified ('%s')",
+ inbuf);
@@ -2084,7 +2024,6 @@ index 0000000000..347f6283ca
+ map->throttling_group = group;
+ map->cache = cache;
+ map->write_zero = write_zero;
+ map->skip = skip;
+
+ g_hash_table_insert(devmap, map->devname, map);
+
@@ -2093,12 +2032,12 @@ index 0000000000..347f6283ca
+
+ int i;
+ int vmstate_fd = -1;
+ bool drive_rename_bitmap[255];
+ memset(drive_rename_bitmap, 0, sizeof(drive_rename_bitmap));
+ guint8 vmstate_stream = 0;
+
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
+ vmstate_stream = i;
+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
+ if (vmstate_fd < 0) {
@@ -2114,25 +2053,10 @@ index 0000000000..347f6283ca
+ const char *cache = NULL;
+ int flags = BDRV_O_RDWR;
+ bool write_zero = true;
+ bool skip = false;
+
+ BlockBackend *blk = NULL;
+
+ if (drive_list) {
+ skip = true;
+ int j;
+ for (j = 0; drive_list[j]; j++) {
+ if (strcmp(drive_list[j], di->devname) == 0) {
+ skip = false;
+ drive_rename_bitmap[i] = true;
+ break;
+ }
+ }
+ } else {
+ drive_rename_bitmap[i] = true;
+ }
+
+ if (!skip && readmap) {
+ if (readmap) {
+ RestoreMap *map;
+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
+ if (map == NULL) {
@@ -2144,8 +2068,7 @@ index 0000000000..347f6283ca
+ throttling_group = map->throttling_group;
+ cache = map->cache;
+ write_zero = map->write_zero;
+ skip = map->skip;
+ } else if (!skip) {
+ } else {
+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ printf("DEVINFO %s %zd\n", devfn, di->size);
@@ -2163,60 +2086,57 @@ index 0000000000..347f6283ca
+ write_zero = false;
+ }
+
+ if (!skip) {
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
+
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
+
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
+ }
+
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
+ }
+
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+ g_error("%s", error_get_pretty(errp));
+ }
+
@@ -2226,10 +2146,6 @@ index 0000000000..347f6283ca
+ }
+ }
+
+ if (drive_list) {
+ g_strfreev(drive_list);
+ }
+
+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
+ g_error("restore failed - %s", error_get_pretty(errp));
+ }
@@ -2237,7 +2153,7 @@ index 0000000000..347f6283ca
+ if (!readmap) {
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && drive_rename_bitmap[i]) {
+ if (di && (i != vmstate_stream)) {
+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
+ dirname, di->devname);
+ char *fn = g_strdup_printf("%s/disk-%s.raw",
@@ -2336,7 +2252,7 @@ index 0000000000..347f6283ca
+ struct iovec iov;
+ QEMUIOVector qiov;
+
+ int64_t start, end, readlen;
+ int64_t start, end;
+ int ret = 0;
+
+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -2350,24 +2266,16 @@ index 0000000000..347f6283ca
+ iov.iov_len = VMA_CLUSTER_SIZE;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ if (start + 1 == end) {
+ memset(buf, 0, VMA_CLUSTER_SIZE);
+ readlen = job->len - start * VMA_CLUSTER_SIZE;
+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+ } else {
+ readlen = VMA_CLUSTER_SIZE;
+ }
+
+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
+ readlen, &qiov, 0);
+ VMA_CLUSTER_SIZE, &qiov, 0);
+ if (ret < 0) {
+ vma_writer_set_error(job->vmaw, "read error");
+ vma_writer_set_error(job->vmaw, "read error", -1);
+ goto out;
+ }
+
+ size_t zb = 0;
+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed");
+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
+ goto out;
+ }
+ }
@@ -2666,7 +2574,7 @@ index 0000000000..347f6283ca
+}
diff --git a/vma.h b/vma.h
new file mode 100644
index 0000000000..86d2873aa5
index 0000000000..c895c97f6d
--- /dev/null
+++ b/vma.h
@@ -0,0 +1,150 @@
@@ -2804,7 +2712,7 @@ index 0000000000..86d2873aa5
+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
+
+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
+
+
+VmaReader *vma_reader_create(const char *filename, Error **errp);
@@ -2814,7 +2722,7 @@ index 0000000000..86d2873aa5
+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
+ BlockBackend *target, bool write_zeroes,
+ bool skip, Error **errp);
+ Error **errp);
+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
+ Error **errp);
+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);

View File

@@ -9,23 +9,21 @@ Subject: [PATCH] PVE-Backup: add backup-dump block driver
- job.c: make job_should_pause non-static
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to coroutine changes]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/backup-dump.c | 168 +++++++++++++++++++++++++++++++
block/backup-dump.c | 167 +++++++++++++++++++++++++++++++
block/backup.c | 30 ++----
block/meson.build | 1 +
include/block/block_int-common.h | 35 +++++++
job.c | 3 +-
5 files changed, 214 insertions(+), 23 deletions(-)
5 files changed, 213 insertions(+), 23 deletions(-)
create mode 100644 block/backup-dump.c
diff --git a/block/backup-dump.c b/block/backup-dump.c
new file mode 100644
index 0000000000..232a094426
index 0000000000..04718a94e2
--- /dev/null
+++ b/block/backup-dump.c
@@ -0,0 +1,168 @@
@@ -0,0 +1,167 @@
+/*
+ * BlockDriver to send backup data stream to a callback function
+ *
@@ -47,8 +45,7 @@ index 0000000000..232a094426
+ void *dump_cb_data;
+} BDRVBackupDumpState;
+
+static coroutine_fn int qemu_backup_dump_co_get_info(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+static int qemu_backup_dump_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -89,7 +86,7 @@ index 0000000000..232a094426
+ /* Nothing to do. */
+}
+
+static coroutine_fn int64_t qemu_backup_dump_co_getlength(BlockDriverState *bs)
+static int64_t qemu_backup_dump_getlength(BlockDriverState *bs)
+{
+ BDRVBackupDumpState *s = bs->opaque;
+
@@ -149,8 +146,8 @@ index 0000000000..232a094426
+
+ .bdrv_close = qemu_backup_dump_close,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_getlength = qemu_backup_dump_co_getlength,
+ .bdrv_co_get_info = qemu_backup_dump_co_get_info,
+ .bdrv_getlength = qemu_backup_dump_getlength,
+ .bdrv_get_info = qemu_backup_dump_get_info,
+
+ .bdrv_co_writev = qemu_backup_dump_co_writev,
+
@@ -195,7 +192,7 @@ index 0000000000..232a094426
+ return bs;
+}
diff --git a/block/backup.c b/block/backup.c
index 39410dcf8d..af87fa6aa9 100644
index b6fa9e8a69..789f8b7799 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,28 +29,6 @@
@@ -227,7 +224,7 @@ index 39410dcf8d..af87fa6aa9 100644
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -457,6 +435,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
@@ -454,6 +432,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
}
cluster_size = block_copy_cluster_size(bcs);
@@ -243,7 +240,7 @@ index 39410dcf8d..af87fa6aa9 100644
if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
diff --git a/block/meson.build b/block/meson.build
index 59b71ba9f3..6fde9f7dcd 100644
index 3a0b84bc11..7f22e7f177 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files(
@@ -255,18 +252,18 @@ index 59b71ba9f3..6fde9f7dcd 100644
'blkdebug.c',
'blklogwrites.c',
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 74195c3004..0f2e1817ad 100644
index 8947abab76..f272d0d8dc 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -26,6 +26,7 @@
#include "block/aio.h"
#include "block/block-common.h"
#include "block/accounting.h"
#include "block/block.h"
+#include "block/block-copy.h"
#include "block/block-global-state.h"
#include "block/snapshot.h"
#include "qemu/iov.h"
@@ -60,6 +61,40 @@
#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
@@ -64,6 +65,40 @@
#define BLOCK_PROBE_BUF_SIZE 512
@@ -308,16 +305,16 @@ index 74195c3004..0f2e1817ad 100644
BDRV_TRACKED_READ,
BDRV_TRACKED_WRITE,
diff --git a/job.c b/job.c
index 72d57f0934..93e22d180b 100644
index 075c6f3a20..e5699ad200 100644
--- a/job.c
+++ b/job.c
@@ -330,7 +330,8 @@ static bool job_started_locked(Job *job)
@@ -276,7 +276,8 @@ static bool job_started(Job *job)
return job->co;
}
/* Called with job_mutex held. */
-static bool job_should_pause_locked(Job *job)
+bool job_should_pause_locked(Job *job);
+bool job_should_pause_locked(Job *job)
-static bool job_should_pause(Job *job)
+bool job_should_pause(Job *job);
+bool job_should_pause(Job *job)
{
return job->pause_count > 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -5,19 +5,17 @@ Subject: [PATCH] PVE-Backup: pbs-restore - new command to restore from proxmox
backup server
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
meson.build | 4 +
pbs-restore.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 240 insertions(+)
pbs-restore.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 227 insertions(+)
create mode 100644 pbs-restore.c
diff --git a/meson.build b/meson.build
index d53976d621..c3330310d9 100644
index 0bc2fb5b10..f48d2e0457 100644
--- a/meson.build
+++ b/meson.build
@@ -3914,6 +3914,10 @@ if have_tools
@@ -3613,6 +3613,10 @@ if have_tools
vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
dependencies: [authz, block, crypto, io, qom], install: true)
@@ -30,10 +28,10 @@ index d53976d621..c3330310d9 100644
subdir('contrib/elf2dmp')
diff --git a/pbs-restore.c b/pbs-restore.c
new file mode 100644
index 0000000000..f03d9bab8d
index 0000000000..2f834cf42e
--- /dev/null
+++ b/pbs-restore.c
@@ -0,0 +1,236 @@
@@ -0,0 +1,223 @@
+/*
+ * Qemu image restore helper for Proxmox Backup
+ *
@@ -65,7 +63,7 @@ index 0000000000..f03d9bab8d
+static void help(void)
+{
+ const char *help_msg =
+ "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
+ "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+ ;
+
+ printf("%s", help_msg);
@@ -113,7 +111,6 @@ index 0000000000..f03d9bab8d
+ Error *main_loop_err = NULL;
+ const char *format = "raw";
+ const char *repository = NULL;
+ const char *backup_ns = NULL;
+ const char *keyfile = NULL;
+ int verbose = false;
+ bool skip_zero = false;
@@ -127,7 +124,6 @@ index 0000000000..f03d9bab8d
+ {"verbose", no_argument, 0, 'v'},
+ {"format", required_argument, 0, 'f'},
+ {"repository", required_argument, 0, 'r'},
+ {"ns", required_argument, 0, 'n'},
+ {"keyfile", required_argument, 0, 'k'},
+ {0, 0, 0, 0}
+ };
@@ -148,9 +144,6 @@ index 0000000000..f03d9bab8d
+ case 'r':
+ repository = g_strdup(argv[optind - 1]);
+ break;
+ case 'n':
+ backup_ns = g_strdup(argv[optind - 1]);
+ break;
+ case 'k':
+ keyfile = g_strdup(argv[optind - 1]);
+ break;
@@ -201,16 +194,8 @@ index 0000000000..f03d9bab8d
+ fprintf(stderr, "connecting to repository '%s'\n", repository);
+ }
+ char *pbs_error = NULL;
+ ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+ repository,
+ snapshot,
+ backup_ns,
+ password,
+ keyfile,
+ key_password,
+ fingerprint,
+ &pbs_error
+ );
+ ProxmoxRestoreHandle *conn = proxmox_restore_new(
+ repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ if (conn == NULL) {
+ fprintf(stderr, "restore failed: %s\n", pbs_error);
+ return -1;

View File

@@ -0,0 +1,452 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 29 Jun 2020 11:06:03 +0200
Subject: [PATCH] PVE-Backup: Add dirty-bitmap tracking for incremental backups
Uses QEMU's existing MIRROR_SYNC_MODE_BITMAP and a dirty-bitmap on top
of all backed-up drives. This will only execute the data-write callback
for any changed chunks, the PBS rust code will reuse chunks from the
previous index for everything it doesn't receive if reuse_index is true.
On error or cancellation, remove all dirty bitmaps to ensure
consistency.
Add PBS/incremental specific information to query backup info QMP and
HMP commands.
Only supported for PBS backups.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
monitor/hmp-cmds.c | 45 ++++++++++----
proxmox-backup-client.c | 3 +-
proxmox-backup-client.h | 1 +
pve-backup.c | 103 ++++++++++++++++++++++++++++++---
qapi/block-core.json | 12 +++-
6 files changed, 142 insertions(+), 23 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 89ca64444d..45da74d7a0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,6 +1042,7 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
+ false, false, // PBS incremental
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 1168773da7..4c1671e289 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -223,19 +223,42 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "End time: %s", ctime(&info->end_time));
}
- int per = (info->has_total && info->total &&
- info->has_transferred && info->transferred) ?
- (info->transferred * 100)/info->total : 0;
- int zero_per = (info->has_total && info->total &&
- info->has_zero_bytes && info->zero_bytes) ?
- (info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Backup file: %s\n", info->backup_file);
monitor_printf(mon, "Backup uuid: %s\n", info->uuid);
- monitor_printf(mon, "Total size: %zd\n", info->total);
- monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
- monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
- info->zero_bytes, zero_per);
+
+ if (!(info->has_total && info->total)) {
+ // this should not happen normally
+ monitor_printf(mon, "Total size: %d\n", 0);
+ } else {
+ bool incremental = false;
+ size_t total_or_dirty = info->total;
+ if (info->has_transferred) {
+ if (info->has_dirty && info->dirty) {
+ if (info->dirty < info->total) {
+ total_or_dirty = info->dirty;
+ incremental = true;
+ }
+ }
+ }
+
+ int per = (info->transferred * 100)/total_or_dirty;
+
+ monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+
+ int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
+ (info->zero_bytes * 100)/info->total : 0;
+ monitor_printf(mon, "Total size: %zd\n", info->total);
+ monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
+ info->transferred, per);
+ monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
+ info->zero_bytes, zero_per);
+
+ if (info->has_reused) {
+ int reused_per = (info->reused * 100)/total_or_dirty;
+ monitor_printf(mon, "Reused bytes: %zd (%d%%)\n",
+ info->reused, reused_per);
+ }
+ }
}
qapi_free_BackupStatus(info);
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index a8f6653a81..4ce7bc0b5e 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -89,6 +89,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp)
{
Coroutine *co = qemu_coroutine_self();
@@ -98,7 +99,7 @@ proxmox_backup_co_register_image(
int pbs_res = -1;
proxmox_backup_register_image_async(
- pbs, device_name, size ,proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
+ pbs, device_name, size, incremental, proxmox_backup_schedule_wake, &waker, &pbs_res, &pbs_err);
qemu_coroutine_yield();
if (pbs_res < 0) {
if (errp) error_setg(errp, "backup register image failed: %s", pbs_err ? pbs_err : "unknown error");
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
index 1dda8b7d8f..8cbf645b2c 100644
--- a/proxmox-backup-client.h
+++ b/proxmox-backup-client.h
@@ -32,6 +32,7 @@ proxmox_backup_co_register_image(
ProxmoxBackupHandle *pbs,
const char *device_name,
uint64_t size,
+ bool incremental,
Error **errp);
diff --git a/pve-backup.c b/pve-backup.c
index 88f5ee133f..1c49cd178d 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -28,6 +28,8 @@
*
*/
+const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
+
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
@@ -39,7 +41,9 @@ static struct PVEBackupState {
uuid_t uuid;
char uuid_str[37];
size_t total;
+ size_t dirty;
size_t transferred;
+ size_t reused;
size_t zero_bytes;
} stat;
int64_t speed;
@@ -66,6 +70,7 @@ typedef struct PVEBackupDevInfo {
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
+ BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
} PVEBackupDevInfo;
@@ -105,11 +110,12 @@ static bool pvebackup_error_or_canceled(void)
return error_or_canceled;
}
-static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes, size_t reused)
{
qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.zero_bytes += zero_bytes;
backup_state.stat.transferred += transferred;
+ backup_state.stat.reused += reused;
qemu_mutex_unlock(&backup_state.stat.lock);
}
@@ -148,7 +154,8 @@ pvebackup_co_dump_pbs_cb(
pvebackup_propagate_error(local_err);
return pbs_res;
} else {
- pvebackup_add_transfered_bytes(size, !buf ? size : 0);
+ size_t reused = (pbs_res == 0) ? size : 0;
+ pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
}
return size;
@@ -208,11 +215,11 @@ pvebackup_co_dump_vma_cb(
} else {
if (remaining >= VMA_CLUSTER_SIZE) {
assert(ret == VMA_CLUSTER_SIZE);
- pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
+ pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes, 0);
remaining -= VMA_CLUSTER_SIZE;
} else {
assert(ret == remaining);
- pvebackup_add_transfered_bytes(remaining, zero_bytes);
+ pvebackup_add_transfered_bytes(remaining, zero_bytes, 0);
remaining = 0;
}
}
@@ -248,6 +255,18 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
+ } else {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+ }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -303,6 +322,12 @@ static void pvebackup_complete_cb(void *opaque, int ret)
// remove self from job queue
backup_state.di_list = g_list_remove(backup_state.di_list, di);
+ if (di->bitmap && ret < 0) {
+ // on error or cancel we cannot ensure synchronization of dirty
+ // bitmaps with backup server, so remove all and do full backup next
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
g_free(di);
qemu_mutex_unlock(&backup_state.backup_mutex);
@@ -472,12 +497,18 @@ static bool create_backup_jobs(void) {
assert(di->target != NULL);
+ MirrorSyncMode sync_mode = MIRROR_SYNC_MODE_FULL;
+ BitmapSyncMode bitmap_mode = BITMAP_SYNC_MODE_NEVER;
+ if (di->bitmap) {
+ sync_mode = MIRROR_SYNC_MODE_BITMAP;
+ bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
+ }
AioContext *aio_context = bdrv_get_aio_context(di->bs);
aio_context_acquire(aio_context);
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
aio_context_release(aio_context);
@@ -528,6 +559,8 @@ typedef struct QmpBackupTask {
const char *fingerprint;
bool has_fingerprint;
int64_t backup_time;
+ bool has_use_dirty_bitmap;
+ bool use_dirty_bitmap;
bool has_format;
BackupFormat format;
bool has_config_file;
@@ -619,6 +652,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
+ size_t dirty = 0;
l = di_list;
while (l) {
@@ -656,6 +690,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
+ bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -675,7 +711,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
goto err;
}
- if (proxmox_backup_co_connect(pbs, task->errp) < 0)
+ int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ if (connect_result < 0)
goto err;
/* register all devices */
@@ -686,9 +723,40 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *devname = bdrv_get_device_name(di->bs);
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, task->errp);
- if (dev_id < 0)
+ BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
+ bool expect_only_dirty = false;
+
+ if (use_dirty_bitmap) {
+ if (bitmap == NULL) {
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ if (!bitmap) {
+ goto err;
+ }
+ } else {
+ expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ }
+
+ if (expect_only_dirty) {
+ dirty += bdrv_get_dirty_count(bitmap);
+ } else {
+ /* mark entire bitmap as dirty to make full backup */
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ dirty += di->size;
+ }
+ di->bitmap = bitmap;
+ } else {
+ dirty += di->size;
+
+ /* after a full backup the old dirty bitmap is invalid anyway */
+ if (bitmap != NULL) {
+ bdrv_release_dirty_bitmap(bitmap);
+ }
+ }
+
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ if (dev_id < 0) {
goto err;
+ }
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
goto err;
@@ -697,6 +765,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->dev_id = dev_id;
}
} else if (format == BACKUP_FORMAT_VMA) {
+ dirty = total;
+
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
@@ -724,6 +794,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
} else if (format == BACKUP_FORMAT_DIR) {
+ dirty = total;
+
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
@@ -796,8 +868,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
+ backup_state.stat.dirty = dirty;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -821,6 +895,10 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ if (di->bitmap) {
+ bdrv_release_dirty_bitmap(di->bitmap);
+ }
+
if (di->target) {
bdrv_unref(di->target);
}
@@ -862,6 +940,7 @@ UuidInfo *qmp_backup(
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -880,6 +959,8 @@ UuidInfo *qmp_backup(
.backup_id = backup_id,
.has_backup_time = has_backup_time,
.backup_time = backup_time,
+ .has_use_dirty_bitmap = has_use_dirty_bitmap,
+ .use_dirty_bitmap = use_dirty_bitmap,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
@@ -948,10 +1029,14 @@ BackupStatus *qmp_query_backup(Error **errp)
info->has_total = true;
info->total = backup_state.stat.total;
+ info->has_dirty = true;
+ info->dirty = backup_state.stat.dirty;
info->has_zero_bytes = true;
info->zero_bytes = backup_state.stat.zero_bytes;
info->has_transferred = true;
info->transferred = backup_state.stat.transferred;
+ info->has_reused = true;
+ info->reused = backup_state.stat.reused;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 90ad07b7ee..3ad9eb5d1a 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -753,8 +753,13 @@
#
# @total: total amount of bytes involved in the backup process
#
+# @dirty: with incremental mode (PBS) this is the amount of bytes involved
+# in the backup process which are marked dirty.
+#
# @transferred: amount of bytes already backed up.
#
+# @reused: amount of bytes reused due to deduplication.
+#
# @zero-bytes: amount of 'zero' bytes detected.
#
# @start-time: time (epoch) when backup job started.
@@ -767,8 +772,8 @@
#
##
{ 'struct': 'BackupStatus',
- 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int',
- '*transferred': 'int', '*zero-bytes': 'int',
+ 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
+ '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
'*backup-file': 'str', '*uuid': 'str' } }
@@ -811,6 +816,8 @@
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
#
+# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
+#
# Returns: the uuid of the backup job
#
##
@@ -821,6 +828,7 @@
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
+ '*use-dirty-bitmap': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -0,0 +1,219 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Thu, 9 Jul 2020 12:53:08 +0200
Subject: [PATCH] PVE: various PBS fixes
pbs: fix crypt and compress parameters
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
PVE: handle PBS write callback with big blocks correctly
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
PVE: add zero block handling to PBS dump callback
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 ++-
pve-backup.c | 57 +++++++++++++++++++++++++++-------
qapi/block-core.json | 6 ++++
3 files changed, 54 insertions(+), 13 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 45da74d7a0..ea7b665aa2 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,7 +1042,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
- false, false, // PBS incremental
+ false, false, // PBS use-dirty-bitmap
+ false, false, // PBS compress
+ false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/pve-backup.c b/pve-backup.c
index 1c49cd178d..c15abefdda 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -8,6 +8,7 @@
#include "block/blockjob.h"
#include "qapi/qapi-commands-block.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
/* PVE backup state and related function */
@@ -67,6 +68,7 @@ opts_init(pvebackup_init);
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
size_t size;
+ uint64_t block_size;
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
@@ -135,10 +137,13 @@ pvebackup_co_dump_pbs_cb(
PVEBackupDevInfo *di = opaque;
assert(backup_state.pbs);
+ assert(buf);
Error *local_err = NULL;
int pbs_res = -1;
+ bool is_zero_block = size == di->block_size && buffer_is_zero(buf, size);
+
qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
@@ -147,17 +152,29 @@ pvebackup_co_dump_pbs_cb(
return -1;
}
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
- qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ uint64_t transferred = 0;
+ uint64_t reused = 0;
+ while (transferred < size) {
+ uint64_t left = size - transferred;
+ uint64_t to_transfer = left < di->block_size ? left : di->block_size;
- if (pbs_res < 0) {
- pvebackup_propagate_error(local_err);
- return pbs_res;
- } else {
- size_t reused = (pbs_res == 0) ? size : 0;
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
+ is_zero_block ? NULL : buf + transferred, start + transferred,
+ to_transfer, &local_err);
+ transferred += to_transfer;
+
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return pbs_res;
+ }
+
+ reused += pbs_res == 0 ? to_transfer : 0;
}
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ pvebackup_add_transfered_bytes(size, is_zero_block ? size : 0, reused);
+
return size;
}
@@ -178,6 +195,7 @@ pvebackup_co_dump_vma_cb(
int ret = -1;
assert(backup_state.vmaw);
+ assert(buf);
uint64_t remaining = size;
@@ -204,9 +222,7 @@ pvebackup_co_dump_vma_cb(
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
+ buf += VMA_CLUSTER_SIZE;
if (ret < 0) {
Error *local_err = NULL;
vma_writer_error_propagate(backup_state.vmaw, &local_err);
@@ -569,6 +585,10 @@ typedef struct QmpBackupTask {
const char *firewall_file;
bool has_devlist;
const char *devlist;
+ bool has_compress;
+ bool compress;
+ bool has_encrypt;
+ bool encrypt;
bool has_speed;
int64_t speed;
Error **errp;
@@ -692,6 +712,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -701,8 +722,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->has_password ? task->password : NULL,
task->has_keyfile ? task->keyfile : NULL,
task->has_key_password ? task->key_password : NULL,
+ task->has_compress ? task->compress : true,
+ task->has_encrypt ? task->encrypt : task->has_keyfile,
task->has_fingerprint ? task->fingerprint : NULL,
- &pbs_err);
+ &pbs_err);
if (!pbs) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
@@ -721,6 +744,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ di->block_size = dump_cb_block_size;
+
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
@@ -941,6 +966,8 @@ UuidInfo *qmp_backup(
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -951,6 +978,8 @@ UuidInfo *qmp_backup(
.backup_file = backup_file,
.has_password = has_password,
.password = password,
+ .has_keyfile = has_keyfile,
+ .keyfile = keyfile,
.has_key_password = has_key_password,
.key_password = key_password,
.has_fingerprint = has_fingerprint,
@@ -961,6 +990,10 @@ UuidInfo *qmp_backup(
.backup_time = backup_time,
.has_use_dirty_bitmap = has_use_dirty_bitmap,
.use_dirty_bitmap = use_dirty_bitmap,
+ .has_compress = has_compress,
+ .compress = compress,
+ .has_encrypt = has_encrypt,
+ .encrypt = encrypt,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3ad9eb5d1a..4120052690 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -818,6 +818,10 @@
#
# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
+# @compress: use compression (optional for format 'pbs', defaults to true)
+#
+# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
+#
# Returns: the uuid of the backup job
#
##
@@ -829,6 +833,8 @@
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
+ '*compress': 'bool',
+ '*encrypt': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -7,27 +7,23 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[error cleanups, file_open implementation]
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[WB: add namespace support]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[FE: adapt to changed function signatures
make pbs_co_preadv return values consistent with QEMU
getlength is now a coroutine function]
make pbs_co_preadv return values consistent with QEMU]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 3 +
block/pbs.c | 305 +++++++++++++++++++++++++++++++++++++++++++
block/pbs.c | 276 +++++++++++++++++++++++++++++++++++++++++++
configure | 9 ++
meson.build | 2 +-
qapi/block-core.json | 13 ++
qapi/pragma.json | 1 +
6 files changed, 332 insertions(+), 1 deletion(-)
5 files changed, 302 insertions(+), 1 deletion(-)
create mode 100644 block/pbs.c
diff --git a/block/meson.build b/block/meson.build
index 6d468f89e5..becc99ac4e 100644
index 2783b77e9c..a26a69434e 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -50,6 +50,9 @@ block_ss.add(files(
@@ -53,6 +53,9 @@ block_ss.add(files(
'../pve-backup.c',
), libproxmox_backup_qemu)
@@ -35,14 +31,14 @@ index 6d468f89e5..becc99ac4e 100644
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
system_ss.add(files('block-ram-registrar.c'))
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/pbs.c b/block/pbs.c
new file mode 100644
index 0000000000..a2211e0f3b
index 0000000000..9d1f1f39d4
--- /dev/null
+++ b/block/pbs.c
@@ -0,0 +1,305 @@
@@ -0,0 +1,276 @@
+/*
+ * Proxmox Backup Server read-only block driver
+ */
@@ -55,12 +51,10 @@ index 0000000000..a2211e0f3b
+#include "qemu/option.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+#include "block/block-io.h"
+
+#include <proxmox-backup-qemu.h>
+
+#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
+#define PBS_OPT_SNAPSHOT "snapshot"
+#define PBS_OPT_ARCHIVE "archive"
+#define PBS_OPT_KEYFILE "keyfile"
@@ -74,7 +68,6 @@ index 0000000000..a2211e0f3b
+ int64_t length;
+
+ char *repository;
+ char *namespace;
+ char *snapshot;
+ char *archive;
+} BDRVPBSState;
@@ -89,11 +82,6 @@ index 0000000000..a2211e0f3b
+ .help = "The server address and repository to connect to.",
+ },
+ {
+ .name = PBS_OPT_NAMESPACE,
+ .type = QEMU_OPT_STRING,
+ .help = "Optional: The snapshot's namespace.",
+ },
+ {
+ .name = PBS_OPT_SNAPSHOT,
+ .type = QEMU_OPT_STRING,
+ .help = "The snapshot to read.",
@@ -129,7 +117,7 @@ index 0000000000..a2211e0f3b
+
+
+// filename format:
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+// pbs:repository=<repo>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+static void pbs_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
@@ -165,7 +153,6 @@ index 0000000000..a2211e0f3b
+ s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
+ const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
+ const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+ const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
+ const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
+ const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
+
@@ -178,12 +165,9 @@ index 0000000000..a2211e0f3b
+ if (!key_password) {
+ key_password = getenv("PBS_ENCRYPTION_PASSWORD");
+ }
+ if (namespace) {
+ s->namespace = g_strdup(namespace);
+ }
+
+ /* connect to PBS server in read mode */
+ s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
+ s->conn = proxmox_restore_new(s->repository, s->snapshot, password,
+ keyfile, key_password, fingerprint, &pbs_error);
+
+ /* invalidates qemu_opt_get char pointers from above */
@@ -228,13 +212,12 @@ index 0000000000..a2211e0f3b
+static void pbs_close(BlockDriverState *bs) {
+ BDRVPBSState *s = bs->opaque;
+ g_free(s->repository);
+ g_free(s->namespace);
+ g_free(s->snapshot);
+ g_free(s->archive);
+ proxmox_restore_disconnect(s->conn);
+}
+
+static coroutine_fn int64_t pbs_co_getlength(BlockDriverState *bs)
+static int64_t pbs_getlength(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ return s->length;
@@ -258,16 +241,7 @@ index 0000000000..a2211e0f3b
+ BDRVPBSState *s = bs->opaque;
+ int ret;
+ char *pbs_error = NULL;
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
+ uint8_t *buf = malloc(bytes);
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
@@ -290,10 +264,8 @@ index 0000000000..a2211e0f3b
+ return -EIO;
+ }
+
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ free(buf);
+
+ return 0;
+}
@@ -310,13 +282,8 @@ index 0000000000..a2211e0f3b
+static void pbs_refresh_filename(BlockDriverState *bs)
+{
+ BDRVPBSState *s = bs->opaque;
+ if (s->namespace) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+ s->repository, s->namespace, s->snapshot, s->archive);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+ }
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+}
+
+static const char *const pbs_strong_runtime_opts[] = {
@@ -333,7 +300,7 @@ index 0000000000..a2211e0f3b
+ .bdrv_file_open = pbs_file_open,
+ .bdrv_open = pbs_open,
+ .bdrv_close = pbs_close,
+ .bdrv_co_getlength = pbs_co_getlength,
+ .bdrv_getlength = pbs_getlength,
+
+ .bdrv_co_preadv = pbs_co_preadv,
+ .bdrv_co_pwritev = pbs_co_pwritev,
@@ -349,40 +316,40 @@ index 0000000000..a2211e0f3b
+
+block_init(bdrv_pbs_init);
diff --git a/configure b/configure
index 133f4e3235..f5a830c1f3 100755
index 72ab03f11a..7203c270ec 100755
--- a/configure
+++ b/configure
@@ -256,6 +256,7 @@ qemu_suffix="qemu"
softmmu="yes"
linux_user=""
@@ -309,6 +309,7 @@ linux_user=""
bsd_user=""
pie=""
coroutine=""
+pbs_bdrv="yes"
plugins="$default_feature"
ninja=""
python=
@@ -809,6 +810,10 @@ for opt do
;;
--enable-download) download="enabled"; git_submodules_action=update;
meson=""
meson_args=""
@@ -902,6 +903,10 @@ for opt do
--enable-uuid|--disable-uuid)
echo "$0: $opt is obsolete, UUID support is always built" >&2
;;
+ --disable-pbs-bdrv) pbs_bdrv="no"
+ ;;
+ --enable-pbs-bdrv) pbs_bdrv="yes"
+ ;;
--enable-plugins) if test "$mingw32" = "yes"; then
error_exit "TCG plugins not currently supported on Windows platforms"
else
@@ -959,6 +964,7 @@ cat << EOF
bsd-user all BSD usermode emulation targets
pie Position Independent Executables
debug-tcg TCG debugging (default is disabled)
--with-git=*) git="$optarg"
;;
--with-git-submodules=*)
@@ -1087,6 +1092,7 @@ cat << EOF
debug-info debugging information
safe-stack SafeStack Stack Smash Protection. Depends on
clang/llvm >= 3.7 and requires coroutine backend ucontext.
+ pbs-bdrv Proxmox backup server read-only block driver support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -1744,6 +1750,9 @@ if test "$solaris" = "yes" ; then
@@ -2463,6 +2469,9 @@ echo "TARGET_DIRS=$target_list" >> $config_host_mak
if test "$modules" = "yes"; then
echo "CONFIG_MODULES=y" >> $config_host_mak
fi
echo "SRC_PATH=$source_path" >> $config_host_mak
echo "TARGET_DIRS=$target_list" >> $config_host_mak
+if test "$pbs_bdrv" = "yes" ; then
+ echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
+fi
@@ -390,10 +357,10 @@ index 133f4e3235..f5a830c1f3 100755
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
diff --git a/meson.build b/meson.build
index c3330310d9..cbfc9a43fb 100644
index f48d2e0457..be4785e2f6 100644
--- a/meson.build
+++ b/meson.build
@@ -4319,7 +4319,7 @@ summary_info += {'bzip2 support': libbzip2}
@@ -3986,7 +3986,7 @@ summary_info += {'bzip2 support': libbzip2}
summary_info += {'lzfse support': liblzfse}
summary_info += {'zstd support': zstd}
summary_info += {'NUMA host support': numa}
@@ -403,18 +370,18 @@ index c3330310d9..cbfc9a43fb 100644
summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libudev': libudev}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1b8462a51b..d67a6d448a 100644
index 4120052690..96bc696aaa 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3396,6 +3396,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
@@ -3099,6 +3099,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
+ 'pbs',
'ssh', 'throttle', 'vdi', 'vhdx',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
@@ -3482,6 +3483,17 @@
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
@@ -3171,6 +3172,17 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
@@ -427,28 +394,16 @@ index 1b8462a51b..d67a6d448a 100644
+{ 'struct': 'BlockdevOptionsPbs',
+ 'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
+ '*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
+ '*key_password': 'str', '*namespace': 'str' } }
+ '*key_password': 'str' } }
+
##
# @BlockdevOptionsNVMe:
#
@@ -4890,6 +4902,7 @@
@@ -4455,6 +4467,7 @@
'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
+ 'pbs': 'BlockdevOptionsPbs',
'nvme': 'BlockdevOptionsNVMe',
'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
'if': 'CONFIG_BLKIO' },
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 325e684411..b6079f6a0e 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -45,6 +45,7 @@
'BlockInfo', # query-block
'BlockdevAioOptions', # blockdev-add, -blockdev
'BlockdevDriver', # blockdev-add, query-blockstats, ...
+ 'BlockdevOptionsPbs', # for PBS backwards compat
'BlockdevVmdkAdapterType', # blockdev-create (to match VMDK spec)
'BlockdevVmdkSubformat', # blockdev-create (to match VMDK spec)
'ColoCompareProperties', # object_add, -object
'parallels': 'BlockdevOptionsGenericFormat',
'preallocate':'BlockdevOptionsPreallocate',

View File

@@ -0,0 +1,74 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 8 Jul 2020 11:57:53 +0200
Subject: [PATCH] PVE: add query_proxmox_support QMP command
Generic interface for future use, currently used for PBS dirty-bitmap
backup support.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[PVE: query-proxmox-support: include library version]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 9 +++++++++
qapi/block-core.json | 29 +++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/pve-backup.c b/pve-backup.c
index c15abefdda..4684789813 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1075,3 +1075,12 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+
+ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+{
+ ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
+ ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
+ ret->pbs_dirty_bitmap = true;
+ ret->pbs_dirty_bitmap_savevm = true;
+ return ret;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 96bc696aaa..0b453c61d4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -863,6 +863,35 @@
##
{ 'command': 'backup-cancel' }
+##
+# @ProxmoxSupportStatus:
+#
+# Contains info about supported features added by Proxmox.
+#
+# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
+# supported.
+#
+# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
+# safely be set for savevm-async.
+#
+# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+#
+##
+{ 'struct': 'ProxmoxSupportStatus',
+ 'data': { 'pbs-dirty-bitmap': 'bool',
+ 'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-library-version': 'str' } }
+
+##
+# @query-proxmox-support:
+#
+# Returns information about supported features added by Proxmox.
+#
+# Returns: @ProxmoxSupportStatus
+#
+##
+{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -0,0 +1,441 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 19 Aug 2020 17:02:00 +0200
Subject: [PATCH] PVE: add query-pbs-bitmap-info QMP call
Returns advanced information about dirty bitmaps used (or not used) for
the latest PBS backup.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp-cmds.c | 28 ++++++-----
pve-backup.c | 117 ++++++++++++++++++++++++++++++++-----------
qapi/block-core.json | 56 +++++++++++++++++++++
3 files changed, 159 insertions(+), 42 deletions(-)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 4c1671e289..c1152f55a7 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -200,6 +200,7 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
void hmp_info_backup(Monitor *mon, const QDict *qdict)
{
BackupStatus *info;
+ PBSBitmapInfoList *bitmap_info;
info = qmp_query_backup(NULL);
@@ -230,26 +231,29 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
// this should not happen normally
monitor_printf(mon, "Total size: %d\n", 0);
} else {
- bool incremental = false;
size_t total_or_dirty = info->total;
- if (info->has_transferred) {
- if (info->has_dirty && info->dirty) {
- if (info->dirty < info->total) {
- total_or_dirty = info->dirty;
- incremental = true;
- }
- }
+ bitmap_info = qmp_query_pbs_bitmap_info(NULL);
+
+ while (bitmap_info) {
+ monitor_printf(mon, "Drive %s:\n",
+ bitmap_info->value->drive);
+ monitor_printf(mon, " bitmap action: %s\n",
+ PBSBitmapAction_str(bitmap_info->value->action));
+ monitor_printf(mon, " size: %zd\n",
+ bitmap_info->value->size);
+ monitor_printf(mon, " dirty: %zd\n",
+ bitmap_info->value->dirty);
+ bitmap_info = bitmap_info->next;
}
- int per = (info->transferred * 100)/total_or_dirty;
-
- monitor_printf(mon, "Backup mode: %s\n", incremental ? "incremental" : "full");
+ qapi_free_PBSBitmapInfoList(bitmap_info);
int zero_per = (info->has_zero_bytes && info->zero_bytes) ?
(info->zero_bytes * 100)/info->total : 0;
monitor_printf(mon, "Total size: %zd\n", info->total);
+ int trans_per = (info->transferred * 100)/total_or_dirty;
monitor_printf(mon, "Transferred bytes: %zd (%d%%)\n",
- info->transferred, per);
+ info->transferred, trans_per);
monitor_printf(mon, "Zero bytes: %zd (%d%%)\n",
info->zero_bytes, zero_per);
diff --git a/pve-backup.c b/pve-backup.c
index 4684789813..f90abaa50a 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -46,6 +46,7 @@ static struct PVEBackupState {
size_t transferred;
size_t reused;
size_t zero_bytes;
+ GList *bitmap_list;
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -672,7 +673,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
- size_t dirty = 0;
l = di_list;
while (l) {
@@ -693,18 +693,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_generate(uuid);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.reused = 0;
+
+ /* clear previous backup's bitmap_list */
+ if (backup_state.stat.bitmap_list) {
+ GList *bl = backup_state.stat.bitmap_list;
+ while (bl) {
+ g_free(((PBSBitmapInfo *)bl->data)->drive);
+ g_free(bl->data);
+ bl = g_list_next(bl);
+ }
+ g_list_free(backup_state.stat.bitmap_list);
+ backup_state.stat.bitmap_list = NULL;
+ }
+
if (format == BACKUP_FORMAT_PBS) {
if (!task->has_password) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_id) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
- goto err;
+ goto err_mutex;
}
if (!task->has_backup_time) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
- goto err;
+ goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
@@ -731,12 +746,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
- goto err;
+ goto err_mutex;
}
int connect_result = proxmox_backup_co_connect(pbs, task->errp);
if (connect_result < 0)
- goto err;
+ goto err_mutex;
/* register all devices */
l = di_list;
@@ -747,6 +762,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->block_size = dump_cb_block_size;
const char *devname = bdrv_get_device_name(di->bs);
+ PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
+ size_t dirty = di->size;
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
@@ -755,49 +772,59 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (bitmap == NULL) {
bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
if (!bitmap) {
- goto err;
+ goto err_mutex;
}
+ action = PBS_BITMAP_ACTION_NEW;
} else {
expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
}
if (expect_only_dirty) {
- dirty += bdrv_get_dirty_count(bitmap);
+ /* track clean chunks as reused */
+ dirty = MIN(bdrv_get_dirty_count(bitmap), di->size);
+ backup_state.stat.reused += di->size - dirty;
+ action = PBS_BITMAP_ACTION_USED;
} else {
/* mark entire bitmap as dirty to make full backup */
bdrv_set_dirty_bitmap(bitmap, 0, di->size);
- dirty += di->size;
+ if (action != PBS_BITMAP_ACTION_NEW) {
+ action = PBS_BITMAP_ACTION_INVALID;
+ }
}
di->bitmap = bitmap;
} else {
- dirty += di->size;
-
/* after a full backup the old dirty bitmap is invalid anyway */
if (bitmap != NULL) {
bdrv_release_dirty_bitmap(bitmap);
+ action = PBS_BITMAP_ACTION_NOT_USED_REMOVED;
}
}
int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
if (dev_id < 0) {
- goto err;
+ goto err_mutex;
}
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
di->dev_id = dev_id;
+
+ PBSBitmapInfo *info = g_malloc(sizeof(*info));
+ info->drive = g_strdup(devname);
+ info->action = action;
+ info->size = di->size;
+ info->dirty = dirty;
+ backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- dirty = total;
-
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
error_propagate(task->errp, local_err);
}
- goto err;
+ goto err_mutex;
}
/* register all devices for vma writer */
@@ -807,7 +834,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
l = g_list_next(l);
if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
- goto err;
+ goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
@@ -815,16 +842,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (di->dev_id <= 0) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
- goto err;
+ goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- dirty = total;
-
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
- goto err;
+ goto err_mutex;
}
backup_dir = task->backup_file;
@@ -841,18 +866,18 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->size, flags, false, &local_err);
if (local_err) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
error_propagate(task->errp, local_err);
- goto err;
+ goto err_mutex;
}
}
} else {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
- goto err;
+ goto err_mutex;
}
@@ -860,7 +885,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_config_file) {
if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
@@ -868,12 +893,11 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_firewall_file) {
if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
- goto err;
+ goto err_mutex;
}
}
/* initialize global backup_state now */
-
- qemu_mutex_lock(&backup_state.stat.lock);
+ /* note: 'reused' and 'bitmap_list' are initialized earlier */
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -893,10 +917,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
- backup_state.stat.dirty = dirty;
+ backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -913,6 +936,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->result = uuid_info;
return;
+err_mutex:
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
err:
l = di_list;
@@ -1076,11 +1102,42 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
+PBSBitmapInfoList *qmp_query_pbs_bitmap_info(Error **errp)
+{
+ PBSBitmapInfoList *head = NULL, **p_next = &head;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+
+ GList *l = backup_state.stat.bitmap_list;
+ while (l) {
+ PBSBitmapInfo *info = (PBSBitmapInfo *)l->data;
+ l = g_list_next(l);
+
+ /* clone bitmap info to avoid auto free after QMP marshalling */
+ PBSBitmapInfo *info_ret = g_malloc0(sizeof(*info_ret));
+ info_ret->drive = g_strdup(info->drive);
+ info_ret->action = info->action;
+ info_ret->size = info->size;
+ info_ret->dirty = info->dirty;
+
+ PBSBitmapInfoList *info_list = g_malloc0(sizeof(*info_list));
+ info_list->value = info_ret;
+
+ *p_next = info_list;
+ p_next = &info_list->next;
+ }
+
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ return head;
+}
+
ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
{
ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->query_bitmap_info = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0b453c61d4..16e184dd28 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -871,6 +871,8 @@
# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
# supported.
#
+# @query-bitmap-info: True if the 'query-pbs-bitmap-info' QMP call is supported.
+#
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -879,6 +881,7 @@
##
{ 'struct': 'ProxmoxSupportStatus',
'data': { 'pbs-dirty-bitmap': 'bool',
+ 'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-library-version': 'str' } }
@@ -892,6 +895,59 @@
##
{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }
+##
+# @PBSBitmapAction:
+#
+# An action taken on a dirty-bitmap when a backup job was started.
+#
+# @not-used: Bitmap mode was not enabled.
+#
+# @not-used-removed: Bitmap mode was not enabled, but a bitmap from a
+# previous backup still existed and was removed.
+#
+# @new: A new bitmap was attached to the drive for this backup.
+#
+# @used: An existing bitmap will be used to only backup changed data.
+#
+# @invalid: A bitmap existed, but had to be cleared since it's associated
+# base snapshot did not match the base given for the current job or
+# the crypt mode has changed.
+#
+##
+{ 'enum': 'PBSBitmapAction',
+ 'data': ['not-used', 'not-used-removed', 'new', 'used', 'invalid'] }
+
+##
+# @PBSBitmapInfo:
+#
+# Contains information about dirty bitmaps used for each drive in a PBS backup.
+#
+# @drive: The underlying drive.
+#
+# @action: The action that was taken when the backup started.
+#
+# @size: The total size of the drive.
+#
+# @dirty: How much of the drive is considered dirty and will be backed up,
+# or 'size' if everything will be.
+#
+##
+{ 'struct': 'PBSBitmapInfo',
+ 'data': { 'drive': 'str', 'action': 'PBSBitmapAction', 'size': 'int',
+ 'dirty': 'int' } }
+
+##
+# @query-pbs-bitmap-info:
+#
+# Returns information about dirty bitmaps used on the most recently started
+# backup. Returns nothing when the last backup was not using PBS or if no
+# backup occured in this session.
+#
+# Returns: @PBSBitmapInfo
+#
+##
+{ 'command': 'query-pbs-bitmap-info', 'returns': ['PBSBitmapInfo'] }
+
##
# @BlockDeviceTimedStats:
#

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build
index cbfc9a43fb..8206270272 100644
index be4785e2f6..3fc7c8d435 100644
--- a/meson.build
+++ b/meson.build
@@ -1779,6 +1779,7 @@ endif
@@ -1463,6 +1463,7 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true)
@@ -25,19 +25,19 @@ index cbfc9a43fb..8206270272 100644
libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
# libselinux
@@ -3406,6 +3407,7 @@ if have_block
@@ -3105,6 +3106,7 @@ if have_block
# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
# os-win32.c does not
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+ blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
system_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
endif
diff --git a/os-posix.c b/os-posix.c
index 0cc1d991b1..f33d9901cf 100644
index 321fc4bd13..b1870d2690 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -29,6 +29,8 @@
@@ -28,6 +28,8 @@
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
@@ -46,7 +46,7 @@ index 0cc1d991b1..f33d9901cf 100644
/* Needed early for CONFIG_BSD etc. */
#include "net/slirp.h"
@@ -332,9 +334,10 @@ void os_setup_post(void)
@@ -281,9 +283,10 @@ void os_setup_post(void)
dup2(fd, 0);
dup2(fd, 1);

View File

@@ -7,14 +7,14 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/qemu/job.h | 12 ++++++++++++
job.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
job.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index e502787dd8..963cf2bef5 100644
index c105b31076..5096679571 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -381,6 +381,18 @@ void job_unlock(void);
@@ -316,6 +316,18 @@ typedef enum JobCreateFlags {
*/
JobTxn *job_txn_new(void);
@@ -34,10 +34,10 @@ index e502787dd8..963cf2bef5 100644
* Release a reference that was previously acquired with job_txn_add_job or
* job_txn_new. If it's the last reference to the object, it will be freed.
diff --git a/job.c b/job.c
index 93e22d180b..2b31f1e14f 100644
index e5699ad200..34c9758349 100644
--- a/job.c
+++ b/job.c
@@ -93,6 +93,8 @@ struct JobTxn {
@@ -72,6 +72,8 @@ struct JobTxn {
/* Reference count */
int refcnt;
@@ -45,8 +45,8 @@ index 93e22d180b..2b31f1e14f 100644
+ bool sequential;
};
void job_lock(void)
@@ -118,6 +120,25 @@ JobTxn *job_txn_new(void)
/* Right now, this mutex is only needed to synchronize accesses to job->busy
@@ -102,6 +104,25 @@ JobTxn *job_txn_new(void)
return txn;
}
@@ -69,23 +69,20 @@ index 93e22d180b..2b31f1e14f 100644
+ job_start(first);
+}
+
/* Called with job_mutex held. */
static void job_txn_ref_locked(JobTxn *txn)
static void job_txn_ref(JobTxn *txn)
{
@@ -1057,6 +1078,12 @@ static void job_completed_txn_success_locked(Job *job)
txn->refcnt++;
@@ -897,6 +918,9 @@ static void job_completed_txn_success(Job *job)
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!job_is_completed_locked(other_job)) {
if (!job_is_completed(other_job)) {
+ if (txn->sequential) {
+ job_unlock();
+ /* Needs to be called without holding the job lock */
+ job_start(other_job);
+ job_lock();
+ }
return;
}
assert(other_job->ret == 0);
@@ -1268,6 +1295,13 @@ int job_finish_sync_locked(Job *job,
@@ -1093,6 +1117,13 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
return -EBUSY;
}
@@ -93,9 +90,9 @@ index 93e22d180b..2b31f1e14f 100644
+ * of cancelling, these have not begun work so job_enter won't do anything,
+ * let's ensure they are marked as ABORTING if required */
+ if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+ job_update_rc_locked(job);
+ job_update_rc(job);
+ }
+
job_unlock();
AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
(job_enter(job), !job_is_completed(job)));
AIO_WAIT_WHILE(job->aio_context,
(job_enter(job), !job_is_completed(job)));

View File

@@ -0,0 +1,295 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 20 Aug 2020 14:25:00 +0200
Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states
By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were
successful - meaning we don't need to remove them when the backup fails,
since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
To keep the rate-limiting and IO impact from before, we use a sequential
transaction, so drives will still be backed up one after the other.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[add new force parameter to job_cancel_sync calls]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 167 +++++++++++++++------------------------------------
1 file changed, 49 insertions(+), 118 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index f90abaa50a..63c686463f 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -52,6 +52,7 @@ static struct PVEBackupState {
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
+ JobTxn *txn;
QemuMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
@@ -71,32 +72,12 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
- bool completed;
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
+ BlockJob *job;
} PVEBackupDevInfo;
-static void pvebackup_run_next_job(void);
-
-static BlockJob *
-lookup_active_block_job(PVEBackupDevInfo *di)
-{
- if (!di->completed && di->bs) {
- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue;
- }
-
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) {
- return job;
- }
- }
- }
- return NULL;
-}
-
static void pvebackup_propagate_error(Error *err)
{
qemu_mutex_lock(&backup_state.stat.lock);
@@ -272,18 +253,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
if (local_err != NULL) {
pvebackup_propagate_error(local_err);
}
- } else {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
- }
}
proxmox_backup_disconnect(backup_state.pbs);
@@ -322,8 +291,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
qemu_mutex_lock(&backup_state.backup_mutex);
- di->completed = true;
-
if (ret < 0) {
Error *local_err = NULL;
error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
@@ -336,20 +303,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
block_on_coroutine_fn(pvebackup_complete_stream, di);
- // remove self from job queue
+ // remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
- if (di->bitmap && ret < 0) {
- // on error or cancel we cannot ensure synchronization of dirty
- // bitmaps with backup server, so remove all and do full backup next
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
g_free(di);
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ /* call cleanup if we're the last job */
+ if (!g_list_first(backup_state.di_list)) {
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ }
- pvebackup_run_next_job();
+ qemu_mutex_unlock(&backup_state.backup_mutex);
}
static void pvebackup_cancel(void)
@@ -371,36 +335,28 @@ static void pvebackup_cancel(void)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- for(;;) {
-
- BlockJob *next_job = NULL;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
+ /* it's enough to cancel one job in the transaction, the rest will follow
+ * automatically */
+ GList *bdi = g_list_first(backup_state.di_list);
+ BlockJob *cancel_job = bdi && bdi->data ?
+ ((PVEBackupDevInfo *)bdi->data)->job :
+ NULL;
- BlockJob *job = lookup_active_block_job(di);
- if (job != NULL) {
- next_job = job;
- break;
- }
- }
+ /* ref the job before releasing the mutex, just to be safe */
+ if (cancel_job) {
+ job_ref(&cancel_job->job);
+ }
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ /* job_cancel_sync may enter the job, so we need to release the
+ * backup_mutex to avoid deadlock */
+ qemu_mutex_unlock(&backup_state.backup_mutex);
- if (next_job) {
- AioContext *aio_context = next_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&next_job->job, true);
- aio_context_release(aio_context);
- } else {
- break;
- }
+ if (cancel_job) {
+ AioContext *aio_context = cancel_job->job.aio_context;
+ aio_context_acquire(aio_context);
+ job_cancel_sync(&cancel_job->job, true);
+ job_unref(&cancel_job->job);
+ aio_context_release(aio_context);
}
}
@@ -459,51 +415,19 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-bool job_should_pause(Job *job);
-
-static void pvebackup_run_next_job(void)
-{
- assert(!qemu_in_coroutine());
-
- qemu_mutex_lock(&backup_state.backup_mutex);
-
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- BlockJob *job = lookup_active_block_job(di);
-
- if (job) {
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- AioContext *aio_context = job->job.aio_context;
- aio_context_acquire(aio_context);
-
- if (job_should_pause(&job->job)) {
- bool error_or_canceled = pvebackup_error_or_canceled();
- if (error_or_canceled) {
- job_cancel_sync(&job->job, true);
- } else {
- job_resume(&job->job);
- }
- }
- aio_context_release(aio_context);
- return;
- }
- }
-
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
-
- qemu_mutex_unlock(&backup_state.backup_mutex);
-}
-
static bool create_backup_jobs(void) {
assert(!qemu_in_coroutine());
Error *local_err = NULL;
+ /* create job transaction to synchronize bitmap commit and cancel all
+ * jobs in case one errors */
+ if (backup_state.txn) {
+ job_txn_unref(backup_state.txn);
+ }
+ backup_state.txn = job_txn_new_seq();
+
BackupPerf perf = { .max_workers = 16 };
/* create and start all jobs (paused state) */
@@ -526,7 +450,7 @@ static bool create_backup_jobs(void) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
aio_context_release(aio_context);
@@ -538,7 +462,8 @@ static bool create_backup_jobs(void) {
pvebackup_propagate_error(create_job_err);
break;
}
- job_start(&job->job);
+
+ di->job = job;
bdrv_unref(di->target);
di->target = NULL;
@@ -556,6 +481,10 @@ static bool create_backup_jobs(void) {
bdrv_unref(di->target);
di->target = NULL;
}
+
+ if (di->job) {
+ job_unref(&di->job->job);
+ }
}
}
@@ -946,10 +875,6 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (di->bitmap) {
- bdrv_release_dirty_bitmap(di->bitmap);
- }
-
if (di->target) {
bdrv_unref(di->target);
}
@@ -1038,9 +963,15 @@ UuidInfo *qmp_backup(
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
- create_backup_jobs();
+ bool errors = create_backup_jobs();
qemu_mutex_unlock(&backup_state.backup_mutex);
- pvebackup_run_next_job();
+
+ if (!errors) {
+ /* start the first job in the transaction
+ * note: this might directly enter the job, so we need to do this
+ * after unlocking the backup_mutex */
+ job_txn_start_seq(backup_state.txn);
+ }
} else {
qemu_mutex_unlock(&backup_state.backup_mutex);
}

View File

@@ -0,0 +1,503 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 28 Sep 2020 13:40:51 +0200
Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
create_backup_jobs
proxmox_backup_co_finish is already async, but previously we would wait
for the coroutine using block_on_coroutine_fn(). Avoid this by
scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
as a real coroutine when calling from pvebackup_complete_cb. This is ok,
since complete_stream uses the backup_mutex internally to synchronize,
and other streams can happily continue writing in the meantime anyway.
To accomodate, backup_mutex is converted to a CoMutex. This means
converting every user to a coroutine. This is not just useful here, but
will come in handy once this series[0] is merged, and QMP calls can be
yield-able coroutines too. Then we can also finally get rid of
block_on_coroutine_fn.
Cases of aio_context_acquire/release from within what is now a coroutine
are changed to aio_co_reschedule_self, which works since a running
coroutine always holds the aio lock for the context it is running in.
job_cancel_sync is called from a BH since it can't be run from a
coroutine (uses AIO_WAIT_WHILE internally).
Same thing for create_backup_jobs, which is converted to a BH too.
To communicate the finishing state, a new property is introduced to
query-backup: 'finishing'. A new state is explicitly not used, since
that would break compatibility with older qemu-server versions.
Also fix create_backup_jobs:
No more weird bool returns, just the standard "errp" format used
everywhere else too. With this, if backup_job_create fails, the error
message is actually returned over QMP and can be shown to the user.
To facilitate correct cleanup on such an error, we call
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
This additionally allows us to actually hold the backup_mutex during
operation.
Also add a job_cancel_sync before job_unref, since a job must be in
STATUS_NULL to be deleted by unref, which could trigger an assert
before.
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[add new force parameter to job_cancel_sync calls]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
qapi/block-core.json | 5 +-
2 files changed, 144 insertions(+), 78 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 63c686463f..6f05796fad 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
static struct PVEBackupState {
struct {
- // Everithing accessed from qmp_backup_query command is protected using lock
+ // Everything accessed from qmp_backup_query command is protected using
+ // this lock. Do NOT hold this lock for long times, as it is sometimes
+ // acquired from coroutines, and thus any wait time may block the guest.
QemuMutex lock;
Error *error;
time_t start_time;
@@ -47,20 +49,22 @@ static struct PVEBackupState {
size_t reused;
size_t zero_bytes;
GList *bitmap_list;
+ bool finishing;
+ bool starting;
} stat;
int64_t speed;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
JobTxn *txn;
- QemuMutex backup_mutex;
+ CoMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
qemu_mutex_init(&backup_state.stat.lock);
- qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.backup_mutex);
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
+ int completed_ret; // INT_MAX if not completed
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
}
// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_co_cleanup(void *unused)
+static void coroutine_fn pvebackup_co_cleanup(void)
{
assert(qemu_in_coroutine());
qemu_mutex_lock(&backup_state.stat.lock);
- backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = true;
qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.end_time = time(NULL);
+ backup_state.stat.finishing = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
}
-// assumes the caller holds backup_mutex
-static void coroutine_fn pvebackup_complete_stream(void *opaque)
+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
{
PVEBackupDevInfo *di = opaque;
+ int ret = di->completed_ret;
- bool error_or_canceled = pvebackup_error_or_canceled();
-
- if (backup_state.vmaw) {
- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool starting = backup_state.stat.starting;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ if (starting) {
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
+ * must) skip all cleanup, as we don't know what has and hasn't been
+ * initialized yet. */
+ return;
}
- if (backup_state.pbs && !error_or_canceled) {
- Error *local_err = NULL;
- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
- if (local_err != NULL) {
- pvebackup_propagate_error(local_err);
- }
- }
-}
-
-static void pvebackup_complete_cb(void *opaque, int ret)
-{
- assert(!qemu_in_coroutine());
-
- PVEBackupDevInfo *di = opaque;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (ret < 0) {
Error *local_err = NULL;
@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
assert(di->target == NULL);
- block_on_coroutine_fn(pvebackup_complete_stream, di);
+ bool error_or_canceled = pvebackup_error_or_canceled();
+
+ if (backup_state.vmaw) {
+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ }
+
+ if (backup_state.pbs && !error_or_canceled) {
+ Error *local_err = NULL;
+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
+ if (local_err != NULL) {
+ pvebackup_propagate_error(local_err);
+ }
+ }
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/* call cleanup if we're the last job */
if (!g_list_first(backup_state.di_list)) {
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
+ pvebackup_co_cleanup();
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-static void pvebackup_cancel(void)
+static void pvebackup_complete_cb(void *opaque, int ret)
{
- assert(!qemu_in_coroutine());
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+
+ /*
+ * Schedule stream cleanup in async coroutine. close_image and finish might
+ * take a while, so we can't block on them here. This way it also doesn't
+ * matter if we're already running in a coroutine or not.
+ * Note: di is a pointer to an entry in the global backup_state struct, so
+ * it stays valid.
+ */
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
+ aio_co_enter(qemu_get_aio_context(), co);
+}
+
+/*
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
+ * main loop processing via a bottom half.
+ */
+static void job_cancel_bh(void *opaque) {
+ CoCtxData *data = (CoCtxData*)opaque;
+ Job *job = (Job*)data->data;
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job, true);
+ aio_context_release(job_ctx);
+ aio_co_enter(data->ctx, data->co);
+}
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
+{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
((PVEBackupDevInfo *)bdi->data)->job :
NULL;
- /* ref the job before releasing the mutex, just to be safe */
if (cancel_job) {
- job_ref(&cancel_job->job);
+ CoCtxData data = {
+ .ctx = qemu_get_current_aio_context(),
+ .co = qemu_coroutine_self(),
+ .data = &cancel_job->job,
+ };
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+ qemu_coroutine_yield();
}
- /* job_cancel_sync may enter the job, so we need to release the
- * backup_mutex to avoid deadlock */
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (cancel_job) {
- AioContext *aio_context = cancel_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&cancel_job->job, true);
- job_unref(&cancel_job->job);
- aio_context_release(aio_context);
- }
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
void qmp_backup_cancel(Error **errp)
{
- pvebackup_cancel();
+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
}
// assumes the caller holds backup_mutex
@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-static bool create_backup_jobs(void) {
+/*
+ * backup_job_create can *not* be run from a coroutine (and requires an
+ * acquired AioContext), so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+ */
+static void create_backup_jobs_bh(void *opaque) {
assert(!qemu_in_coroutine());
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
Error *local_err = NULL;
/* create job transaction to synchronize bitmap commit and cancel all
@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
aio_context_release(aio_context);
- if (!job || local_err != NULL) {
- Error *create_job_err = NULL;
- error_setg(&create_job_err, "backup_job_create failed: %s",
- local_err ? error_get_pretty(local_err) : "null");
+ di->job = job;
- pvebackup_propagate_error(create_job_err);
+ if (!job || local_err) {
+ error_setg(errp, "backup_job_create failed: %s",
+ local_err ? error_get_pretty(local_err) : "null");
break;
}
- di->job = job;
-
bdrv_unref(di->target);
di->target = NULL;
}
- bool errors = pvebackup_error_or_canceled();
-
- if (errors) {
+ if (*errp) {
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
}
if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_cancel_sync(&di->job->job, true);
job_unref(&di->job->job);
+ aio_context_release(ctx);
}
}
}
- return errors;
+ /* return */
+ aio_co_enter(data->ctx, data->co);
}
typedef struct QmpBackupTask {
@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
-// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
QmpBackupTask *task = opaque;
task->result = NULL; // just to be sure
@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
di->size = size;
total += size;
+
+ di->completed_ret = INT_MAX;
}
uuid_generate(uuid);
@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info->UUID = uuid_str;
task->result = uuid_info;
+
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(task->errp, local_err);
+ goto err;
+ }
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.starting = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ /* start the first job in the transaction */
+ job_txn_start_seq(backup_state.txn);
+
return;
err_mutex:
@@ -885,6 +960,7 @@ err:
g_free(di);
}
g_list_free(di_list);
+ backup_state.di_list = NULL;
if (devs) {
g_strfreev(devs);
@@ -905,6 +981,8 @@ err:
}
task->result = NULL;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
.errp = errp,
};
- qemu_mutex_lock(&backup_state.backup_mutex);
-
block_on_coroutine_fn(pvebackup_co_prepare, &task);
- if (*errp == NULL) {
- bool errors = create_backup_jobs();
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (!errors) {
- /* start the first job in the transaction
- * note: this might directly enter the job, so we need to do this
- * after unlocking the backup_mutex */
- job_txn_start_seq(backup_state.txn);
- }
- } else {
- qemu_mutex_unlock(&backup_state.backup_mutex);
- }
-
return task.result;
}
@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
info->transferred = backup_state.stat.transferred;
info->has_reused = true;
info->reused = backup_state.stat.reused;
+ info->finishing = backup_state.stat.finishing;
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 16e184dd28..cb17d00fe0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -770,12 +770,15 @@
#
# @uuid: uuid for this backup job
#
+# @finishing: if status='active' and finishing=true, then the backup process is
+# waiting for the target to finish.
+#
##
{ 'struct': 'BackupStatus',
'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
'*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
'*start-time': 'int', '*end-time': 'int',
- '*backup-file': 'str', '*uuid': 'str' } }
+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
##
# @BackupFormat:

View File

@@ -13,23 +13,21 @@ safe migration is possible and makes sense.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: split up state_pending for 8.0]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/misc.h | 3 ++
migration/meson.build | 2 +
migration/migration.c | 1 +
migration/pbs-state.c | 104 +++++++++++++++++++++++++++++++++++++++
migration/pbs-state.c | 106 +++++++++++++++++++++++++++++++++++++++
pve-backup.c | 1 +
qapi/block-core.json | 6 +++
6 files changed, 117 insertions(+)
6 files changed, 119 insertions(+)
create mode 100644 migration/pbs-state.c
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 7dcc0b5c2c..4c940b2475 100644
index 465906710d..4f0aeceb6f 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -77,4 +77,7 @@ bool migration_in_bg_snapshot(void);
@@ -75,4 +75,7 @@ bool migration_in_bg_snapshot(void);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
@@ -38,24 +36,25 @@ index 7dcc0b5c2c..4c940b2475 100644
+
#endif
diff --git a/migration/meson.build b/migration/meson.build
index 07f6057acc..343994d891 100644
index 0842d00cd2..d012f4d8d3 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -7,7 +7,9 @@ migration_files = files(
@@ -6,8 +6,10 @@ migration_files = files(
'vmstate.c',
'qemu-file.c',
'yank_functions.c',
+ 'pbs-state.c',
)
+system_ss.add(libproxmox_backup_qemu)
softmmu_ss.add(migration_files)
+softmmu_ss.add(libproxmox_backup_qemu)
system_ss.add(files(
softmmu_ss.add(files(
'block-dirty-bitmap.c',
diff --git a/migration/migration.c b/migration/migration.c
index 7a4c8beb5d..0a955a2a18 100644
index bb8bbddfe4..8109e468eb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -162,6 +162,7 @@ void migration_object_init(void)
@@ -229,6 +229,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
@@ -65,10 +64,10 @@ index 7a4c8beb5d..0a955a2a18 100644
void migration_cancel(const Error *error)
diff --git a/migration/pbs-state.c b/migration/pbs-state.c
new file mode 100644
index 0000000000..887e998b9e
index 0000000000..29f2b3860d
--- /dev/null
+++ b/migration/pbs-state.c
@@ -0,0 +1,104 @@
@@ -0,0 +1,106 @@
+/*
+ * PBS (dirty-bitmap) state migration
+ */
@@ -87,8 +86,11 @@ index 0000000000..887e998b9e
+/* state is accessed via this static variable directly, 'opaque' is NULL */
+static PBSState pbs_state;
+
+static void pbs_state_pending(void *opaque, uint64_t *must_precopy,
+ uint64_t *can_postcopy)
+static void pbs_state_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ /* we send everything in save_setup, so nothing is ever pending */
+}
@@ -158,8 +160,7 @@ index 0000000000..887e998b9e
+static SaveVMHandlers savevm_pbs_state_handlers = {
+ .save_setup = pbs_state_save_setup,
+ .has_postcopy = pbs_state_has_postcopy,
+ .state_pending_exact = pbs_state_pending,
+ .state_pending_estimate = pbs_state_pending,
+ .save_live_pending = pbs_state_save_pending,
+ .is_active_iterate = pbs_state_is_active_iterate,
+ .load_state = pbs_state_load,
+ .is_active = pbs_state_is_active,
@@ -174,22 +175,22 @@ index 0000000000..887e998b9e
+ NULL);
+}
diff --git a/pve-backup.c b/pve-backup.c
index d84d807654..9c8b88d075 100644
index 6f05796fad..5fa3cc1352 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1060,6 +1060,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
@@ -1132,6 +1132,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index d67a6d448a..09de550c95 100644
index cb17d00fe0..bd978ea562 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -991,6 +991,11 @@
@@ -879,6 +879,11 @@
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
#
@@ -198,14 +199,14 @@ index d67a6d448a..09de550c95 100644
+# migration cap if this is false/unset may lead
+# to crashes on migration!
+#
# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
# parameter.
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
@@ -1001,6 +1006,7 @@
##
@@ -886,6 +891,7 @@
'data': { 'pbs-dirty-bitmap': 'bool',
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
'pbs-library-version': 'str',
'backup-max-workers': 'bool' } }
'pbs-library-version': 'str' } }
##

View File

@@ -19,10 +19,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index e1ae3b7316..285dd1d148 100644
index 9aba7d9c22..f4ecf9c9f9 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -540,7 +540,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
@@ -538,7 +538,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
error_report_err(local_err);

View File

@@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 30 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index 34f97ab646..398782963d 100644
index d707d0b354..da6ed52323 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1391,12 +1391,42 @@ static char *get_initiator_name(QemuOpts *opts)
@@ -1386,12 +1386,42 @@ static char *get_initiator_name(QemuOpts *opts)
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;

View File

@@ -0,0 +1,598 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 26 Jan 2021 15:45:30 +0100
Subject: [PATCH] PVE: Use coroutine QMP for backup/cancel_backup
Finally turn backup QMP calls into coroutines, now that it's possible.
This has the benefit that calls are asynchronous to the main loop, i.e.
long running operations like connecting to a PBS server will no longer
hang the VM.
Additionally, it allows us to get rid of block_on_coroutine_fn, which
was always a hacky workaround.
While we're already spring cleaning, also remove the QmpBackupTask
struct, since we can now put the 'prepare' function directly into
qmp_backup and thus no longer need those giant walls of text.
(Note that for our patches to work with 5.2.0 this change is actually
required, otherwise monitor_get_fd() fails as we're not in a QMP
coroutine, but one we start ourselves - we could of course set the
monitor for that coroutine ourselves, but let's just fix it the right
way instead)
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +-
hmp-commands.hx | 2 +
proxmox-backup-client.c | 31 -----
pve-backup.c | 232 ++++++++++-----------------------
qapi/block-core.json | 4 +-
5 files changed, 77 insertions(+), 196 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index ea7b665aa2..ef45552e3b 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1016,7 +1016,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
g_free(global_snapshots);
}
-void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup_cancel(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
@@ -1025,7 +1025,7 @@ void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, error);
}
-void hmp_backup(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 97f24942b3..7a2be816da 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -111,6 +111,7 @@ ERST
"\n\t\t\t Use -d to dump data into a directory instead"
"\n\t\t\t of using VMA format.",
.cmd = hmp_backup,
+ .coroutine = true,
},
SRST
@@ -124,6 +125,7 @@ ERST
.params = "",
.help = "cancel the current VM backup",
.cmd = hmp_backup_cancel,
+ .coroutine = true,
},
SRST
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index 4ce7bc0b5e..0923037dec 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -5,37 +5,6 @@
/* Proxmox Backup Server client bindings using coroutines */
-typedef struct BlockOnCoroutineWrapper {
- AioContext *ctx;
- CoroutineEntry *entry;
- void *entry_arg;
- bool finished;
-} BlockOnCoroutineWrapper;
-
-static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
-{
- BlockOnCoroutineWrapper *wrapper = opaque;
- wrapper->entry(wrapper->entry_arg);
- wrapper->finished = true;
- aio_wait_kick();
-}
-
-void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
-{
- assert(!qemu_in_coroutine());
-
- AioContext *ctx = qemu_get_current_aio_context();
- BlockOnCoroutineWrapper wrapper = {
- .finished = false,
- .entry = entry,
- .entry_arg = entry_arg,
- .ctx = ctx,
- };
- Coroutine *wrapper_co = qemu_coroutine_create(block_on_coroutine_wrapper, &wrapper);
- aio_co_enter(ctx, wrapper_co);
- AIO_WAIT_WHILE(ctx, !wrapper.finished);
-}
-
// This is called from another thread, so we use aio_co_schedule()
static void proxmox_backup_schedule_wake(void *data) {
CoCtxData *waker = (CoCtxData *)data;
diff --git a/pve-backup.c b/pve-backup.c
index 5fa3cc1352..323014744c 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -357,7 +357,7 @@ static void job_cancel_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
+void coroutine_fn qmp_backup_cancel(Error **errp)
{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
@@ -394,11 +394,6 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-void qmp_backup_cancel(Error **errp)
-{
- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
-}
-
// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
@@ -533,50 +528,27 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-typedef struct QmpBackupTask {
- const char *backup_file;
- bool has_password;
- const char *password;
- bool has_keyfile;
- const char *keyfile;
- bool has_key_password;
- const char *key_password;
- bool has_backup_id;
- const char *backup_id;
- bool has_backup_time;
- const char *fingerprint;
- bool has_fingerprint;
- int64_t backup_time;
- bool has_use_dirty_bitmap;
- bool use_dirty_bitmap;
- bool has_format;
- BackupFormat format;
- bool has_config_file;
- const char *config_file;
- bool has_firewall_file;
- const char *firewall_file;
- bool has_devlist;
- const char *devlist;
- bool has_compress;
- bool compress;
- bool has_encrypt;
- bool encrypt;
- bool has_speed;
- int64_t speed;
- Error **errp;
- UuidInfo *result;
-} QmpBackupTask;
-
-static void coroutine_fn pvebackup_co_prepare(void *opaque)
+UuidInfo coroutine_fn *qmp_backup(
+ const char *backup_file,
+ bool has_password, const char *password,
+ bool has_keyfile, const char *keyfile,
+ bool has_key_password, const char *key_password,
+ bool has_fingerprint, const char *fingerprint,
+ bool has_backup_id, const char *backup_id,
+ bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
+ bool has_format, BackupFormat format,
+ bool has_config_file, const char *config_file,
+ bool has_firewall_file, const char *firewall_file,
+ bool has_devlist, const char *devlist,
+ bool has_speed, int64_t speed, Error **errp)
{
assert(qemu_in_coroutine());
qemu_co_mutex_lock(&backup_state.backup_mutex);
- QmpBackupTask *task = opaque;
-
- task->result = NULL; // just to be sure
-
BlockBackend *blk;
BlockDriverState *bs = NULL;
const char *backup_dir = NULL;
@@ -593,17 +565,17 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
+ return NULL;
}
/* Todo: try to auto-detect format based on file name */
- BackupFormat format = task->has_format ? task->format : BACKUP_FORMAT_VMA;
+ format = has_format ? format : BACKUP_FORMAT_VMA;
- if (task->has_devlist) {
- devs = g_strsplit_set(task->devlist, ",;:", -1);
+ if (has_devlist) {
+ devs = g_strsplit_set(devlist, ",;:", -1);
gchar **d = devs;
while (d && *d) {
@@ -611,14 +583,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (blk) {
bs = blk_bs(blk);
if (!bdrv_is_inserted(bs)) {
- error_setg(task->errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
goto err;
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
di_list = g_list_append(di_list, di);
} else {
- error_set(task->errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", *d);
goto err;
}
@@ -641,7 +613,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (!di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
goto err;
}
@@ -651,13 +623,13 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, task->errp)) {
+ if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
goto err;
}
ssize_t size = bdrv_getlength(di->bs);
if (size < 0) {
- error_setg_errno(task->errp, -di->size, "bdrv_getlength failed");
+ error_setg_errno(errp, -di->size, "bdrv_getlength failed");
goto err;
}
di->size = size;
@@ -684,47 +656,44 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (format == BACKUP_FORMAT_PBS) {
- if (!task->has_password) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
+ if (!has_password) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
goto err_mutex;
}
- if (!task->has_backup_id) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
+ if (!has_backup_id) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
goto err_mutex;
}
- if (!task->has_backup_time) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
+ if (!has_backup_time) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
- bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
-
-
char *pbs_err = NULL;
pbs = proxmox_backup_new(
- task->backup_file,
- task->backup_id,
- task->backup_time,
+ backup_file,
+ backup_id,
+ backup_time,
dump_cb_block_size,
- task->has_password ? task->password : NULL,
- task->has_keyfile ? task->keyfile : NULL,
- task->has_key_password ? task->key_password : NULL,
- task->has_compress ? task->compress : true,
- task->has_encrypt ? task->encrypt : task->has_keyfile,
- task->has_fingerprint ? task->fingerprint : NULL,
+ has_password ? password : NULL,
+ has_keyfile ? keyfile : NULL,
+ has_key_password ? key_password : NULL,
+ has_compress ? compress : true,
+ has_encrypt ? encrypt : has_keyfile,
+ has_fingerprint ? fingerprint : NULL,
&pbs_err);
if (!pbs) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
goto err_mutex;
}
- int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ int connect_result = proxmox_backup_co_connect(pbs, errp);
if (connect_result < 0)
goto err_mutex;
@@ -743,9 +712,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
- if (use_dirty_bitmap) {
+ if (has_use_dirty_bitmap && use_dirty_bitmap) {
if (bitmap == NULL) {
- bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, errp);
if (!bitmap) {
goto err_mutex;
}
@@ -775,12 +744,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
if (dev_id < 0) {
goto err_mutex;
}
- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, errp))) {
goto err_mutex;
}
@@ -794,10 +763,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
}
goto err_mutex;
}
@@ -808,25 +777,25 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, errp))) {
goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
if (di->dev_id <= 0) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- if (mkdir(task->backup_file, 0640) != 0) {
- error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
- task->backup_file);
+ if (mkdir(backup_file, 0640) != 0) {
+ error_setg_errno(errp, errno, "can't create directory '%s'\n",
+ backup_file);
goto err_mutex;
}
- backup_dir = task->backup_file;
+ backup_dir = backup_file;
l = di_list;
while (l) {
@@ -840,34 +809,34 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_img_create(di->targetfile, "raw", NULL, NULL, NULL,
di->size, flags, false, &local_err);
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
}
} else {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
goto err_mutex;
}
/* add configuration file to archive */
- if (task->has_config_file) {
- if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_config_file) {
+ if (pvebackup_co_add_config(config_file, config_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
/* add firewall file to archive */
- if (task->has_firewall_file) {
- if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_firewall_file) {
+ if (pvebackup_co_add_config(firewall_file, firewall_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
@@ -885,7 +854,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (backup_state.stat.backup_file) {
g_free(backup_state.stat.backup_file);
}
- backup_state.stat.backup_file = g_strdup(task->backup_file);
+ backup_state.stat.backup_file = g_strdup(backup_file);
uuid_copy(backup_state.stat.uuid, uuid);
uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
@@ -900,7 +869,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_mutex_unlock(&backup_state.stat.lock);
- backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+ backup_state.speed = (has_speed && speed > 0) ? speed : 0;
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -910,8 +879,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
- task->result = uuid_info;
-
/* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
* backup_mutex locked. This is fine, a CoMutex can be held across yield
* points, and we'll release it as soon as the BH reschedules us.
@@ -925,7 +892,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_coroutine_yield();
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err;
}
@@ -938,7 +905,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
/* start the first job in the transaction */
job_txn_start_seq(backup_state.txn);
- return;
+ return uuid_info;
err_mutex:
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -969,7 +936,7 @@ err:
if (vmaw) {
Error *err = NULL;
vma_writer_close(vmaw, &err);
- unlink(task->backup_file);
+ unlink(backup_file);
}
if (pbs) {
@@ -980,65 +947,8 @@ err:
rmdir(backup_dir);
}
- task->result = NULL;
-
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
-}
-
-UuidInfo *qmp_backup(
- const char *backup_file,
- bool has_password, const char *password,
- bool has_keyfile, const char *keyfile,
- bool has_key_password, const char *key_password,
- bool has_fingerprint, const char *fingerprint,
- bool has_backup_id, const char *backup_id,
- bool has_backup_time, int64_t backup_time,
- bool has_use_dirty_bitmap, bool use_dirty_bitmap,
- bool has_compress, bool compress,
- bool has_encrypt, bool encrypt,
- bool has_format, BackupFormat format,
- bool has_config_file, const char *config_file,
- bool has_firewall_file, const char *firewall_file,
- bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
-{
- QmpBackupTask task = {
- .backup_file = backup_file,
- .has_password = has_password,
- .password = password,
- .has_keyfile = has_keyfile,
- .keyfile = keyfile,
- .has_key_password = has_key_password,
- .key_password = key_password,
- .has_fingerprint = has_fingerprint,
- .fingerprint = fingerprint,
- .has_backup_id = has_backup_id,
- .backup_id = backup_id,
- .has_backup_time = has_backup_time,
- .backup_time = backup_time,
- .has_use_dirty_bitmap = has_use_dirty_bitmap,
- .use_dirty_bitmap = use_dirty_bitmap,
- .has_compress = has_compress,
- .compress = compress,
- .has_encrypt = has_encrypt,
- .encrypt = encrypt,
- .has_format = has_format,
- .format = format,
- .has_config_file = has_config_file,
- .config_file = config_file,
- .has_firewall_file = has_firewall_file,
- .firewall_file = firewall_file,
- .has_devlist = has_devlist,
- .devlist = devlist,
- .has_speed = has_speed,
- .speed = speed,
- .errp = errp,
- };
-
- block_on_coroutine_fn(pvebackup_co_prepare, &task);
-
- return task.result;
+ return NULL;
}
BackupStatus *qmp_query_backup(Error **errp)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bd978ea562..ca1966f54b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -842,7 +842,7 @@
'*config-file': 'str',
'*firewall-file': 'str',
'*devlist': 'str', '*speed': 'int' },
- 'returns': 'UuidInfo' }
+ 'returns': 'UuidInfo', 'coroutine': true }
##
# @query-backup:
@@ -864,7 +864,7 @@
# Notes: This command succeeds even if there is no backup process running.
#
##
-{ 'command': 'backup-cancel' }
+{ 'command': 'backup-cancel', 'coroutine': true }
##
# @ProxmoxSupportStatus:

View File

@@ -1,153 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 6 Apr 2023 14:59:31 +0200
Subject: [PATCH] alloc-track: fix deadlock during drop
by replacing the block node directly after changing the backing file
instead of rescheduling it.
With changes in QEMU 8.0, calling bdrv_get_info (and bdrv_unref)
during drop can lead to a deadlock when using iothread (only triggered
with multiple disks, except during debugging where it also triggered
with one disk sometimes):
1. job_unref_locked acquires the AioContext and calls job->driver->free
2. track_drop gets scheduled
3. bdrv_graph_wrlock is called and polls which leads to track_drop being
called
4. track_drop acquires the AioContext recursively
5. bdrv_get_info is a wrapped coroutine (since 8.0) and thus polls for
bdrv_co_get_info. This releases the AioContext, but only once! The
documentation for the AIO_WAIT_WHILE macro states that the
AioContext lock needs to be acquired exactly once, but there does
not seem to be a way for track_drop to know if it acquired the lock
recursively or not (without adding further hacks).
6. Because the AioContext is still held by the main thread once, it can't
be acquired before entering bdrv_co_get_info in co_schedule_bh_cb
which happens in the iothread
When doing the operation in change_backing_file, the AioContext has
already been acquired by the caller, so the issue with the recursive
lock goes away.
The comment explaining why delaying the replace is necessary is
> we need to schedule this for later however, since when this function
> is called, the blockjob modifying us is probably not done yet and
> has a blocker on 'bs'
However, there is no check for blockers in bdrv_replace_node. It would
need to be done by us, the caller, with check_to_replace_node.
Furthermore, the mirror job also does its call to bdrv_replace_node
while there is an active blocker (inserted by mirror itself) and they
use a specialized version to check for blockers instead of
check_to_replace_node there. Alloc-track could also do something
similar to check for other blockers, but it should be fine to rely on
Proxmox VE that no other operation with the blockdev is going on.
Mirror also drains the target before replacing the node, but the
target can have other users. In case of alloc-track the file child
should not be accessible by anybody else and so there can't be an
in-flight operation for the file child when alloc-track is drained.
The rescheduling based on refcounting is a hack and it doesn't seem to
be necessary anymore. It's not clear what the original issue from the
comment was. Testing with older builds with track_drop done directly
without rescheduling also didn't lead to any noticable issue for me.
One issue it might have been is the one fixed by b1e1af394d
("block/stream: Drain subtree around graph change"), where
block-stream had a use-after-free if the base node changed at an
inconvenient time (which alloc-track's auto-drop does).
It's also not possible to just not auto-replace the alloc-track. Not
replacing it at all leads to other operations like block resize
hanging, and there is no good way to replace it manually via QMP
(there is x-blockdev-change, but it is experimental and doesn't
implement the required operation yet). Also, it's just cleaner in
general to not leave unnecessary block nodes lying around.
Suggested-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 54 ++++++++++++++-------------------------------
1 file changed, 16 insertions(+), 38 deletions(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index b75d7c6460..76da140a68 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -25,7 +25,6 @@
typedef enum DropState {
DropNone,
- DropRequested,
DropInProgress,
} DropState;
@@ -268,37 +267,6 @@ static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
}
}
-static void track_drop(void *opaque)
-{
- BlockDriverState *bs = (BlockDriverState*)opaque;
- BlockDriverState *file = bs->file->bs;
- BDRVAllocTrackState *s = bs->opaque;
-
- assert(file);
-
- /* we rely on the fact that we're not used anywhere else, so let's wait
- * until we're only used once - in the drive connected to the guest (and one
- * ref is held by bdrv_ref in track_change_backing_file) */
- if (bs->refcnt > 2) {
- aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
- return;
- }
- AioContext *aio_context = bdrv_get_aio_context(bs);
- aio_context_acquire(aio_context);
-
- bdrv_drained_begin(bs);
-
- /* now that we're drained, we can safely set 'DropInProgress' */
- s->drop_state = DropInProgress;
- bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-
- bdrv_replace_node(bs, file, &error_abort);
- bdrv_set_backing_hd(bs, NULL, &error_abort);
- bdrv_drained_end(bs);
- bdrv_unref(bs);
- aio_context_release(aio_context);
-}
-
static int track_change_backing_file(BlockDriverState *bs,
const char *backing_file,
const char *backing_fmt)
@@ -308,13 +276,23 @@ static int track_change_backing_file(BlockDriverState *bs,
backing_file == NULL && backing_fmt == NULL)
{
/* backing file has been disconnected, there's no longer any use for
- * this node, so let's remove ourselves from the block graph - we need
- * to schedule this for later however, since when this function is
- * called, the blockjob modifying us is probably not done yet and has a
- * blocker on 'bs' */
- s->drop_state = DropRequested;
+ * this node, so let's remove ourselves from the block graph */
+ BlockDriverState *file = bs->file->bs;
+
+ /* Just to be sure, because bdrv_replace_node unrefs it */
bdrv_ref(bs);
- aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress;
+
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+
+ bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
return 0;

View File

@@ -0,0 +1,98 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 10 Feb 2021 11:07:06 +0100
Subject: [PATCH] PBS: add master key support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
this requires a new enough libproxmox-backup-qemu0, and allows querying
from the PVE side to avoid QMP calls with unsupported parameters.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 3 +++
qapi/block-core.json | 7 +++++++
3 files changed, 11 insertions(+)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index ef45552e3b..4c799f00d9 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1039,6 +1039,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS password
false, NULL, // PBS keyfile
false, NULL, // PBS key_password
+ false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
diff --git a/pve-backup.c b/pve-backup.c
index 323014744c..9f6c04a512 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -533,6 +533,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_password, const char *password,
bool has_keyfile, const char *keyfile,
bool has_key_password, const char *key_password,
+ bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
@@ -681,6 +682,7 @@ UuidInfo coroutine_fn *qmp_backup(
has_password ? password : NULL,
has_keyfile ? keyfile : NULL,
has_key_password ? key_password : NULL,
+ has_master_keyfile ? master_keyfile : NULL,
has_compress ? compress : true,
has_encrypt ? encrypt : has_keyfile,
has_fingerprint ? fingerprint : NULL,
@@ -1044,5 +1046,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_savevm = true;
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ca1966f54b..fc8a125451 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -813,6 +813,8 @@
#
# @key-password: password for keyfile (optional for format 'pbs')
#
+# @master-keyfile: PEM-formatted master public keyfile (optional for format 'pbs')
+#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
# @backup-id: backup ID (required for format 'pbs')
@@ -832,6 +834,7 @@
'*password': 'str',
'*keyfile': 'str',
'*key-password': 'str',
+ '*master-keyfile': 'str',
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
@@ -884,6 +887,9 @@
# migration cap if this is false/unset may lead
# to crashes on migration!
#
+# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
+# parameter.
+#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
##
@@ -892,6 +898,7 @@
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
'pbs-library-version': 'str' } }
##

View File

@@ -1,190 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 5 May 2023 13:39:53 +0200
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
In spirit, this is a partial revert of commit 9b09503752 ("migration:
run setup callbacks out of big lock"), but only for the snapshot case.
For snapshots, the bdrv_writev_vmstate() function is used during setup
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
while calling it could lead to an assertion failure. To understand
how, first note the following:
1. Generated coroutine wrappers for block layer functions spawn the
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
2. If the host OS switches threads at an inconvenient time, it can
happen that a bottom half scheduled for the main thread's AioContext
is executed as part of a vCPU thread's aio_poll().
An example leading to the assertion failure is as follows:
main thread:
1. A snapshot-save QMP command gets issued.
2. snapshot_save_job_bh() is scheduled.
vCPU thread:
3. aio_poll() for the main thread's AioContext is called (e.g. when
the guest writes to a pflash device, as part of blk_pwrite which is a
generated coroutine wrapper).
4. snapshot_save_job_bh() is executed as part of aio_poll().
3. qemu_savevm_state() is called.
4. qemu_mutex_unlock_iothread() is called. Now
qemu_get_current_aio_context() returns 0x0.
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
But this function is a generated coroutine wrapper, so it uses
AIO_WAIT_WHILE. There, the assertion
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
will fail.
To fix it, ensure that the BQL is held during setup. To avoid changing
the behavior for migration too, introduce conditionals for the setup
callbacks that need the BQL and only take the lock if it's not already
held.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
include/migration/register.h | 2 +-
migration/block-dirty-bitmap.c | 15 ++++++++++++---
migration/block.c | 15 ++++++++++++---
migration/ram.c | 16 +++++++++++++---
migration/savevm.c | 2 --
5 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/include/migration/register.h b/include/migration/register.h
index 90914f32f5..c728fd9120 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
* by other locks.
*/
int (*save_live_iterate)(QEMUFile *f, void *opaque);
+ int (*save_setup)(QEMUFile *f, void *opaque);
/* This runs outside the iothread lock! */
- int (*save_setup)(QEMUFile *f, void *opaque);
/* Note for save_live_pending:
* must_precopy:
* - must be migrated in precopy or in stopped state
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 285dd1d148..f7ee5a74d9 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1219,10 +1219,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
{
DBMSaveState *s = &((DBMState *)opaque)->save;
SaveBitmapState *dbms = NULL;
+ bool release_lock = false;
- qemu_mutex_lock_iothread();
+ /* For snapshots, the BQL is held during setup. */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
if (init_dirty_bitmap_migration(s) < 0) {
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return -1;
}
@@ -1230,7 +1237,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
send_bitmap_start(f, s, dbms);
}
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return 0;
}
diff --git a/migration/block.c b/migration/block.c
index 86c2256a2b..8423e0c9f9 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -725,21 +725,30 @@ static void block_migration_cleanup(void *opaque)
static int block_save_setup(QEMUFile *f, void *opaque)
{
int ret;
+ bool release_lock = false;
trace_migration_block_save("setup", block_mig_state.submitted,
block_mig_state.transferred);
- qemu_mutex_lock_iothread();
+ /* For snapshots, the BQL is held during setup. */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
ret = init_blk_migration(f);
if (ret < 0) {
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
return ret;
}
/* start track dirty blocks */
ret = set_dirty_tracking();
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
if (ret) {
return ret;
diff --git a/migration/ram.c b/migration/ram.c
index 6e1514f69f..6a1aec7031 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2896,8 +2896,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
static void ram_init_bitmaps(RAMState *rs)
{
- /* For memory_global_dirty_log_start below. */
- qemu_mutex_lock_iothread();
+ bool release_lock = false;
+
+ /*
+ * For memory_global_dirty_log_start below.
+ * For snapshots, the BQL is held during setup.
+ */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_mutex_lock_iothread();
+ release_lock = true;
+ }
qemu_mutex_lock_ramlist();
WITH_RCU_READ_LOCK_GUARD() {
@@ -2909,7 +2917,9 @@ static void ram_init_bitmaps(RAMState *rs)
}
}
qemu_mutex_unlock_ramlist();
- qemu_mutex_unlock_iothread();
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ }
/*
* After an eventual first bitmap sync, fixup the initial bitmap
diff --git a/migration/savevm.c b/migration/savevm.c
index d60c4f487a..3c015722f7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1625,10 +1625,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
reset_vfio_bytes_transferred();
ms->to_dst_file = f;
- qemu_mutex_unlock_iothread();
qemu_savevm_state_header(f);
qemu_savevm_state_setup(f);
- qemu_mutex_lock_iothread();
while (qemu_file_get_error(f) == 0) {
if (qemu_savevm_state_iterate(f, false) > 0) {

View File

@@ -0,0 +1,53 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 9 Dec 2020 11:46:57 +0100
Subject: [PATCH] PVE: block/pbs: fast-path reads without allocation if
possible
...and switch over to g_malloc/g_free while at it to align with other
QEMU code.
Tracing shows the fast-path is taken almost all the time, though not
100% so the slow one is still necessary.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/pbs.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/pbs.c b/block/pbs.c
index 0b05ea9080..c5eb4d5bad 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -200,7 +200,16 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
BDRVPBSState *s = bs->opaque;
int ret;
char *pbs_error = NULL;
- uint8_t *buf = malloc(bytes);
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
if (offset < 0 || bytes < 0) {
fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
@@ -223,8 +232,10 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
return -EIO;
}
- qemu_iovec_from_buf(qiov, 0, buf, bytes);
- free(buf);
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
return 0;
}

View File

@@ -1,29 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 5 May 2023 15:30:16 +0200
Subject: [PATCH] savevm-async: don't hold BQL during setup
See commit "migration: for snapshots, hold the BQL during setup
callbacks" for why. This is separate, because a version of that one
will hopefully land upstream.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/savevm-async.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 80624fada8..b1d85a4b41 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -401,10 +401,8 @@ void qmp_savevm_start(const char *statefile, Error **errp)
snap_state.state = SAVE_STATE_ACTIVE;
snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
- qemu_mutex_unlock_iothread();
qemu_savevm_state_header(snap_state.file);
qemu_savevm_state_setup(snap_state.file);
- qemu_mutex_lock_iothread();
/* Async processing from here on out happens in iohandler context, so let
* the target bdrv have its home there.

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/stream.c b/block/stream.c
index e522bbdec5..afed72db55 100644
index 694709bd25..e09bd5c4ef 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -27,7 +27,7 @@ enum {
@@ -28,7 +28,7 @@ enum {
* large enough to process multiple clusters in a single call, so
* that populating contiguous regions of the image is efficient.
*/

View File

@@ -17,17 +17,17 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 4 insertions(+)
diff --git a/block/io.c b/block/io.c
index 83d1b1dfdc..24a3c84c93 100644
index 0a8cbefe86..531b3b7a2d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1710,6 +1710,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
int sliced_niov;
size_t sliced_head, sliced_tail;
@@ -1734,6 +1734,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
{
int ret;
+ if (!qiov) {
+ return 0;
+ }
+
/* Should have been checked by the caller already */
ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
if (ret < 0) {
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {

View File

@@ -25,21 +25,20 @@ once the backing image is removed. It will be replaced by 'file'.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
make error return value consistent with QEMU
avoid premature break during read]
make error return value consistent with QEMU]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
block/alloc-track.c | 350 ++++++++++++++++++++++++++++++++++++++++++++
block/meson.build | 1 +
2 files changed, 353 insertions(+)
2 files changed, 351 insertions(+)
create mode 100644 block/alloc-track.c
diff --git a/block/alloc-track.c b/block/alloc-track.c
new file mode 100644
index 0000000000..b75d7c6460
index 0000000000..43d40d11af
--- /dev/null
+++ b/block/alloc-track.c
@@ -0,0 +1,352 @@
@@ -0,0 +1,350 @@
+/*
+ * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this
@@ -55,7 +54,6 @@ index 0000000000..b75d7c6460
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
@@ -165,9 +163,9 @@ index 0000000000..b75d7c6460
+ }
+}
+
+static coroutine_fn int64_t track_co_getlength(BlockDriverState *bs)
+static int64_t track_getlength(BlockDriverState *bs)
+{
+ return bdrv_co_getlength(bs->file->bs);
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn track_co_preadv(BlockDriverState *bs,
@@ -217,8 +215,7 @@ index 0000000000..b75d7c6460
+ ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
+ &local_qiov, flags);
+ } else {
+ qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ ret = 0;
+ ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ }
+
+ if (ret != 0) {
@@ -368,7 +365,7 @@ index 0000000000..b75d7c6460
+
+ .bdrv_file_open = track_open,
+ .bdrv_close = track_close,
+ .bdrv_co_getlength = track_co_getlength,
+ .bdrv_getlength = track_getlength,
+ .bdrv_child_perm = track_child_perm,
+ .bdrv_refresh_limits = track_refresh_limits,
+
@@ -393,7 +390,7 @@ index 0000000000..b75d7c6460
+
+block_init(bdrv_alloc_track_init);
diff --git a/block/meson.build b/block/meson.build
index becc99ac4e..0a69836593 100644
index a26a69434e..74e5f49758 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh)

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 15:26:30 +0200
Subject: [PATCH] PVE: whitelist 'invalid' QAPI names for backwards compat
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qapi/pragma.json | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/qapi/pragma.json b/qapi/pragma.json
index a2358e303a..9ff5c84ffd 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -15,6 +15,7 @@
'device_add',
'device_del',
'expire_password',
+ 'get_link_status',
'migrate_cancel',
'netdev_add',
'netdev_del',
@@ -64,6 +65,8 @@
'SysEmuTarget', # query-cpu-fast, query-target
'UuidInfo', # query-uuid
'VncClientInfo', # query-vnc, query-vnc-servers, ...
- 'X86CPURegister32' # qom-get of x86 CPU properties
+ 'X86CPURegister32', # qom-get of x86 CPU properties
# feature-words, filtered-features
+ 'BlockdevOptionsPbs', # for PBS backwards compat
+ 'BalloonInfo'
] } }

View File

@@ -0,0 +1,35 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 17:36:55 +0200
Subject: [PATCH] PVE: savevm-async: register yank before
migration_incoming_state_destroy
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/savevm-async.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index e65a5e3482..2ed2536816 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -20,6 +20,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
+#include "qemu/yank.h"
/* #define DEBUG_SAVEVM_STATE */
@@ -514,6 +515,10 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
dirty_bitmap_mig_before_vm_start();
qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
migration_incoming_state_destroy();
if (ret < 0) {
error_setg_errno(errp, -ret, "Error while loading VM state");

View File

@@ -1,9 +1,9 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Mon, 7 Feb 2022 14:21:01 +0100
Subject: [PATCH] qemu-img dd: add -l option for loading a snapshot
Subject: [PATCH] qemu-img: dd: add -l option for loading a snapshot
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
docs/tools/qemu-img.rst | 6 +++---
@@ -12,7 +12,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 5e713e231d..9390d5e5cf 100644
index 699229eef6..4189ced8bc 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -492,10 +492,10 @@ Command description:
@@ -46,10 +46,10 @@ index b5b0bb4467..36f97e1f19 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 9d414d639b..e13a12137b 100644
index c6b4a5567d..041c203fc3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5016,6 +5016,7 @@ static int img_dd(int argc, char **argv)
@@ -4943,6 +4943,7 @@ static int img_dd(int argc, char **argv)
BlockDriver *drv = NULL, *proto_drv = NULL;
BlockBackend *blk1 = NULL, *blk2 = NULL;
QemuOpts *opts = NULL;
@@ -57,15 +57,15 @@ index 9d414d639b..e13a12137b 100644
QemuOptsList *create_opts = NULL;
Error *local_err = NULL;
bool image_opts = false;
@@ -5025,6 +5026,7 @@ static int img_dd(int argc, char **argv)
@@ -4952,6 +4953,7 @@ static int img_dd(int argc, char **argv)
int64_t size = 0, readsize = 0;
int64_t out_pos, in_pos;
int64_t block_count = 0, out_pos, in_pos;
bool force_share = false, skip_create = false;
+ const char *snapshot_name = NULL;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -5062,7 +5064,7 @@ static int img_dd(int argc, char **argv)
@@ -4989,7 +4991,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
@@ -74,7 +74,7 @@ index 9d414d639b..e13a12137b 100644
if (c == EOF) {
break;
}
@@ -5085,6 +5087,19 @@ static int img_dd(int argc, char **argv)
@@ -5012,6 +5014,19 @@ static int img_dd(int argc, char **argv)
case 'n':
skip_create = true;
break;
@@ -94,7 +94,7 @@ index 9d414d639b..e13a12137b 100644
case 'U':
force_share = true;
break;
@@ -5144,11 +5159,24 @@ static int img_dd(int argc, char **argv)
@@ -5071,11 +5086,24 @@ static int img_dd(int argc, char **argv)
if (dd.flags & C_IF) {
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
force_share);
@@ -120,7 +120,7 @@ index 9d414d639b..e13a12137b 100644
}
if (dd.flags & C_OSIZE) {
@@ -5303,6 +5331,7 @@ static int img_dd(int argc, char **argv)
@@ -5230,6 +5258,7 @@ static int img_dd(int argc, char **argv)
out:
g_free(arg);
qemu_opts_del(opts);

View File

@@ -0,0 +1,407 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 21 Apr 2022 13:26:48 +0200
Subject: [PATCH] vma: allow partial restore
Introduce a new map line for skipping a certain drive, of the form
skip=drive-scsi0
Since in PVE, most archives are compressed and piped to vma for
restore, it's not easily possible to skip reads.
For the reader, a new skip flag for VmaRestoreState is added and the
target is allowed to be NULL if skip is specified when registering. If
the skip flag is set, no writes will be made as well as no check for
duplicate clusters. Therefore, the flag is not set for verify.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Acked-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
vma-reader.c | 64 ++++++++++++---------
vma.c | 157 +++++++++++++++++++++++++++++----------------------
vma.h | 2 +-
3 files changed, 126 insertions(+), 97 deletions(-)
diff --git a/vma-reader.c b/vma-reader.c
index e65f1e8415..81a891c6b1 100644
--- a/vma-reader.c
+++ b/vma-reader.c
@@ -28,6 +28,7 @@ typedef struct VmaRestoreState {
bool write_zeroes;
unsigned long *bitmap;
int bitmap_size;
+ bool skip;
} VmaRestoreState;
struct VmaReader {
@@ -425,13 +426,14 @@ VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
}
static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
- BlockBackend *target, bool write_zeroes)
+ BlockBackend *target, bool write_zeroes, bool skip)
{
assert(vmar);
assert(dev_id);
vmar->rstate[dev_id].target = target;
vmar->rstate[dev_id].write_zeroes = write_zeroes;
+ vmar->rstate[dev_id].skip = skip;
int64_t size = vmar->devinfo[dev_id].size;
@@ -446,28 +448,30 @@ static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
}
int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
- bool write_zeroes, Error **errp)
+ bool write_zeroes, bool skip, Error **errp)
{
assert(vmar);
- assert(target != NULL);
+ assert(target != NULL || skip);
assert(dev_id);
- assert(vmar->rstate[dev_id].target == NULL);
-
- int64_t size = blk_getlength(target);
- int64_t size_diff = size - vmar->devinfo[dev_id].size;
-
- /* storage types can have different size restrictions, so it
- * is not always possible to create an image with exact size.
- * So we tolerate a size difference up to 4MB.
- */
- if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
- error_setg(errp, "vma_reader_register_bs for stream %s failed - "
- "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
- size, vmar->devinfo[dev_id].size);
- return -1;
+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
+
+ if (target != NULL) {
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
}
- allocate_rstate(vmar, dev_id, target, write_zeroes);
+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
return 0;
}
@@ -560,19 +564,23 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
VmaRestoreState *rstate = &vmar->rstate[dev_id];
BlockBackend *target = NULL;
+ bool skip = rstate->skip;
+
if (dev_id != vmar->vmstate_stream) {
target = rstate->target;
- if (!verify && !target) {
+ if (!verify && !target && !skip) {
error_setg(errp, "got wrong dev id %d", dev_id);
return -1;
}
- if (vma_reader_get_bitmap(rstate, cluster_num)) {
- error_setg(errp, "found duplicated cluster %zd for stream %s",
- cluster_num, vmar->devinfo[dev_id].devname);
- return -1;
+ if (!skip) {
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
}
- vma_reader_set_bitmap(rstate, cluster_num, 1);
max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
} else {
@@ -618,7 +626,7 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
return -1;
}
- if (!verify) {
+ if (!verify && !skip) {
int nb_sectors = end_sector - sector_num;
if (restore_write_data(vmar, dev_id, target, vmstate_fd,
buf + start, sector_num, nb_sectors,
@@ -654,7 +662,7 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
return -1;
}
- if (!verify) {
+ if (!verify && !skip) {
int nb_sectors = end_sector - sector_num;
if (restore_write_data(vmar, dev_id, target, vmstate_fd,
buf + start, sector_num,
@@ -679,7 +687,7 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
vmar->partial_zero_cluster_data += zero_size;
}
- if (rstate->write_zeroes && !verify) {
+ if (rstate->write_zeroes && !verify && !skip) {
if (restore_write_data(vmar, dev_id, target, vmstate_fd,
zero_vma_block, sector_num,
nb_sectors, errp) < 0) {
@@ -850,7 +858,7 @@ int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp)
for (dev_id = 1; dev_id < 255; dev_id++) {
if (vma_reader_get_device_info(vmar, dev_id)) {
- allocate_rstate(vmar, dev_id, NULL, false);
+ allocate_rstate(vmar, dev_id, NULL, false, false);
}
}
diff --git a/vma.c b/vma.c
index e8dffb43e0..e6e9ffc7fe 100644
--- a/vma.c
+++ b/vma.c
@@ -138,6 +138,7 @@ typedef struct RestoreMap {
char *throttling_group;
char *cache;
bool write_zero;
+ bool skip;
} RestoreMap;
static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -245,47 +246,61 @@ static int extract_content(int argc, char **argv)
char *bps = NULL;
char *group = NULL;
char *cache = NULL;
+ char *devname = NULL;
+ bool skip = false;
+ uint64_t bps_value = 0;
+ const char *path = NULL;
+ bool write_zero = true;
+
if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
break;
}
int len = strlen(line);
if (line[len - 1] == '\n') {
line[len - 1] = '\0';
- if (len == 1) {
+ len = len - 1;
+ if (len == 0) {
break;
}
}
- while (1) {
- if (!try_parse_option(&line, "format", &format, inbuf) &&
- !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
- !try_parse_option(&line, "throttling.group", &group, inbuf) &&
- !try_parse_option(&line, "cache", &cache, inbuf))
- {
- break;
+ if (strncmp(line, "skip", 4) == 0) {
+ if (len < 6 || line[4] != '=') {
+ g_error("read map failed - option 'skip' has no value ('%s')",
+ inbuf);
+ } else {
+ devname = line + 5;
+ skip = true;
+ }
+ } else {
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
}
- }
- uint64_t bps_value = 0;
- if (bps) {
- bps_value = verify_u64(bps);
- g_free(bps);
- }
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
- const char *path;
- bool write_zero;
- if (line[0] == '0' && line[1] == ':') {
- path = line + 2;
- write_zero = false;
- } else if (line[0] == '1' && line[1] == ':') {
- path = line + 2;
- write_zero = true;
- } else {
- g_error("read map failed - parse error ('%s')", inbuf);
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ path = extract_devname(path, &devname, -1);
}
- char *devname = NULL;
- path = extract_devname(path, &devname, -1);
if (!devname) {
g_error("read map failed - no dev name specified ('%s')",
inbuf);
@@ -299,6 +314,7 @@ static int extract_content(int argc, char **argv)
map->throttling_group = group;
map->cache = cache;
map->write_zero = write_zero;
+ map->skip = skip;
g_hash_table_insert(devmap, map->devname, map);
@@ -328,6 +344,7 @@ static int extract_content(int argc, char **argv)
const char *cache = NULL;
int flags = BDRV_O_RDWR;
bool write_zero = true;
+ bool skip = false;
BlockBackend *blk = NULL;
@@ -343,6 +360,7 @@ static int extract_content(int argc, char **argv)
throttling_group = map->throttling_group;
cache = map->cache;
write_zero = map->write_zero;
+ skip = map->skip;
} else {
devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
dirname, di->devname);
@@ -361,57 +379,60 @@ static int extract_content(int argc, char **argv)
write_zero = false;
}
- size_t devlen = strlen(devfn);
- QDict *options = NULL;
- bool writethrough;
- if (format) {
- /* explicit format from commandline */
- options = qdict_new();
- qdict_put_str(options, "driver", format);
- } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
- strncmp(devfn, "/dev/", 5) == 0)
- {
- /* This part is now deprecated for PVE as well (just as qemu
- * deprecated not specifying an explicit raw format, too.
- */
- /* explicit raw format */
- options = qdict_new();
- qdict_put_str(options, "driver", "raw");
- }
- if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
- g_error("invalid cache option: %s\n", cache);
- }
+ if (!skip) {
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
- if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
- g_error("can't open file %s - %s", devfn,
- error_get_pretty(errp));
- }
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
- if (cache) {
- blk_set_enable_write_cache(blk, !writethrough);
- }
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
- if (throttling_group) {
- blk_io_limits_enable(blk, throttling_group);
- }
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
- if (throttling_bps) {
- if (!throttling_group) {
- blk_io_limits_enable(blk, devfn);
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
}
- ThrottleConfig cfg;
- throttle_config_init(&cfg);
- cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
- Error *err = NULL;
- if (!throttle_is_valid(&cfg, &err)) {
- error_report_err(err);
- g_error("failed to apply throttling");
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
}
- blk_set_io_limits(blk, &cfg);
}
- if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
g_error("%s", error_get_pretty(errp));
}
diff --git a/vma.h b/vma.h
index c895c97f6d..1b62859165 100644
--- a/vma.h
+++ b/vma.h
@@ -142,7 +142,7 @@ GList *vma_reader_get_config_data(VmaReader *vmar);
VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
BlockBackend *target, bool write_zeroes,
- Error **errp);
+ bool skip, Error **errp);
int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
Error **errp);
int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);

View File

@@ -0,0 +1,233 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Tue, 26 Apr 2022 16:06:28 +0200
Subject: [PATCH] pbs: namespace support
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
block/pbs.c | 25 +++++++++++++++++++++----
pbs-restore.c | 19 ++++++++++++++++---
pve-backup.c | 6 +++++-
qapi/block-core.json | 5 ++++-
5 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4c799f00d9..0502f42be6 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1041,6 +1041,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS key_password
false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
+ false, NULL, // PBS backup-ns
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
false, false, // PBS use-dirty-bitmap
diff --git a/block/pbs.c b/block/pbs.c
index c5eb4d5bad..7471e2ef9d 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -14,6 +14,7 @@
#include <proxmox-backup-qemu.h>
#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
#define PBS_OPT_SNAPSHOT "snapshot"
#define PBS_OPT_ARCHIVE "archive"
#define PBS_OPT_KEYFILE "keyfile"
@@ -27,6 +28,7 @@ typedef struct {
int64_t length;
char *repository;
+ char *namespace;
char *snapshot;
char *archive;
} BDRVPBSState;
@@ -40,6 +42,11 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "The server address and repository to connect to.",
},
+ {
+ .name = PBS_OPT_NAMESPACE,
+ .type = QEMU_OPT_STRING,
+ .help = "Optional: The snapshot's namespace.",
+ },
{
.name = PBS_OPT_SNAPSHOT,
.type = QEMU_OPT_STRING,
@@ -76,7 +83,7 @@ static QemuOptsList runtime_opts = {
// filename format:
-// pbs:repository=<repo>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
static void pbs_parse_filename(const char *filename, QDict *options,
Error **errp)
{
@@ -112,6 +119,7 @@ static int pbs_open(BlockDriverState *bs, QDict *options, int flags,
s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+ const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
@@ -124,9 +132,12 @@ static int pbs_open(BlockDriverState *bs, QDict *options, int flags,
if (!key_password) {
key_password = getenv("PBS_ENCRYPTION_PASSWORD");
}
+ if (namespace) {
+ s->namespace = g_strdup(namespace);
+ }
/* connect to PBS server in read mode */
- s->conn = proxmox_restore_new(s->repository, s->snapshot, password,
+ s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
keyfile, key_password, fingerprint, &pbs_error);
/* invalidates qemu_opt_get char pointers from above */
@@ -171,6 +182,7 @@ static int pbs_file_open(BlockDriverState *bs, QDict *options, int flags,
static void pbs_close(BlockDriverState *bs) {
BDRVPBSState *s = bs->opaque;
g_free(s->repository);
+ g_free(s->namespace);
g_free(s->snapshot);
g_free(s->archive);
proxmox_restore_disconnect(s->conn);
@@ -252,8 +264,13 @@ static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
static void pbs_refresh_filename(BlockDriverState *bs)
{
BDRVPBSState *s = bs->opaque;
- snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
- s->repository, s->snapshot, s->archive);
+ if (s->namespace) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+ s->repository, s->namespace, s->snapshot, s->archive);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+ }
}
static const char *const pbs_strong_runtime_opts[] = {
diff --git a/pbs-restore.c b/pbs-restore.c
index 2f834cf42e..f03d9bab8d 100644
--- a/pbs-restore.c
+++ b/pbs-restore.c
@@ -29,7 +29,7 @@
static void help(void)
{
const char *help_msg =
- "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+ "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
;
printf("%s", help_msg);
@@ -77,6 +77,7 @@ int main(int argc, char **argv)
Error *main_loop_err = NULL;
const char *format = "raw";
const char *repository = NULL;
+ const char *backup_ns = NULL;
const char *keyfile = NULL;
int verbose = false;
bool skip_zero = false;
@@ -90,6 +91,7 @@ int main(int argc, char **argv)
{"verbose", no_argument, 0, 'v'},
{"format", required_argument, 0, 'f'},
{"repository", required_argument, 0, 'r'},
+ {"ns", required_argument, 0, 'n'},
{"keyfile", required_argument, 0, 'k'},
{0, 0, 0, 0}
};
@@ -110,6 +112,9 @@ int main(int argc, char **argv)
case 'r':
repository = g_strdup(argv[optind - 1]);
break;
+ case 'n':
+ backup_ns = g_strdup(argv[optind - 1]);
+ break;
case 'k':
keyfile = g_strdup(argv[optind - 1]);
break;
@@ -160,8 +165,16 @@ int main(int argc, char **argv)
fprintf(stderr, "connecting to repository '%s'\n", repository);
}
char *pbs_error = NULL;
- ProxmoxRestoreHandle *conn = proxmox_restore_new(
- repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+ repository,
+ snapshot,
+ backup_ns,
+ password,
+ keyfile,
+ key_password,
+ fingerprint,
+ &pbs_error
+ );
if (conn == NULL) {
fprintf(stderr, "restore failed: %s\n", pbs_error);
return -1;
diff --git a/pve-backup.c b/pve-backup.c
index 9f6c04a512..f6a5f8c785 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -10,6 +10,8 @@
#include "qapi/qmp/qerror.h"
#include "qemu/cutils.h"
+#include <proxmox-backup-qemu.h>
+
/* PVE backup state and related function */
/*
@@ -535,6 +537,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_key_password, const char *key_password,
bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
+ bool has_backup_ns, const char *backup_ns,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
@@ -674,8 +677,9 @@ UuidInfo coroutine_fn *qmp_backup(
firewall_name = "fw.conf";
char *pbs_err = NULL;
- pbs = proxmox_backup_new(
+ pbs = proxmox_backup_new_ns(
backup_file,
+ has_backup_ns ? backup_ns : NULL,
backup_id,
backup_time,
dump_cb_block_size,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index fc8a125451..cc2ead0b75 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -817,6 +817,8 @@
#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
+# @backup-ns: backup namespace (required for format 'pbs')
+#
# @backup-id: backup ID (required for format 'pbs')
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
@@ -836,6 +838,7 @@
'*key-password': 'str',
'*master-keyfile': 'str',
'*fingerprint': 'str',
+ '*backup-ns': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
@@ -3282,7 +3285,7 @@
{ 'struct': 'BlockdevOptionsPbs',
'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
'*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
- '*key_password': 'str' } }
+ '*key_password': 'str', '*namespace': 'str' } }
##
# @BlockdevOptionsNVMe:

View File

@@ -12,10 +12,10 @@ Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index a4749f3b1b..53e0396b51 100644
index 64a8d7d48b..9fc6dcb957 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1511,7 +1511,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
@@ -1348,7 +1348,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
@@ -23,7 +23,7 @@ index a4749f3b1b..53e0396b51 100644
assert(offset + bytes <= s->image_size);
@@ -1539,43 +1538,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
@@ -1376,43 +1375,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
@@ -68,7 +68,7 @@ index a4749f3b1b..53e0396b51 100644
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1594,8 +1557,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
@@ -1431,8 +1394,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}

View File

@@ -13,10 +13,10 @@ Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 53e0396b51..0913a0af39 100644
index 9fc6dcb957..98f4ba2620 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1470,11 +1470,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
@@ -1307,11 +1307,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);

View File

@@ -23,10 +23,10 @@ Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
1 file changed, 112 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 0913a0af39..1dab254517 100644
index 98f4ba2620..efcbbe5949 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -108,12 +108,6 @@ typedef struct RBDTask {
@@ -97,12 +97,6 @@ typedef struct RBDTask {
int64_t ret;
} RBDTask;
@@ -39,7 +39,7 @@ index 0913a0af39..1dab254517 100644
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid,
@@ -1456,111 +1450,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
@@ -1293,111 +1287,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}
@@ -148,10 +148,10 @@ index 0913a0af39..1dab254517 100644
- return status;
-}
-
static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
{
BDRVRBDState *s = bs->opaque;
@@ -1796,7 +1685,6 @@ static BlockDriver bdrv_rbd = {
@@ -1633,7 +1522,6 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif

View File

@@ -0,0 +1,59 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 25 May 2022 13:59:37 +0200
Subject: [PATCH] PVE-Backup: create jobs: correctly cancel in error scenario
The first call to job_cancel_sync() will cancel and free all jobs in
the transaction, so ensure that it's called only once and get rid of
the job_unref() that would operate on freed memory.
It's also necessary to NULL backup_state.pbs in the error scenario,
because a subsequent backup_cancel QMP call (as happens in PVE when
the backup QMP command fails) would try to call proxmox_backup_abort()
and run into a segfault.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index f6a5f8c785..5bed6f4014 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -506,6 +506,11 @@ static void create_backup_jobs_bh(void *opaque) {
}
if (*errp) {
+ /*
+ * It's enough to cancel one job in the transaction, the rest will
+ * follow automatically.
+ */
+ bool canceled = false;
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -516,12 +521,12 @@ static void create_backup_jobs_bh(void *opaque) {
di->target = NULL;
}
- if (di->job) {
+ if (!canceled && di->job) {
AioContext *ctx = di->job->job.aio_context;
aio_context_acquire(ctx);
job_cancel_sync(&di->job->job, true);
- job_unref(&di->job->job);
aio_context_release(ctx);
+ canceled = true;
}
}
}
@@ -947,6 +952,7 @@ err:
if (pbs) {
proxmox_backup_disconnect(pbs);
+ backup_state.pbs = NULL;
}
if (backup_dir) {

View File

@@ -0,0 +1,76 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 25 May 2022 13:59:38 +0200
Subject: [PATCH] PVE-Backup: ensure jobs in di_list are referenced
Ensures that qmp_backup_cancel doesn't pick a job that's already been
freed. With unlucky timings it seems possible that:
1. job_exit -> job_completed -> job_finalize_single starts
2. pvebackup_co_complete_stream gets spawned in completion callback
3. job finalize_single finishes -> job's refcount hits zero -> job is
freed
4. qmp_backup_cancel comes in and locks backup_state.backup_mutex
before pvebackup_co_complete_stream can remove the job from the
di_list
5. qmp_backup_cancel will pick a job that's already been freed
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 5bed6f4014..0c34428713 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -316,6 +316,14 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
}
}
+ if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_unref(&di->job->job);
+ di->job = NULL;
+ aio_context_release(ctx);
+ }
+
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -491,9 +499,12 @@ static void create_backup_jobs_bh(void *opaque) {
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
- aio_context_release(aio_context);
-
di->job = job;
+ if (job) {
+ job_ref(&job->job);
+ }
+
+ aio_context_release(aio_context);
if (!job || local_err) {
error_setg(errp, "backup_job_create failed: %s",
@@ -521,12 +532,16 @@ static void create_backup_jobs_bh(void *opaque) {
di->target = NULL;
}
- if (!canceled && di->job) {
+ if (di->job) {
AioContext *ctx = di->job->job.aio_context;
aio_context_acquire(ctx);
- job_cancel_sync(&di->job->job, true);
+ if (!canceled) {
+ job_cancel_sync(&di->job->job, true);
+ canceled = true;
+ }
+ job_unref(&di->job->job);
+ di->job = NULL;
aio_context_release(ctx);
- canceled = true;
}
}
}

View File

@@ -0,0 +1,120 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 25 May 2022 13:59:39 +0200
Subject: [PATCH] PVE-Backup: avoid segfault issues upon backup-cancel
When canceling a backup in PVE via a signal it's easy to run into a
situation where the job is already failing when the backup_cancel QMP
command comes in. With a bit of unlucky timing on top, it can happen
that job_exit() runs between schedulung of job_cancel_bh() and
execution of job_cancel_bh(). But job_cancel_sync() does not expect
that the job is already finalized (in fact, the job might've been
freed already, but even if it isn't, job_cancel_sync() would try to
deref job->txn which would be NULL at that point).
It is not possible to simply use the job_cancel() (which is advertised
as being async but isn't in all cases) in qmp_backup_cancel() for the
same reason job_cancel_sync() cannot be used. Namely, because it can
invoke job_finish_sync() (which uses AIO_WAIT_WHILE and thus hangs if
called from a coroutine). This happens when there's multiple jobs in
the transaction and job->deferred_to_main_loop is true (is set before
scheduling job_exit()) or if the job was not started yet.
Fix the issue by selecting the job to cancel in job_cancel_bh() itself
using the first job that's not completed yet. This is not necessarily
the first job in the list, because pvebackup_co_complete_stream()
might not yet have removed a completed job when job_cancel_bh() runs.
An alternative would be to continue using only the first job and
checking against JOB_STATUS_CONCLUDED or JOB_STATUS_NULL to decide if
it's still necessary and possible to cancel, but the approach with
using the first non-completed job seemed more robust.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 61 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 0c34428713..2e22030eec 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -355,15 +355,42 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/*
* job_cancel(_sync) does not like to be called from coroutines, so defer to
- * main loop processing via a bottom half.
+ * main loop processing via a bottom half. Assumes that caller holds
+ * backup_mutex.
*/
static void job_cancel_bh(void *opaque) {
CoCtxData *data = (CoCtxData*)opaque;
- Job *job = (Job*)data->data;
- AioContext *job_ctx = job->aio_context;
- aio_context_acquire(job_ctx);
- job_cancel_sync(job, true);
- aio_context_release(job_ctx);
+
+ /*
+ * Be careful to pick a valid job to cancel:
+ * 1. job_cancel_sync() does not expect the job to be finalized already.
+ * 2. job_exit() might run between scheduling and running job_cancel_bh()
+ * and pvebackup_co_complete_stream() might not have removed the job from
+ * the list yet (in fact, cannot, because it waits for the backup_mutex).
+ * Requiring !job_is_completed() ensures that no finalized job is picked.
+ */
+ GList *bdi = g_list_first(backup_state.di_list);
+ while (bdi) {
+ if (bdi->data) {
+ BlockJob *bj = ((PVEBackupDevInfo *)bdi->data)->job;
+ if (bj) {
+ Job *job = &bj->job;
+ if (!job_is_completed(job)) {
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job, true);
+ aio_context_release(job_ctx);
+ /*
+ * It's enough to cancel one job in the transaction, the
+ * rest will follow automatically.
+ */
+ break;
+ }
+ }
+ }
+ bdi = g_list_next(bdi);
+ }
+
aio_co_enter(data->ctx, data->co);
}
@@ -384,22 +411,12 @@ void coroutine_fn qmp_backup_cancel(Error **errp)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
- /* it's enough to cancel one job in the transaction, the rest will follow
- * automatically */
- GList *bdi = g_list_first(backup_state.di_list);
- BlockJob *cancel_job = bdi && bdi->data ?
- ((PVEBackupDevInfo *)bdi->data)->job :
- NULL;
-
- if (cancel_job) {
- CoCtxData data = {
- .ctx = qemu_get_current_aio_context(),
- .co = qemu_coroutine_self(),
- .data = &cancel_job->job,
- };
- aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
- qemu_coroutine_yield();
- }
+ CoCtxData data = {
+ .ctx = qemu_get_current_aio_context(),
+ .co = qemu_coroutine_self(),
+ };
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+ qemu_coroutine_yield();
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}

View File

@@ -0,0 +1,57 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 22 Jun 2022 10:45:11 +0200
Subject: [PATCH] vma: create: support 64KiB-unaligned input images
which fixes backing up templates with such disks in PVE, for example
efitype=4m EFI disks on a file-based storage (size = 540672).
If there is not enough left to read, blk_co_preadv will return -EIO,
so limit the size in the last iteration.
For writing, an unaligned end is already handled correctly.
The call to memset is not strictly necessary, because writing also
checks that it doesn't write data beyond the end of the image. But
there are two reasons to do it:
1. It's cleaner that way.
2. It allows detecting when the final piece is all zeroes, which might
not happen if the buffer still contains data from the previous
iteration.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
vma.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/vma.c b/vma.c
index e6e9ffc7fe..304f02bc84 100644
--- a/vma.c
+++ b/vma.c
@@ -548,7 +548,7 @@ static void coroutine_fn backup_run(void *opaque)
struct iovec iov;
QEMUIOVector qiov;
- int64_t start, end;
+ int64_t start, end, readlen;
int ret = 0;
unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -562,8 +562,16 @@ static void coroutine_fn backup_run(void *opaque)
iov.iov_len = VMA_CLUSTER_SIZE;
qemu_iovec_init_external(&qiov, &iov, 1);
+ if (start + 1 == end) {
+ memset(buf, 0, VMA_CLUSTER_SIZE);
+ readlen = job->len - start * VMA_CLUSTER_SIZE;
+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+ } else {
+ readlen = VMA_CLUSTER_SIZE;
+ }
+
ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
- VMA_CLUSTER_SIZE, &qiov, 0);
+ readlen, &qiov, 0);
if (ret < 0) {
vma_writer_set_error(job->vmaw, "read error", -1);
goto out;

View File

@@ -0,0 +1,25 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 22 Jun 2022 10:45:12 +0200
Subject: [PATCH] vma: create: avoid triggering assertion in error case
error_setg expects its argument to not be initialized yet.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
vma-writer.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/vma-writer.c b/vma-writer.c
index df4b20793d..ac7da237d0 100644
--- a/vma-writer.c
+++ b/vma-writer.c
@@ -311,6 +311,8 @@ VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
}
if (vmaw->fd < 0) {
+ error_free(*errp);
+ *errp = NULL;
error_setg(errp, "can't open file %s - %s\n", filename,
g_strerror(errno));
goto err;

View File

@@ -0,0 +1,36 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 22 Jun 2022 10:45:13 +0200
Subject: [PATCH] block: alloc-track: avoid premature break
While the bdrv_co_preadv() calls are expected to return 0 on success,
qemu_iovec_memset() will return the number of bytes set (will be
local_bytes, because the slice with that size was just initialized).
Don't break out of the loop after the branch with qemu_iovec_memset(),
because there might still be work to do. Additionally, ret is an int,
which on 64-bit platforms is too small to hold the size_t returned by
qemu_iovec_memset().
The branch seems to be difficult to reach in practice, because the
whole point of alloc-track is to be used with a backing device.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index 6b50fbe537..c1160af04b 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -174,7 +174,8 @@ static int coroutine_fn track_co_preadv(BlockDriverState *bs,
ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
&local_qiov, flags);
} else {
- ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ ret = 0;
}
if (ret != 0) {

View File

@@ -0,0 +1,144 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 3 Oct 2022 15:52:04 +0200
Subject: [PATCH] PVE Backup: allow passing max-workers performance setting
For query-proxmox-support, add an indication that it's possible to use
the setting.
For now, the other two BackupPerf settings are not exposed:
* use-copy-range: would need to be implemented by the backup-dump
block driver first, and in fact, the default for backup was changed,
because it wasn't as fast for backup in QEMU, see commit
6a30f663d4c0b3c45a544d541e0c4e214b2473a1.
* max-chunk: enforced to be at least the backup cluster size, which is
4 MiB for PBS and otherwise maximum of source and target cluster size.
And block-copy has a maximum buffer size of 1 MiB, so setting a larger
max-chunk doesn't even have an effect. To make the setting sensibly
usable the check would need to be removed and optionally the
block-copy max buffer size would need to be bumped. I tried doing just
that, and tested different source/target combinations with different
max-chunk settings, but there were no noticable improvements over the
default "unlimited" (resulting in 1 MiB for block-copy).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +++-
pve-backup.c | 18 +++++++++++++-----
qapi/block-core.json | 9 +++++++--
3 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 0502f42be6..cc231ec3f2 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1049,7 +1049,9 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
- devlist, qdict_haskey(qdict, "speed"), speed, &error);
+ devlist, qdict_haskey(qdict, "speed"), speed,
+ false, 0, // BackupPerf max-workers
+ &error);
hmp_handle_error(mon, error);
}
diff --git a/pve-backup.c b/pve-backup.c
index 2e22030eec..e9aa7e0f49 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -55,6 +55,7 @@ static struct PVEBackupState {
bool starting;
} stat;
int64_t speed;
+ BackupPerf perf;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
@@ -492,8 +493,6 @@ static void create_backup_jobs_bh(void *opaque) {
}
backup_state.txn = job_txn_new_seq();
- BackupPerf perf = { .max_workers = 16 };
-
/* create and start all jobs (paused state) */
GList *l = backup_state.di_list;
while (l) {
@@ -513,8 +512,9 @@ static void create_backup_jobs_bh(void *opaque) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
- bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
+ bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+ &local_err);
di->job = job;
if (job) {
@@ -584,7 +584,9 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
+ bool has_speed, int64_t speed,
+ bool has_max_workers, int64_t max_workers,
+ Error **errp)
{
assert(qemu_in_coroutine());
@@ -914,6 +916,11 @@ UuidInfo coroutine_fn *qmp_backup(
backup_state.speed = (has_speed && speed > 0) ? speed : 0;
+ backup_state.perf = (BackupPerf){ .max_workers = 16 };
+ if (has_max_workers) {
+ backup_state.perf.max_workers = max_workers;
+ }
+
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -1089,5 +1096,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
+ ret->backup_max_workers = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cc2ead0b75..e3f62faa81 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -829,6 +829,8 @@
#
# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
#
+# @max-workers: see @BackupPerf for details. Default 16.
+#
# Returns: the uuid of the backup job
#
##
@@ -847,7 +849,9 @@
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',
- '*devlist': 'str', '*speed': 'int' },
+ '*devlist': 'str',
+ '*speed': 'int',
+ '*max-workers': 'int' },
'returns': 'UuidInfo', 'coroutine': true }
##
@@ -902,7 +906,8 @@
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
- 'pbs-library-version': 'str' } }
+ 'pbs-library-version': 'str',
+ 'backup-max-workers': 'bool' } }
##
# @query-proxmox-support:

83
debian/patches/series vendored
View File

@@ -1,16 +1,9 @@
extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
extra/0005-Revert-Revert-graph-lock-Disable-locking-for-now.patch
extra/0006-migration-states-workaround-snapshot-performance-reg.patch
extra/0007-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
extra/0008-target-i386-the-sgx_epc_get_section-stub-is-reachabl.patch
extra/0009-ui-clipboard-mark-type-as-not-available-when-there-i.patch
extra/0010-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch
extra/0011-virtio-Re-enable-notifications-after-drain.patch
extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch
extra/0013-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch
extra/0002-block-io_uring-revert-Use-io_uring_register_ring_fd-.patch
extra/0003-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch
extra/0004-chardev-fix-segfault-in-finalize.patch
extra/0005-init-daemonize-defuse-PID-file-resolve-error.patch
extra/0006-block-block-backend-blk_set_enable_write_cache-is-IO.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -23,12 +16,12 @@ pve/0003-PVE-Config-set-the-CPU-model-to-kvm64-32-instead-of-.patch
pve/0004-PVE-Config-ui-spice-default-to-pve-certificates.patch
pve/0005-PVE-Config-glusterfs-no-default-logfile-if-daemonize.patch
pve/0006-PVE-Config-rbd-block-rbd-disable-rbd_cache_writethro.patch
pve/0007-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0008-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0009-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0010-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0011-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0012-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
pve/0007-PVE-Up-qmp-add-get_link_status.patch
pve/0008-PVE-Up-glusterfs-allow-partial-reads.patch
pve/0009-PVE-Up-qemu-img-return-success-on-info-without-snaps.patch
pve/0010-PVE-Up-qemu-img-dd-add-osize-and-read-from-to-stdin-.patch
pve/0011-PVE-Up-qemu-img-dd-add-isize-parameter.patch
pve/0012-PVE-Up-qemu-img-dd-add-n-skip_create.patch
pve/0013-PVE-virtio-balloon-improve-query-balloon.patch
pve/0014-PVE-qapi-modify-query-machines.patch
pve/0015-PVE-qapi-modify-spice-query.patch
@@ -45,21 +38,39 @@ pve/0025-PVE-Allow-version-code-in-machine-type.patch
pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
pve/0027-PVE-Backup-add-vma-backup-format-code.patch
pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
pve/0029-PVE-Add-sequential-job-transaction-support.patch
pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
pve/0031-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0035-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0036-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0037-PVE-block-stream-increase-chunk-size.patch
pve/0038-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
pve/0039-block-add-alloc-track-driver.patch
pve/0040-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
pve/0041-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
pve/0042-Revert-block-rbd-implement-bdrv_co_block_status.patch
pve/0043-alloc-track-fix-deadlock-during-drop.patch
pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
pve/0045-savevm-async-don-t-hold-BQL-during-setup.patch
pve-qemu-8.1-vitastor.patch
pve/0029-PVE-Backup-proxmox-backup-patches-for-qemu.patch
pve/0030-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0031-PVE-Backup-Add-dirty-bitmap-tracking-for-incremental.patch
pve/0032-PVE-various-PBS-fixes.patch
pve/0033-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
pve/0034-PVE-add-query_proxmox_support-QMP-command.patch
pve/0035-PVE-add-query-pbs-bitmap-info-QMP-call.patch
pve/0036-PVE-redirect-stderr-to-journal-when-daemonized.patch
pve/0037-PVE-Add-sequential-job-transaction-support.patch
pve/0038-PVE-Backup-Use-a-transaction-to-synchronize-job-stat.patch
pve/0039-PVE-Backup-Don-t-block-on-finishing-and-cleanup-crea.patch
pve/0040-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
pve/0041-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch
pve/0042-PVE-fall-back-to-open-iscsi-initiatorname.patch
pve/0043-PVE-Use-coroutine-QMP-for-backup-cancel_backup.patch
pve/0044-PBS-add-master-key-support.patch
pve/0045-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch
pve/0046-PVE-block-stream-increase-chunk-size.patch
pve/0047-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
pve/0048-block-add-alloc-track-driver.patch
pve/0049-PVE-whitelist-invalid-QAPI-names-for-backwards-compa.patch
pve/0050-PVE-savevm-async-register-yank-before-migration_inco.patch
pve/0051-qemu-img-dd-add-l-option-for-loading-a-snapshot.patch
pve/0052-vma-allow-partial-restore.patch
pve/0053-pbs-namespace-support.patch
pve/0054-Revert-block-rbd-workaround-for-ceph-issue-53784.patch
pve/0055-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
pve/0056-Revert-block-rbd-implement-bdrv_co_block_status.patch
pve/0057-PVE-Backup-create-jobs-correctly-cancel-in-error-sce.patch
pve/0058-PVE-Backup-ensure-jobs-in-di_list-are-referenced.patch
pve/0059-PVE-Backup-avoid-segfault-issues-upon-backup-cancel.patch
pve/0060-vma-create-support-64KiB-unaligned-input-images.patch
pve/0061-vma-create-avoid-triggering-assertion-in-error-case.patch
pve/0062-block-alloc-track-avoid-premature-break.patch
pve/0063-PVE-Backup-allow-passing-max-workers-performance-set.patch
pve-qemu-7.1-vitastor.patch

View File

@@ -1,6 +1,7 @@
# install the userspace utilities
debian/kvm-ifup etc/kvm/
debian/kvm-ifdown etc/kvm/
#install ovmf uefi rom
debian/OVMF_CODE-pure-efi.fd usr/share/kvm/
debian/OVMF_VARS-pure-efi.fd usr/share/kvm/
debian/kvm-ifdown etc/kvm/
# install the userspace utilities
debian/kvm-ifup etc/kvm/

View File

@@ -1,13 +1,16 @@
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
usr/bin/qemu-system-x86_64 usr/bin/kvm
# qemu-system-i386 and qemu-system-x86_64 provides the same hardware emulation
usr/bin/qemu-system-x86_64 usr/bin/qemu-system-i386
# also use aarch64 for 32 bit arm
usr/bin/qemu-system-aarch64 usr/bin/qemu-system-arm
# upstream provides a qemu man page,
# we symlink to kvm for backward compatibility
# and to qemu-system-{i386,x86_64} to fullfill our 'Provides: qemu-system-x86'
usr/share/man/man1/qemu.1 usr/share/man/man1/kvm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-i386.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-x86_64.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-arm.1
usr/share/man/man1/qemu.1 usr/share/man/man1/qemu-system-aarch64.1

Some files were not shown because too many files have changed in this diff Show More