Compare commits

..

1 Commits

Author SHA1 Message Date
9cd02c02db Add Vitastor support 2021-12-08 20:32:53 +00:00
109 changed files with 4105 additions and 7192 deletions

View File

@@ -19,9 +19,6 @@ submodule:
test -f "${SRCDIR}/configure" || git submodule update --init --recursive
$(BUILDDIR): keycodemapdb | submodule
# check if qemu/ was used for a build
# if so, please run 'make distclean' in the submodule and try again
test ! -f $(SRCDIR)/build/config.status
rm -rf $(BUILDDIR)
cp -a $(SRCDIR) $(BUILDDIR)
cp -a debian $(BUILDDIR)/debian
@@ -33,7 +30,7 @@ $(BUILDDIR): keycodemapdb | submodule
deb kvm: $(DEBS)
$(DEB_DBG): $(DEB)
$(DEB): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j8
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j
lintian $(DEBS)
.PHONY: update
@@ -49,7 +46,7 @@ update:
.PHONY: upload
upload: $(DEBS)
tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist bullseye
tar cf - ${DEBS} | ssh repoman@repo.proxmox.com upload --product pve --dist buster
.PHONY: distclean clean
distclean: clean

285
debian/changelog vendored
View File

@@ -1,289 +1,8 @@
pve-qemu-kvm (7.1.0-4+vitastor3) bullseye; urgency=medium
* Add bdrv_co_block_status implementation for QCOW2 export support
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 12 Jan 2023 02:31:18 +0300
pve-qemu-kvm (7.1.0-4+vitastor2) bullseye; urgency=medium
pve-qemu-kvm (5.1.0-8+vitastor1) buster; urgency=medium
* Add Vitastor support
-- Vitaliy Filippov <vitalif@yourcmc.ru> Thu, 15 Dec 2022 19:32:28 +0300
pve-qemu-kvm (7.1.0-4) bullseye; urgency=medium
* cherry-pick "block/block-backend: blk_set_enable_write_cache is IO_CODE"
-- Proxmox Support Team <support@proxmox.com> Tue, 22 Nov 2022 09:21:06 +0100
pve-qemu-kvm (7.1.0-3) bullseye; urgency=medium
* init: daemonize: defuse PID file resolve error to a warning at max, fixing
some usecases that regressed with 7.1, like tracking start up in our
file-restore VM.
-- Proxmox Support Team <support@proxmox.com> Fri, 28 Oct 2022 10:27:49 +0200
pve-qemu-kvm (7.1.0-2) bullseye; urgency=medium
* fix an issue with error handling in async backup code
-- Proxmox Support Team <support@proxmox.com> Tue, 18 Oct 2022 15:33:44 +0200
pve-qemu-kvm (7.1.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.1.0
* add fix for io_uring_register_ring_fd from upstream
-- Proxmox Support Team <support@proxmox.com> Fri, 14 Oct 2022 14:54:09 +0200
pve-qemu-kvm (7.0.0-4) bullseye; urgency=medium
* add revision to version output
* PVE Backup: allow passing max-workers performance setting
-- Proxmox Support Team <support@proxmox.com> Mon, 10 Oct 2022 11:55:37 +0200
pve-qemu-kvm (7.0.0-3) bullseye; urgency=medium
* savevm-async: avoid segfault when aborting snapshot creation task
* savevm-async: set SAVE_STATE_DONE when closing state file was successful
allowing one to start a new snapshot task after aborting one.
-- Proxmox Support Team <support@proxmox.com> Tue, 30 Aug 2022 12:54:03 +0200
pve-qemu-kvm (7.0.0-2) bullseye; urgency=medium
* backport "io_uring: fix short read slow path"
* backport "e1000: set RX descriptor status in a separate operation"
-- Proxmox Support Team <support@proxmox.com> Wed, 20 Jul 2022 09:17:07 +0200
pve-qemu-kvm (7.0.0-1) bullseye; urgency=medium
* update to QEMU stable release 7.0.0
-- Proxmox Support Team <support@proxmox.com> Thu, 30 Jun 2022 11:07:37 +0200
pve-qemu-kvm (6.2.0-11) bullseye; urgency=medium
* add 'namespace' to BlockdevOptionsPbs for live-restore support
* vma: create: support 64KiB-unaligned input images like to improve backing
up some VM templates
* block: alloc-track: avoid unlikely, but possible premature break
-- Proxmox Support Team <support@proxmox.com> Wed, 22 Jun 2022 15:54:54 +0200
pve-qemu-kvm (6.2.0-10) bullseye; urgency=medium
* fix #4101: fix backup cancellation bug with iothreads
-- Proxmox Support Team <support@proxmox.com> Thu, 9 Jun 2022 16:35:51 +0200
pve-qemu-kvm (6.2.0-9) bullseye; urgency=medium
* fix possible race conditions during cancellation of a PBS backup
-- Proxmox Support Team <support@proxmox.com> Wed, 08 Jun 2022 14:03:22 +0200
pve-qemu-kvm (6.2.0-8) bullseye; urgency=medium
* revert "block/rbd: implement bdrv_co_block_status" to work around
performance regression when backing up large RBD disk
-- Proxmox Support Team <support@proxmox.com> Thu, 19 May 2022 09:24:45 +0200
pve-qemu-kvm (6.2.0-7) bullseye; urgency=medium
* Proxmox Backup Server namespace support
-- Proxmox Support Team <support@proxmox.com> Thu, 12 May 2022 16:05:56 +0200
pve-qemu-kvm (6.2.0-6) bullseye; urgency=medium
* block/gluster: correctly set max_pdiscard which is int64_t to avoid
triggering assertion
* ui/vnc.c: Fixed a deadlock bug
* display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) and
integer overflow in cursor_alloc (CVE-2021-4206)
-- Proxmox Support Team <support@proxmox.com> Wed, 11 May 2022 10:42:53 +0200
pve-qemu-kvm (6.2.0-5) bullseye; urgency=medium
* vma: allow partial restore by skipping some disk
-- Proxmox Support Team <support@proxmox.com> Mon, 25 Apr 2022 10:13:46 +0200
pve-qemu-kvm (6.2.0-4) bullseye; urgency=medium
* d/control: add libgbm to build dependencies
* d/control: add suggest dependency-hint for libgl1
* various stable backports:
+ virtio-net: fix map leaking on error during receive
+ memory: Fix incorrect calls of log_global_start/stop
+ acpi: fix OEM ID/OEM Table ID padding
+ vhost-vsock: detach the virqueue element in case of error
+ vhost-user: remove VirtQ notifier restore
+ vhost-user: fix VirtQ notifier cleanup
+ virtio: fix the condition for iommu_platform not supported
-- Proxmox Support Team <support@proxmox.com> Fri, 22 Apr 2022 11:52:30 +0200
pve-qemu-kvm (6.2.0-3) bullseye; urgency=medium
* cherry-pick fix for some manually added ACPI table SLIC entries via the
custom args flag.
-- Proxmox Support Team <support@proxmox.com> Fri, 15 Apr 2022 09:09:37 +0200
pve-qemu-kvm (6.2.0-2) bullseye; urgency=medium
* compile in virgl support
* enable zstd support
* drop sdl dependency (it was disabled at compile time already)
* recommend 'numactl'
* fix an issue with multi-disk backups where chunks would be written
multiple times
-- Proxmox Support Team <support@proxmox.com> Thu, 03 Mar 2022 12:03:44 +0100
pve-qemu-kvm (6.2.0-1) bullseye; urgency=medium
* update to QEMU stable release 6.2.0
-- Proxmox Support Team <support@proxmox.com> Thu, 17 Feb 2022 06:23:14 +0100
pve-qemu-kvm (6.1.1-2) bullseye; urgency=medium
* vma: create: register all streams before entering coroutines to avoid that
an early stream starts to write already before all got registered.
-- Proxmox Support Team <support@proxmox.com> Mon, 14 Feb 2022 15:53:15 +0100
pve-qemu-kvm (6.1.1-1) bullseye; urgency=medium
* update to 6.1.1 stable release
-- Proxmox Support Team <support@proxmox.com> Thu, 13 Jan 2022 10:57:43 +0100
pve-qemu-kvm (6.1.0-3) bullseye; urgency=medium
* fix #3738: cherry-pick "block: introduce max_hw_iov for use in scsi-
generic
-- Proxmox Support Team <support@proxmox.com> Wed, 01 Dec 2021 15:35:43 +0100
pve-qemu-kvm (6.1.0-2) bullseye; urgency=medium
* avoid a possible segmentation fault during block (disk) mirror
-- Proxmox Support Team <support@proxmox.com> Tue, 16 Nov 2021 09:38:10 +0100
pve-qemu-kvm (6.1.0-1) bullseye; urgency=medium
* update to QEMU stable release 6.1.0
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Oct 2021 15:15:19 +0200
pve-qemu-kvm (6.0.0-4) bullseye; urgency=medium
* drop the ancient workaround that force disabled SMM due to observing VM
hangs on old kernel versions.
* monitor/qmp: fix race with clients disconnecting early resulting in other
clients receiving a message with the (now wrong) ID of the former
-- Proxmox Support Team <support@proxmox.com> Mon, 06 Sep 2021 07:30:00 +0200
pve-qemu-kvm (6.0.0-3) bullseye; urgency=medium
* io_uring: resubmit when result is -EAGAIN
-- Proxmox Support Team <support@proxmox.com> Tue, 3 Aug 2021 15:01:31 +0200
pve-qemu-kvm (6.0.0-2) bullseye; urgency=medium
* enable io-uring support in QEMU builds
-- Proxmox Support Team <support@proxmox.com> Wed, 23 Jun 2021 11:03:54 +0200
pve-qemu-kvm (6.0.0-1) bullseye; urgency=medium
* update to QEMU stable release 6.0.0
-- Proxmox Support Team <support@proxmox.com> Fri, 28 May 2021 11:30:50 +0200
pve-qemu-kvm (5.2.0-11) bullseye; urgency=medium
* re-build for Proxmox VE 7 / Debian Bullseye
-- Proxmox Support Team <support@proxmox.com> Thu, 13 May 2021 14:03:00 +0200
pve-qemu-kvm (5.2.0-6) pve; urgency=medium
* improve the alloc-track and Proxmox Backup Server special block driver when
used with IO-threads for the new live-restore feature
-- Proxmox Support Team <support@proxmox.com> Thu, 15 Apr 2021 16:29:48 +0200
pve-qemu-kvm (5.2.0-5) pve; urgency=medium
* cherry-pick fixes for a possible deadlock when resizing a disk while using
IO-threads
-- Proxmox Support Team <support@proxmox.com> Tue, 30 Mar 2021 18:18:18 +0200
pve-qemu-kvm (5.2.0-4) pve; urgency=medium
* monitor/qmp: fix race on CHR_EVENT_CLOSED without OOB
* improve saving and loading dirty bitmaps in live-snapshots
-- Proxmox Support Team <support@proxmox.com> Tue, 23 Mar 2021 15:41:26 +0100
pve-qemu-kvm (5.2.0-3) pve; urgency=medium
* backport "i386/acpi: restore device paths for pre-5.1 vms" patch
* ship list of 'i440fx' and 'q35' machine versions this QEMU version supports
-- Proxmox Support Team <support@proxmox.com> Fri, 05 Mar 2021 16:23:06 +0100
pve-qemu-kvm (5.2.0-2) pve; urgency=medium
* Proxmox Backup restore: ensure all caches are flushed for userspace
attached storages (for example, Ceph when librbd and not KRBD is used)
* ship VirtIOFSd daemon
-- Proxmox Support Team <support@proxmox.com> Wed, 24 Feb 2021 18:25:14 +0100
pve-qemu-kvm (5.2.0-1) pve; urgency=medium
* update to QEMU stable release 5.2.0
* fix #3084: fall back to open-iscsi initiatorname
* fix snapshot abort and improve performance in some edge cases
* add basis for Proxmox Backup Server master key support
-- Proxmox Support Team <support@proxmox.com> Fri, 12 Feb 2021 12:09:59 +0100
-- Vitaliy Filippov <vitalif@yourcmc.ru> Sun, 05 Dec 2021 13:46:51 +0300
pve-qemu-kvm (5.1.0-8) pve; urgency=medium

17
debian/control vendored
View File

@@ -10,34 +10,26 @@ Build-Depends: autotools-dev,
libattr1-dev,
libcap-ng-dev,
libcurl4-gnutls-dev,
libepoxy-dev,
libfdt-dev,
libgbm-dev,
libglusterfs-dev (>= 5.2-2),
libgnutls28-dev,
libiscsi-dev (>= 1.12.0),
libjemalloc-dev,
libjpeg-dev,
libjson-perl,
libnuma-dev,
libpci-dev,
libpixman-1-dev,
libproxmox-backup-qemu0-dev (>= 1.3.0-1),
libproxmox-backup-qemu0-dev (>= 1.0.2-1),
librbd-dev (>= 0.48),
libsdl1.2-dev,
libseccomp-dev,
libspice-protocol-dev (>= 0.12.14~),
libspice-server-dev (>= 0.14.0~),
libsystemd-dev,
liburing-dev,
libusb-1.0-0-dev (>= 1.0.17-1),
libusbredirparser-dev (>= 0.6-2),
libvirglrenderer-dev,
libzstd-dev,
meson,
python3-minimal,
python3-sphinx,
python3-sphinx-rtd-theme,
quilt,
texi2html,
texinfo,
@@ -49,6 +41,7 @@ Package: pve-qemu-kvm
Architecture: any
Depends: ceph-common (>= 0.48),
iproute2,
libaio1,
libgfapi0 | glusterfs-common (>= 5.6),
libgfchangelog0 | glusterfs-common (>= 5.6),
libgfdb0 | glusterfs-common (>= 5.6),
@@ -59,14 +52,16 @@ Depends: ceph-common (>= 0.48),
libiscsi4 (>= 1.12.0) | libiscsi7,
libjemalloc2,
libjpeg62-turbo,
libproxmox-backup-qemu0 (>= 1.0.2-1),
libsdl1.2debian,
libspice-server1 (>= 0.14.0~),
libusb-1.0-0 (>= 1.0.17-1),
libusbredirparser1 (>= 0.6-2),
libuuid1,
numactl,
python,
${misc:Depends},
${shlibs:Depends},
Recommends: numactl
Suggests: libgl1
Conflicts: kvm,
pve-kvm,
pve-qemu-kvm-2.6.18,

2
debian/copyright vendored
View File

@@ -25,7 +25,7 @@ License:
In particular, the QEMU virtual CPU core library (libqemu.a) is
released under the GNU Lesser General Public License version 2 or later.
On Debian systems, the complete text of the GNU Lesser General Public
On Debian systems, the complete text of the GNU Lesser General Public
License can be found in the file /usr/share/common-licenses/LGPL.
Some hardware device emulation sources and other QEMU functionality are

View File

@@ -1,27 +0,0 @@
#!/usr/bin/perl
use warnings;
use strict;
use JSON;
my $machines = [];
while (<STDIN>) {
if (/^\s*Supported machines are:/) {
next;
}
s/^\s+//;
my @machine = split(/\s+/);
next if $machine[0] !~ m/^pc-(i440fx|q35)-(.+)$/;
push @$machines, {
'id' => $machine[0],
'type' => $1,
'version' => $2,
};
}
die "no QEMU machine types detected from STDIN input" if scalar (@$machines) <= 0;
print to_json($machines, { utf8 => 1 }) or die "$!\n";

View File

@@ -0,0 +1,34 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 6 Aug 2020 15:43:58 +0200
Subject: [PATCH] block/block-copy: always align copied region to cluster size
Since commit 42ac214406e0 (block/block-copy: refactor task creation)
block_copy_task_create calculates the area to be copied via
bdrv_dirty_bitmap_next_dirty_area, but that can return an unaligned byte
count if the image's last cluster end is not aligned to the bitmap's
granularity.
Always ALIGN_UP the resulting bytes value to satisfy block_copy_do_copy,
which requires the 'bytes' parameter to be aligned to cluster size.
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
block/block-copy.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/block/block-copy.c b/block/block-copy.c
index f7428a7c08..a30b9097ef 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -142,6 +142,9 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
return NULL;
}
+ assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+ bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
+
/* region is dirty, so no existent tasks possible in it */
assert(!find_conflicting_task(s, offset, bytes));

View File

@@ -1,206 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 23 Aug 2021 11:28:32 +0200
Subject: [PATCH] monitor/qmp: fix race with clients disconnecting early
The following sequence can produce a race condition that results in
responses meant for different clients being sent to the wrong one:
(QMP, no OOB)
1) client A connects
2) client A sends 'qmp_capabilities'
3) 'qmp_dispatch' runs in coroutine, schedules out to
'do_qmp_dispatch_bh' and yields
4) client A disconnects (i.e. aborts, crashes, etc...)
5) client B connects
6) 'do_qmp_dispatch_bh' runs 'qmp_capabilities' and wakes calling coroutine
7) capabilities are now set and 'mon->commands' is set to '&qmp_commands'
8) 'qmp_dispatch' returns to 'monitor_qmp_dispatch'
9) success message is sent to client B *without it ever having sent
'qmp_capabilities' itself*
9a) even if client B ignores it, it will now presumably send it's own
greeting, which will error because caps are already set
The fix proposed here uses an atomic, sequential connection number
stored in the MonitorQMP struct, which is incremented everytime a new
client connects. Since it is not changed on CHR_EVENT_CLOSED, the
behaviour of allowing a client to disconnect only one side of the
connection is retained.
The connection_nr needs to be exposed outside of the monitor subsystem,
since qmp_dispatch lives in qapi code. It needs to be checked twice,
once for actually running the command in the BH (fixes 7), and once for
sending back a response (fixes 9).
This satisfies my local reproducer - using multiple clients constantly
looping to open a connection, send the greeting, then exiting no longer
crashes other, normally behaving clients with unrelated responses.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/monitor/monitor.h | 1 +
monitor/monitor-internal.h | 7 +++++++
monitor/monitor.c | 15 +++++++++++++++
monitor/qmp.c | 15 ++++++++++++++-
qapi/qmp-dispatch.c | 21 +++++++++++++++++----
stubs/monitor-core.c | 5 +++++
6 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index a4b40e8391..d64ae8f34e 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
Monitor *monitor_cur(void);
Monitor *monitor_set_cur(Coroutine *co, Monitor *mon);
bool monitor_cur_is_qmp(void);
+int monitor_get_connection_nr(const Monitor *mon);
void monitor_init_globals(void);
void monitor_init_globals_core(void);
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index caa2e90ef2..e1596f79ab 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -152,6 +152,13 @@ typedef struct {
QemuMutex qmp_queue_lock;
/* Input queue that holds all the parsed QMP requests */
GQueue *qmp_requests;
+
+ /*
+ * A sequential number that gets incremented on every new CHR_EVENT_OPENED.
+ * Used to avoid leftover responses in BHs from being sent to the wrong
+ * client. Access with atomics.
+ */
+ int connection_nr;
} MonitorQMP;
/**
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 86949024f6..c306cadcf4 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -135,6 +135,21 @@ bool monitor_cur_is_qmp(void)
return cur_mon && monitor_is_qmp(cur_mon);
}
+/**
+ * If @mon is a QMP monitor, return the connection_nr, otherwise -1.
+ */
+int monitor_get_connection_nr(const Monitor *mon)
+{
+ MonitorQMP *qmp_mon;
+
+ if (!monitor_is_qmp(mon)) {
+ return -1;
+ }
+
+ qmp_mon = container_of(mon, MonitorQMP, common);
+ return qatomic_read(&qmp_mon->connection_nr);
+}
+
/**
* Is @mon is using readline?
* Note: not all HMP monitors use readline, e.g., gdbserver has a
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 092c527b6f..6b8cfcf6d8 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
QDict *rsp;
QDict *error;
+ int conn_nr_before = qatomic_read(&mon->connection_nr);
+
rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
&mon->common);
@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
}
}
- monitor_qmp_respond(mon, rsp);
+ /*
+ * qmp_dispatch might have yielded and waited for a BH, in which case there
+ * is a chance a new client connected in the meantime - if this happened,
+ * the command will not have been executed, but we also need to ensure that
+ * we don't send back a corresponding response on a line that no longer
+ * belongs to this request.
+ */
+ if (conn_nr_before == qatomic_read(&mon->connection_nr)) {
+ monitor_qmp_respond(mon, rsp);
+ }
+
qobject_unref(rsp);
}
@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
switch (event) {
case CHR_EVENT_OPENED:
+ qatomic_inc_fetch(&mon->connection_nr);
mon->commands = &qmp_cap_negotiation_commands;
monitor_qmp_caps_reset(mon);
data = qmp_greeting(mon);
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 0990873ec8..e605003771 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -117,16 +117,28 @@ typedef struct QmpDispatchBH {
QObject **ret;
Error **errp;
Coroutine *co;
+ int conn_nr;
} QmpDispatchBH;
static void do_qmp_dispatch_bh(void *opaque)
{
QmpDispatchBH *data = opaque;
- assert(monitor_cur() == NULL);
- monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
- data->cmd->fn(data->args, data->ret, data->errp);
- monitor_set_cur(qemu_coroutine_self(), NULL);
+ /*
+ * A QMP monitor tracks it's client with a connection number, if this
+ * changes during the scheduling delay of this BH, we must not execute the
+ * command. Otherwise a badly placed 'qmp_capabilities' might affect the
+ * connection state of a client it was never meant for.
+ */
+ if (data->conn_nr == monitor_get_connection_nr(data->cur_mon)) {
+ assert(monitor_cur() == NULL);
+ monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
+ data->cmd->fn(data->args, data->ret, data->errp);
+ monitor_set_cur(qemu_coroutine_self(), NULL);
+ } else {
+ error_setg(data->errp, "active monitor connection changed");
+ }
+
aio_co_wake(data->co);
}
@@ -231,6 +243,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
.ret = &ret,
.errp = &err,
.co = qemu_coroutine_self(),
+ .conn_nr = monitor_get_connection_nr(cur_mon),
};
aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
&data);
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index afa477aae6..d3ff124bf3 100644
--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
@@ -12,6 +12,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
return NULL;
}
+int monitor_get_connection_nr(const Monitor *mon)
+{
+ return -1;
+}
+
void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
{
}

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Mon, 14 Sep 2020 19:32:21 +0200
Subject: [PATCH] Revert "qemu-img convert: Don't pre-zero images"
This reverts commit edafc70c0c8510862f2f213a3acf7067113bcd08.
As it correlates with causing issues on LVM allocation
https://bugzilla.proxmox.com/show_bug.cgi?id=3002
---
qemu-img.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/qemu-img.c b/qemu-img.c
index 9635e9c001..c7884d248a 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2079,6 +2079,15 @@ static int convert_do_copy(ImgConvertState *s)
s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
}
+ if (!s->has_zero_init && !s->target_has_backing &&
+ bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
+ {
+ ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
+ if (ret == 0) {
+ s->has_zero_init = true;
+ }
+ }
+
/* Allocate buffer for copied data. For compressed images, only one cluster
* can be copied at a time. */
if (s->compressed) {

View File

@@ -1,76 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sam Li <faithilikerun@gmail.com>
Date: Sat, 24 Sep 2022 22:48:15 +0800
Subject: [PATCH] block/io_uring: revert "Use io_uring_register_ring_fd() to
skip fd operations"
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1193
The commit "Use io_uring_register_ring_fd() to skip fd operations" broke
when booting a guest with iothread and io_uring. That is because the
io_uring_register_ring_fd() call is made from the main thread instead of
IOThread where io_uring_submit() is called. It can not be guaranteed
to register the ring fd in the correct thread or unregister the same ring
fd if the IOThread is disabled. This optimization is not critical so we
will revert previous commit.
This reverts commit e2848bc574fe2715c694bf8fe9a1ba7f78a1125a
and 77e3f038af1764983087e3551a0fde9951952c4d.
Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
block/io_uring.c | 13 +------------
meson.build | 1 -
2 files changed, 1 insertion(+), 13 deletions(-)
diff --git a/block/io_uring.c b/block/io_uring.c
index a1760152e0..973e15d876 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -11,7 +11,6 @@
#include "qemu/osdep.h"
#include <liburing.h>
#include "block/aio.h"
-#include "qemu/error-report.h"
#include "qemu/queue.h"
#include "block/block.h"
#include "block/raw-aio.h"
@@ -19,7 +18,6 @@
#include "qapi/error.h"
#include "trace.h"
-
/* io_uring ring size */
#define MAX_ENTRIES 128
@@ -432,17 +430,8 @@ LuringState *luring_init(Error **errp)
}
ioq_init(&s->io_q);
-#ifdef CONFIG_LIBURING_REGISTER_RING_FD
- if (io_uring_register_ring_fd(&s->ring) < 0) {
- /*
- * Only warn about this error: we will fallback to the non-optimized
- * io_uring operations.
- */
- warn_report("failed to register linux io_uring ring file descriptor");
- }
-#endif
-
return s;
+
}
void luring_cleanup(LuringState *s)
diff --git a/meson.build b/meson.build
index 20fddbd707..d5230eadd6 100644
--- a/meson.build
+++ b/meson.build
@@ -1793,7 +1793,6 @@ config_host_data.set('CONFIG_LIBNFS', libnfs.found())
config_host_data.set('CONFIG_LIBSSH', libssh.found())
config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
-config_host_data.set('CONFIG_LIBURING_REGISTER_RING_FD', cc.has_function('io_uring_register_ring_fd', prefix: '#include <liburing.h>', dependencies:linux_io_uring))
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())

View File

@@ -0,0 +1,87 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Tue, 25 Aug 2020 07:36:36 +0200
Subject: [PATCH] usb: fix setup_len init (CVE-2020-14364)
Store calculated setup_len in a local variable, verify it, and only
write it to the struct (USBDevice->setup_len) in case it passed the
sanity checks.
This prevents other code (do_token_{in,out} functions specifically)
from working with invalid USBDevice->setup_len values and overrunning
the USBDevice->setup_buf[] buffer.
Fixes: CVE-2020-14364
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Tested-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Li Qiang <liq3ea@gmail.com>
Message-id: 20200825053636.29648-1-kraxel@redhat.com
(cherry picked from commit b946434f2659a182afc17e155be6791ebfb302eb)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/usb/core.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 5abd128b6b..5234dcc73f 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream)
static void do_token_setup(USBDevice *s, USBPacket *p)
{
int request, value, index;
+ unsigned int setup_len;
if (p->iov.size != 8) {
p->status = USB_RET_STALL;
@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p)
usb_packet_copy(p, s->setup_buf, p->iov.size);
s->setup_index = 0;
p->actual_length = 0;
- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
- if (s->setup_len > sizeof(s->data_buf)) {
+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
+ if (setup_len > sizeof(s->data_buf)) {
fprintf(stderr,
"usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n",
- s->setup_len, sizeof(s->data_buf));
+ setup_len, sizeof(s->data_buf));
p->status = USB_RET_STALL;
return;
}
+ s->setup_len = setup_len;
request = (s->setup_buf[0] << 8) | s->setup_buf[1];
value = (s->setup_buf[3] << 8) | s->setup_buf[2];
@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p)
static void do_parameter(USBDevice *s, USBPacket *p)
{
int i, request, value, index;
+ unsigned int setup_len;
for (i = 0; i < 8; i++) {
s->setup_buf[i] = p->parameter >> (i*8);
}
s->setup_state = SETUP_STATE_PARAM;
- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
s->setup_index = 0;
request = (s->setup_buf[0] << 8) | s->setup_buf[1];
value = (s->setup_buf[3] << 8) | s->setup_buf[2];
index = (s->setup_buf[5] << 8) | s->setup_buf[4];
- if (s->setup_len > sizeof(s->data_buf)) {
+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
+ if (setup_len > sizeof(s->data_buf)) {
fprintf(stderr,
"usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n",
- s->setup_len, sizeof(s->data_buf));
+ setup_len, sizeof(s->data_buf));
p->status = USB_RET_STALL;
return;
}
+ s->setup_len = setup_len;
if (p->pid == USB_TOKEN_OUT) {
usb_packet_copy(p, s->data_buf, s->setup_len);

View File

@@ -1,51 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Yusuke Okada <okada.yusuke@jp.fujitsu.com>
Date: Thu, 18 Aug 2022 14:46:19 -0400
Subject: [PATCH] virtiofsd: use g_date_time_get_microsecond to get subsecond
The "%f" specifier in g_date_time_format() is only available in glib
2.65.2 or later. If combined with older glib, the function returns null
and the timestamp displayed as "(null)".
For backward compatibility, g_date_time_get_microsecond should be used
to retrieve subsecond.
In this patch the g_date_time_format() leaves subsecond field as "%06d"
and let next snprintf to format with g_date_time_get_microsecond.
Signed-off-by: Yusuke Okada <okada.yusuke@jp.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: 20220818184618.2205172-1-yokada.996@gmail.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry-picked from commit f16d15c9276bd8f501f861c39cbd4adc812d0c1d)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
tools/virtiofsd/passthrough_ll.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 371a7bead6..20f0f41f99 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -4185,6 +4185,7 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile)
static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
{
g_autofree char *localfmt = NULL;
+ char buf[64];
if (current_log_level < level) {
return;
@@ -4197,9 +4198,11 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
fmt);
} else {
g_autoptr(GDateTime) now = g_date_time_new_now_utc();
- g_autofree char *nowstr = g_date_time_format(now, "%Y-%m-%d %H:%M:%S.%f%z");
+ g_autofree char *nowstr = g_date_time_format(now,
+ "%Y-%m-%d %H:%M:%S.%%06d%z");
+ snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now));
localfmt = g_strdup_printf("[%s] [ID: %08ld] %s",
- nowstr, syscall(__NR_gettid), fmt);
+ buf, syscall(__NR_gettid), fmt);
}
fmt = localfmt;
}

View File

@@ -1,56 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maksim Davydov <davydov-max@yandex-team.ru>
Date: Thu, 25 Aug 2022 19:52:47 +0300
Subject: [PATCH] chardev: fix segfault in finalize
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If finalize chardev-msmouse or chardev-wctable is called immediately after
init it cases QEMU to crash with segfault. This happens because of
QTAILQ_REMOVE in qemu_input_handler_unregister tries to dereference
NULL pointer.
For instance, this error can be reproduced via `qom-list-properties`
command.
Signed-off-by: Maksim Davydov <davydov-max@yandex-team.ru>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Message-Id: <20220825165247.33704-1-davydov-max@yandex-team.ru>
(trivial backport from fc0c128531ed55f058bfbad4f1348ebd9a0187f2)
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
chardev/msmouse.c | 4 +++-
chardev/wctablet.c | 4 +++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/chardev/msmouse.c b/chardev/msmouse.c
index eb9231dcdb..2cc1b16561 100644
--- a/chardev/msmouse.c
+++ b/chardev/msmouse.c
@@ -146,7 +146,9 @@ static void char_msmouse_finalize(Object *obj)
{
MouseChardev *mouse = MOUSE_CHARDEV(obj);
- qemu_input_handler_unregister(mouse->hs);
+ if (mouse->hs) {
+ qemu_input_handler_unregister(mouse->hs);
+ }
}
static QemuInputHandler msmouse_handler = {
diff --git a/chardev/wctablet.c b/chardev/wctablet.c
index e8b292c43c..43bdf6b608 100644
--- a/chardev/wctablet.c
+++ b/chardev/wctablet.c
@@ -319,7 +319,9 @@ static void wctablet_chr_finalize(Object *obj)
{
TabletChardev *tablet = WCTABLET_CHARDEV(obj);
- qemu_input_handler_unregister(tablet->hs);
+ if (tablet->hs) {
+ qemu_input_handler_unregister(tablet->hs);
+ }
}
static void wctablet_chr_open(Chardev *chr,

View File

@@ -1,42 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 28 Oct 2022 10:09:46 +0200
Subject: [PATCH] init: daemonize: defuse PID file resolve error
When proxmox-file-restore invokes QEMU, the PID file is a (temporary)
file that's already unlinked, so resolving the absolute path here
failed.
It should not be a critical error when the PID file unlink handler
can't be registered, because the path can't be resolved for whatever
reason. If the file is already gone from QEMU's perspective (i.e.
errno is ENOENT), silently ignore the error. Otherwise, print a
warning.
Reported-by: Dominik Csapak <d.csapak@proxmox.com>
Suggested-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
softmmu/vl.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 706bd7cff7..3381c56af7 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2438,10 +2438,11 @@ static void qemu_maybe_daemonize(const char *pid_file)
pid_file_realpath = g_malloc0(PATH_MAX);
if (!realpath(pid_file, pid_file_realpath)) {
- error_report("cannot resolve PID file path: %s: %s",
- pid_file, strerror(errno));
- unlink(pid_file);
- exit(1);
+ if (errno != ENOENT) {
+ warn_report("not removing PID file on exit: cannot resolve PID "
+ "file path: %s: %s", pid_file, strerror(errno));
+ }
+ return;
}
qemu_unlink_pidfile_notifier = (struct UnlinkPidfileNotifier) {

View File

@@ -1,77 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 27 Oct 2022 03:27:26 -0400
Subject: [PATCH] block/block-backend: blk_set_enable_write_cache is IO_CODE
blk_set_enable_write_cache() is defined as GLOBAL_STATE_CODE
but can be invoked from iothreads when handling scsi requests.
This triggers an assertion failure:
0x00007fd6c3515ce1 in raise () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fd6c34ff537 in abort () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fd6c34ff40f in ?? () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fd6c350e662 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
0x000056149e2cea03 in blk_set_enable_write_cache (wce=true, blk=0x5614a01c27f0)
at ../src/block/block-backend.c:1949
0x000056149e2d0a67 in blk_set_enable_write_cache (blk=0x5614a01c27f0,
wce=<optimized out>) at ../src/block/block-backend.c:1951
0x000056149dfe9c59 in scsi_disk_apply_mode_select (p=0x7fd6b400c00e "\004",
page=<optimized out>, s=<optimized out>) at ../src/hw/scsi/scsi-disk.c:1520
mode_select_pages (change=true, len=18, p=0x7fd6b400c00e "\004", r=0x7fd6b4001ff0)
at ../src/hw/scsi/scsi-disk.c:1570
scsi_disk_emulate_mode_select (inbuf=<optimized out>, r=0x7fd6b4001ff0) at
../src/hw/scsi/scsi-disk.c:1640
scsi_disk_emulate_write_data (req=0x7fd6b4001ff0) at ../src/hw/scsi/scsi-disk.c:1934
0x000056149e18ff16 in virtio_scsi_handle_cmd_req_submit (req=<optimized out>,
req=<optimized out>, s=0x5614a12f16b0) at ../src/hw/scsi/virtio-scsi.c:719
virtio_scsi_handle_cmd_vq (vq=0x7fd6bab92140, s=0x5614a12f16b0) at
../src/hw/scsi/virtio-scsi.c:761
virtio_scsi_handle_cmd (vq=<optimized out>, vdev=<optimized out>) at
../src/hw/scsi/virtio-scsi.c:775
virtio_scsi_handle_cmd (vdev=0x5614a12f16b0, vq=0x7fd6bab92140) at
../src/hw/scsi/virtio-scsi.c:765
0x000056149e1a8aa6 in virtio_queue_notify_vq (vq=0x7fd6bab92140) at
../src/hw/virtio/virtio.c:2365
0x000056149e3ccea5 in aio_dispatch_handler (ctx=ctx@entry=0x5614a01babe0,
node=<optimized out>) at ../src/util/aio-posix.c:369
0x000056149e3cd868 in aio_dispatch_ready_handlers (ready_list=0x7fd6c09b2680,
ctx=0x5614a01babe0) at ../src/util/aio-posix.c:399
aio_poll (ctx=0x5614a01babe0, blocking=blocking@entry=true) at
../src/util/aio-posix.c:713
0x000056149e2a7796 in iothread_run (opaque=opaque@entry=0x56149ffde500) at
../src/iothread.c:67
0x000056149e3d0859 in qemu_thread_start (args=0x7fd6c09b26f0) at
../src/util/qemu-thread-posix.c:504
0x00007fd6c36b9ea7 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
0x00007fd6c35d9aef in clone () from /lib/x86_64-linux-gnu/libc.so.6
Changing GLOBAL_STATE_CODE in IO_CODE is allowed, since GSC callers are
allowed to call IO_CODE.
Resolves: #1272
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221027072726.2681500-1-eesposit@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Tested-by: Antoine Damhet <antoine.damhet@shadow.tech>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit be8da05b5ed8fb546731b9edb997f303f272bad8)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/block-backend.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/block-backend.c b/block/block-backend.c
index d4a5df2ac2..1b563e628b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1946,7 +1946,7 @@ bool blk_enable_write_cache(BlockBackend *blk)
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- GLOBAL_STATE_CODE();
+ IO_CODE();
blk->enable_write_cache = wce;
}

View File

@@ -1,90 +1,18 @@
Index: qemu/block/meson.build
Index: pve-qemu-kvm-5.1.0/qapi/block-core.json
===================================================================
--- qemu.orig/block/meson.build
+++ qemu/block/meson.build
@@ -111,6 +111,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: qemu/meson.build
===================================================================
--- qemu.orig/meson.build
+++ qemu/meson.build
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1802,6 +1822,7 @@ config_host_data.set('CONFIG_NUMA', numa
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3965,6 +3986,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: qemu/meson_options.txt
===================================================================
--- qemu.orig/meson_options.txt
+++ qemu/meson_options.txt
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: qemu/qapi/block-core.json
===================================================================
--- qemu.orig/qapi/block-core.json
+++ qemu/qapi/block-core.json
@@ -3209,7 +3209,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
--- pve-qemu-kvm-5.1.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-5.1.0/qapi/block-core.json
@@ -3041,7 +3041,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
- 'sheepdog', 'pbs',
+ 'sheepdog', 'pbs', 'vitastor',
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4149,6 +4149,28 @@
'*server': ['InetSocketAddressBase'] } }
@@ -3889,6 +3889,28 @@
'*tag': 'str' } }
##
+# @BlockdevOptionsVitastor:
@@ -112,16 +40,16 @@ Index: qemu/qapi/block-core.json
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4593,6 +4615,7 @@
@@ -4234,6 +4256,7 @@
'replication': { 'type': 'BlockdevOptionsReplication',
'if': 'defined(CONFIG_REPLICATION)' },
'sheepdog': 'BlockdevOptionsSheepdog',
+ 'vitastor': 'BlockdevOptionsVitastor',
'ssh': 'BlockdevOptionsSsh',
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4985,6 +5008,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
@@ -4623,6 +4646,17 @@
'*cluster-size' : 'size' } }
##
+# @BlockdevCreateOptionsVitastor:
@@ -138,40 +66,97 @@ Index: qemu/qapi/block-core.json
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5182,6 +5216,7 @@
@@ -4884,6 +4918,7 @@
'qed': 'BlockdevCreateOptionsQed',
'rbd': 'BlockdevCreateOptionsRbd',
'sheepdog': 'BlockdevCreateOptionsSheepdog',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: qemu/scripts/ci/org.centos/stream/8/x86_64/configure
Index: pve-qemu-kvm-5.1.0/configure
===================================================================
--- qemu.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ qemu/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
Index: a/block/vitastor.c
--- pve-qemu-kvm-5.1.0.orig/configure
+++ pve-qemu-kvm-5.1.0/configure
@@ -446,6 +446,7 @@ trace_backends="log"
trace_file="trace"
spice=""
rbd=""
+vitastor=""
smartcard=""
libusb=""
usb_redir=""
@@ -1383,6 +1384,10 @@ for opt do
;;
--enable-rbd) rbd="yes"
;;
+ --disable-vitastor) vitastor="no"
+ ;;
+ --enable-vitastor) vitastor="yes"
+ ;;
--disable-xfsctl) xfs="no"
;;
--enable-xfsctl) xfs="yes"
@@ -1901,6 +1906,7 @@ disabled with --disable-FEATURE, default
vhost-vdpa vhost-vdpa kernel backend support
spice spice
rbd rados block device (rbd)
+ vitastor vitastor block device
libiscsi iscsi support
libnfs nfs support
smartcard smartcard support (libcacard)
@@ -4234,6 +4240,27 @@ EOF
fi
##########################################
+# vitastor probe
+if test "$vitastor" != "no" ; then
+ cat > $TMPC <<EOF
+#include <vitastor_c.h>
+int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+}
+EOF
+ vitastor_libs="-lvitastor_client"
+ if compile_prog "" "$vitastor_libs" ; then
+ vitastor=yes
+ else
+ if test "$vitastor" = "yes" ; then
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
+ fi
+ vitastor=no
+ fi
+fi
+
+##########################################
# libssh probe
if test "$libssh" != "no" ; then
if $pkg_config --exists libssh; then
@@ -6969,6 +6996,7 @@ echo "Trace output file $trace_file-<pid
fi
echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
echo "rbd support $rbd"
+echo "vitastor support $vitastor"
echo "xfsctl support $xfs"
echo "smartcard support $smartcard"
echo "libusb $libusb"
@@ -7644,6 +7672,10 @@ if test "$rbd" = "yes" ; then
echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
fi
+if test "$vitastor" = "yes" ; then
+ echo "CONFIG_VITASTOR=y" >> $config_host_mak
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
+fi
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
if test "$coroutine_pool" = "yes" ; then
Index: pve-qemu-kvm-5.1.0/block/vitastor.c
===================================================================
--- /dev/null
+++ a/block/vitastor.c
@@ -0,0 +1,797 @@
+++ pve-qemu-kvm-5.1.0/block/vitastor.c
@@ -0,0 +1,598 @@
+// Copyright (c) Vitaliy Filippov, 2019+
+// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
+
@@ -227,7 +212,6 @@ Index: a/block/vitastor.c
+ char *etcd_host;
+ char *etcd_prefix;
+ char *image;
+ int skip_parents;
+ uint64_t inode;
+ uint64_t pool;
+ uint64_t size;
@@ -238,10 +222,6 @@ Index: a/block/vitastor.c
+ int rdma_gid_index;
+ int rdma_mtu;
+ QemuMutex mutex;
+
+ uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
+ uint32_t last_bitmap_granularity;
+ uint8_t *last_bitmap;
+} VitastorClient;
+
+typedef struct VitastorRPC
@@ -251,9 +231,6 @@ Index: a/block/vitastor.c
+ QEMUIOVector *iov;
+ long ret;
+ int complete;
+ uint64_t inode, offset, len;
+ uint32_t bitmap_granularity;
+ uint8_t *bitmap;
+} VitastorRPC;
+
+static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
@@ -329,7 +306,6 @@ Index: a/block/vitastor.c
+ if (!strcmp(name, "inode") ||
+ !strcmp(name, "pool") ||
+ !strcmp(name, "size") ||
+ !strcmp(name, "skip-parents") ||
+ !strcmp(name, "use-rdma") ||
+ !strcmp(name, "rdma-port_num") ||
+ !strcmp(name, "rdma-gid-index") ||
@@ -389,65 +365,41 @@ Index: a/block/vitastor.c
+ }
+}
+
+static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
+{
+ aio_set_fd_handler(ctx, fd,
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
+ 0 /*is_external*/,
+#endif
+ fd_read, fd_write,
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
+ NULL /*io_flush*/,
+#endif
+#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
+ NULL /*io_poll*/,
+#endif
+#if QEMU_VERSION_MAJOR >= 7
+ NULL /*io_poll_ready*/,
+#endif
+ opaque);
+}
+
+static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+ VitastorRPC task;
+ VitastorClient *client = bs->opaque;
+ void *image = NULL;
+ int64_t ret = 0;
+ qemu_mutex_init(&client->mutex);
+ client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
+ // FIXME: Rename to etcd_address
+ client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
+ client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
+ client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
+ client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
+ client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
+ client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
+ client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
+ client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
+ client->proxy = vitastor_c_create_qemu(
+ vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
+ (QEMUSetFDHandler*)aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
+ client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
+ );
+ image = client->image = g_strdup(qdict_get_try_str(options, "image"));
+ client->image = g_strdup(qdict_get_try_str(options, "image"));
+ client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
+ // Get image metadata (size and readonly flag) or just wait until the client is ready
+ if (!image)
+ client->image = (char*)"x";
+ task.complete = 0;
+ task.bs = bs;
+ if (qemu_in_coroutine())
+ {
+ vitastor_co_get_metadata(&task);
+ }
+ else
+ {
+ bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+ BDRV_POLL_WHILE(bs, !task.complete);
+ }
+ client->image = image;
+ if (client->image)
+ {
+ // Get image metadata (size and readonly flag)
+ VitastorRPC task;
+ task.complete = 0;
+ task.bs = bs;
+ if (qemu_in_coroutine())
+ {
+ vitastor_co_get_metadata(&task);
+ }
+ else
+ {
+ qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
+ }
+ BDRV_POLL_WHILE(bs, !task.complete);
+ client->watch = (void*)task.ret;
+ client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
+ client->size = vitastor_c_inode_get_size(client->watch);
@@ -469,10 +421,9 @@ Index: a/block/vitastor.c
+ client->pool = qdict_get_try_int(options, "pool", 0);
+ if (client->pool)
+ {
+ client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
+ client->inode = (client->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
+ }
+ client->size = qdict_get_try_int(options, "size", 0);
+ vitastor_c_close_watch(client->proxy, (void*)task.ret);
+ }
+ if (!client->size)
+ {
@@ -494,7 +445,6 @@ Index: a/block/vitastor.c
+ qdict_del(options, "inode");
+ qdict_del(options, "pool");
+ qdict_del(options, "size");
+ qdict_del(options, "skip-parents");
+ return ret;
+}
+
@@ -511,8 +461,6 @@ Index: a/block/vitastor.c
+ g_free(client->etcd_prefix);
+ if (client->image)
+ g_free(client->image);
+ free(client->last_bitmap);
+ client->last_bitmap = NULL;
+}
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
@@ -639,13 +587,7 @@ Index: a/block/vitastor.c
+ vitastor_co_generic_bh_cb(opaque, retval);
+}
+
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
+#else
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
+#endif
+)
+static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
@@ -665,26 +607,13 @@ Index: a/block/vitastor.c
+ return task.ret;
+}
+
+static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
+#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
+ int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
+#else
+ uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
+#endif
+)
+static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags)
+{
+ VitastorClient *client = bs->opaque;
+ VitastorRPC task;
+ vitastor_co_init_task(bs, &task);
+ task.iov = iov;
+
+ if (client->last_bitmap)
+ {
+ // Invalidate last bitmap on write
+ free(client->last_bitmap);
+ client->last_bitmap = NULL;
+ }
+
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
@@ -698,140 +627,6 @@ Index: a/block/vitastor.c
+ return task.ret;
+}
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
+#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
+static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
+{
+ VitastorRPC *task = opaque;
+ VitastorClient *client = task->bs->opaque;
+ task->ret = retval;
+ task->complete = 1;
+ if (retval >= 0)
+ {
+ task->bitmap = bitmap;
+ if (client->last_bitmap_inode == task->inode &&
+ client->last_bitmap_offset == task->offset &&
+ client->last_bitmap_len == task->len)
+ {
+ free(client->last_bitmap);
+ client->last_bitmap = bitmap;
+ }
+ }
+ if (qemu_coroutine_self() != task->co)
+ {
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
+ aio_co_wake(task->co);
+#else
+ qemu_coroutine_enter(task->co, NULL);
+ qemu_aio_release(task);
+#endif
+ }
+}
+
+static int coroutine_fn vitastor_co_block_status(
+ BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
+{
+ // Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
+ // Not allocated => return 0
+ // Error => return -errno
+ // Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
+ VitastorRPC task;
+ VitastorClient *client = bs->opaque;
+ uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
+ uint8_t bit = 0;
+ if (client->last_bitmap && client->last_bitmap_inode == inode &&
+ client->last_bitmap_offset <= offset &&
+ client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
+ {
+ // Use the previously read bitmap
+ task.bitmap_granularity = client->last_bitmap_granularity;
+ task.offset = client->last_bitmap_offset;
+ task.len = client->last_bitmap_len;
+ task.bitmap = client->last_bitmap;
+ }
+ else
+ {
+ // Read bitmap from this position, rounding to full inode PG blocks
+ uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
+ if (!block_size)
+ return -EAGAIN;
+ // Init coroutine
+ vitastor_co_init_task(bs, &task);
+ free(client->last_bitmap);
+ task.inode = client->last_bitmap_inode = inode;
+ task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
+ task.offset = client->last_bitmap_offset = offset / block_size * block_size;
+ task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
+ task.bitmap = client->last_bitmap = NULL;
+ qemu_mutex_lock(&client->mutex);
+ vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
+ qemu_mutex_unlock(&client->mutex);
+ while (!task.complete)
+ {
+ qemu_coroutine_yield();
+ }
+ if (task.ret < 0)
+ {
+ // Error
+ return task.ret;
+ }
+ }
+ if (want_zero)
+ {
+ // Get precise mapping with all holes
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
+ uint64_t bmp_len = task.len / task.bitmap_granularity;
+ uint64_t bmp_end = bmp_pos+1;
+ bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
+ while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
+ {
+ bmp_end++;
+ }
+ *pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
+ }
+ else
+ {
+ // Get larger allocated extents, possibly with false positives
+ uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
+ uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
+ while (bmp_pos < bmp_end)
+ {
+ if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
+ {
+ bit = bit || task.bitmap[bmp_pos >> 3];
+ bmp_pos += 8;
+ }
+ else
+ {
+ bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
+ bmp_pos++;
+ }
+ }
+ *pnum = bytes;
+ }
+ if (bit)
+ {
+ *map = offset;
+ *file = bs;
+ }
+ return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
+}
+#endif
+#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
+// QEMU 1.7-2.11
+static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+{
+ int64_t map = 0;
+ int64_t pnumbytes = 0;
+ int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
+ *pnum = pnumbytes/BDRV_SECTOR_SIZE;
+ return r;
+}
+#endif
+#endif
+
+#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
+static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
+{
@@ -939,15 +734,6 @@ Index: a/block/vitastor.c
+ .bdrv_co_truncate = vitastor_co_truncate,
+#endif
+
+#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
+ // For snapshot export
+ .bdrv_co_block_status = vitastor_co_block_status,
+#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
+ .bdrv_co_get_block_status = vitastor_co_get_block_status,
+#endif
+#endif
+
+#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
+ .bdrv_co_preadv = vitastor_co_preadv,
+ .bdrv_co_pwritev = vitastor_co_pwritev,
@@ -969,3 +755,24 @@ Index: a/block/vitastor.c
+}
+
+block_init(vitastor_block_init);
Index: pve-qemu-kvm-5.1.0/block/Makefile.objs
===================================================================
--- pve-qemu-kvm-5.1.0.orig/block/Makefile.objs
+++ pve-qemu-kvm-5.1.0/block/Makefile.objs
@@ -32,6 +32,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) +
block-obj-$(CONFIG_LIBNFS) += nfs.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_VITASTOR) += vitastor.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_LIBSSH) += ssh.o
block-obj-y += backup-dump.o
@@ -61,6 +62,8 @@ curl.o-cflags := $(CURL_CFLAGS)
curl.o-libs := $(CURL_LIBS)
rbd.o-cflags := $(RBD_CFLAGS)
rbd.o-libs := $(RBD_LIBS)
+vitastor.o-cflags := $(VITASTOR_CFLAGS)
+vitastor.o-libs := $(VITASTOR_LIBS)
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
ssh.o-cflags := $(LIBSSH_CFLAGS)

View File

@@ -14,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 48cd096624..3d60b80286 100644
index 9a00d4190a..bb72e1e5ca 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -553,7 +553,7 @@ static QemuOptsList raw_runtime_opts = {
@@ -508,7 +508,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "locking",
.type = QEMU_OPT_STRING,
@@ -26,7 +26,7 @@ index 48cd096624..3d60b80286 100644
},
{
.name = "pr-manager",
@@ -653,7 +653,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
@@ -606,7 +606,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->use_lock = false;
break;
case ON_OFF_AUTO_AUTO:

View File

@@ -5,21 +5,22 @@ Subject: [PATCH] PVE: [Config] Adjust network script path to /etc/kvm/
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/net/net.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
include/net/net.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h
index 523136c7ac..c27859b4f6 100644
index e7ef42d62b..4fd1144e58 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -226,8 +226,8 @@ void netdev_add(QemuOpts *opts, Error **errp);
@@ -209,8 +209,9 @@ void netdev_add(QemuOpts *opts, Error **errp);
int net_hub_id_for_client(NetClientState *nc, int *id);
NetClientState *net_hub_port_find(int hub_id);
-#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifdown"
+#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/kvm/kvm-ifup"
+#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/kvm/kvm-ifdown"
-#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
+#define DEFAULT_NETWORK_SCRIPT "/etc/kvm/kvm-ifup"
+#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/kvm/kvm-ifdown"
+
#define DEFAULT_BRIDGE_HELPER CONFIG_QEMU_HELPERDIR "/qemu-bridge-helper"
#define DEFAULT_BRIDGE_INTERFACE "br0"

View File

@@ -10,10 +10,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 82004b65b9..4868db8f94 100644
index e1a5c174dc..8973d3160f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2133,9 +2133,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
@@ -1975,9 +1975,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define CPU_RESOLVING_TYPE TYPE_X86_CPU
#ifdef TARGET_X86_64
@@ -24,4 +24,4 @@ index 82004b65b9..4868db8f94 100644
+#define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("kvm32")
#endif
#define cpu_list x86_cpu_list
#define cpu_signal_handler cpu_x86_signal_handler

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/ui/spice-core.c b/ui/spice-core.c
index c3ac20ad43..37774f1c0a 100644
index ecc2ec2c55..ca04965ead 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -689,32 +689,35 @@ static void qemu_spice_init(void)
@@ -668,32 +668,35 @@ void qemu_spice_init(void)
if (tls_port) {
x509_dir = qemu_opt_get(opts, "x509-dir");

View File

@@ -0,0 +1,24 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:34 +0200
Subject: [PATCH] PVE: [Config] smm_available = false
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/i386/x86.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 67bee1bcb8..d954bf77b9 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -856,7 +856,7 @@ bool x86_machine_is_smm_enabled(X86MachineState *x86ms)
if (tcg_enabled() || qtest_enabled()) {
smm_available = true;
} else if (kvm_enabled()) {
- smm_available = kvm_has_smm();
+ smm_available = false;
}
if (smm_available) {

View File

@@ -9,7 +9,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/block/gluster.c b/block/gluster.c
index b60213ab80..93da76bc31 100644
index 4f1448e2bc..715f06394c 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -42,7 +42,7 @@

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index f826410f40..64a8d7d48b 100644
index 688074c64b..8ae39abb46 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -820,6 +820,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -651,6 +651,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
rados_conf_set(*cluster, "rbd_cache", "false");
}

View File

@@ -11,10 +11,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
3 files changed, 43 insertions(+)
diff --git a/net/net.c b/net/net.c
index 2db160e063..8329347891 100644
index bbaedb3c7a..9de23ec834 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1343,6 +1343,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
@@ -1276,6 +1276,33 @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
}
}
@@ -49,7 +49,7 @@ index 2db160e063..8329347891 100644
{
NetClientState *nc;
diff --git a/qapi/net.json b/qapi/net.json
index 75ba2cb989..a3c93ab88f 100644
index ddb113e5e5..eb3b785984 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -35,6 +35,21 @@
@@ -75,14 +75,14 @@ index 75ba2cb989..a3c93ab88f 100644
# @netdev_add:
#
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 7f810b0e97..a2358e303a 100644
index cffae27666..5a3e3de95f 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -26,6 +26,7 @@
'system_wakeup' ],
# Commands allowed to return a non-dictionary
'command-returns-exceptions': [
+ 'get_link_status',
@@ -5,6 +5,7 @@
{ 'pragma': {
# Commands allowed to return a non-dictionary:
'returns-whitelist': [
+ 'get_link_status',
'human-monitor-command',
'qom-get',
'query-tpm-models',
'query-migrate-cache-size',

View File

@@ -16,7 +16,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/gluster.c b/block/gluster.c
index 93da76bc31..1079b6186b 100644
index 715f06394c..6d02170d9b 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -57,6 +57,7 @@ typedef struct GlusterAIOCB {
@@ -27,7 +27,7 @@ index 93da76bc31..1079b6186b 100644
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -752,8 +753,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
@@ -759,8 +760,10 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
acb->ret = 0; /* Success */
} else if (ret < 0) {
acb->ret = -errno; /* Read/Write failed */
@@ -39,15 +39,15 @@ index 93da76bc31..1079b6186b 100644
}
aio_co_schedule(acb->aio_context, acb->coroutine);
@@ -1022,6 +1025,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
@@ -1028,6 +1031,7 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1203,9 +1207,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
@@ -1209,9 +1213,11 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
@@ -59,7 +59,7 @@ index 93da76bc31..1079b6186b 100644
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
}
@@ -1269,6 +1275,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1275,6 +1281,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
@@ -67,11 +67,11 @@ index 93da76bc31..1079b6186b 100644
ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
@@ -1317,6 +1324,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
@@ -1321,6 +1328,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
acb.ret = 0;
acb.coroutine = qemu_coroutine_self();
acb.aio_context = bdrv_get_aio_context(bs);
+ acb.is_write = true;
ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {

View File

@@ -9,10 +9,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/qemu-img.c b/qemu-img.c
index 7d4b33b3da..bb36f42dd2 100644
index 5308773811..45aa024acc 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3010,7 +3010,8 @@ static int img_info(int argc, char **argv)
@@ -2955,7 +2955,8 @@ static int img_info(int argc, char **argv)
list = collect_image_info_list(image_opts, filename, fmt, chain,
force_share);
if (!list) {

View File

@@ -31,14 +31,13 @@ override the output file's size.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img-cmds.hx | 4 +-
qemu-img.c | 202 ++++++++++++++++++++++++++++++-----------------
2 files changed, 133 insertions(+), 73 deletions(-)
qemu-img.c | 191 +++++++++++++++++++++++++++++------------------
2 files changed, 121 insertions(+), 74 deletions(-)
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 1b1dab5b17..d1616c045a 100644
index b89c019b76..91d18a4819 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -58,9 +58,9 @@ SRST
@@ -54,10 +53,10 @@ index 1b1dab5b17..d1616c045a 100644
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index bb36f42dd2..74afcb79ef 100644
index 45aa024acc..af54d0896e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4826,10 +4826,12 @@ static int img_bitmap(int argc, char **argv)
@@ -4819,10 +4819,12 @@ static int img_bitmap(int argc, char **argv)
#define C_IF 04
#define C_OF 010
#define C_SKIP 020
@@ -70,7 +69,7 @@ index bb36f42dd2..74afcb79ef 100644
};
struct DdIo {
@@ -4905,6 +4907,19 @@ static int img_dd_skip(const char *arg,
@@ -4898,6 +4900,19 @@ static int img_dd_skip(const char *arg,
return 0;
}
@@ -90,7 +89,7 @@ index bb36f42dd2..74afcb79ef 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -4945,6 +4960,7 @@ static int img_dd(int argc, char **argv)
@@ -4938,6 +4953,7 @@ static int img_dd(int argc, char **argv)
{ "if", img_dd_if, C_IF },
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
@@ -98,7 +97,7 @@ index bb36f42dd2..74afcb79ef 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5020,91 +5036,112 @@ static int img_dd(int argc, char **argv)
@@ -5016,8 +5032,13 @@ static int img_dd(int argc, char **argv)
arg = NULL;
}
@@ -106,30 +105,53 @@ index bb36f42dd2..74afcb79ef 100644
- error_report("Must specify both input and output files");
+ if (!(dd.flags & C_IF) && (!fmt || strcmp(fmt, "raw") != 0)) {
+ error_report("Input format must be raw when readin from stdin");
ret = -1;
goto out;
}
-
- blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
- force_share);
-
- if (!blk1) {
+ ret = -1;
+ goto out;
+ }
+ if (!(dd.flags & C_OF) && strcmp(out_fmt, "raw") != 0) {
+ error_report("Output format must be raw when writing to stdout");
ret = -1;
goto out;
}
@@ -5029,85 +5050,101 @@ static int img_dd(int argc, char **argv)
goto out;
}
- blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
- force_share);
+ if (dd.flags & C_IF) {
+ blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+ force_share);
- if (!blk1) {
- ret = -1;
- goto out;
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
}
- drv = bdrv_find_format(out_fmt);
- if (!drv) {
- error_report("Unknown file format");
- ret = -1;
- goto out;
- }
+ if (dd.flags & C_OSIZE) {
+ size = dd.osize;
+ } else if (dd.flags & C_IF) {
+ size = blk_getlength(blk1);
+ if (size < 0) {
+ error_report("Failed to get size for '%s'", in.filename);
+ ret = -1;
+ goto out;
+ }
+ } else if (dd.flags & C_COUNT) {
+ size = dd.count * in.bsz;
+ } else {
+ error_report("Output size must be known when reading from stdin");
ret = -1;
goto out;
}
- proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
+ if (dd.flags & C_IF) {
+ blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+ force_share);
- if (!proto_drv) {
- error_report_err(local_err);
@@ -147,50 +169,14 @@ index bb36f42dd2..74afcb79ef 100644
- proto_drv->format_name);
- ret = -1;
- goto out;
+ if (!blk1) {
+ ret = -1;
+ goto out;
+ }
+ if (!(dd.flags & C_OSIZE) && dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
+ dd.count * in.bsz < size) {
+ size = dd.count * in.bsz;
}
- create_opts = qemu_opts_append(create_opts, drv->create_opts);
- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
-
- opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
- size = blk_getlength(blk1);
- if (size < 0) {
- error_report("Failed to get size for '%s'", in.filename);
+ if (dd.flags & C_OSIZE) {
+ size = dd.osize;
+ } else if (dd.flags & C_IF) {
+ size = blk_getlength(blk1);
+ if (size < 0) {
+ error_report("Failed to get size for '%s'", in.filename);
+ ret = -1;
+ goto out;
+ }
+ } else if (dd.flags & C_COUNT) {
+ size = dd.count * in.bsz;
+ } else {
+ error_report("Output size must be known when reading from stdin");
ret = -1;
goto out;
}
- if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
+ if (!(dd.flags & C_OSIZE) && dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
dd.count * in.bsz < size) {
size = dd.count * in.bsz;
}
- /* Overflow means the specified offset is beyond input image's size */
- if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
- size < in.bsz * in.offset)) {
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
- } else {
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
- size - in.bsz * in.offset, &error_abort);
- }
+ if (dd.flags & C_OF) {
+ drv = bdrv_find_format(out_fmt);
+ if (!drv) {
@@ -200,11 +186,9 @@ index bb36f42dd2..74afcb79ef 100644
+ }
+ proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- size = blk_getlength(blk1);
- if (size < 0) {
- error_report("Failed to get size for '%s'", in.filename);
- ret = -1;
- goto out;
- }
@@ -228,18 +212,20 @@ index bb36f42dd2..74afcb79ef 100644
+ create_opts = qemu_opts_append(create_opts, drv->create_opts);
+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
- /* TODO, we can't honour --image-opts for the target,
- * since it needs to be given in a format compatible
- * with the bdrv_create() call above which does not
- * support image-opts style.
- */
- blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
- false, false, false);
- if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
- dd.count * in.bsz < size) {
- size = dd.count * in.bsz;
- }
+ opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
- if (!blk2) {
- ret = -1;
- goto out;
- /* Overflow means the specified offset is beyond input image's size */
- if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
- size < in.bsz * in.offset)) {
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
- } else {
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
- size - in.bsz * in.offset, &error_abort);
- }
+ /* Overflow means the specified offset is beyond input image's size */
+ if (dd.flags & C_OSIZE) {
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
@@ -250,7 +236,15 @@ index bb36f42dd2..74afcb79ef 100644
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+ size - in.bsz * in.offset, &error_abort);
+ }
+
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
- }
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
@@ -259,7 +253,14 @@ index bb36f42dd2..74afcb79ef 100644
+ ret = -1;
+ goto out;
+ }
+
- /* TODO, we can't honour --image-opts for the target,
- * since it needs to be given in a format compatible
- * with the bdrv_create() call above which does not
- * support image-opts style.
- */
- blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
- false, false, false);
+ /* TODO, we can't honour --image-opts for the target,
+ * since it needs to be given in a format compatible
+ * with the bdrv_create() call above which does not
@@ -267,7 +268,10 @@ index bb36f42dd2..74afcb79ef 100644
+ */
+ blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
+ false, false, false);
+
- if (!blk2) {
- ret = -1;
- goto out;
+ if (!blk2) {
+ ret = -1;
+ goto out;
@@ -275,54 +279,41 @@ index bb36f42dd2..74afcb79ef 100644
}
if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
@@ -5121,20 +5158,43 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
@@ -5125,11 +5162,17 @@ static int img_dd(int argc, char **argv)
for (out_pos = 0; in_pos < size; block_count++) {
+ int in_ret, out_ret;
int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
int in_ret, out_ret;
-
- ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
- if (ret < 0) {
- if (in_pos + in.bsz > size) {
- in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
+ size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ if (blk1) {
+ in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+ if (in_ret == 0) {
+ in_ret = bytes;
+ }
+ } else {
+ in_ret = read(STDIN_FILENO, in.buf, bytes);
+ in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
- in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
+ in_ret = read(STDIN_FILENO, in.buf, in_bsz);
+ if (in_ret == 0) {
+ /* early EOF is considered an error */
+ error_report("Input ended unexpectedly");
+ ret = -1;
+ goto out;
+ }
+ }
+ if (in_ret < 0) {
error_report("error while reading from input image file: %s",
- strerror(-ret));
+ strerror(-in_ret));
+ ret = -1;
goto out;
}
in_pos += bytes;
if (in_ret < 0) {
error_report("error while reading from input image file: %s",
@@ -5139,9 +5182,13 @@ static int img_dd(int argc, char **argv)
}
in_pos += in_ret;
- ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
- if (ret < 0) {
- out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ if (blk2) {
+ out_ret = blk_pwrite(blk2, out_pos, in_ret, in.buf, 0);
+ if (out_ret == 0) {
+ out_ret = in_ret;
+ }
+ out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+ } else {
+ out_ret = write(STDOUT_FILENO, in.buf, in_ret);
+ }
+
- if (out_ret < 0) {
+ if (out_ret != in_ret) {
error_report("error while writing to output image file: %s",
- strerror(-ret));
+ strerror(-out_ret));
+ ret = -1;
goto out;
}
out_pos += bytes;
strerror(-out_ret));
ret = -1;

View File

@@ -10,16 +10,15 @@ an expected end of input.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
qemu-img.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 74afcb79ef..14594d44b6 100644
index af54d0896e..0f1d464392 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4827,11 +4827,13 @@ static int img_bitmap(int argc, char **argv)
@@ -4820,11 +4820,13 @@ static int img_bitmap(int argc, char **argv)
#define C_OF 010
#define C_SKIP 020
#define C_OSIZE 040
@@ -33,7 +32,7 @@ index 74afcb79ef..14594d44b6 100644
};
struct DdIo {
@@ -4920,6 +4922,19 @@ static int img_dd_osize(const char *arg,
@@ -4913,6 +4915,19 @@ static int img_dd_osize(const char *arg,
return 0;
}
@@ -53,7 +52,7 @@ index 74afcb79ef..14594d44b6 100644
static int img_dd(int argc, char **argv)
{
int ret = 0;
@@ -4934,12 +4949,14 @@ static int img_dd(int argc, char **argv)
@@ -4927,12 +4942,14 @@ static int img_dd(int argc, char **argv)
int c, i;
const char *out_fmt = "raw";
const char *fmt = NULL;
@@ -69,7 +68,7 @@ index 74afcb79ef..14594d44b6 100644
};
struct DdIo in = {
.bsz = 512, /* Block size is by default 512 bytes */
@@ -4961,6 +4978,7 @@ static int img_dd(int argc, char **argv)
@@ -4954,6 +4971,7 @@ static int img_dd(int argc, char **argv)
{ "of", img_dd_of, C_OF },
{ "skip", img_dd_skip, C_SKIP },
{ "osize", img_dd_osize, C_OSIZE },
@@ -77,7 +76,7 @@ index 74afcb79ef..14594d44b6 100644
{ NULL, NULL, 0 }
};
const struct option long_options[] = {
@@ -5157,9 +5175,10 @@ static int img_dd(int argc, char **argv)
@@ -5160,14 +5178,18 @@ static int img_dd(int argc, char **argv)
in.buf = g_new(uint8_t, in.bsz);
@@ -85,14 +84,12 @@ index 74afcb79ef..14594d44b6 100644
+ readsize = (dd.isize > 0) ? dd.isize : size;
+ for (out_pos = 0; in_pos < readsize; block_count++) {
int in_ret, out_ret;
- int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
+ int bytes = (in_pos + in.bsz > readsize) ? readsize - in_pos : in.bsz;
- size_t in_bsz = in_pos + in.bsz > size ? size - in_pos : in.bsz;
+ size_t in_bsz = in_pos + in.bsz > readsize ? readsize - in_pos : in.bsz;
if (blk1) {
in_ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
if (in_ret == 0) {
@@ -5168,6 +5187,9 @@ static int img_dd(int argc, char **argv)
in_ret = blk_pread(blk1, in_pos, in.buf, in_bsz);
} else {
in_ret = read(STDIN_FILENO, in.buf, bytes);
in_ret = read(STDIN_FILENO, in.buf, in_bsz);
if (in_ret == 0) {
+ if (dd.isize == 0) {
+ goto out;

View File

@@ -1,121 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: fix getopt-string + add documentation]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
docs/tools/qemu-img.rst | 11 ++++++++++-
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 23 ++++++++++++++---------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 85a6e05b35..699229eef6 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -208,6 +208,10 @@ Parameters to convert subcommand:
Parameters to dd subcommand:
+.. option:: -n
+
+ Skip the creation of the target volume
+
.. program:: qemu-img-dd
.. option:: bs=BLOCK_SIZE
@@ -488,7 +492,7 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
dd copies from *INPUT* file to *OUTPUT* file converting it from
*FMT* format to *OUTPUT_FMT* format.
@@ -499,6 +503,11 @@ Command description:
The size syntax is similar to :manpage:`dd(1)`'s size syntax.
+ If the ``-n`` option is specified, the target volume creation will be
+ skipped. This is useful for formats such as ``rbd`` if the target
+ volume has already been created with site specific options that cannot
+ be supplied through ``qemu-img``.
+
.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME
Give information about the disk image *FILENAME*. Use it in
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index d1616c045a..b5b0bb4467 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -58,9 +58,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index 14594d44b6..c6b4a5567d 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4951,7 +4951,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t block_count = 0, out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -4989,7 +4989,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5009,6 +5009,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5139,13 +5142,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -0,0 +1,65 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexandre Derumier <aderumier@odiso.com>
Date: Mon, 6 Apr 2020 12:16:42 +0200
Subject: [PATCH] PVE: [Up] qemu-img dd: add -n skip_create
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qemu-img.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/qemu-img.c b/qemu-img.c
index 0f1d464392..9635e9c001 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4944,7 +4944,7 @@ static int img_dd(int argc, char **argv)
const char *fmt = NULL;
int64_t size = 0, readsize = 0;
int64_t block_count = 0, out_pos, in_pos;
- bool force_share = false;
+ bool force_share = false, skip_create = false;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -4982,7 +4982,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:U:n", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5002,6 +5002,9 @@ static int img_dd(int argc, char **argv)
case 'h':
help();
break;
+ case 'n':
+ skip_create = true;
+ break;
case 'U':
force_share = true;
break;
@@ -5142,13 +5145,15 @@ static int img_dd(int argc, char **argv)
size - in.bsz * in.offset, &error_abort);
}
- ret = bdrv_create(drv, out.filename, opts, &local_err);
- if (ret < 0) {
- error_reportf_err(local_err,
- "%s: error while creating output image: ",
- out.filename);
- ret = -1;
- goto out;
+ if (!skip_create) {
+ ret = bdrv_create(drv, out.filename, opts, &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "%s: error while creating output image: ",
+ out.filename);
+ ret = -1;
+ goto out;
+ }
}
/* TODO, we can't honour --image-opts for the target,

View File

@@ -10,14 +10,14 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/virtio/virtio-balloon.c | 33 +++++++++++++++++++++++++++++++--
monitor/hmp-cmds.c | 30 +++++++++++++++++++++++++++++-
qapi/machine.json | 22 +++++++++++++++++++++-
qapi/misc.json | 22 +++++++++++++++++++++-
3 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 73ac5eb675..bbfe7eca62 100644
index 22cb5df717..6513adb0a6 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -806,8 +806,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
@@ -805,8 +805,37 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{
VirtIOBalloon *dev = opaque;
@@ -58,10 +58,10 @@ index 73ac5eb675..bbfe7eca62 100644
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index c6cd6f91dd..15572befb1 100644
index ae4b6a4246..6e26ea2cd0 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -715,7 +715,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
@@ -660,7 +660,35 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
@@ -98,13 +98,13 @@ index c6cd6f91dd..15572befb1 100644
qapi_free_BalloonInfo(info);
}
diff --git a/qapi/machine.json b/qapi/machine.json
index 6afd1936b0..8b4be9b718 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1054,9 +1054,29 @@
# @actual: the logical size of the VM in bytes
# Formula used: logical_vm_size = vm_ram_size - balloon_size
diff --git a/qapi/misc.json b/qapi/misc.json
index 9d32820dc1..44b1fb6fa7 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -226,10 +226,30 @@
#
# @actual: the number of bytes the balloon currently contains
#
+# @last_update: time when stats got updated from guest
+#
@@ -122,7 +122,8 @@ index 6afd1936b0..8b4be9b718 100644
+#
+# @max_mem: amount of memory (in bytes) assigned to the guest
+#
# Since: 0.14
# Since: 0.14.0
#
##
-{ 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
+{ 'struct': 'BalloonInfo',

View File

@@ -13,10 +13,10 @@ Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 4f4ab30f8c..76fff60a6b 100644
index 963088b798..32f630549e 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -99,6 +99,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
@@ -234,6 +234,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
info->numa_mem_supported = mc->numa_mem_supported;
info->deprecated = !!mc->deprecation_reason;
@@ -30,24 +30,24 @@ index 4f4ab30f8c..76fff60a6b 100644
info->default_cpu_type = g_strdup(mc->default_cpu_type);
info->has_default_cpu_type = true;
diff --git a/qapi/machine.json b/qapi/machine.json
index 8b4be9b718..555458f785 100644
index 481b1f07ec..268044a34b 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -138,6 +138,8 @@
@@ -342,6 +342,8 @@
#
# @is-default: whether the machine is default
#
+# @is-current: whether this machine is currently used
+#
# @cpu-max: maximum number of CPUs supported by the machine type
# (since 1.5)
# (since 1.5.0)
#
@@ -159,7 +161,7 @@
@@ -361,7 +363,7 @@
##
{ 'struct': 'MachineInfo',
'data': { 'name': 'str', '*alias': 'str',
- '*is-default': 'bool', 'cpu-max': 'int',
+ '*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
'*default-ram-id': 'str' } }
'deprecated': 'bool', '*default-cpu-type': 'str' } }

View File

@@ -1,281 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 13 Oct 2022 11:33:50 +0200
Subject: [PATCH] PVE: add IOChannel implementation for savevm-async
based on migration/channel-block.c and the implementation that was
present in migration/savevm-async.c before QEMU 7.1.
Passes along read/write requests to the given BlockBackend, while
ensuring that a read request going beyond the end results in a
graceful short read.
Additionally, allows tracking the current position from the outside
(intended to be used for progress tracking).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/channel-savevm-async.c | 182 +++++++++++++++++++++++++++++++
migration/channel-savevm-async.h | 51 +++++++++
migration/meson.build | 1 +
3 files changed, 234 insertions(+)
create mode 100644 migration/channel-savevm-async.c
create mode 100644 migration/channel-savevm-async.h
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
new file mode 100644
index 0000000000..06d5484778
--- /dev/null
+++ b/migration/channel-savevm-async.c
@@ -0,0 +1,182 @@
+/*
+ * QIO Channel implementation to be used by savevm-async QMP calls
+ */
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "qapi/error.h"
+#include "sysemu/block-backend.h"
+#include "trace.h"
+
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos)
+{
+ QIOChannelSavevmAsync *ioc;
+
+ ioc = QIO_CHANNEL_SAVEVM_ASYNC(object_new(TYPE_QIO_CHANNEL_SAVEVM_ASYNC));
+
+ bdrv_ref(blk_bs(be));
+ ioc->be = be;
+ ioc->bs_pos = bs_pos;
+
+ return ioc;
+}
+
+
+static void
+qio_channel_savevm_async_finalize(Object *obj)
+{
+ QIOChannelSavevmAsync *ioc = QIO_CHANNEL_SAVEVM_ASYNC(obj);
+
+ if (ioc->be) {
+ bdrv_unref(blk_bs(ioc->be));
+ ioc->be = NULL;
+ }
+ ioc->bs_pos = NULL;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_readv(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int **fds,
+ size_t *nfds,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ int64_t maxlen = blk_getlength(be);
+ QEMUIOVector qiov;
+ size_t size;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (*saioc->bs_pos >= maxlen) {
+ error_setg(errp, "cannot read beyond maxlen");
+ return -1;
+ }
+
+ if (maxlen - *saioc->bs_pos < qiov.size) {
+ size = maxlen - *saioc->bs_pos;
+ } else {
+ size = qiov.size;
+ }
+
+ // returns 0 on success
+ ret = blk_preadv(be, *saioc->bs_pos, size, &qiov, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk_preadv failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += size;
+ return size;
+}
+
+
+static ssize_t
+qio_channel_savevm_async_writev(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ int *fds,
+ size_t nfds,
+ int flags,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ BlockBackend *be = saioc->be;
+ QEMUIOVector qiov;
+ int ret;
+
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
+
+ if (qemu_in_coroutine()) {
+ ret = blk_co_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ aio_wait_kick();
+ } else {
+ ret = blk_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blk(_co)_pwritev failed");
+ return -1;
+ }
+
+ *saioc->bs_pos += qiov.size;
+ return qiov.size;
+}
+
+
+static int
+qio_channel_savevm_async_set_blocking(QIOChannel *ioc,
+ bool enabled,
+ Error **errp)
+{
+ if (!enabled) {
+ error_setg(errp, "Non-blocking mode not supported for savevm-async");
+ return -1;
+ }
+ return 0;
+}
+
+
+static int
+qio_channel_savevm_async_close(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
+ int rv = bdrv_flush(blk_bs(saioc->be));
+
+ if (rv < 0) {
+ error_setg_errno(errp, -rv, "Unable to flush VMState");
+ return -1;
+ }
+
+ bdrv_unref(blk_bs(saioc->be));
+ saioc->be = NULL;
+ saioc->bs_pos = NULL;
+
+ return 0;
+}
+
+
+static void
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
+ AioContext *ctx,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ void *opaque)
+{
+ // if channel-block starts doing something, check if this needs adaptation
+}
+
+
+static void
+qio_channel_savevm_async_class_init(ObjectClass *klass,
+ void *class_data G_GNUC_UNUSED)
+{
+ QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+ ioc_klass->io_writev = qio_channel_savevm_async_writev;
+ ioc_klass->io_readv = qio_channel_savevm_async_readv;
+ ioc_klass->io_set_blocking = qio_channel_savevm_async_set_blocking;
+ ioc_klass->io_close = qio_channel_savevm_async_close;
+ ioc_klass->io_set_aio_fd_handler = qio_channel_savevm_async_set_aio_fd_handler;
+}
+
+static const TypeInfo qio_channel_savevm_async_info = {
+ .parent = TYPE_QIO_CHANNEL,
+ .name = TYPE_QIO_CHANNEL_SAVEVM_ASYNC,
+ .instance_size = sizeof(QIOChannelSavevmAsync),
+ .instance_finalize = qio_channel_savevm_async_finalize,
+ .class_init = qio_channel_savevm_async_class_init,
+};
+
+static void
+qio_channel_savevm_async_register_types(void)
+{
+ type_register_static(&qio_channel_savevm_async_info);
+}
+
+type_init(qio_channel_savevm_async_register_types);
diff --git a/migration/channel-savevm-async.h b/migration/channel-savevm-async.h
new file mode 100644
index 0000000000..17ae2cb261
--- /dev/null
+++ b/migration/channel-savevm-async.h
@@ -0,0 +1,51 @@
+/*
+ * QEMU I/O channels driver for savevm-async.c
+ *
+ * Copyright (c) 2022 Proxmox Server Solutions
+ *
+ * Authors:
+ * Fiona Ebner (f.ebner@proxmox.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QIO_CHANNEL_SAVEVM_ASYNC_H
+#define QIO_CHANNEL_SAVEVM_ASYNC_H
+
+#include "io/channel.h"
+#include "qom/object.h"
+
+#define TYPE_QIO_CHANNEL_SAVEVM_ASYNC "qio-channel-savevm-async"
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSavevmAsync, QIO_CHANNEL_SAVEVM_ASYNC)
+
+
+/**
+ * QIOChannelSavevmAsync:
+ *
+ * The QIOChannelBlock object provides a channel implementation that is able to
+ * perform I/O on any BlockBackend whose BlockDriverState directly contains a
+ * VMState (as opposed to indirectly, like qcow2). It allows tracking the
+ * current position from the outside.
+ */
+struct QIOChannelSavevmAsync {
+ QIOChannel parent;
+ BlockBackend *be;
+ size_t *bs_pos;
+};
+
+
+/**
+ * qio_channel_savevm_async_new:
+ * @be: the block backend
+ * @bs_pos: used to keep track of the IOChannels current position
+ *
+ * Create a new IO channel object that can perform I/O on a BlockBackend object
+ * whose BlockDriverState directly contains a VMState.
+ *
+ * Returns: the new channel object
+ */
+QIOChannelSavevmAsync *
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos);
+
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
diff --git a/migration/meson.build b/migration/meson.build
index 690487cf1a..8cac83c06c 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -13,6 +13,7 @@ softmmu_ss.add(files(
'block-dirty-bitmap.c',
'channel.c',
'channel-block.c',
+ 'channel-savevm-async.c',
'colo-failover.c',
'colo.c',
'exec.c',

View File

@@ -12,29 +12,29 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 8 insertions(+)
diff --git a/qapi/ui.json b/qapi/ui.json
index cf58ab4283..0be2388941 100644
index 9d6721037f..af87b18db9 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -310,11 +310,14 @@
@@ -214,11 +214,14 @@
#
# @channels: a list of @SpiceChannel for each active spice channel
#
+# @ticket: The last ticket set with set_password
+#
# Since: 0.14
# Since: 0.14.0
##
{ 'struct': 'SpiceInfo',
'data': {'enabled': 'bool', 'migrated': 'bool', '*host': 'str', '*port': 'int',
'*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
+ '*ticket': 'str',
'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
'if': 'CONFIG_SPICE' }
'if': 'defined(CONFIG_SPICE)' }
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 37774f1c0a..367f77f2b4 100644
index ca04965ead..243466c13d 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -534,6 +534,11 @@ static SpiceInfo *qmp_query_spice_real(Error **errp)
@@ -539,6 +539,11 @@ SpiceInfo *qmp_query_spice(Error **errp)
micro = SPICE_SERVER_VERSION & 0xff;
info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);

View File

@@ -1,7 +1,9 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:46 +0200
Subject: [PATCH] PVE: add savevm-async for background state snapshots
Subject: [PATCH] PVE: internal snapshot async
Truncate at 1024 boundary (Fabian Ebner will send a patch for stable)
Put qemu_savevm_state_{header,setup} into the main loop and the rest
of the iteration into a coroutine. The former need to lock the
@@ -9,44 +11,44 @@ iothread (and we can't unlock it in the coroutine), and the latter
can't deal with being in a separate thread, so a coroutine it must
be.
Truncate output file at 1024 boundary.
Do not block the VM and save the state on aborting a snapshot, as the
snapshot will be invalid anyway.
Also, when aborting, wait for the target file to be closed, otherwise a
client might run into race-conditions when trying to remove the file
still opened by QEMU.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[improve aborting]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[FE: further improve aborting
adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
Makefile.objs | 1 +
hmp-commands-info.hx | 13 +
hmp-commands.hx | 33 +++
include/migration/snapshot.h | 2 +
hmp-commands.hx | 32 +++
include/block/aio.h | 10 +
include/migration/snapshot.h | 1 +
include/monitor/hmp.h | 5 +
migration/meson.build | 1 +
migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
monitor/hmp-cmds.c | 57 ++++
qapi/migration.json | 34 +++
qapi/misc.json | 32 +++
qemu-options.hx | 12 +
savevm-async.c | 542 +++++++++++++++++++++++++++++++++++
softmmu/vl.c | 10 +
11 files changed, 730 insertions(+)
create mode 100644 migration/savevm-async.c
util/async.c | 30 ++
13 files changed, 779 insertions(+)
create mode 100644 savevm-async.c
diff --git a/Makefile.objs b/Makefile.objs
index d22b3b45d7..a1307c12a8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -46,6 +46,7 @@ common-obj-y += bootdevice.o iothread.o
common-obj-y += dump/
common-obj-y += job-qmp.o
common-obj-y += monitor/
+common-obj-y += savevm-async.o
common-obj-y += net/
common-obj-y += qdev-monitor.o
common-obj-$(CONFIG_WIN32) += os-win32.o
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 188d9ece3b..97b88eaaad 100644
index 30209e3903..ae8ff21789 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -538,6 +538,19 @@ SRST
Show current migration parameters.
@@ -580,6 +580,19 @@ SRST
Show current migration xbzrle cache size.
ERST
+ {
@@ -66,13 +68,13 @@ index 188d9ece3b..97b88eaaad 100644
.name = "balloon",
.args_type = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 182e639d14..bbcc73e942 100644
index 60f395c276..2b58ac4a1c 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1800,3 +1800,36 @@ ERST
"\n\t\t\t\t\t limit on a specified virtual cpu",
.cmd = hmp_cancel_vcpu_dirty_limit,
@@ -1829,3 +1829,35 @@ ERST
.flags = "p",
},
+
+ {
+ .name = "savevm-start",
@@ -103,25 +105,52 @@ index 182e639d14..bbcc73e942 100644
+ .args_type = "",
+ .params = "",
+ .help = "Resume VM after snaphot.",
+ .cmd = hmp_savevm_end,
+ .coroutine = true,
+ .cmd = hmp_savevm_end,
+ },
diff --git a/include/block/aio.h b/include/block/aio.h
index b2f703fa3f..c37617b404 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
+#include "qemu/coroutine.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -654,6 +655,15 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
*/
void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
+/**
+ * aio_co_reschedule_self:
+ * @new_ctx: the new context
+ *
+ * Move the currently running coroutine to new_ctx. If the coroutine is already
+ * running in new_ctx, do nothing.
+ */
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
+
/**
* aio_co_wake:
* @co: the coroutine
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index e72083b117..c846d37806 100644
index c85b6ec75b..4411b7121d 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
bool has_devices, strList *devices,
Error **errp);
@@ -17,5 +17,6 @@
int save_snapshot(const char *name, Error **errp);
int load_snapshot(const char *name, Error **errp);
+int load_snapshot_from_blockdev(const char *filename, Error **errp);
+
#endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index a618eb1e4e..55067beff1 100644
index c986cfd28b..243952d32f 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -26,6 +26,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
void hmp_info_uuid(Monitor *mon, const QDict *qdict);
void hmp_info_chardev(Monitor *mon, const QDict *qdict);
void hmp_info_mice(Monitor *mon, const QDict *qdict);
@@ -129,7 +158,7 @@ index a618eb1e4e..55067beff1 100644
void hmp_info_migrate(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -80,6 +81,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
@@ -83,6 +84,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
void hmp_netdev_del(Monitor *mon, const QDict *qdict);
void hmp_getfd(Monitor *mon, const QDict *qdict);
void hmp_closefd(Monitor *mon, const QDict *qdict);
@@ -140,26 +169,192 @@ index a618eb1e4e..55067beff1 100644
void hmp_sendkey(Monitor *mon, const QDict *qdict);
void hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/migration/meson.build b/migration/meson.build
index 8cac83c06c..0842d00cd2 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -24,6 +24,7 @@ softmmu_ss.add(files(
'multifd-zlib.c',
'postcopy-ram.c',
'savevm.c',
+ 'savevm-async.c',
'socket.c',
'tls.c',
), gnutls)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 6e26ea2cd0..280bb447a6 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1904,6 +1904,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+ qmp_savevm_start(statefile != NULL, statefile, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_snapshot_drive(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_delete_drive_snapshot(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_savevm_end(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+
+ qmp_savevm_end(&errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
+{
+ SaveVMInfo *info;
+ info = qmp_query_savevm(NULL);
+
+ if (info->has_status) {
+ monitor_printf(mon, "savevm status: %s\n", info->status);
+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+ info->total_time);
+ } else {
+ monitor_printf(mon, "savevm status: not running\n");
+ }
+ if (info->has_bytes) {
+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+ }
+ if (info->has_error) {
+ monitor_printf(mon, "Error: %s\n", info->error);
+ }
+}
+
void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index ea53b23dca..c556257544 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -225,6 +225,40 @@
'*compression': 'CompressionStats',
'*socket-address': ['SocketAddress'] } }
+##
+# @SaveVMInfo:
+#
+# Information about current migration process.
+#
+# @status: string describing the current savevm status.
+# This can be 'active', 'completed', 'failed'.
+# If this field is not returned, no savevm process
+# has been initiated
+#
+# @error: string containing error message is status is failed.
+#
+# @total-time: total amount of milliseconds since savevm started.
+# If savevm has ended, it returns the total save time
+#
+# @bytes: total amount of data transfered
+#
+# Since: 1.3
+##
+{ 'struct': 'SaveVMInfo',
+ 'data': {'*status': 'str', '*error': 'str',
+ '*total-time': 'int', '*bytes': 'int'} }
+
+##
+# @query-savevm:
+#
+# Returns information about current savevm process.
+#
+# Returns: @SaveVMInfo
+#
+# Since: 1.3
+##
+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
+
##
# @query-migrate:
#
diff --git a/qapi/misc.json b/qapi/misc.json
index 44b1fb6fa7..9895899f8b 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -1168,6 +1168,38 @@
##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
+##
+# @savevm-start:
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end' }
+
##
# @AcpiTableOptions:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index 708583b4ce..d32995cc50 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3866,6 +3866,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor)
ERST
+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
+ "-loadstate file\n" \
+ " start right away with a saved state\n",
+ QEMU_ARCH_ALL)
+SRST
+``-loadstate file``
+ Start right away with a saved state. This option does not rollback
+ disk state like @code{loadvm}, so user must make sure that disk
+ have correct state. @var{file} can be any valid device URL. See the section
+ for "Device URL Syntax" for more information.
+ERST
+
#ifndef _WIN32
DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/savevm-async.c b/savevm-async.c
new file mode 100644
index 0000000000..05d394c0e2
index 0000000000..f918e18dce
--- /dev/null
+++ b/migration/savevm-async.c
@@ -0,0 +1,531 @@
+++ b/savevm-async.c
@@ -0,0 +1,542 @@
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "migration/migration.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
@@ -177,12 +372,14 @@ index 0000000000..05d394c0e2
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/cutils.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+/* used while emulated sync operation in progress */
+#define NOT_DONE -EINPROGRESS
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
@@ -211,15 +408,8 @@ index 0000000000..05d394c0e2
+ int64_t total_time;
+ QEMUBH *finalize_bh;
+ Coroutine *co;
+ QemuCoSleep target_close_wait;
+} snap_state;
+
+static bool savevm_aborted(void)
+{
+ return snap_state.state == SAVE_STATE_CANCELLED ||
+ snap_state.state == SAVE_STATE_ERROR;
+}
+
+SaveVMInfo *qmp_query_savevm(Error **errp)
+{
+ SaveVMInfo *info = g_malloc0(sizeof(*info));
@@ -269,25 +459,20 @@ index 0000000000..05d394c0e2
+
+ if (snap_state.file) {
+ ret = qemu_fclose(snap_state.file);
+ snap_state.file = NULL;
+ }
+
+ if (snap_state.target) {
+ if (!savevm_aborted()) {
+ /* try to truncate, but ignore errors (will fail on block devices).
+ * note1: bdrv_read() need whole blocks, so we need to round up
+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
+ */
+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+ }
+ /* try to truncate, but ignore errors (will fail on block devices).
+ * note1: bdrv_read() need whole blocks, so we need to round up
+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
+ */
+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
+ error_free(snap_state.blocker);
+ snap_state.blocker = NULL;
+ blk_unref(snap_state.target);
+ snap_state.target = NULL;
+
+ qemu_co_sleep_wake(&snap_state.target_close_wait);
+ }
+
+ return ret;
@@ -313,14 +498,66 @@ index 0000000000..05d394c0e2
+ snap_state.state = SAVE_STATE_ERROR;
+}
+
+static int block_state_close(void *opaque, Error **errp)
+{
+ snap_state.file = NULL;
+ return blk_flush(snap_state.target);
+}
+
+typedef struct BlkRwCo {
+ int64_t offset;
+ QEMUIOVector *qiov;
+ ssize_t ret;
+} BlkRwCo;
+
+static void coroutine_fn block_state_write_entry(void *opaque) {
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
+ rwco->qiov, 0);
+ aio_wait_kick();
+}
+
+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos, Error **errp)
+{
+ QEMUIOVector qiov;
+ BlkRwCo rwco;
+
+ assert(pos == snap_state.bs_pos);
+ rwco = (BlkRwCo) {
+ .offset = pos,
+ .qiov = &qiov,
+ .ret = NOT_DONE,
+ };
+
+ qemu_iovec_init_external(&qiov, iov, iovcnt);
+
+ if (qemu_in_coroutine()) {
+ block_state_write_entry(&rwco);
+ } else {
+ Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
+ bdrv_coroutine_enter(blk_bs(snap_state.target), co);
+ BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
+ }
+ if (rwco.ret < 0) {
+ return rwco.ret;
+ }
+
+ snap_state.bs_pos += qiov.size;
+ return qiov.size;
+}
+
+static const QEMUFileOps block_file_ops = {
+ .writev_buffer = block_state_writev_buffer,
+ .close = block_state_close,
+};
+
+static void process_savevm_finalize(void *opaque)
+{
+ int ret;
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ MigrationState *ms = migrate_get_current();
+
+ bool aborted = savevm_aborted();
+
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
@@ -342,13 +579,10 @@ index 0000000000..05d394c0e2
+ save_snapshot_error("vm_stop_force_state error %d", ret);
+ }
+
+ if (!aborted) {
+ /* skip state saving if we aborted, snapshot will be invalid anyway */
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+
+ DPRINTF("state saving complete\n");
@@ -357,7 +591,7 @@ index 0000000000..05d394c0e2
+
+ /* clear migration state */
+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL;
+
+ qemu_savevm_state_cleanup();
@@ -367,12 +601,6 @@ index 0000000000..05d394c0e2
+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_COMPLETED;
+ } else if (aborted) {
+ /*
+ * If there was an error, there's no need to set a new one here.
+ * If the snapshot was canceled, leave setting the state to
+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
+ */
+ } else {
+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+ snap_state.state);
@@ -406,11 +634,7 @@ index 0000000000..05d394c0e2
+ while (snap_state.state == SAVE_STATE_ACTIVE) {
+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
+
+ /* pending is expected to be called without iothread lock */
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
+ qemu_mutex_lock_iothread();
+
+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
+
+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
@@ -498,7 +722,6 @@ index 0000000000..05d394c0e2
+ snap_state.bs_pos = 0;
+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ snap_state.blocker = NULL;
+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
+
+ if (snap_state.error) {
+ error_free(snap_state.error);
@@ -525,9 +748,7 @@ index 0000000000..05d394c0e2
+ goto restart;
+ }
+
+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
+ &snap_state.bs_pos));
+ snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+
+ if (!snap_state.file) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -569,14 +790,11 @@ index 0000000000..05d394c0e2
+
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ snap_state.saved_vm_running = false;
+ }
+}
+
+void coroutine_fn qmp_savevm_end(Error **errp)
+void qmp_savevm_end(Error **errp)
+{
+ int64_t timeout;
+
+ if (snap_state.state == SAVE_STATE_DONE) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot not started\n");
@@ -585,42 +803,14 @@ index 0000000000..05d394c0e2
+
+ if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_CANCELLED;
+ goto wait_for_close;
+ return;
+ }
+
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ snap_state.saved_vm_running = false;
+ }
+
+ snap_state.state = SAVE_STATE_DONE;
+
+wait_for_close:
+ if (!snap_state.target) {
+ DPRINTF("savevm-end: no target file open\n");
+ return;
+ }
+
+ /* wait until cleanup is done before returning, this ensures that after this
+ * call exits the statefile will be closed and can be removed immediately */
+ DPRINTF("savevm-end: waiting for cleanup\n");
+ timeout = 30L * 1000 * 1000 * 1000;
+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
+ QEMU_CLOCK_REALTIME, timeout);
+ if (snap_state.target) {
+ save_snapshot_error("timeout waiting for target file close in "
+ "qmp_savevm_end");
+ /* we cannot assume the snapshot finished in this case, so leave the
+ * state alone - caller has to figure something out */
+ return;
+ }
+
+ // File closed and no other error, so ensure next snapshot can be started.
+ if (snap_state.state != SAVE_STATE_ERROR) {
+ snap_state.state = SAVE_STATE_DONE;
+ }
+
+ DPRINTF("savevm-end: cleanup done\n");
+}
+
+// FIXME: Deprecated
@@ -639,6 +829,27 @@ index 0000000000..05d394c0e2
+ true, name, errp);
+}
+
+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
+ size_t size, Error **errp)
+{
+ BlockBackend *be = opaque;
+ int64_t maxlen = blk_getlength(be);
+ if (pos > maxlen) {
+ return -EIO;
+ }
+ if ((pos + size) > maxlen) {
+ size = maxlen - pos - 1;
+ }
+ if (size == 0) {
+ return 0;
+ }
+ return blk_pread(be, pos, buf, size);
+}
+
+static const QEMUFileOps loadstate_file_ops = {
+ .get_buffer = loadstate_get_buffer,
+};
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+ BlockBackend *be;
@@ -646,7 +857,6 @@ index 0000000000..05d394c0e2
+ Error *blocker = NULL;
+
+ QEMUFile *f;
+ size_t bs_pos = 0;
+ int ret = -EINVAL;
+
+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
@@ -660,7 +870,7 @@ index 0000000000..05d394c0e2
+ blk_op_block_all(be, blocker);
+
+ /* restore the VM state */
+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_fopen_ops(be, &loadstate_file_ops);
+ if (!f) {
+ error_setg(errp, "Could not open VM state file");
+ goto the_end;
@@ -669,9 +879,6 @@ index 0000000000..05d394c0e2
+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+ ret = qemu_loadvm_state(f);
+
+ /* dirty bitmap migration has a special case we need to trigger manually */
+ dirty_bitmap_mig_before_vm_start();
+
+ qemu_fclose(f);
+ migration_incoming_state_destroy();
+ if (ret < 0) {
@@ -689,211 +896,19 @@ index 0000000000..05d394c0e2
+ }
+ return ret;
+}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 15572befb1..1507180990 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1925,6 +1925,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+ qmp_savevm_start(statefile != NULL, statefile, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_snapshot_drive(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_delete_drive_snapshot(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+
+ qmp_savevm_end(&errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
+{
+ SaveVMInfo *info;
+ info = qmp_query_savevm(NULL);
+
+ if (info->has_status) {
+ monitor_printf(mon, "savevm status: %s\n", info->status);
+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+ info->total_time);
+ } else {
+ monitor_printf(mon, "savevm status: not running\n");
+ }
+ if (info->has_bytes) {
+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+ }
+ if (info->has_error) {
+ monitor_printf(mon, "Error: %s\n", info->error);
+ }
+}
+
void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index 81185d4311..3129f71fa8 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -261,6 +261,40 @@
'*compression': 'CompressionStats',
'*socket-address': ['SocketAddress'] } }
+##
+# @SaveVMInfo:
+#
+# Information about current migration process.
+#
+# @status: string describing the current savevm status.
+# This can be 'active', 'completed', 'failed'.
+# If this field is not returned, no savevm process
+# has been initiated
+#
+# @error: string containing error message is status is failed.
+#
+# @total-time: total amount of milliseconds since savevm started.
+# If savevm has ended, it returns the total save time
+#
+# @bytes: total amount of data transfered
+#
+# Since: 1.3
+##
+{ 'struct': 'SaveVMInfo',
+ 'data': {'*status': 'str', '*error': 'str',
+ '*total-time': 'int', '*bytes': 'int'} }
+
+##
+# @query-savevm:
+#
+# Returns information about current savevm process.
+#
+# Returns: @SaveVMInfo
+#
+# Since: 1.3
+##
+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
+
##
# @query-migrate:
#
diff --git a/qapi/misc.json b/qapi/misc.json
index 27ef5a2b20..b3ce75dcae 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -435,6 +435,38 @@
##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
+##
+# @savevm-start:
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end', 'coroutine': true }
+
##
# @CommandLineParameterType:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index 31c04f7eea..c2ca6e91b5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4341,6 +4341,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor)
ERST
+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
+ "-loadstate file\n" \
+ " start right away with a saved state\n",
+ QEMU_ARCH_ALL)
+SRST
+``-loadstate file``
+ Start right away with a saved state. This option does not rollback
+ disk state like @code{loadvm}, so user must make sure that disk
+ have correct state. @var{file} can be any valid device URL. See the section
+ for "Device URL Syntax" for more information.
+ERST
+
#ifndef _WIN32
DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 706bd7cff7..b8637c4262 100644
index 4eb9d1f7fd..670b7e427c 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -165,6 +165,7 @@ static const char *accelerators;
static bool have_custom_ram_size;
static const char *ram_memdev_id;
static QDict *machine_opts_dict;
+static const char *loadstate;
static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
static int display_remote;
@@ -2584,6 +2585,12 @@ void qmp_x_exit_preconfig(Error **errp)
if (loadvm) {
load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
+ } else if (loadstate) {
+ Error *local_err = NULL;
+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
+ error_report_err(local_err);
+ autostart = 0;
+ }
}
if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init();
@@ -3133,6 +3140,9 @@ void qemu_init(int argc, char **argv, char **envp)
@@ -2844,6 +2844,7 @@ void qemu_init(int argc, char **argv, char **envp)
int optind;
const char *optarg;
const char *loadvm = NULL;
+ const char *loadstate = NULL;
MachineClass *machine_class;
const char *cpu_option;
const char *vga_model = NULL;
@@ -3408,6 +3409,9 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_loadvm:
loadvm = optarg;
break;
@@ -903,3 +918,57 @@ index 706bd7cff7..b8637c4262 100644
case QEMU_OPTION_full_screen:
dpy.has_full_screen = true;
dpy.full_screen = true;
@@ -4464,6 +4468,12 @@ void qemu_init(int argc, char **argv, char **envp)
autostart = 0;
exit(1);
}
+ } else if (loadstate) {
+ Error *local_err = NULL;
+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
+ error_report_err(local_err);
+ autostart = 0;
+ }
}
if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init();
diff --git a/util/async.c b/util/async.c
index 1319eee3bc..b68e73f488 100644
--- a/util/async.c
+++ b/util/async.c
@@ -559,6 +559,36 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co)
aio_context_unref(ctx);
}
+typedef struct AioCoRescheduleSelf {
+ Coroutine *co;
+ AioContext *new_ctx;
+} AioCoRescheduleSelf;
+
+static void aio_co_reschedule_self_bh(void *opaque)
+{
+ AioCoRescheduleSelf *data = opaque;
+ aio_co_schedule(data->new_ctx, data->co);
+}
+
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
+{
+ AioContext *old_ctx = qemu_get_current_aio_context();
+
+ if (old_ctx != new_ctx) {
+ AioCoRescheduleSelf data = {
+ .co = qemu_coroutine_self(),
+ .new_ctx = new_ctx,
+ };
+ /*
+ * We can't directly schedule the coroutine in the target context
+ * because this would be racy: The other thread could try to enter the
+ * coroutine before it has yielded in this one.
+ */
+ aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
+ qemu_coroutine_yield();
+ }
+}
+
void aio_co_wake(struct Coroutine *co)
{
AioContext *ctx;

View File

@@ -1,216 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] PVE: add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[increase max IOV count in QEMUFile to actually write more data]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to removal of QEMUFileOps]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
migration/qemu-file.c | 49 +++++++++++++++++++++++++++-------------
migration/qemu-file.h | 2 ++
migration/savevm-async.c | 5 ++--
3 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 4f400c2e52..21e8998867 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -31,8 +31,8 @@
#include "trace.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+#define DEFAULT_IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 256)
struct QEMUFile {
const QEMUFileHooks *hooks;
@@ -55,7 +55,8 @@ struct QEMUFile {
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -106,7 +107,9 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
-static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
+static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
+ bool is_writable,
+ size_t buffer_size)
{
QEMUFile *f;
@@ -115,6 +118,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
object_ref(ioc);
f->ioc = ioc;
f->is_writable = is_writable;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
return f;
}
@@ -125,17 +130,27 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
*/
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
{
- return qemu_file_new_impl(f->ioc, !f->is_writable);
+ return qemu_file_new_impl(f->ioc, !f->is_writable, DEFAULT_IO_BUF_SIZE);
}
QEMUFile *qemu_file_new_output(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, true);
+ return qemu_file_new_impl(ioc, true, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, true, buffer_size);
}
QEMUFile *qemu_file_new_input(QIOChannel *ioc)
{
- return qemu_file_new_impl(ioc, false);
+ return qemu_file_new_impl(ioc, false, DEFAULT_IO_BUF_SIZE);
+}
+
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size)
+{
+ return qemu_file_new_impl(ioc, false, buffer_size);
}
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
@@ -393,7 +408,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
do {
len = qio_channel_read(f->ioc,
(char *)f->buf + pending,
- IO_BUF_SIZE - pending,
+ f->buf_allocated_size - pending,
&local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
@@ -443,6 +458,8 @@ int qemu_fclose(QEMUFile *f)
}
g_clear_pointer(&f->ioc, object_unref);
+ free(f->buf);
+
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -497,7 +514,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -523,7 +540,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -570,8 +587,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -621,7 +638,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -655,7 +672,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
*/
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src = NULL;
@@ -680,7 +697,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -832,7 +849,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+ ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index fa13d04d78..914f1a63a8 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -63,7 +63,9 @@ typedef struct QEMUFileHooks {
} QEMUFileHooks;
QEMUFile *qemu_file_new_input(QIOChannel *ioc);
+QEMUFile *qemu_file_new_input_sized(QIOChannel *ioc, size_t buffer_size);
QEMUFile *qemu_file_new_output(QIOChannel *ioc);
+QEMUFile *qemu_file_new_output_sized(QIOChannel *ioc, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_fclose(QEMUFile *f);
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index b3692739a0..e65a5e3482 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -367,7 +367,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
&snap_state.bs_pos));
- snap_state.file = qemu_file_new_output(ioc);
+ snap_state.file = qemu_file_new_output_sized(ioc, 4 * 1024 * 1024);
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -500,7 +500,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */
- f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+ f = qemu_file_new_input_sized(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)),
+ 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -0,0 +1,183 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 4 May 2020 11:05:08 +0200
Subject: [PATCH] add optional buffer size to QEMUFile
So we can use a 4M buffer for savevm-async which should
increase performance storing the state onto ceph.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
migration/qemu-file.c | 36 ++++++++++++++++++++++++------------
migration/qemu-file.h | 1 +
savevm-async.c | 4 ++--
3 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index be21518c57..a4d2e2c8ff 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,7 +30,7 @@
#include "trace.h"
#include "qapi/error.h"
-#define IO_BUF_SIZE 32768
+#define DEFAULT_IO_BUF_SIZE 32768
#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
struct QEMUFile {
@@ -45,7 +45,8 @@ struct QEMUFile {
when reading */
int buf_index;
int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
+ size_t buf_allocated_size;
+ uint8_t *buf;
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
@@ -101,7 +102,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, size_t buffer_size)
{
QEMUFile *f;
@@ -109,9 +110,17 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
f->opaque = opaque;
f->ops = ops;
+ f->buf_allocated_size = buffer_size;
+ f->buf = malloc(buffer_size);
+
return f;
}
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
+{
+ return qemu_fopen_ops_sized(opaque, ops, DEFAULT_IO_BUF_SIZE);
+}
+
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
{
@@ -346,7 +355,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
}
len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
- IO_BUF_SIZE - pending, &local_error);
+ f->buf_allocated_size - pending, &local_error);
if (len > 0) {
f->buf_size += len;
f->pos += len;
@@ -386,6 +395,9 @@ int qemu_fclose(QEMUFile *f)
ret = ret2;
}
}
+
+ free(f->buf);
+
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
@@ -435,7 +447,7 @@ static void add_buf_to_iovec(QEMUFile *f, size_t len)
{
if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
f->buf_index += len;
- if (f->buf_index == IO_BUF_SIZE) {
+ if (f->buf_index == f->buf_allocated_size) {
qemu_fflush(f);
}
}
@@ -461,7 +473,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
while (size > 0) {
- l = IO_BUF_SIZE - f->buf_index;
+ l = f->buf_allocated_size - f->buf_index;
if (l > size) {
l = size;
}
@@ -508,8 +520,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
size_t index;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
- assert(size <= IO_BUF_SIZE - offset);
+ assert(offset < f->buf_allocated_size);
+ assert(size <= f->buf_allocated_size - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
@@ -559,7 +571,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
size_t res;
uint8_t *src;
- res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, f->buf_allocated_size), 0);
if (res == 0) {
return done;
}
@@ -593,7 +605,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
*/
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
{
- if (size < IO_BUF_SIZE) {
+ if (size < f->buf_allocated_size) {
size_t res;
uint8_t *src;
@@ -618,7 +630,7 @@ int qemu_peek_byte(QEMUFile *f, int offset)
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
- assert(offset < IO_BUF_SIZE);
+ assert(offset < f->buf_allocated_size);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
@@ -770,7 +782,7 @@ static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
const uint8_t *p, size_t size)
{
- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+ ssize_t blen = f->buf_allocated_size - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
return -1;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index a9b6d6ccb7..8752d27c74 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -120,6 +120,7 @@ typedef struct QEMUFileHooks {
} QEMUFileHooks;
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
+QEMUFile *qemu_fopen_ops_sized(void *opaque, const QEMUFileOps *ops, size_t buffer_size);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_get_fd(QEMUFile *f);
int qemu_fclose(QEMUFile *f);
diff --git a/savevm-async.c b/savevm-async.c
index f918e18dce..156b7a030e 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -392,7 +392,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
goto restart;
}
- snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+ snap_state.file = qemu_fopen_ops_sized(&snap_state, &block_file_ops, 4 * 1024 * 1024);
if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -514,7 +514,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker);
/* restore the VM state */
- f = qemu_fopen_ops(be, &loadstate_file_ops);
+ f = qemu_fopen_ops_sized(be, &loadstate_file_ops, 4 * 1024 * 1024);
if (!f) {
error_setg(errp, "Could not open VM state file");
goto the_end;

View File

@@ -4,32 +4,30 @@ Date: Mon, 6 Apr 2020 12:16:47 +0200
Subject: [PATCH] PVE: block: add the zeroinit block driver filter
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[adapt to changed function signatures]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 1 +
block/zeroinit.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 197 insertions(+)
block/Makefile.objs | 1 +
block/zeroinit.c | 198 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 199 insertions(+)
create mode 100644 block/zeroinit.c
diff --git a/block/meson.build b/block/meson.build
index 60bc305597..ad40c10b6a 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -43,6 +43,7 @@ block_ss.add(files(
'vmdk.c',
'vpc.c',
'write-threshold.c',
+ 'zeroinit.c',
), zstd, zlib, gnutls)
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 19c6f371c9..d1a9227b8f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -11,6 +11,7 @@ block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-$(CONFIG_QED) += qed-check.o
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o
+block-obj-y += zeroinit.o
block-obj-y += blkdebug.o blkverify.o blkreplay.o
block-obj-$(CONFIG_PARALLELS) += parallels.o
block-obj-y += blklogwrites.o
diff --git a/block/zeroinit.c b/block/zeroinit.c
new file mode 100644
index 0000000000..20ee611f22
index 0000000000..4fbb80eab0
--- /dev/null
+++ b/block/zeroinit.c
@@ -0,0 +1,196 @@
@@ -0,0 +1,198 @@
+/*
+ * Filter to fake a zero-initialized block device.
+ *
@@ -140,22 +138,22 @@ index 0000000000..20ee611f22
+}
+
+static int coroutine_fn zeroinit_co_preadv(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn zeroinit_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags)
+ int count, BdrvRequestFlags flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ if (offset >= s->extents)
+ return 0;
+ return bdrv_pwrite_zeroes(bs->file, offset, bytes, flags);
+ return bdrv_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn zeroinit_co_pwritev(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ BDRVZeroinitState *s = bs->opaque;
+ int64_t extents = offset + bytes;
@@ -176,9 +174,9 @@ index 0000000000..20ee611f22
+}
+
+static int coroutine_fn zeroinit_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+ return bdrv_co_pdiscard(bs->file, offset, count);
+}
+
+static int zeroinit_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -214,6 +212,8 @@ index 0000000000..20ee611f22
+
+ .bdrv_has_zero_init = zeroinit_has_zero_init,
+
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
+
+ .bdrv_co_pdiscard = zeroinit_co_pdiscard,
+
+ .bdrv_co_truncate = zeroinit_co_truncate,

View File

@@ -14,12 +14,12 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 11 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx
index c2ca6e91b5..ab4734ef32 100644
index d32995cc50..abfde19ce0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1118,6 +1118,9 @@ backend describes how QEMU handles the data.
@@ -914,6 +914,9 @@ DEFHEADING()
ERST
DEFHEADING(Block device options:)
+DEF("id", HAS_ARG, QEMU_OPTION_id,
+ "-id n set the VMID", QEMU_ARCH_ALL)
@@ -28,20 +28,20 @@ index c2ca6e91b5..ab4734ef32 100644
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index b8637c4262..39f149924e 100644
index 670b7e427c..366e30e594 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2620,6 +2620,7 @@ void qemu_init(int argc, char **argv, char **envp)
MachineClass *machine_class;
bool userconfig = true;
FILE *vmstate_dump_file = NULL;
@@ -2832,6 +2832,7 @@ static void create_default_memdev(MachineState *ms, const char *path)
void qemu_init(int argc, char **argv, char **envp)
{
int i;
+ long vm_id;
qemu_add_opts(&qemu_drive_opts);
qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -3245,6 +3246,13 @@ void qemu_init(int argc, char **argv, char **envp)
machine_parse_property_opt(qemu_find_opts("smp-opts"),
"smp", optarg);
int snapshot, linux_boot;
const char *initrd_filename;
const char *kernel_filename, *kernel_cmdline;
@@ -3530,6 +3531,13 @@ void qemu_init(int argc, char **argv, char **envp)
exit(1);
}
break;
+ case QEMU_OPTION_id:
+ vm_id = strtol(optarg, (char **)&optarg, 10);
@@ -51,5 +51,5 @@ index b8637c4262..39f149924e 100644
+ }
+ break;
case QEMU_OPTION_vnc:
vnc_parse(optarg);
vnc_parse(optarg, &error_fatal);
break;

View File

@@ -11,7 +11,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 9 insertions(+)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 2a20982066..7968ad5a93 100644
index 81addd6390..c2026b07c5 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -278,6 +278,15 @@ static void apic_reset_common(DeviceState *dev)

View File

@@ -8,15 +8,15 @@ Otherwise creating images on nfs/cifs can be problematic.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/file-posix.c | 59 ++++++++++++++++++++++++++++++--------------
block/file-posix.c | 61 +++++++++++++++++++++++++++++---------------
qapi/block-core.json | 3 ++-
2 files changed, 42 insertions(+), 20 deletions(-)
2 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 3d60b80286..49ee1db5f9 100644
index bb72e1e5ca..914bd1f367 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2475,6 +2475,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2390,6 +2390,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
int fd;
uint64_t perm, shared;
int result = 0;
@@ -24,7 +24,7 @@ index 3d60b80286..49ee1db5f9 100644
/* Validate options and set default values */
assert(options->driver == BLOCKDEV_DRIVER_FILE);
@@ -2515,19 +2516,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2431,19 +2432,22 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
@@ -59,7 +59,7 @@ index 3d60b80286..49ee1db5f9 100644
}
/* Clear the file by truncating it to 0 */
@@ -2581,13 +2585,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
@@ -2497,13 +2501,15 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
}
out_unlock:
@@ -82,7 +82,7 @@ index 3d60b80286..49ee1db5f9 100644
}
out_close:
@@ -2612,6 +2618,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
@@ -2528,6 +2534,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
PreallocMode prealloc;
char *buf = NULL;
Error *local_err = NULL;
@@ -90,7 +90,7 @@ index 3d60b80286..49ee1db5f9 100644
/* Skip file: protocol prefix */
strstart(filename, "file:", &filename);
@@ -2634,6 +2641,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
@@ -2550,6 +2557,18 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
return -EINVAL;
}
@@ -109,7 +109,7 @@ index 3d60b80286..49ee1db5f9 100644
options = (BlockdevCreateOptions) {
.driver = BLOCKDEV_DRIVER_FILE,
.u.file = {
@@ -2645,6 +2664,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
@@ -2561,6 +2580,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
.nocow = nocow,
.has_extent_size_hint = has_extent_size_hint,
.extent_size_hint = extent_size_hint,
@@ -118,11 +118,20 @@ index 3d60b80286..49ee1db5f9 100644
},
};
return raw_co_create(&options, errp);
@@ -3107,7 +3128,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
}
/* Copy locks to the new fd */
- if (s->perm_change_fd) {
+ if (s->use_lock && s->perm_change_fd) {
ret = raw_apply_lock_bytes(NULL, s->perm_change_fd, perm, ~shared,
false, errp);
if (ret < 0) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e1857e7094..ddac91e8f6 100644
index 197bdc1c36..ea5fae22ae 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4537,7 +4537,8 @@
@@ -4178,7 +4178,8 @@
'size': 'size',
'*preallocation': 'PreallocMode',
'*nocow': 'bool',

View File

@@ -18,10 +18,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 6b8cfcf6d8..3ec67e32d3 100644
index d433ceae5b..a16cf3532d 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -519,8 +519,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
@@ -409,8 +409,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
qemu_chr_fe_set_echo(&mon->common.chr, true);
/* Note: we run QMP monitor in I/O thread when @chr supports that */

View File

@@ -26,10 +26,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index a673302cce..fa424440bd 100644
index 8d1a90c6cf..413902777e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -127,7 +127,8 @@ GlobalProperty hw_compat_4_0[] = {
@@ -66,7 +66,8 @@ GlobalProperty hw_compat_4_0[] = {
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-device", "edid", "false" },
{ "virtio-device", "use-started", "false" },

View File

@@ -10,19 +10,18 @@ Version is made available as 'pve-version' in query-machines (same as,
and only if 'is-current').
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
hw/core/machine-qmp-cmds.c | 6 ++++++
include/hw/boards.h | 2 ++
qapi/machine.json | 4 +++-
softmmu/vl.c | 25 +++++++++++++++++++++++++
4 files changed, 36 insertions(+), 1 deletion(-)
qapi/machine.json | 3 ++-
softmmu/vl.c | 15 ++++++++++++++-
4 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 76fff60a6b..ec9201fb9a 100644
index 32f630549e..71e19db4e1 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -103,6 +103,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
@@ -238,6 +238,12 @@ MachineInfoList *qmp_query_machines(Error **errp)
if (strcmp(mc->name, MACHINE_GET_CLASS(current_machine)->name) == 0) {
info->has_is_current = true;
info->is_current = true;
@@ -36,10 +35,10 @@ index 76fff60a6b..ec9201fb9a 100644
if (mc->default_cpu_type) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 7b416c9787..8ae15c51aa 100644
index 426ce5f625..3bce25a25f 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -230,6 +230,8 @@ struct MachineClass {
@@ -170,6 +170,8 @@ struct MachineClass {
const char *desc;
const char *deprecation_reason;
@@ -49,80 +48,54 @@ index 7b416c9787..8ae15c51aa 100644
void (*reset)(MachineState *state);
void (*wakeup)(MachineState *state);
diff --git a/qapi/machine.json b/qapi/machine.json
index 555458f785..d868e4d31d 100644
index 268044a34b..7a811a5860 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -157,6 +157,8 @@
#
# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
#
+# @pve-version: custom PVE version suffix specified as 'machine+pveN'
+#
# Since: 1.2
##
{ 'struct': 'MachineInfo',
@@ -164,7 +166,7 @@
@@ -365,7 +365,8 @@
'data': { 'name': 'str', '*alias': 'str',
'*is-default': 'bool', '*is-current': 'bool', 'cpu-max': 'int',
'hotpluggable-cpus': 'bool', 'numa-mem-supported': 'bool',
'deprecated': 'bool', '*default-cpu-type': 'str',
- '*default-ram-id': 'str' } }
+ '*default-ram-id': 'str', '*pve-version': 'str' } }
- 'deprecated': 'bool', '*default-cpu-type': 'str' } }
+ 'deprecated': 'bool', '*default-cpu-type': 'str',
+ '*pve-version': 'str' } }
##
# @query-machines:
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 39f149924e..0d233d55f3 100644
index 366e30e594..16aa2186b0 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1580,6 +1580,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp)
@@ -2322,6 +2322,8 @@ static MachineClass *machine_parse(const char *name, GSList *machines)
{
const char *optarg = qdict_get_try_str(qdict, "type");
+ const char *pvever = qdict_get_try_str(qdict, "pvever");
GSList *machines = object_class_get_list(TYPE_MACHINE, false);
MachineClass *machine_class;
Error *local_err = NULL;
@@ -1597,6 +1598,11 @@ static MachineClass *select_machine(QDict *qdict, Error **errp)
}
MachineClass *mc;
GSList *el;
+ size_t pvever_index = 0;
+ gchar *name_clean;
if (is_help_option(name)) {
printf("Supported machines are:\n");
@@ -2338,12 +2340,23 @@ static MachineClass *machine_parse(const char *name, GSList *machines)
exit(0);
}
+ if (machine_class) {
+ machine_class->pve_version = g_strdup(pvever);
+ qdict_del(qdict, "pvever");
- mc = find_machine(name, machines);
+ // PVE version is specified with '+' as seperator, e.g. pc-i440fx+pvever
+ pvever_index = strcspn(name, "+");
+
+ name_clean = g_strndup(name, pvever_index);
+ mc = find_machine(name_clean, machines);
+ g_free(name_clean);
+
if (!mc) {
error_report("unsupported machine type");
error_printf("Use -machine help to list supported machines\n");
exit(1);
}
+
+ if (pvever_index < strlen(name)) {
+ mc->pve_version = &name[pvever_index+1];
+ }
+
g_slist_free(machines);
if (local_err) {
error_append_hint(&local_err, "Use -machine help to list supported machines\n");
@@ -3187,12 +3193,31 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_machine:
{
bool help;
+ size_t pvever_index, name_len;
+ const gchar *name;
+ gchar *name_clean, *pvever;
return mc;
}
keyval_parse_into(machine_opts_dict, optarg, "type", &help, &error_fatal);
if (help) {
machine_help_func(machine_opts_dict);
exit(EXIT_SUCCESS);
}
+
+ // PVE version is specified with '+' as seperator, e.g. pc-i440fx+pvever
+ name = qdict_get_try_str(machine_opts_dict, "type");
+ if (name != NULL) {
+ name_len = strlen(name);
+ pvever_index = strcspn(name, "+");
+ if (pvever_index < name_len) {
+ name_clean = g_strndup(name, pvever_index);
+ pvever = g_strndup(name + pvever_index + 1, name_len - pvever_index - 1);
+ qdict_put_str(machine_opts_dict, "pvever", pvever);
+ qdict_put_str(machine_opts_dict, "type", name_clean);
+ g_free(name_clean);
+ g_free(pvever);
+ }
+ }
+
break;
}
case QEMU_OPTION_accel:

View File

@@ -3,64 +3,58 @@ From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:57 +0200
Subject: [PATCH] PVE-Backup: add vma backup format code
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: create: register all streams before entering coroutines]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/meson.build | 2 +
meson.build | 5 +
vma-reader.c | 859 ++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 791 ++++++++++++++++++++++++++++++++++++++++++
vma.c | 849 +++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 ++++++++
6 files changed, 2656 insertions(+)
Makefile | 3 +-
Makefile.objs | 1 +
vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 790 ++++++++++++++++++++++++++++++++++++++++++++++
vma.c | 839 ++++++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 +++++++++
6 files changed, 2639 insertions(+), 1 deletion(-)
create mode 100644 vma-reader.c
create mode 100644 vma-writer.c
create mode 100644 vma.c
create mode 100644 vma.h
diff --git a/block/meson.build b/block/meson.build
index ad40c10b6a..3a0b84bc11 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -46,6 +46,8 @@ block_ss.add(files(
'zeroinit.c',
), zstd, zlib, gnutls)
diff --git a/Makefile b/Makefile
index 13dd708c4a..7b8c17ce2d 100644
--- a/Makefile
+++ b/Makefile
@@ -479,7 +479,7 @@ dummy := $(call unnest-vars,, \
+block_ss.add(files('../vma-writer.c'), libuuid)
+
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
include $(SRC_PATH)/tests/Makefile.include
if get_option('qcow1').allowed()
diff --git a/meson.build b/meson.build
index d5230eadd6..ffff66c0cc 100644
--- a/meson.build
+++ b/meson.build
@@ -1462,6 +1462,8 @@ keyutils = dependency('libkeyutils', required: false,
-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
+all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
has_gettid = cc.has_function('gettid')
qemu-version.h: FORCE
$(call quiet-command, \
@@ -602,6 +602,7 @@ qemu-img$(EXESUF): qemu-img.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io
qemu-nbd$(EXESUF): qemu-nbd.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
qemu-io$(EXESUF): qemu-io.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
qemu-storage-daemon$(EXESUF): qemu-storage-daemon.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(chardev-obj-y) $(io-obj-y) $(qom-obj-y) $(storage-daemon-obj-y) $(COMMON_LDADDS)
+vma$(EXESUF): vma.o vma-reader.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+libuuid = cc.find_library('uuid', required: true)
+
# libselinux
selinux = dependency('libselinux',
required: get_option('selinux'),
@@ -3607,6 +3609,9 @@ if have_tools
dependencies: [blockdev, qemuutil, gnutls, selinux],
install: true)
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
diff --git a/Makefile.objs b/Makefile.objs
index a1307c12a8..ade7b17a69 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -17,6 +17,7 @@ block-obj-y = block/ nbd/ scsi/
block-obj-y += block.o blockjob.o job.o
block-obj-y += qemu-io-cmds.o
block-obj-$(CONFIG_REPLICATION) += replication.o
+block-obj-y += vma-writer.o
block-obj-m = block/
+ vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
+ dependencies: [authz, block, crypto, io, qom], install: true)
+
subdir('storage-daemon')
subdir('contrib/rdmacm-mux')
subdir('contrib/elf2dmp')
diff --git a/vma-reader.c b/vma-reader.c
new file mode 100644
index 0000000000..e65f1e8415
index 0000000000..2b1d1cdab3
--- /dev/null
+++ b/vma-reader.c
@@ -0,0 +1,859 @@
@@ -0,0 +1,857 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -78,6 +72,7 @@ index 0000000000..e65f1e8415
+#include <glib.h>
+#include <uuid/uuid.h>
+
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/ratelimit.h"
+#include "vma.h"
@@ -254,9 +249,6 @@ index 0000000000..e65f1e8415
+ if (vmar->rstate[i].bitmap) {
+ g_free(vmar->rstate[i].bitmap);
+ }
+ if (vmar->rstate[i].target) {
+ blk_unref(vmar->rstate[i].target);
+ }
+ }
+
+ if (vmar->md5csum) {
@@ -588,7 +580,7 @@ index 0000000000..e65f1e8415
+ }
+ }
+ } else {
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
+ if (res < 0) {
+ error_setg(errp, "blk_pwrite to %s failed (%d)",
+ bdrv_get_device_name(blk_bs(target)), res);
@@ -922,10 +914,10 @@ index 0000000000..e65f1e8415
+
diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644
index 0000000000..df4b20793d
index 0000000000..f5d2c5d23c
--- /dev/null
+++ b/vma-writer.c
@@ -0,0 +1,791 @@
@@ -0,0 +1,790 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -949,7 +941,6 @@ index 0000000000..df4b20793d
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
+#include "qemu/memalign.h"
+
+#define DEBUG_VMA 0
+
@@ -1133,9 +1124,9 @@ index 0000000000..df4b20793d
+ assert(qemu_in_coroutine());
+ AioContext *ctx = qemu_get_current_aio_context();
+ aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter,
+ NULL, NULL, qemu_coroutine_self());
+ NULL, qemu_coroutine_self());
+ qemu_coroutine_yield();
+ aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL, NULL);
+ aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL);
+}
+
+static ssize_t coroutine_fn
@@ -1222,20 +1213,20 @@ index 0000000000..df4b20793d
+
+ if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
+ oflags = O_NONBLOCK|O_WRONLY;
+ vmaw->fd = qemu_open(filename, oflags, errp);
+ vmaw->fd = qemu_open(filename, oflags, 0644);
+ } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
+ oflags = O_NONBLOCK|O_WRONLY;
+ vmaw->fd = qemu_open(filename, oflags, errp);
+ vmaw->fd = qemu_open(filename, oflags, 0644);
+ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
+ vmaw->fd = monitor_get_fd(monitor_cur(), tmp_id_str, errp);
+ vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp);
+ if (vmaw->fd < 0) {
+ goto err;
+ }
+ /* try to use O_NONBLOCK */
+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
+ } else {
+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_EXCL;
+ vmaw->fd = qemu_create(filename, oflags, 0644, errp);
+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_CREAT|O_EXCL;
+ vmaw->fd = qemu_open(filename, oflags, 0644);
+ }
+
+ if (vmaw->fd < 0) {
@@ -1719,10 +1710,10 @@ index 0000000000..df4b20793d
+}
diff --git a/vma.c b/vma.c
new file mode 100644
index 0000000000..e8dffb43e0
index 0000000000..2eea2fc281
--- /dev/null
+++ b/vma.c
@@ -0,0 +1,849 @@
@@ -0,0 +1,839 @@
+/*
+ * VMA: Virtual Machine Archive
+ *
@@ -1740,11 +1731,11 @@ index 0000000000..e8dffb43e0
+#include <glib.h>
+
+#include "vma.h"
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include "qemu/memalign.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/block-backend.h"
+
@@ -2034,6 +2025,8 @@ index 0000000000..e8dffb43e0
+ int vmstate_fd = -1;
+ guint8 vmstate_stream = 0;
+
+ BlockBackend *blk = NULL;
+
+ for (i = 1; i < 255; i++) {
+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
@@ -2054,8 +2047,6 @@ index 0000000000..e8dffb43e0
+ int flags = BDRV_O_RDWR;
+ bool write_zero = true;
+
+ BlockBackend *blk = NULL;
+
+ if (readmap) {
+ RestoreMap *map;
+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
@@ -2168,6 +2159,8 @@ index 0000000000..e8dffb43e0
+
+ vma_reader_destroy(vmar);
+
+ blk_unref(blk);
+
+ bdrv_close_all();
+
+ return ret;
@@ -2296,7 +2289,6 @@ index 0000000000..e8dffb43e0
+ int i, c;
+ int verbose = 0;
+ const char *archivename;
+ GList *backup_coroutines = NULL;
+ GList *config_files = NULL;
+
+ for (;;) {
@@ -2385,9 +2377,7 @@ index 0000000000..e8dffb43e0
+ job->dev_id = dev_id;
+
+ Coroutine *co = qemu_coroutine_create(backup_run, job);
+ // Don't enter coroutine yet, because it might write the header before
+ // all streams can be registered.
+ backup_coroutines = g_list_append(backup_coroutines, co);
+ qemu_coroutine_enter(co);
+ }
+
+ VmaStatus vmastat;
@@ -2395,13 +2385,6 @@ index 0000000000..e8dffb43e0
+ int last_percent = -1;
+
+ if (devcount) {
+ GList *entry = backup_coroutines;
+ while (entry && entry->data) {
+ Coroutine *co = entry->data;
+ qemu_coroutine_enter(co);
+ entry = g_list_next(entry);
+ }
+
+ while (1) {
+ main_loop_wait(false);
+ vma_writer_get_status(vmaw, &vmastat);
@@ -2466,8 +2449,6 @@ index 0000000000..e8dffb43e0
+ g_error("creating vma archive failed");
+ }
+
+ g_list_free(backup_coroutines);
+ g_list_free(config_files);
+ vma_writer_destroy(vmaw);
+ return 0;
+}

View File

@@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 2 Mar 2022 08:35:05 +0100
Subject: [PATCH] block/backup: move bcs bitmap initialization to job creation
For backing up the state of multiple disks from the same time, a job
for each disk has to be created. It's convenient if the jobs don't
have to be started at the same time and if operation of the VM can be
resumed after job creation. This would lead to a window between job
creation and running the job, where writes can happen. But no writes
should happen between setting up the copy-before-write filter and
setting up the block copy state bitmap, because then new writes would
just pass through.
Commit 06e0a9c16405c0a4c1eca33cf286cc04c42066a2 moved initalization of
the bitmap to setting up the copy-before-write filter when sync_mode
is not MIRROR_SYNC_MODE_BITMAP. Ensure that the bitmap is initialized
upon job creation for the remaining case too, by moving the
backup_init_bcs_bitmap call to backup_job_create.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index b2b649e305..b6fa9e8a69 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
true);
} else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
/*
- * We can't hog the coroutine to initialize this thoroughly.
- * Set a flag and resume work when we are able to yield safely.
+ * Initialization is costly here. Simply set a flag and let the
+ * backup_run coroutine resume work once it can yield safely.
*/
block_copy_set_skip_unallocated(job->bcs, true);
}
@@ -252,8 +252,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
int ret;
- backup_init_bcs_bitmap(s);
-
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
@@ -492,6 +490,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
+ backup_init_bcs_bitmap(job);
+
return &job->common;
error:

View File

@@ -7,23 +7,33 @@ Subject: [PATCH] PVE-Backup: add backup-dump block driver
- move BackupBlockJob declaration from block/backup.c to include/block/block_int.h
- block/backup.c - backup-job-create: also consider source cluster size
- job.c: make job_should_pause non-static
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/backup-dump.c | 167 +++++++++++++++++++++++++++++++
block/backup.c | 30 ++----
block/meson.build | 1 +
include/block/block_int-common.h | 35 +++++++
job.c | 3 +-
5 files changed, 213 insertions(+), 23 deletions(-)
block/Makefile.objs | 1 +
block/backup-dump.c | 168 ++++++++++++++++++++++++++++++++++++++
block/backup.c | 23 ++----
include/block/block_int.h | 30 +++++++
job.c | 3 +-
5 files changed, 206 insertions(+), 19 deletions(-)
create mode 100644 block/backup-dump.c
diff --git a/block/Makefile.objs b/block/Makefile.objs
index d1a9227b8f..9ea0477d0b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -33,6 +33,7 @@ block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_LIBSSH) += ssh.o
+block-obj-y += backup-dump.o
block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
diff --git a/block/backup-dump.c b/block/backup-dump.c
new file mode 100644
index 0000000000..04718a94e2
index 0000000000..93d7f46950
--- /dev/null
+++ b/block/backup-dump.c
@@ -0,0 +1,167 @@
@@ -0,0 +1,168 @@
+/*
+ * BlockDriver to send backup data stream to a callback function
+ *
@@ -35,6 +45,7 @@ index 0000000000..04718a94e2
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qom/object_interfaces.h"
+#include "block/block_int.h"
+
@@ -192,18 +203,17 @@ index 0000000000..04718a94e2
+ return bs;
+}
diff --git a/block/backup.c b/block/backup.c
index b6fa9e8a69..789f8b7799 100644
index 4f13bb20a5..cd42236b79 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,28 +29,6 @@
@@ -32,24 +32,6 @@
#include "block/copy-before-write.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-typedef struct BackupBlockJob {
- BlockJob common;
- BlockDriverState *cbw;
- BlockDriverState *backup_top;
- BlockDriverState *source_bs;
- BlockDriverState *target_bs;
-
- BdrvDirtyBitmap *sync_bitmap;
-
@@ -212,64 +222,38 @@ index b6fa9e8a69..789f8b7799 100644
- BlockdevOnError on_source_error;
- BlockdevOnError on_target_error;
- uint64_t len;
- uint64_t bytes_read;
- int64_t cluster_size;
- BackupPerf perf;
-
- BlockCopyState *bcs;
-
- bool wait;
- BlockCopyCallState *bg_bcs_call;
-} BackupBlockJob;
-
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -454,6 +432,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
@@ -422,6 +404,11 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
cluster_size = block_copy_cluster_size(bcs);
+ if (cluster_size < 0) {
+ goto error;
+ }
+
+ BlockDriverInfo bdi;
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ cluster_size = MAX(cluster_size, bdi.cluster_size);
+ }
if (perf->max_chunk && perf->max_chunk < cluster_size) {
error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
diff --git a/block/meson.build b/block/meson.build
index 3a0b84bc11..7f22e7f177 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,6 +4,7 @@ block_ss.add(files(
'aio_task.c',
'amend.c',
'backup.c',
+ 'backup-dump.c',
'copy-before-write.c',
'blkdebug.c',
'blklogwrites.c',
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 8947abab76..f272d0d8dc 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -26,6 +26,7 @@
#include "block/accounting.h"
#include "block/block.h"
+#include "block/block-copy.h"
#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
@@ -64,6 +65,40 @@
+
/*
* If source is in backing chain of target assume that target is going to be
* used for "image fleecing", i.e. it should represent a kind of snapshot of
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38dec0275b..1efb1f527c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -62,6 +62,36 @@
#define BLOCK_PROBE_BUF_SIZE 512
+typedef int BackupDumpFunc(void *opaque, uint64_t offset, uint64_t bytes, const void *buf);
+
+BlockDriverState *bdrv_backup_dump_create(
+BlockDriverState *bdrv_backuo_dump_create(
+ int dump_cb_block_size,
+ uint64_t byte_size,
+ BackupDumpFunc *dump_cb,
@@ -281,9 +265,8 @@ index 8947abab76..f272d0d8dc 100644
+typedef struct BlockCopyState BlockCopyState;
+typedef struct BackupBlockJob {
+ BlockJob common;
+ BlockDriverState *cbw;
+ BlockDriverState *backup_top;
+ BlockDriverState *source_bs;
+ BlockDriverState *target_bs;
+
+ BdrvDirtyBitmap *sync_bitmap;
+
@@ -292,23 +275,20 @@ index 8947abab76..f272d0d8dc 100644
+ BlockdevOnError on_source_error;
+ BlockdevOnError on_target_error;
+ uint64_t len;
+ uint64_t bytes_read;
+ int64_t cluster_size;
+ BackupPerf perf;
+
+ BlockCopyState *bcs;
+
+ bool wait;
+ BlockCopyCallState *bg_bcs_call;
+} BackupBlockJob;
+
enum BdrvTrackedRequestType {
BDRV_TRACKED_READ,
BDRV_TRACKED_WRITE,
diff --git a/job.c b/job.c
index 075c6f3a20..e5699ad200 100644
index 53be57a3a0..b8139c80a4 100644
--- a/job.c
+++ b/job.c
@@ -276,7 +276,8 @@ static bool job_started(Job *job)
@@ -269,7 +269,8 @@ static bool job_started(Job *job)
return job->co;
}

View File

@@ -6,32 +6,39 @@ Subject: [PATCH] PVE-Backup: pbs-restore - new command to restore from proxmox
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
meson.build | 4 +
pbs-restore.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 227 insertions(+)
Makefile | 4 +-
pbs-restore.c | 217 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 220 insertions(+), 1 deletion(-)
create mode 100644 pbs-restore.c
diff --git a/meson.build b/meson.build
index 0bc2fb5b10..f48d2e0457 100644
--- a/meson.build
+++ b/meson.build
@@ -3613,6 +3613,10 @@ if have_tools
vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
dependencies: [authz, block, crypto, io, qom], install: true)
diff --git a/Makefile b/Makefile
index aec216968d..b73da29f24 100644
--- a/Makefile
+++ b/Makefile
@@ -479,7 +479,7 @@ dummy := $(call unnest-vars,, \
include $(SRC_PATH)/tests/Makefile.include
-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
+all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) vma$(EXESUF) pbs-restore$(EXESUF) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
qemu-version.h: FORCE
$(call quiet-command, \
@@ -604,6 +604,8 @@ qemu-io$(EXESUF): qemu-io.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-o
qemu-storage-daemon$(EXESUF): qemu-storage-daemon.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(chardev-obj-y) $(io-obj-y) $(qom-obj-y) $(storage-daemon-obj-y) $(COMMON_LDADDS)
qemu-storage-daemon$(EXESUF): LIBS += -lproxmox_backup_qemu
vma$(EXESUF): vma.o vma-reader.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+pbs-restore$(EXESUF): pbs-restore.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+pbs-restore$(EXESUF): LIBS += -lproxmox_backup_qemu
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
+ pbs_restore = executable('pbs-restore', files('pbs-restore.c') + genh,
+ dependencies: [authz, block, crypto, io, qom,
+ libproxmox_backup_qemu], install: true)
+
subdir('storage-daemon')
subdir('contrib/rdmacm-mux')
subdir('contrib/elf2dmp')
diff --git a/pbs-restore.c b/pbs-restore.c
new file mode 100644
index 0000000000..2f834cf42e
index 0000000000..4bf37ef1fa
--- /dev/null
+++ b/pbs-restore.c
@@ -0,0 +1,223 @@
@@ -0,0 +1,217 @@
+/*
+ * Qemu image restore helper for Proxmox Backup
+ *
@@ -50,6 +57,7 @@ index 0000000000..2f834cf42e
+#include <getopt.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
@@ -95,7 +103,7 @@ index 0000000000..2f834cf42e
+ }
+ res = blk_pwrite_zeroes(callback_data->target, offset, data_len, 0);
+ } else {
+ res = blk_pwrite(callback_data->target, offset, data_len, data, 0);
+ res = blk_pwrite(callback_data->target, offset, data, data_len, 0);
+ }
+
+ if (res < 0) {
@@ -194,19 +202,13 @@ index 0000000000..2f834cf42e
+ fprintf(stderr, "connecting to repository '%s'\n", repository);
+ }
+ char *pbs_error = NULL;
+ ProxmoxRestoreHandle *conn = proxmox_restore_new(
+ ProxmoxRestoreHandle *conn = proxmox_restore_connect(
+ repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ if (conn == NULL) {
+ fprintf(stderr, "restore failed: %s\n", pbs_error);
+ return -1;
+ }
+
+ int res = proxmox_restore_connect(conn, &pbs_error);
+ if (res < 0 || pbs_error) {
+ fprintf(stderr, "restore failed (connection error): %s\n", pbs_error);
+ return -1;
+ }
+
+ QDict *options = qdict_new();
+
+ if (format) {
@@ -237,7 +239,7 @@ index 0000000000..2f834cf42e
+ fprintf(stderr, "starting to restore snapshot '%s'\n", snapshot);
+ fflush(stderr); // ensure we do not get printed after the progress log
+ }
+ res = proxmox_restore_image(
+ int res = proxmox_restore_image(
+ conn,
+ archive_name,
+ write_callback,
@@ -246,7 +248,6 @@ index 0000000000..2f834cf42e
+ verbose);
+
+ proxmox_restore_disconnect(conn);
+ blk_unref(blk);
+
+ if (res < 0) {
+ fprintf(stderr, "restore failed: %s\n", pbs_error);

View File

@@ -0,0 +1,914 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:17:00 +0200
Subject: [PATCH] PVE-Backup: avoid coroutines to fix AIO freeze, cleanups
We observed various AIO pool loop freezes, so we decided to avoid
coroutines and restrict ourselfes using similar code as upstream
(see blockdev.c: do_backup_common).
* avoid coroutine for job related code (causes hangs with iothreads)
- We then acquire/release all mutexes outside coroutines now, so we can now
correctly use a normal mutex.
* split pvebackup_co_dump_cb into:
- pvebackup_co_dump_pbs_cb and
- pvebackup_co_dump_pbs_cb
* new helper functions
- pvebackup_propagate_error
- pvebackup_error_or_canceled
- pvebackup_add_transfered_bytes
* avoid cancel flag (not needed)
* simplify backup_cancel logic
There is progress on upstream to support running qmp commands inside
coroutines, see:
https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04852.html
We should consider using that when it is available in upstream qemu.
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
---
pve-backup.c | 638 ++++++++++++++++++++++++++-------------------------
1 file changed, 320 insertions(+), 318 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 55441eb9d1..d40f3f2fd6 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -11,11 +11,27 @@
/* PVE backup state and related function */
+/*
+ * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
+ * so you may not use normal mutexes within coroutines:
+ *
+ * ---bad-example---
+ * qemu_rec_mutex_lock(lock)
+ * ...
+ * qemu_coroutine_yield() // wait for something
+ * // we are now inside a different thread
+ * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
+ * ---end-bad-example--
+ *
+ * ==> Always use CoMutext inside coroutines.
+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
+ *
+ */
static struct PVEBackupState {
struct {
- // Everithing accessed from qmp command, protected using rwlock
- CoRwlock rwlock;
+ // Everithing accessed from qmp_backup_query command is protected using lock
+ QemuMutex lock;
Error *error;
time_t start_time;
time_t end_time;
@@ -25,19 +41,20 @@ static struct PVEBackupState {
size_t total;
size_t transferred;
size_t zero_bytes;
- bool cancel;
} stat;
int64_t speed;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
- CoMutex backup_mutex;
+ QemuMutex backup_mutex;
+ CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
- qemu_co_rwlock_init(&backup_state.stat.rwlock);
- qemu_co_mutex_init(&backup_state.backup_mutex);
+ qemu_mutex_init(&backup_state.stat.lock);
+ qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
// initialize PVEBackupState at startup
@@ -52,10 +69,54 @@ typedef struct PVEBackupDevInfo {
BlockDriverState *target;
} PVEBackupDevInfo;
-static void pvebackup_co_run_next_job(void);
+static void pvebackup_run_next_job(void);
+static BlockJob *
+lookup_active_block_job(PVEBackupDevInfo *di)
+{
+ if (!di->completed && di->bs) {
+ for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
+ if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
+ continue;
+ }
+
+ BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
+ if (bjob && bjob->source_bs == di->bs) {
+ return job;
+ }
+ }
+ }
+ return NULL;
+}
+
+static void pvebackup_propagate_error(Error *err)
+{
+ qemu_mutex_lock(&backup_state.stat.lock);
+ error_propagate(&backup_state.stat.error, err);
+ qemu_mutex_unlock(&backup_state.stat.lock);
+}
+
+static bool pvebackup_error_or_canceled(void)
+{
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool error_or_canceled = !!backup_state.stat.error;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ return error_or_canceled;
+}
+
+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
+{
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.zero_bytes += zero_bytes;
+ backup_state.stat.transferred += transferred;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+}
+
+// This may get called from multiple coroutines in multiple io-threads
+// Note1: this may get called after job_cancel()
static int coroutine_fn
-pvebackup_co_dump_cb(
+pvebackup_co_dump_pbs_cb(
void *opaque,
uint64_t start,
uint64_t bytes,
@@ -67,137 +128,127 @@ pvebackup_co_dump_cb(
const unsigned char *buf = pbuf;
PVEBackupDevInfo *di = opaque;
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- bool cancel = backup_state.stat.cancel;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ assert(backup_state.pbs);
+
+ Error *local_err = NULL;
+ int pbs_res = -1;
+
+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
- if (cancel) {
- return size; // return success
+ // avoid deadlock if job is cancelled
+ if (pvebackup_error_or_canceled()) {
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return -1;
}
- qemu_co_mutex_lock(&backup_state.backup_mutex);
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- int ret = -1;
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ return pbs_res;
+ } else {
+ pvebackup_add_transfered_bytes(size, !buf ? size : 0);
+ }
- if (backup_state.vmaw) {
- size_t zero_bytes = 0;
- uint64_t remaining = size;
-
- uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
- if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- if (!backup_state.stat.error) {
- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
- error_setg(&backup_state.stat.error,
- "got unaligned write inside backup dump "
- "callback (sector %ld)", start);
- }
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return -1; // not aligned to cluster size
- }
+ return size;
+}
- while (remaining > 0) {
- ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num,
- buf, &zero_bytes);
- ++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
- if (ret < 0) {
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- if (!backup_state.stat.error) {
- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
- vma_writer_error_propagate(backup_state.vmaw, &backup_state.stat.error);
- }
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+// This may get called from multiple coroutines in multiple io-threads
+static int coroutine_fn
+pvebackup_co_dump_vma_cb(
+ void *opaque,
+ uint64_t start,
+ uint64_t bytes,
+ const void *pbuf)
+{
+ assert(qemu_in_coroutine());
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return ret;
- } else {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- backup_state.stat.zero_bytes += zero_bytes;
- if (remaining >= VMA_CLUSTER_SIZE) {
- backup_state.stat.transferred += VMA_CLUSTER_SIZE;
- remaining -= VMA_CLUSTER_SIZE;
- } else {
- backup_state.stat.transferred += remaining;
- remaining = 0;
- }
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- }
- }
- } else if (backup_state.pbs) {
- Error *local_err = NULL;
- int pbs_res = -1;
+ const uint64_t size = bytes;
+ const unsigned char *buf = pbuf;
+ PVEBackupDevInfo *di = opaque;
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ int ret = -1;
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ assert(backup_state.vmaw);
- if (pbs_res < 0) {
- error_propagate(&backup_state.stat.error, local_err);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return pbs_res;
- } else {
- if (!buf) {
- backup_state.stat.zero_bytes += size;
- }
- backup_state.stat.transferred += size;
+ uint64_t remaining = size;
+
+ uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
+ if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
+ Error *local_err = NULL;
+ error_setg(&local_err,
+ "got unaligned write inside backup dump "
+ "callback (sector %ld)", start);
+ pvebackup_propagate_error(local_err);
+ return -1; // not aligned to cluster size
+ }
+
+ while (remaining > 0) {
+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
+ // avoid deadlock if job is cancelled
+ if (pvebackup_error_or_canceled()) {
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return -1;
}
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ size_t zero_bytes = 0;
+ ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- } else {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- if (!buf) {
- backup_state.stat.zero_bytes += size;
+ ++cluster_num;
+ if (buf) {
+ buf += VMA_CLUSTER_SIZE;
+ }
+ if (ret < 0) {
+ Error *local_err = NULL;
+ vma_writer_error_propagate(backup_state.vmaw, &local_err);
+ pvebackup_propagate_error(local_err);
+ return ret;
+ } else {
+ if (remaining >= VMA_CLUSTER_SIZE) {
+ assert(ret == VMA_CLUSTER_SIZE);
+ pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
+ remaining -= VMA_CLUSTER_SIZE;
+ } else {
+ assert(ret == remaining);
+ pvebackup_add_transfered_bytes(remaining, zero_bytes);
+ remaining = 0;
+ }
}
- backup_state.stat.transferred += size;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
}
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
-
return size;
}
-static void coroutine_fn pvebackup_co_cleanup(void)
+// assumes the caller holds backup_mutex
+static void coroutine_fn pvebackup_co_cleanup(void *unused)
{
assert(qemu_in_coroutine());
- qemu_co_mutex_lock(&backup_state.backup_mutex);
-
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.end_time = time(NULL);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
Error *local_err = NULL;
vma_writer_close(backup_state.vmaw, &local_err);
if (local_err != NULL) {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- error_propagate(&backup_state.stat.error, local_err);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- }
+ pvebackup_propagate_error(local_err);
+ }
backup_state.vmaw = NULL;
}
if (backup_state.pbs) {
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
- if (!error_or_canceled) {
+ if (!pvebackup_error_or_canceled()) {
Error *local_err = NULL;
proxmox_backup_co_finish(backup_state.pbs, &local_err);
if (local_err != NULL) {
- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
- error_propagate(&backup_state.stat.error, local_err);
- }
+ pvebackup_propagate_error(local_err);
+ }
}
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
proxmox_backup_disconnect(backup_state.pbs);
backup_state.pbs = NULL;
@@ -205,43 +256,14 @@ static void coroutine_fn pvebackup_co_cleanup(void)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-typedef struct PVEBackupCompeteCallbackData {
- PVEBackupDevInfo *di;
- int result;
-} PVEBackupCompeteCallbackData;
-
-static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
+// assumes the caller holds backup_mutex
+static void coroutine_fn pvebackup_complete_stream(void *opaque)
{
- assert(qemu_in_coroutine());
-
- PVEBackupCompeteCallbackData *cb_data = opaque;
-
- qemu_co_mutex_lock(&backup_state.backup_mutex);
-
- PVEBackupDevInfo *di = cb_data->di;
- int ret = cb_data->result;
-
- di->completed = true;
-
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
-
- if (ret < 0 && !backup_state.stat.error) {
- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
- error_setg(&backup_state.stat.error, "job failed with err %d - %s",
- ret, strerror(-ret));
- }
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-
- di->bs = NULL;
+ PVEBackupDevInfo *di = opaque;
- if (di->target) {
- bdrv_unref(di->target);
- di->target = NULL;
- }
+ bool error_or_canceled = pvebackup_error_or_canceled();
if (backup_state.vmaw) {
vma_writer_close_stream(backup_state.vmaw, di->dev_id);
@@ -251,110 +273,101 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
Error *local_err = NULL;
proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
if (local_err != NULL) {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- error_propagate(&backup_state.stat.error, local_err);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ pvebackup_propagate_error(local_err);
}
}
+}
- // remove self from job queue
- backup_state.di_list = g_list_remove(backup_state.di_list, di);
- g_free(di);
+static void pvebackup_complete_cb(void *opaque, int ret)
+{
+ assert(!qemu_in_coroutine());
- int pending_jobs = g_list_length(backup_state.di_list);
+ PVEBackupDevInfo *di = opaque;
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
- if (pending_jobs > 0) {
- pvebackup_co_run_next_job();
- } else {
- pvebackup_co_cleanup();
+ di->completed = true;
+
+ if (ret < 0) {
+ Error *local_err = NULL;
+ error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
+ pvebackup_propagate_error(local_err);
}
-}
-static void pvebackup_complete_cb(void *opaque, int ret)
-{
- // This can be called from the main loop, or from a coroutine
- PVEBackupCompeteCallbackData cb_data = {
- .di = opaque,
- .result = ret,
- };
+ di->bs = NULL;
- if (qemu_in_coroutine()) {
- pvebackup_co_complete_cb(&cb_data);
- } else {
- block_on_coroutine_fn(pvebackup_co_complete_cb, &cb_data);
- }
-}
+ assert(di->target == NULL);
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
-{
- assert(qemu_in_coroutine());
+ block_on_coroutine_fn(pvebackup_complete_stream, di);
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- backup_state.stat.cancel = true;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ // remove self from job queue
+ backup_state.di_list = g_list_remove(backup_state.di_list, di);
- qemu_co_mutex_lock(&backup_state.backup_mutex);
+ g_free(di);
- // Avoid race between block jobs and backup-cancel command:
- if (!(backup_state.vmaw || backup_state.pbs)) {
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
- }
+ qemu_mutex_unlock(&backup_state.backup_mutex);
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- if (!backup_state.stat.error) {
- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
- error_setg(&backup_state.stat.error, "backup cancelled");
- }
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ pvebackup_run_next_job();
+}
+
+static void pvebackup_cancel(void)
+{
+ assert(!qemu_in_coroutine());
+
+ Error *cancel_err = NULL;
+ error_setg(&cancel_err, "backup canceled");
+ pvebackup_propagate_error(cancel_err);
+
+ qemu_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
- vma_writer_set_error(backup_state.vmaw, "backup cancelled");
+ vma_writer_set_error(backup_state.vmaw, "backup canceled");
}
if (backup_state.pbs) {
- proxmox_backup_abort(backup_state.pbs, "backup cancelled");
+ proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
- bool running_jobs = 0;
- GList *l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
- if (!di->completed && di->bs) {
- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue;
- }
+ qemu_mutex_unlock(&backup_state.backup_mutex);
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) {
- AioContext *aio_context = job->job.aio_context;
- aio_context_acquire(aio_context);
+ for(;;) {
- if (!di->completed) {
- running_jobs += 1;
- job_cancel(&job->job, false);
- }
- aio_context_release(aio_context);
- }
+ BlockJob *next_job = NULL;
+
+ qemu_mutex_lock(&backup_state.backup_mutex);
+
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ BlockJob *job = lookup_active_block_job(di);
+ if (job != NULL) {
+ next_job = job;
+ break;
}
}
- }
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
- if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
+ if (next_job) {
+ AioContext *aio_context = next_job->job.aio_context;
+ aio_context_acquire(aio_context);
+ job_cancel_sync(&next_job->job);
+ aio_context_release(aio_context);
+ } else {
+ break;
+ }
+ }
}
void qmp_backup_cancel(Error **errp)
{
- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
+ pvebackup_cancel();
}
+// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
const char *name,
@@ -406,46 +419,97 @@ static int coroutine_fn pvebackup_co_add_config(
bool job_should_pause(Job *job);
-static void coroutine_fn pvebackup_co_run_next_job(void)
+static void pvebackup_run_next_job(void)
{
- assert(qemu_in_coroutine());
+ assert(!qemu_in_coroutine());
- qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
GList *l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!di->completed && di->bs) {
- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue;
- }
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) {
- AioContext *aio_context = job->job.aio_context;
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
- aio_context_acquire(aio_context);
-
- if (job_should_pause(&job->job)) {
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-
- if (error_or_canceled) {
- job_cancel(&job->job, false);
- } else {
- job_resume(&job->job);
- }
- }
- aio_context_release(aio_context);
- return;
+ BlockJob *job = lookup_active_block_job(di);
+
+ if (job) {
+ qemu_mutex_unlock(&backup_state.backup_mutex);
+
+ AioContext *aio_context = job->job.aio_context;
+ aio_context_acquire(aio_context);
+
+ if (job_should_pause(&job->job)) {
+ bool error_or_canceled = pvebackup_error_or_canceled();
+ if (error_or_canceled) {
+ job_cancel_sync(&job->job);
+ } else {
+ job_resume(&job->job);
}
}
+ aio_context_release(aio_context);
+ return;
+ }
+ }
+
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
+
+ qemu_mutex_unlock(&backup_state.backup_mutex);
+}
+
+static bool create_backup_jobs(void) {
+
+ assert(!qemu_in_coroutine());
+
+ Error *local_err = NULL;
+
+ /* create and start all jobs (paused state) */
+ GList *l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ assert(di->target != NULL);
+
+ AioContext *aio_context = bdrv_get_aio_context(di->bs);
+ aio_context_acquire(aio_context);
+
+ BlockJob *job = backup_job_create(
+ NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
+ BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
+
+ aio_context_release(aio_context);
+
+ if (!job || local_err != NULL) {
+ Error *create_job_err = NULL;
+ error_setg(&create_job_err, "backup_job_create failed: %s",
+ local_err ? error_get_pretty(local_err) : "null");
+
+ pvebackup_propagate_error(create_job_err);
+ break;
+ }
+ job_start(&job->job);
+
+ bdrv_unref(di->target);
+ di->target = NULL;
+ }
+
+ bool errors = pvebackup_error_or_canceled();
+
+ if (errors) {
+ l = backup_state.di_list;
+ while (l) {
+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
+ l = g_list_next(l);
+
+ if (di->target) {
+ bdrv_unref(di->target);
+ di->target = NULL;
+ }
}
}
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ return errors;
}
typedef struct QmpBackupTask {
@@ -476,7 +540,8 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
-static void coroutine_fn pvebackup_co_start(void *opaque)
+// assumes the caller holds backup_mutex
+static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
@@ -495,16 +560,12 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
GList *di_list = NULL;
GList *l;
UuidInfo *uuid_info;
- BlockJob *job;
const char *config_name = "qemu-server.conf";
const char *firewall_name = "qemu-server.fw";
- qemu_co_mutex_lock(&backup_state.backup_mutex);
-
if (backup_state.di_list) {
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
return;
}
@@ -627,7 +688,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
if (dev_id < 0)
goto err;
- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
goto err;
}
@@ -648,7 +709,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
goto err;
}
@@ -713,9 +774,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
}
/* initialize global backup_state now */
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
-
- backup_state.stat.cancel = false;
+ qemu_mutex_lock(&backup_state.stat.lock);
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -738,7 +797,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
@@ -747,48 +806,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
backup_state.di_list = di_list;
- /* start all jobs (paused state) */
- l = di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l);
-
- // make sure target runs in same aoi_context as source
- AioContext *aio_context = bdrv_get_aio_context(di->bs);
- aio_context_acquire(aio_context);
- GSList *ignore = NULL;
- bdrv_set_aio_context_ignore(di->target, aio_context, &ignore);
- g_slist_free(ignore);
- aio_context_release(aio_context);
-
- job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
- if (!job || local_err != NULL) {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- error_setg(&backup_state.stat.error, "backup_job_create failed");
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- break;
- }
- job_start(&job->job);
- if (di->target) {
- bdrv_unref(di->target);
- di->target = NULL;
- }
- }
-
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
-
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- bool no_errors = !backup_state.stat.error;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-
- if (no_errors) {
- pvebackup_co_run_next_job(); // run one job
- } else {
- pvebackup_co_cancel(NULL);
- }
-
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
@@ -831,8 +848,6 @@ err:
rmdir(backup_dir);
}
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
-
task->result = NULL;
return;
}
@@ -876,32 +891,31 @@ UuidInfo *qmp_backup(
.errp = errp,
};
- block_on_coroutine_fn(pvebackup_co_start, &task);
+ qemu_mutex_lock(&backup_state.backup_mutex);
- return task.result;
-}
+ block_on_coroutine_fn(pvebackup_co_prepare, &task);
+ if (*errp == NULL) {
+ create_backup_jobs();
+ qemu_mutex_unlock(&backup_state.backup_mutex);
+ pvebackup_run_next_job();
+ } else {
+ qemu_mutex_unlock(&backup_state.backup_mutex);
+ }
-typedef struct QmpQueryBackupTask {
- Error **errp;
- BackupStatus *result;
-} QmpQueryBackupTask;
+ return task.result;
+}
-static void coroutine_fn pvebackup_co_query(void *opaque)
+BackupStatus *qmp_query_backup(Error **errp)
{
- assert(qemu_in_coroutine());
-
- QmpQueryBackupTask *task = opaque;
-
BackupStatus *info = g_malloc0(sizeof(*info));
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ qemu_mutex_lock(&backup_state.stat.lock);
if (!backup_state.stat.start_time) {
/* not started, return {} */
- task->result = info;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- return;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ return info;
}
info->has_status = true;
@@ -937,19 +951,7 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
info->has_transferred = true;
info->transferred = backup_state.stat.transferred;
- task->result = info;
+ qemu_mutex_unlock(&backup_state.stat.lock);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-}
-
-BackupStatus *qmp_query_backup(Error **errp)
-{
- QmpQueryBackupTask task = {
- .errp = errp,
- .result = NULL,
- };
-
- block_on_coroutine_fn(pvebackup_co_query, &task);
-
- return task.result;
+ return info;
}

View File

@@ -26,20 +26,19 @@ Suggested-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: Ma Haocong <mahaocong@didichuxing.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 98 +++++++++++++++++++++-----
blockdev.c | 39 +++++++++-
include/block/block_int-global-state.h | 4 +-
qapi/block-core.json | 29 ++++++--
tests/unit/test-block-iothread.c | 4 +-
block/mirror.c | 98 ++++++++++++++++++++++++++++++-------
blockdev.c | 39 +++++++++++++--
include/block/block_int.h | 4 +-
qapi/block-core.json | 29 +++++++++--
tests/test-block-iothread.c | 4 +-
5 files changed, 145 insertions(+), 29 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 3c4ab1159d..f2eca983f1 100644
index e8e8844afc..100e828639 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob {
@@ -49,7 +49,7 @@ typedef struct MirrorBlockJob {
BlockDriverState *to_replace;
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
@@ -48,7 +47,7 @@ index 3c4ab1159d..f2eca983f1 100644
BlockMirrorBackingMode backing_mode;
/* Whether the target image requires explicit zero-initialization */
bool zero_target;
@@ -65,6 +65,8 @@ typedef struct MirrorBlockJob {
@@ -64,6 +64,8 @@ typedef struct MirrorBlockJob {
size_t buf_size;
int64_t bdev_length;
unsigned long *cow_bitmap;
@@ -57,17 +56,17 @@ index 3c4ab1159d..f2eca983f1 100644
BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi;
uint8_t *buf;
@@ -696,7 +698,8 @@ static int mirror_exit_common(Job *job)
@@ -676,7 +678,8 @@ static int mirror_exit_common(Job *job)
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ BlockDriverState *backing;
+ backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base;
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
if (bdrv_cow_bs(unfiltered_target) != backing) {
@@ -794,6 +797,16 @@ static void mirror_abort(Job *job)
if (backing_bs(target_bs) != backing) {
bdrv_set_backing_hd(target_bs, backing, &local_err);
if (local_err) {
@@ -771,6 +774,16 @@ static void mirror_abort(Job *job)
assert(ret == 0);
}
@@ -84,7 +83,7 @@ index 3c4ab1159d..f2eca983f1 100644
static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -973,7 +986,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
@@ -952,7 +965,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -94,23 +93,23 @@ index 3c4ab1159d..f2eca983f1 100644
ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit;
@@ -1212,6 +1226,7 @@ static const BlockJobDriver mirror_job_driver = {
@@ -1184,6 +1198,7 @@ static const BlockJobDriver mirror_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
+ .clean = mirror_clean,
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = mirror_cancel,
@@ -1228,6 +1243,7 @@ static const BlockJobDriver commit_active_job_driver = {
},
@@ -1199,6 +1214,7 @@ static const BlockJobDriver commit_active_job_driver = {
.run = mirror_run,
.prepare = mirror_prepare,
.abort = mirror_abort,
+ .clean = mirror_clean,
.pause = mirror_pause,
.complete = mirror_complete,
.cancel = commit_active_cancel,
@@ -1593,7 +1609,10 @@ static BlockJob *mirror_start_job(
},
@@ -1547,7 +1563,10 @@ static BlockJob *mirror_start_job(
BlockCompletionFunc *cb,
void *opaque,
const BlockJobDriver *driver,
@@ -122,8 +121,8 @@ index 3c4ab1159d..f2eca983f1 100644
bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode,
Error **errp)
@@ -1605,10 +1624,39 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
@@ -1560,10 +1579,39 @@ static BlockJob *mirror_start_job(
Error *local_err = NULL;
int ret;
- if (granularity == 0) {
@@ -164,7 +163,7 @@ index 3c4ab1159d..f2eca983f1 100644
assert(is_power_of_2(granularity));
if (buf_size < 0) {
@@ -1740,7 +1788,9 @@ static BlockJob *mirror_start_job(
@@ -1667,7 +1715,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
@@ -175,7 +174,7 @@ index 3c4ab1159d..f2eca983f1 100644
s->backing_mode = backing_mode;
s->zero_target = zero_target;
s->copy_mode = copy_mode;
@@ -1761,6 +1811,18 @@ static BlockJob *mirror_start_job(
@@ -1687,6 +1737,18 @@ static BlockJob *mirror_start_job(
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
}
@@ -194,7 +193,7 @@ index 3c4ab1159d..f2eca983f1 100644
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
BLK_PERM_CONSISTENT_READ,
@@ -1838,6 +1900,9 @@ fail:
@@ -1740,6 +1802,9 @@ fail:
if (s->dirty_bitmap) {
bdrv_release_dirty_bitmap(s->dirty_bitmap);
}
@@ -204,7 +203,7 @@ index 3c4ab1159d..f2eca983f1 100644
job_early_fail(&s->common.job);
}
@@ -1855,31 +1920,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1757,29 +1822,23 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -221,8 +220,6 @@ index 3c4ab1159d..f2eca983f1 100644
- bool is_none_mode;
BlockDriverState *base;
GLOBAL_STATE_CODE();
- if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
- (mode == MIRROR_SYNC_MODE_BITMAP)) {
- error_setg(errp, "Sync mode '%s' not supported",
@@ -230,7 +227,7 @@ index 3c4ab1159d..f2eca983f1 100644
- return;
- }
- is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode, zero_target,
on_source_error, on_target_error, unmap, NULL, NULL,
@@ -241,7 +238,7 @@ index 3c4ab1159d..f2eca983f1 100644
}
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1906,7 +1965,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1805,7 +1864,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque,
@@ -249,13 +246,13 @@ index 3c4ab1159d..f2eca983f1 100644
+ &commit_active_job_driver, MIRROR_SYNC_MODE_FULL,
+ NULL, 0, base, auto_complete,
filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
errp);
if (!job) {
&local_err);
if (local_err) {
diff --git a/blockdev.c b/blockdev.c
index 9230888e34..9a1a3118ed 100644
index 681da7c8b6..02d58e7645 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2951,6 +2951,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2876,6 +2876,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
@@ -266,15 +263,15 @@ index 9230888e34..9a1a3118ed 100644
BlockMirrorBackingMode backing_mode,
bool zero_target,
bool has_speed, int64_t speed,
@@ -2970,6 +2974,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2894,6 +2898,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
Error **errp)
{
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT;
+ BdrvDirtyBitmap *bitmap = NULL;
if (!has_speed) {
speed = 0;
@@ -3024,6 +3029,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2948,6 +2953,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
@@ -301,10 +298,10 @@ index 9230888e34..9a1a3118ed 100644
+ }
+ }
+
if (!has_replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
@@ -3070,8 +3098,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (has_replaces) {
BlockDriverState *to_replace_bs;
AioContext *replace_aio_context;
@@ -2985,8 +3013,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
@@ -315,7 +312,7 @@ index 9230888e34..9a1a3118ed 100644
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
}
@@ -3216,6 +3244,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3127,6 +3155,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
arg->has_replaces, arg->replaces, arg->sync,
@@ -324,7 +321,7 @@ index 9230888e34..9a1a3118ed 100644
backing_mode, zero_target,
arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
@@ -3237,6 +3267,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
@@ -3148,6 +3178,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
const char *device, const char *target,
bool has_replaces, const char *replaces,
MirrorSyncMode sync,
@@ -333,7 +330,7 @@ index 9230888e34..9a1a3118ed 100644
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
@@ -3286,7 +3318,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
@@ -3197,7 +3229,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
}
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
@@ -343,11 +340,11 @@ index 9230888e34..9a1a3118ed 100644
zero_target, has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index b49f4eb35b..9d744db618 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -149,7 +149,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8dda6f769d..279bd4ab61 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1245,7 +1245,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
@@ -359,10 +356,10 @@ index b49f4eb35b..9d744db618 100644
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..e1857e7094 100644
index 69db270b1a..9db8e26517 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2000,10 +2000,19 @@
@@ -2064,10 +2064,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
@@ -383,7 +380,7 @@ index 2173e7734a..e1857e7094 100644
#
# @buf-size: maximum amount of data in flight from source to
# target (since 1.4).
@@ -2043,7 +2052,9 @@
@@ -2105,7 +2114,9 @@
{ 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -394,7 +391,7 @@ index 2173e7734a..e1857e7094 100644
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
@@ -2322,10 +2333,19 @@
@@ -2372,10 +2383,19 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
@@ -415,7 +412,7 @@ index 2173e7734a..e1857e7094 100644
#
# @buf-size: maximum amount of data in flight from source to
# target
@@ -2375,7 +2395,8 @@
@@ -2424,7 +2444,8 @@
{ 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str',
@@ -425,11 +422,11 @@ index 2173e7734a..e1857e7094 100644
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 8b55eccc89..f4650be8e5 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -753,8 +753,8 @@ static void test_propagate_mirror(void)
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
index 3f866a35c6..500ede71c8 100644
--- a/tests/test-block-iothread.c
+++ b/tests/test-block-iothread.c
@@ -623,8 +623,8 @@ static void test_propagate_mirror(void)
/* Start a mirror job */
mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,

View File

@@ -1,219 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Thu, 9 Jul 2020 12:53:08 +0200
Subject: [PATCH] PVE: various PBS fixes
pbs: fix crypt and compress parameters
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
PVE: handle PBS write callback with big blocks correctly
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
PVE: add zero block handling to PBS dump callback
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 ++-
pve-backup.c | 57 +++++++++++++++++++++++++++-------
qapi/block-core.json | 6 ++++
3 files changed, 54 insertions(+), 13 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 45da74d7a0..ea7b665aa2 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,7 +1042,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
- false, false, // PBS incremental
+ false, false, // PBS use-dirty-bitmap
+ false, false, // PBS compress
+ false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/pve-backup.c b/pve-backup.c
index 1c49cd178d..c15abefdda 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -8,6 +8,7 @@
#include "block/blockjob.h"
#include "qapi/qapi-commands-block.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
/* PVE backup state and related function */
@@ -67,6 +68,7 @@ opts_init(pvebackup_init);
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
size_t size;
+ uint64_t block_size;
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
@@ -135,10 +137,13 @@ pvebackup_co_dump_pbs_cb(
PVEBackupDevInfo *di = opaque;
assert(backup_state.pbs);
+ assert(buf);
Error *local_err = NULL;
int pbs_res = -1;
+ bool is_zero_block = size == di->block_size && buffer_is_zero(buf, size);
+
qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
@@ -147,17 +152,29 @@ pvebackup_co_dump_pbs_cb(
return -1;
}
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
- qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ uint64_t transferred = 0;
+ uint64_t reused = 0;
+ while (transferred < size) {
+ uint64_t left = size - transferred;
+ uint64_t to_transfer = left < di->block_size ? left : di->block_size;
- if (pbs_res < 0) {
- pvebackup_propagate_error(local_err);
- return pbs_res;
- } else {
- size_t reused = (pbs_res == 0) ? size : 0;
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
+ is_zero_block ? NULL : buf + transferred, start + transferred,
+ to_transfer, &local_err);
+ transferred += to_transfer;
+
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return pbs_res;
+ }
+
+ reused += pbs_res == 0 ? to_transfer : 0;
}
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ pvebackup_add_transfered_bytes(size, is_zero_block ? size : 0, reused);
+
return size;
}
@@ -178,6 +195,7 @@ pvebackup_co_dump_vma_cb(
int ret = -1;
assert(backup_state.vmaw);
+ assert(buf);
uint64_t remaining = size;
@@ -204,9 +222,7 @@ pvebackup_co_dump_vma_cb(
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
+ buf += VMA_CLUSTER_SIZE;
if (ret < 0) {
Error *local_err = NULL;
vma_writer_error_propagate(backup_state.vmaw, &local_err);
@@ -569,6 +585,10 @@ typedef struct QmpBackupTask {
const char *firewall_file;
bool has_devlist;
const char *devlist;
+ bool has_compress;
+ bool compress;
+ bool has_encrypt;
+ bool encrypt;
bool has_speed;
int64_t speed;
Error **errp;
@@ -692,6 +712,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -701,8 +722,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->has_password ? task->password : NULL,
task->has_keyfile ? task->keyfile : NULL,
task->has_key_password ? task->key_password : NULL,
+ task->has_compress ? task->compress : true,
+ task->has_encrypt ? task->encrypt : task->has_keyfile,
task->has_fingerprint ? task->fingerprint : NULL,
- &pbs_err);
+ &pbs_err);
if (!pbs) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
@@ -721,6 +744,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ di->block_size = dump_cb_block_size;
+
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
@@ -941,6 +966,8 @@ UuidInfo *qmp_backup(
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -951,6 +978,8 @@ UuidInfo *qmp_backup(
.backup_file = backup_file,
.has_password = has_password,
.password = password,
+ .has_keyfile = has_keyfile,
+ .keyfile = keyfile,
.has_key_password = has_key_password,
.key_password = key_password,
.has_fingerprint = has_fingerprint,
@@ -961,6 +990,10 @@ UuidInfo *qmp_backup(
.backup_time = backup_time,
.has_use_dirty_bitmap = has_use_dirty_bitmap,
.use_dirty_bitmap = use_dirty_bitmap,
+ .has_compress = has_compress,
+ .compress = compress,
+ .has_encrypt = has_encrypt,
+ .encrypt = encrypt,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3ad9eb5d1a..4120052690 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -818,6 +818,10 @@
#
# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
+# @compress: use compression (optional for format 'pbs', defaults to true)
+#
+# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
+#
# Returns: the uuid of the backup job
#
##
@@ -829,6 +833,8 @@
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
+ '*compress': 'bool',
+ '*encrypt': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -18,16 +18,15 @@ incremental backup modes; we can use this bitmap to later refresh a
successfully created mirror.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index f2eca983f1..b6475d50ad 100644
index 100e828639..7d3c3252f3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -673,8 +673,6 @@ static int mirror_exit_common(Job *job)
@@ -653,8 +653,6 @@ static int mirror_exit_common(Job *job)
bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
}
@@ -36,9 +35,9 @@ index f2eca983f1..b6475d50ad 100644
/* Make sure that the source BDS doesn't go away during bdrv_replace_node,
* before we can call bdrv_drained_end */
bdrv_ref(src);
@@ -775,6 +773,18 @@ static int mirror_exit_common(Job *job)
block_job_remove_all_bdrv(bjob);
bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
@@ -752,6 +750,18 @@ static int mirror_exit_common(Job *job)
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+ if (s->sync_bitmap) {
+ if (s->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS ||
@@ -55,7 +54,7 @@ index f2eca983f1..b6475d50ad 100644
bs_opaque->job = NULL;
bdrv_drained_end(src);
@@ -1634,10 +1644,6 @@ static BlockJob *mirror_start_job(
@@ -1589,10 +1599,6 @@ static BlockJob *mirror_start_job(
" sync mode",
MirrorSyncMode_str(sync_mode));
return NULL;
@@ -66,7 +65,7 @@ index f2eca983f1..b6475d50ad 100644
}
} else if (bitmap) {
error_setg(errp,
@@ -1654,6 +1660,12 @@ static BlockJob *mirror_start_job(
@@ -1609,6 +1615,12 @@ static BlockJob *mirror_start_job(
return NULL;
}
granularity = bdrv_dirty_bitmap_granularity(bitmap);

View File

@@ -10,16 +10,15 @@ as one without the other does not make much sense with the current set
of modes.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
blockdev.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
index 9a1a3118ed..a57b0af2e7 100644
index 02d58e7645..0d480f02c7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3050,6 +3050,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2974,6 +2974,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return;
}
@@ -28,4 +27,4 @@ index 9a1a3118ed..a57b0af2e7 100644
+ return;
}
if (!has_replaces) {
if (has_replaces) {

View File

@@ -10,16 +10,15 @@ since sync_bitmap is busy at the point of merging, and we checked access
beforehand.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
block/mirror.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index b6475d50ad..8b3342f9ec 100644
index 7d3c3252f3..fb12ccb932 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -779,8 +779,8 @@ static int mirror_exit_common(Job *job)
@@ -756,8 +756,8 @@ static int mirror_exit_common(Job *job)
job->ret == 0 && ret == 0)) {
/* Success; synchronize copy back to sync. */
bdrv_clear_dirty_bitmap(s->sync_bitmap, NULL);
@@ -30,17 +29,14 @@ index b6475d50ad..8b3342f9ec 100644
}
}
bdrv_release_dirty_bitmap(s->dirty_bitmap);
@@ -1828,11 +1828,8 @@ static BlockJob *mirror_start_job(
@@ -1754,8 +1754,8 @@ static BlockJob *mirror_start_job(
}
if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
- bdrv_merge_dirty_bitmap(s->dirty_bitmap, s->sync_bitmap,
- NULL, &local_err);
- if (local_err) {
- goto fail;
- }
+ bdrv_dirty_bitmap_merge_internal(s->dirty_bitmap, s->sync_bitmap,
+ NULL, true);
}
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
if (local_err) {
goto fail;
}

View File

@@ -20,11 +20,11 @@ intentionally keeping copyright and ownership of original test case to
honor provenance.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
tests/qemu-iotests/384 | 547 +++++++
tests/qemu-iotests/384.out | 2846 ++++++++++++++++++++++++++++++++++++
2 files changed, 3393 insertions(+)
tests/qemu-iotests/group | 1 +
3 files changed, 3394 insertions(+)
create mode 100755 tests/qemu-iotests/384
create mode 100644 tests/qemu-iotests/384.out
@@ -3433,3 +3433,15 @@ index 0000000000..9b7408b6d6
+{"execute": "blockdev-mirror", "arguments": {"bitmap": "bitmap0", "device": "drive0", "filter-node-name": "mirror-top", "job-id": "api_job", "sync": "none", "target": "mirror_target"}}
+{"error": {"class": "GenericError", "desc": "bitmap-mode must be specified if a bitmap is provided"}}
+
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 025ed5238d..bee527c012 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -270,6 +270,7 @@
253 rw quick
254 rw backing quick
255 rw quick
+384 rw
256 rw auto quick
257 rw
258 rw quick

View File

@@ -1,61 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 12 Jan 2021 14:12:20 +0100
Subject: [PATCH] PVE: redirect stderr to journal when daemonized
QEMU uses the logging for error messages usually, so LOG_ERR is most
fitting.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
meson.build | 2 ++
os-posix.c | 7 +++++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build
index be4785e2f6..3fc7c8d435 100644
--- a/meson.build
+++ b/meson.build
@@ -1463,6 +1463,7 @@ keyutils = dependency('libkeyutils', required: false,
has_gettid = cc.has_function('gettid')
libuuid = cc.find_library('uuid', required: true)
+libsystemd = cc.find_library('systemd', required: true)
libproxmox_backup_qemu = cc.find_library('proxmox_backup_qemu', required: true)
# libselinux
@@ -3105,6 +3106,7 @@ if have_block
# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
# os-win32.c does not
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+ blockdev_ss.add(when: 'CONFIG_POSIX', if_true: libsystemd)
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
endif
diff --git a/os-posix.c b/os-posix.c
index 321fc4bd13..b1870d2690 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -28,6 +28,8 @@
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
+#include <systemd/sd-journal.h>
+#include <syslog.h>
/* Needed early for CONFIG_BSD etc. */
#include "net/slirp.h"
@@ -281,9 +283,10 @@ void os_setup_post(void)
dup2(fd, 0);
dup2(fd, 1);
- /* In case -D is given do not redirect stderr to /dev/null */
+ /* In case -D is given do not redirect stderr to journal */
if (!qemu_log_enabled()) {
- dup2(fd, 2);
+ int journal_fd = sd_journal_stream_fd("QEMU", LOG_ERR, 0);
+ dup2(journal_fd, 2);
}
close(fd);

View File

@@ -11,7 +11,6 @@ mode was never available for drive-mirror, it makes the interface more
uniform w.r.t. backup block jobs.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/mirror.c | 28 +++------------
blockdev.c | 29 +++++++++++++++
@@ -19,11 +18,11 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 8b3342f9ec..1d4ff0efad 100644
index fb12ccb932..dfce442e97 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1634,31 +1634,13 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
@@ -1589,31 +1589,13 @@ static BlockJob *mirror_start_job(
Error *local_err = NULL;
int ret;
- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
@@ -60,10 +59,10 @@ index 8b3342f9ec..1d4ff0efad 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
index a57b0af2e7..ce62a9b439 100644
index 0d480f02c7..be87d65c02 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3029,7 +3029,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -2953,7 +2953,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}

View File

@@ -20,19 +20,19 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
monitor/hmp-cmds.c | 45 ++++++++++----
proxmox-backup-client.c | 3 +-
proxmox-backup-client.h | 1 +
pve-backup.c | 103 ++++++++++++++++++++++++++++++---
qapi/block-core.json | 12 +++-
6 files changed, 142 insertions(+), 23 deletions(-)
block/monitor/block-hmp-cmds.c | 1 +
monitor/hmp-cmds.c | 45 ++++++++++++----
proxmox-backup-client.c | 3 +-
proxmox-backup-client.h | 1 +
pve-backup.c | 95 ++++++++++++++++++++++++++++++----
qapi/block-core.json | 12 ++++-
6 files changed, 134 insertions(+), 23 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 89ca64444d..45da74d7a0 100644
index d485c3ac79..fdc85a5c0e 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1042,6 +1042,7 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
@@ -1038,6 +1038,7 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
@@ -41,10 +41,10 @@ index 89ca64444d..45da74d7a0 100644
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 1168773da7..4c1671e289 100644
index 0e2d166552..3ff014d32a 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -223,19 +223,42 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
@@ -218,19 +218,42 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "End time: %s", ctime(&info->end_time));
}
@@ -132,7 +132,7 @@ index 1dda8b7d8f..8cbf645b2c 100644
diff --git a/pve-backup.c b/pve-backup.c
index 88f5ee133f..1c49cd178d 100644
index d40f3f2fd6..d50f03a050 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -28,6 +28,8 @@
@@ -232,7 +232,7 @@ index 88f5ee133f..1c49cd178d 100644
g_free(di);
qemu_mutex_unlock(&backup_state.backup_mutex);
@@ -472,12 +497,18 @@ static bool create_backup_jobs(void) {
@@ -470,12 +495,18 @@ static bool create_backup_jobs(void) {
assert(di->target != NULL);
@@ -247,22 +247,22 @@ index 88f5ee133f..1c49cd178d 100644
BlockJob *job = backup_job_create(
- NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+ bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
+ bitmap_mode, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
aio_context_release(aio_context);
@@ -528,6 +559,8 @@ typedef struct QmpBackupTask {
@@ -526,6 +557,8 @@ typedef struct QmpBackupTask {
const char *fingerprint;
bool has_fingerprint;
int64_t backup_time;
+ bool has_use_dirty_bitmap;
+ bool use_dirty_bitmap;
+ bool has_incremental;
+ bool incremental;
bool has_format;
BackupFormat format;
bool has_config_file;
@@ -619,6 +652,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -617,6 +650,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
@@ -270,16 +270,16 @@ index 88f5ee133f..1c49cd178d 100644
l = di_list;
while (l) {
@@ -656,6 +690,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -654,6 +688,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
+ bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+ bool incremental = task->has_incremental && task->incremental;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -675,7 +711,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -673,7 +709,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
goto err;
}
@@ -289,50 +289,42 @@ index 88f5ee133f..1c49cd178d 100644
goto err;
/* register all devices */
@@ -686,9 +723,40 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -684,9 +721,32 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *devname = bdrv_get_device_name(di->bs);
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, task->errp);
- if (dev_id < 0)
+ BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
+ bool expect_only_dirty = false;
+
+ if (use_dirty_bitmap) {
+ bool use_incremental = false;
+ if (incremental) {
+ if (bitmap == NULL) {
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ if (!bitmap) {
+ goto err;
+ }
+ } else {
+ expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ }
+
+ if (expect_only_dirty) {
+ dirty += bdrv_get_dirty_count(bitmap);
+ } else {
+ /* mark entire bitmap as dirty to make full backup */
+ /* mark entire bitmap as dirty to make full backup first */
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ dirty += di->size;
+ } else {
+ use_incremental = true;
+ dirty += bdrv_get_dirty_count(bitmap);
+ }
+ di->bitmap = bitmap;
+ } else {
+ } else if (bitmap != NULL) {
+ dirty += di->size;
+
+ /* after a full backup the old dirty bitmap is invalid anyway */
+ if (bitmap != NULL) {
+ bdrv_release_dirty_bitmap(bitmap);
+ }
+ bdrv_release_dirty_bitmap(bitmap);
+ }
+
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, use_incremental, task->errp);
+ if (dev_id < 0) {
goto err;
+ }
if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
goto err;
@@ -697,6 +765,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -695,6 +755,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->dev_id = dev_id;
}
} else if (format == BACKUP_FORMAT_VMA) {
@@ -341,7 +333,7 @@ index 88f5ee133f..1c49cd178d 100644
vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
@@ -724,6 +794,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -722,6 +784,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
} else if (format == BACKUP_FORMAT_DIR) {
@@ -350,18 +342,18 @@ index 88f5ee133f..1c49cd178d 100644
if (mkdir(task->backup_file, 0640) != 0) {
error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
task->backup_file);
@@ -796,8 +868,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -794,8 +858,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
+ backup_state.stat.dirty = dirty;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
+ backup_state.stat.reused = dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -821,6 +895,10 @@ err:
@@ -819,6 +885,10 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
@@ -372,24 +364,24 @@ index 88f5ee133f..1c49cd178d 100644
if (di->target) {
bdrv_unref(di->target);
}
@@ -862,6 +940,7 @@ UuidInfo *qmp_backup(
@@ -860,6 +930,7 @@ UuidInfo *qmp_backup(
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_incremental, bool incremental,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -880,6 +959,8 @@ UuidInfo *qmp_backup(
@@ -878,6 +949,8 @@ UuidInfo *qmp_backup(
.backup_id = backup_id,
.has_backup_time = has_backup_time,
.backup_time = backup_time,
+ .has_use_dirty_bitmap = has_use_dirty_bitmap,
+ .use_dirty_bitmap = use_dirty_bitmap,
+ .has_incremental = has_incremental,
+ .incremental = incremental,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
@@ -948,10 +1029,14 @@ BackupStatus *qmp_query_backup(Error **errp)
@@ -946,10 +1019,14 @@ BackupStatus *qmp_query_backup(Error **errp)
info->has_total = true;
info->total = backup_state.stat.total;
@@ -405,14 +397,14 @@ index 88f5ee133f..1c49cd178d 100644
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 90ad07b7ee..3ad9eb5d1a 100644
index 9db8e26517..6cad1e0e38 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -753,8 +753,13 @@
@@ -767,8 +767,13 @@
#
# @total: total amount of bytes involved in the backup process
#
+# @dirty: with incremental mode (PBS) this is the amount of bytes involved
+# @dirty: with incremental mode, this is the amount of bytes involved
+# in the backup process which are marked dirty.
+#
# @transferred: amount of bytes already backed up.
@@ -422,7 +414,7 @@ index 90ad07b7ee..3ad9eb5d1a 100644
# @zero-bytes: amount of 'zero' bytes detected.
#
# @start-time: time (epoch) when backup job started.
@@ -767,8 +772,8 @@
@@ -781,8 +786,8 @@
#
##
{ 'struct': 'BackupStatus',
@@ -433,20 +425,20 @@ index 90ad07b7ee..3ad9eb5d1a 100644
'*start-time': 'int', '*end-time': 'int',
'*backup-file': 'str', '*uuid': 'str' } }
@@ -811,6 +816,8 @@
@@ -825,6 +830,8 @@
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
#
+# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
+# @incremental: sync incremental changes since last job (optional for format 'pbs')
+#
# Returns: the uuid of the backup job
#
##
@@ -821,6 +828,7 @@
@@ -835,6 +842,7 @@
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
+ '*use-dirty-bitmap': 'bool',
+ '*incremental': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -0,0 +1,126 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Mon, 6 Jul 2020 20:05:16 +0200
Subject: [PATCH] PVE backup: rename incremental to use-dirty-bitmap
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 22 +++++++++++-----------
qapi/block-core.json | 6 +++---
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index d50f03a050..7bf54b4c5d 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -557,8 +557,8 @@ typedef struct QmpBackupTask {
const char *fingerprint;
bool has_fingerprint;
int64_t backup_time;
- bool has_incremental;
- bool incremental;
+ bool has_use_dirty_bitmap;
+ bool use_dirty_bitmap;
bool has_format;
BackupFormat format;
bool has_config_file;
@@ -688,7 +688,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
- bool incremental = task->has_incremental && task->incremental;
+ bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
char *pbs_err = NULL;
pbs = proxmox_backup_new(
@@ -722,9 +722,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
+ bool expect_only_dirty = false;
- bool use_incremental = false;
- if (incremental) {
+ if (use_dirty_bitmap) {
if (bitmap == NULL) {
bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
if (!bitmap) {
@@ -734,7 +734,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_set_dirty_bitmap(bitmap, 0, di->size);
dirty += di->size;
} else {
- use_incremental = true;
+ expect_only_dirty = true;
dirty += bdrv_get_dirty_count(bitmap);
}
di->bitmap = bitmap;
@@ -743,7 +743,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_release_dirty_bitmap(bitmap);
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, use_incremental, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
if (dev_id < 0) {
goto err;
}
@@ -861,7 +861,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.dirty = dirty;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- backup_state.stat.reused = dirty >= total ? 0 : total - dirty;
+ backup_state.stat.reused = format == BACKUP_FORMAT_PBS && dirty >= total ? 0 : total - dirty;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -930,7 +930,7 @@ UuidInfo *qmp_backup(
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
- bool has_incremental, bool incremental,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -949,8 +949,8 @@ UuidInfo *qmp_backup(
.backup_id = backup_id,
.has_backup_time = has_backup_time,
.backup_time = backup_time,
- .has_incremental = has_incremental,
- .incremental = incremental,
+ .has_use_dirty_bitmap = has_use_dirty_bitmap,
+ .use_dirty_bitmap = use_dirty_bitmap,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6cad1e0e38..e00e577c6c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -767,7 +767,7 @@
#
# @total: total amount of bytes involved in the backup process
#
-# @dirty: with incremental mode, this is the amount of bytes involved
+# @dirty: with incremental mode (PBS) this is the amount of bytes involved
# in the backup process which are marked dirty.
#
# @transferred: amount of bytes already backed up.
@@ -830,7 +830,7 @@
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
#
-# @incremental: sync incremental changes since last job (optional for format 'pbs')
+# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
# Returns: the uuid of the backup job
#
@@ -842,7 +842,7 @@
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
- '*incremental': 'bool',
+ '*use-dirty-bitmap': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -0,0 +1,44 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 6 Jul 2020 14:40:12 +0200
Subject: [PATCH] PVE: fixup pbs-restore API
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pbs-restore.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/pbs-restore.c b/pbs-restore.c
index 4bf37ef1fa..ff3fc38e8a 100644
--- a/pbs-restore.c
+++ b/pbs-restore.c
@@ -161,13 +161,19 @@ int main(int argc, char **argv)
fprintf(stderr, "connecting to repository '%s'\n", repository);
}
char *pbs_error = NULL;
- ProxmoxRestoreHandle *conn = proxmox_restore_connect(
+ ProxmoxRestoreHandle *conn = proxmox_restore_new(
repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
if (conn == NULL) {
fprintf(stderr, "restore failed: %s\n", pbs_error);
return -1;
}
+ int res = proxmox_restore_connect(conn, &pbs_error);
+ if (res < 0 || pbs_error) {
+ fprintf(stderr, "restore failed (connection error): %s\n", pbs_error);
+ return -1;
+ }
+
QDict *options = qdict_new();
if (format) {
@@ -198,7 +204,7 @@ int main(int argc, char **argv)
fprintf(stderr, "starting to restore snapshot '%s'\n", snapshot);
fflush(stderr); // ensure we do not get printed after the progress log
}
- int res = proxmox_restore_image(
+ res = proxmox_restore_image(
conn,
archive_name,
write_callback,

View File

@@ -0,0 +1,30 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 6 Jul 2020 14:40:13 +0200
Subject: [PATCH] PVE: always set dirty counter for non-incremental backups
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 7bf54b4c5d..1f2a0bbe8c 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -738,9 +738,13 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
dirty += bdrv_get_dirty_count(bitmap);
}
di->bitmap = bitmap;
- } else if (bitmap != NULL) {
+ } else {
dirty += di->size;
- bdrv_release_dirty_bitmap(bitmap);
+
+ /* after a full backup the old dirty bitmap is invalid anyway */
+ if (bitmap != NULL) {
+ bdrv_release_dirty_bitmap(bitmap);
+ }
}
int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);

View File

@@ -0,0 +1,36 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 6 Jul 2020 14:40:14 +0200
Subject: [PATCH] PVE: use proxmox_backup_check_incremental
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
pve-backup.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 1f2a0bbe8c..1cd9d31d7c 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -730,12 +730,16 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (!bitmap) {
goto err;
}
- /* mark entire bitmap as dirty to make full backup first */
- bdrv_set_dirty_bitmap(bitmap, 0, di->size);
- dirty += di->size;
} else {
- expect_only_dirty = true;
+ expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
+ }
+
+ if (expect_only_dirty) {
dirty += bdrv_get_dirty_count(bitmap);
+ } else {
+ /* mark entire bitmap as dirty to make full backup */
+ bdrv_set_dirty_bitmap(bitmap, 0, di->size);
+ dirty += di->size;
}
di->bitmap = bitmap;
} else {

View File

@@ -1,69 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Tue, 17 Nov 2020 10:51:05 +0100
Subject: [PATCH] PVE: fall back to open-iscsi initiatorname
When no explicit option is given, try reading the initiator name from
/etc/iscsi/initiatorname.iscsi and only use the generic fallback, i.e.
iqn.2008-11.org.linux-kvmXXX, as a third alternative.
This avoids the need to add an explicit option for vma and to explicitly set it
for each call to qemu that deals with iSCSI disks, while still allowing to set
the option if a different name is needed.
According to RFC 3720, an initiator name is at most 223 bytes long, so the
4 KiB buffer is big enough, even if many whitespaces are used.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/iscsi.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/block/iscsi.c b/block/iscsi.c
index d707d0b354..da6ed52323 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1386,12 +1386,42 @@ static char *get_initiator_name(QemuOpts *opts)
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;
+ FILE *name_fh;
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
+ name_fh = fopen("/etc/iscsi/initiatorname.iscsi", "r");
+ if (name_fh) {
+ const char *key = "InitiatorName";
+ char buffer[4096];
+ char *line;
+
+ while ((line = fgets(buffer, sizeof(buffer), name_fh))) {
+ line = g_strstrip(line);
+ if (!strncmp(line, key, strlen(key))) {
+ line = strchr(line, '=');
+ if (!line || strlen(line) == 1) {
+ continue;
+ }
+ line++;
+ g_strstrip(line);
+ if (!strlen(line)) {
+ continue;
+ }
+ name = line;
+ break;
+ }
+ }
+ fclose(name_fh);
+
+ if (name) {
+ return g_strdup(name);
+ }
+ }
+
uuid_info = qmp_query_uuid(NULL);
if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
name = qemu_get_vm_name();

View File

@@ -0,0 +1,103 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Thu, 9 Jul 2020 12:53:08 +0200
Subject: [PATCH] PVE: fixup pbs backup, add compress and encrypt options
---
block/monitor/block-hmp-cmds.c | 4 +++-
pve-backup.c | 13 ++++++++++++-
qapi/block-core.json | 6 ++++++
3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index fdc85a5c0e..43aa87487b 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1038,7 +1038,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
- false, false, // PBS incremental
+ false, false, // PBS use-dirty-bitmap
+ false, false, // PBS compress
+ false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
devlist, qdict_haskey(qdict, "speed"), speed, &error);
diff --git a/pve-backup.c b/pve-backup.c
index 1cd9d31d7c..bfb648d6b5 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -567,6 +567,10 @@ typedef struct QmpBackupTask {
const char *firewall_file;
bool has_devlist;
const char *devlist;
+ bool has_compress;
+ bool compress;
+ bool has_encrypt;
+ bool encrypt;
bool has_speed;
int64_t speed;
Error **errp;
@@ -690,6 +694,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
+
char *pbs_err = NULL;
pbs = proxmox_backup_new(
task->backup_file,
@@ -699,8 +704,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->has_password ? task->password : NULL,
task->has_keyfile ? task->keyfile : NULL,
task->has_key_password ? task->key_password : NULL,
+ task->has_compress ? task->compress : true,
+ task->has_encrypt ? task->encrypt : task->has_keyfile,
task->has_fingerprint ? task->fingerprint : NULL,
- &pbs_err);
+ &pbs_err);
if (!pbs) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
@@ -939,6 +946,8 @@ UuidInfo *qmp_backup(
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
bool has_format, BackupFormat format,
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
@@ -967,6 +976,8 @@ UuidInfo *qmp_backup(
.firewall_file = firewall_file,
.has_devlist = has_devlist,
.devlist = devlist,
+ .has_compress = has_compress,
+ .has_encrypt = has_encrypt,
.has_speed = has_speed,
.speed = speed,
.errp = errp,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e00e577c6c..a177fea6cd 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -832,6 +832,10 @@
#
# @use-dirty-bitmap: use dirty bitmap to detect incremental changes since last job (optional for format 'pbs')
#
+# @compress: use compression (optional for format 'pbs', defaults to true)
+#
+# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
+#
# Returns: the uuid of the backup job
#
##
@@ -843,6 +847,8 @@
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
+ '*compress': 'bool',
+ '*encrypt': 'bool',
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',

View File

@@ -6,39 +6,37 @@ Subject: [PATCH] PVE: Add PBS block driver to map backup archives into VMs
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[error cleanups, file_open implementation]
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
make pbs_co_preadv return values consistent with QEMU]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/meson.build | 3 +
block/pbs.c | 276 +++++++++++++++++++++++++++++++++++++++++++
configure | 9 ++
meson.build | 2 +-
qapi/block-core.json | 13 ++
5 files changed, 302 insertions(+), 1 deletion(-)
block/Makefile.objs | 2 +
block/pbs.c | 271 +++++++++++++++++++++++++++++++++++++++++++
configure | 10 ++
qapi/block-core.json | 14 ++-
4 files changed, 296 insertions(+), 1 deletion(-)
create mode 100644 block/pbs.c
diff --git a/block/meson.build b/block/meson.build
index 2783b77e9c..a26a69434e 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -53,6 +53,9 @@ block_ss.add(files(
'../pve-backup.c',
), libproxmox_backup_qemu)
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: files('pbs.c'))
+block_ss.add(when: 'CONFIG_PBS_BDRV', if_true: libproxmox_backup_qemu)
+
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 9ea0477d0b..28fb3b7f7c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,6 +5,7 @@ block-obj-$(CONFIG_CLOOP) += cloop.o
block-obj-$(CONFIG_BOCHS) += bochs.o
block-obj-$(CONFIG_VVFAT) += vvfat.o
block-obj-$(CONFIG_DMG) += dmg.o
+block-obj-$(CONFIG_PBS_BDRV) += pbs.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o
block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
@@ -75,3 +76,4 @@ io_uring.o-cflags := $(LINUX_IO_URING_CFLAGS)
io_uring.o-libs := $(LINUX_IO_URING_LIBS)
parallels.o-cflags := $(LIBXML2_CFLAGS)
parallels.o-libs := $(LIBXML2_LIBS)
+pbs.o-libs := -lproxmox_backup_qemu
diff --git a/block/pbs.c b/block/pbs.c
new file mode 100644
index 0000000000..9d1f1f39d4
index 0000000000..1481a2bfd1
--- /dev/null
+++ b/block/pbs.c
@@ -0,0 +1,276 @@
@@ -0,0 +1,271 @@
+/*
+ * Proxmox Backup Server read-only block driver
+ */
@@ -235,25 +233,20 @@ index 0000000000..9d1f1f39d4
+}
+
+static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BDRVPBSState *s = bs->opaque;
+ int ret;
+ char *pbs_error = NULL;
+ uint8_t *buf = malloc(bytes);
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+
+ ReadCallbackData rcb = {
+ .co = qemu_coroutine_self(),
+ .ctx = bdrv_get_aio_context(bs),
+ .ctx = qemu_get_current_aio_context(),
+ };
+
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, (uint64_t)offset, (uint64_t)bytes,
+ proxmox_restore_read_image_at_async(s->conn, s->aid, buf, offset, bytes,
+ read_callback, (void *) &rcb, &ret, &pbs_error);
+
+ qemu_coroutine_yield();
@@ -267,12 +260,12 @@ index 0000000000..9d1f1f39d4
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ free(buf);
+
+ return 0;
+ return ret;
+}
+
+static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ fprintf(stderr, "pbs-bdrv: cannot write to backup file, make sure "
+ "any attached disk devices are set to read-only!\n");
@@ -316,72 +309,68 @@ index 0000000000..9d1f1f39d4
+
+block_init(bdrv_pbs_init);
diff --git a/configure b/configure
index 72ab03f11a..7203c270ec 100755
index 2acc4d1465..3fc80d0c2c 100755
--- a/configure
+++ b/configure
@@ -309,6 +309,7 @@ linux_user=""
bsd_user=""
pie=""
coroutine=""
@@ -510,6 +510,7 @@ vvfat="yes"
qed="yes"
parallels="yes"
sheepdog="yes"
+pbs_bdrv="yes"
plugins="$default_feature"
meson=""
meson_args=""
@@ -902,6 +903,10 @@ for opt do
--enable-uuid|--disable-uuid)
echo "$0: $opt is obsolete, UUID support is always built" >&2
libxml2=""
debug_mutex="no"
libpmem=""
@@ -1576,6 +1577,10 @@ for opt do
;;
--enable-sheepdog) sheepdog="yes"
;;
+ --disable-pbs-bdrv) pbs_bdrv="no"
+ ;;
+ --enable-pbs-bdrv) pbs_bdrv="yes"
+ ;;
--with-git=*) git="$optarg"
--disable-vhost-user) vhost_user="no"
;;
--with-git-submodules=*)
@@ -1087,6 +1092,7 @@ cat << EOF
debug-info debugging information
safe-stack SafeStack Stack Smash Protection. Depends on
clang/llvm >= 3.7 and requires coroutine backend ucontext.
--enable-vhost-user) vhost_user="yes"
@@ -1936,6 +1941,7 @@ disabled with --disable-FEATURE, default is enabled if available:
qed qed image format support
parallels parallels image format support
sheepdog sheepdog block driver support
+ pbs-bdrv Proxmox backup server read-only block driver support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -2463,6 +2469,9 @@ echo "TARGET_DIRS=$target_list" >> $config_host_mak
if test "$modules" = "yes"; then
echo "CONFIG_MODULES=y" >> $config_host_mak
crypto-afalg Linux AF_ALG crypto backend driver
capstone capstone disassembler support
debug-mutex mutex debugging support
@@ -7009,6 +7015,7 @@ echo "vvfat support $vvfat"
echo "qed support $qed"
echo "parallels support $parallels"
echo "sheepdog support $sheepdog"
+echo "pbs-bdrv support $pbs_bdrv"
echo "capstone $capstone"
echo "libpmem support $libpmem"
echo "libdaxctl support $libdaxctl"
@@ -7885,6 +7892,9 @@ fi
if test "$sheepdog" = "yes" ; then
echo "CONFIG_SHEEPDOG=y" >> $config_host_mak
fi
+if test "$pbs_bdrv" = "yes" ; then
+ echo "CONFIG_PBS_BDRV=y" >> $config_host_mak
+fi
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
diff --git a/meson.build b/meson.build
index f48d2e0457..be4785e2f6 100644
--- a/meson.build
+++ b/meson.build
@@ -3986,7 +3986,7 @@ summary_info += {'bzip2 support': libbzip2}
summary_info += {'lzfse support': liblzfse}
summary_info += {'zstd support': zstd}
summary_info += {'NUMA host support': numa}
-summary_info += {'capstone': capstone}
+summary_info += {'PBS bdrv support': config_host.has_key('CONFIG_PBS_BDRV')}
summary_info += {'libpmem support': libpmem}
summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libudev': libudev}
if test "$pty_h" = "yes" ; then
echo "HAVE_PTY_H=y" >> $config_host_mak
fi
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4120052690..96bc696aaa 100644
index a177fea6cd..f782c2cf96 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3099,6 +3099,7 @@
@@ -2951,7 +2951,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
+ 'pbs',
'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
- 'sheepdog',
+ 'sheepdog', 'pbs',
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
@@ -3171,6 +3172,17 @@
@@ -3015,6 +3015,17 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
@@ -399,11 +388,11 @@ index 4120052690..96bc696aaa 100644
##
# @BlockdevOptionsNVMe:
#
@@ -4455,6 +4467,7 @@
@@ -4121,6 +4132,7 @@
'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
+ 'pbs': 'BlockdevOptionsPbs',
'nvme': 'BlockdevOptionsNVMe',
'parallels': 'BlockdevOptionsGenericFormat',
'preallocate':'BlockdevOptionsPreallocate',
'qcow2': 'BlockdevOptionsQcow2',

View File

@@ -1,598 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 26 Jan 2021 15:45:30 +0100
Subject: [PATCH] PVE: Use coroutine QMP for backup/cancel_backup
Finally turn backup QMP calls into coroutines, now that it's possible.
This has the benefit that calls are asynchronous to the main loop, i.e.
long running operations like connecting to a PBS server will no longer
hang the VM.
Additionally, it allows us to get rid of block_on_coroutine_fn, which
was always a hacky workaround.
While we're already spring cleaning, also remove the QmpBackupTask
struct, since we can now put the 'prepare' function directly into
qmp_backup and thus no longer need those giant walls of text.
(Note that for our patches to work with 5.2.0 this change is actually
required, otherwise monitor_get_fd() fails as we're not in a QMP
coroutine, but one we start ourselves - we could of course set the
monitor for that coroutine ourselves, but let's just fix it the right
way instead)
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +-
hmp-commands.hx | 2 +
proxmox-backup-client.c | 31 -----
pve-backup.c | 232 ++++++++++-----------------------
qapi/block-core.json | 4 +-
5 files changed, 77 insertions(+), 196 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index ea7b665aa2..ef45552e3b 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1016,7 +1016,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
g_free(global_snapshots);
}
-void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup_cancel(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
@@ -1025,7 +1025,7 @@ void hmp_backup_cancel(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, error);
}
-void hmp_backup(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 97f24942b3..7a2be816da 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -111,6 +111,7 @@ ERST
"\n\t\t\t Use -d to dump data into a directory instead"
"\n\t\t\t of using VMA format.",
.cmd = hmp_backup,
+ .coroutine = true,
},
SRST
@@ -124,6 +125,7 @@ ERST
.params = "",
.help = "cancel the current VM backup",
.cmd = hmp_backup_cancel,
+ .coroutine = true,
},
SRST
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
index 4ce7bc0b5e..0923037dec 100644
--- a/proxmox-backup-client.c
+++ b/proxmox-backup-client.c
@@ -5,37 +5,6 @@
/* Proxmox Backup Server client bindings using coroutines */
-typedef struct BlockOnCoroutineWrapper {
- AioContext *ctx;
- CoroutineEntry *entry;
- void *entry_arg;
- bool finished;
-} BlockOnCoroutineWrapper;
-
-static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
-{
- BlockOnCoroutineWrapper *wrapper = opaque;
- wrapper->entry(wrapper->entry_arg);
- wrapper->finished = true;
- aio_wait_kick();
-}
-
-void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
-{
- assert(!qemu_in_coroutine());
-
- AioContext *ctx = qemu_get_current_aio_context();
- BlockOnCoroutineWrapper wrapper = {
- .finished = false,
- .entry = entry,
- .entry_arg = entry_arg,
- .ctx = ctx,
- };
- Coroutine *wrapper_co = qemu_coroutine_create(block_on_coroutine_wrapper, &wrapper);
- aio_co_enter(ctx, wrapper_co);
- AIO_WAIT_WHILE(ctx, !wrapper.finished);
-}
-
// This is called from another thread, so we use aio_co_schedule()
static void proxmox_backup_schedule_wake(void *data) {
CoCtxData *waker = (CoCtxData *)data;
diff --git a/pve-backup.c b/pve-backup.c
index 5fa3cc1352..323014744c 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -357,7 +357,7 @@ static void job_cancel_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-static void coroutine_fn pvebackup_co_cancel(void *opaque)
+void coroutine_fn qmp_backup_cancel(Error **errp)
{
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
@@ -394,11 +394,6 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
-void qmp_backup_cancel(Error **errp)
-{
- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
-}
-
// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
@@ -533,50 +528,27 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
-typedef struct QmpBackupTask {
- const char *backup_file;
- bool has_password;
- const char *password;
- bool has_keyfile;
- const char *keyfile;
- bool has_key_password;
- const char *key_password;
- bool has_backup_id;
- const char *backup_id;
- bool has_backup_time;
- const char *fingerprint;
- bool has_fingerprint;
- int64_t backup_time;
- bool has_use_dirty_bitmap;
- bool use_dirty_bitmap;
- bool has_format;
- BackupFormat format;
- bool has_config_file;
- const char *config_file;
- bool has_firewall_file;
- const char *firewall_file;
- bool has_devlist;
- const char *devlist;
- bool has_compress;
- bool compress;
- bool has_encrypt;
- bool encrypt;
- bool has_speed;
- int64_t speed;
- Error **errp;
- UuidInfo *result;
-} QmpBackupTask;
-
-static void coroutine_fn pvebackup_co_prepare(void *opaque)
+UuidInfo coroutine_fn *qmp_backup(
+ const char *backup_file,
+ bool has_password, const char *password,
+ bool has_keyfile, const char *keyfile,
+ bool has_key_password, const char *key_password,
+ bool has_fingerprint, const char *fingerprint,
+ bool has_backup_id, const char *backup_id,
+ bool has_backup_time, int64_t backup_time,
+ bool has_use_dirty_bitmap, bool use_dirty_bitmap,
+ bool has_compress, bool compress,
+ bool has_encrypt, bool encrypt,
+ bool has_format, BackupFormat format,
+ bool has_config_file, const char *config_file,
+ bool has_firewall_file, const char *firewall_file,
+ bool has_devlist, const char *devlist,
+ bool has_speed, int64_t speed, Error **errp)
{
assert(qemu_in_coroutine());
qemu_co_mutex_lock(&backup_state.backup_mutex);
- QmpBackupTask *task = opaque;
-
- task->result = NULL; // just to be sure
-
BlockBackend *blk;
BlockDriverState *bs = NULL;
const char *backup_dir = NULL;
@@ -593,17 +565,17 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
+ return NULL;
}
/* Todo: try to auto-detect format based on file name */
- BackupFormat format = task->has_format ? task->format : BACKUP_FORMAT_VMA;
+ format = has_format ? format : BACKUP_FORMAT_VMA;
- if (task->has_devlist) {
- devs = g_strsplit_set(task->devlist, ",;:", -1);
+ if (has_devlist) {
+ devs = g_strsplit_set(devlist, ",;:", -1);
gchar **d = devs;
while (d && *d) {
@@ -611,14 +583,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (blk) {
bs = blk_bs(blk);
if (!bdrv_is_inserted(bs)) {
- error_setg(task->errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, *d);
goto err;
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
di_list = g_list_append(di_list, di);
} else {
- error_set(task->errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", *d);
goto err;
}
@@ -641,7 +613,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (!di_list) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "empty device list");
goto err;
}
@@ -651,13 +623,13 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, task->errp)) {
+ if (bdrv_op_is_blocked(di->bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
goto err;
}
ssize_t size = bdrv_getlength(di->bs);
if (size < 0) {
- error_setg_errno(task->errp, -di->size, "bdrv_getlength failed");
+ error_setg_errno(errp, -di->size, "bdrv_getlength failed");
goto err;
}
di->size = size;
@@ -684,47 +656,44 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
if (format == BACKUP_FORMAT_PBS) {
- if (!task->has_password) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
+ if (!has_password) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'password'");
goto err_mutex;
}
- if (!task->has_backup_id) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
+ if (!has_backup_id) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-id'");
goto err_mutex;
}
- if (!task->has_backup_time) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
+ if (!has_backup_time) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "missing parameter 'backup-time'");
goto err_mutex;
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
firewall_name = "fw.conf";
- bool use_dirty_bitmap = task->has_use_dirty_bitmap && task->use_dirty_bitmap;
-
-
char *pbs_err = NULL;
pbs = proxmox_backup_new(
- task->backup_file,
- task->backup_id,
- task->backup_time,
+ backup_file,
+ backup_id,
+ backup_time,
dump_cb_block_size,
- task->has_password ? task->password : NULL,
- task->has_keyfile ? task->keyfile : NULL,
- task->has_key_password ? task->key_password : NULL,
- task->has_compress ? task->compress : true,
- task->has_encrypt ? task->encrypt : task->has_keyfile,
- task->has_fingerprint ? task->fingerprint : NULL,
+ has_password ? password : NULL,
+ has_keyfile ? keyfile : NULL,
+ has_key_password ? key_password : NULL,
+ has_compress ? compress : true,
+ has_encrypt ? encrypt : has_keyfile,
+ has_fingerprint ? fingerprint : NULL,
&pbs_err);
if (!pbs) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
goto err_mutex;
}
- int connect_result = proxmox_backup_co_connect(pbs, task->errp);
+ int connect_result = proxmox_backup_co_connect(pbs, errp);
if (connect_result < 0)
goto err_mutex;
@@ -743,9 +712,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
- if (use_dirty_bitmap) {
+ if (has_use_dirty_bitmap && use_dirty_bitmap) {
if (bitmap == NULL) {
- bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
+ bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, errp);
if (!bitmap) {
goto err_mutex;
}
@@ -775,12 +744,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
}
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, task->errp);
+ int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
if (dev_id < 0) {
goto err_mutex;
}
- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, errp))) {
goto err_mutex;
}
@@ -794,10 +763,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.bitmap_list = g_list_append(backup_state.stat.bitmap_list, info);
}
} else if (format == BACKUP_FORMAT_VMA) {
- vmaw = vma_writer_create(task->backup_file, uuid, &local_err);
+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
if (!vmaw) {
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
}
goto err_mutex;
}
@@ -808,25 +777,25 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, errp))) {
goto err_mutex;
}
const char *devname = bdrv_get_device_name(di->bs);
di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
if (di->dev_id <= 0) {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
goto err_mutex;
}
}
} else if (format == BACKUP_FORMAT_DIR) {
- if (mkdir(task->backup_file, 0640) != 0) {
- error_setg_errno(task->errp, errno, "can't create directory '%s'\n",
- task->backup_file);
+ if (mkdir(backup_file, 0640) != 0) {
+ error_setg_errno(errp, errno, "can't create directory '%s'\n",
+ backup_file);
goto err_mutex;
}
- backup_dir = task->backup_file;
+ backup_dir = backup_file;
l = di_list;
while (l) {
@@ -840,34 +809,34 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
bdrv_img_create(di->targetfile, "raw", NULL, NULL, NULL,
di->size, flags, false, &local_err);
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
di->target = bdrv_open(di->targetfile, NULL, NULL, flags, &local_err);
if (!di->target) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err_mutex;
}
}
} else {
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
goto err_mutex;
}
/* add configuration file to archive */
- if (task->has_config_file) {
- if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_config_file) {
+ if (pvebackup_co_add_config(config_file, config_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
/* add firewall file to archive */
- if (task->has_firewall_file) {
- if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
- vmaw, pbs, task->errp) != 0) {
+ if (has_firewall_file) {
+ if (pvebackup_co_add_config(firewall_file, firewall_name, format, backup_dir,
+ vmaw, pbs, errp) != 0) {
goto err_mutex;
}
}
@@ -885,7 +854,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (backup_state.stat.backup_file) {
g_free(backup_state.stat.backup_file);
}
- backup_state.stat.backup_file = g_strdup(task->backup_file);
+ backup_state.stat.backup_file = g_strdup(backup_file);
uuid_copy(backup_state.stat.uuid, uuid);
uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
@@ -900,7 +869,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_mutex_unlock(&backup_state.stat.lock);
- backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+ backup_state.speed = (has_speed && speed > 0) ? speed : 0;
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -910,8 +879,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
- task->result = uuid_info;
-
/* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
* backup_mutex locked. This is fine, a CoMutex can be held across yield
* points, and we'll release it as soon as the BH reschedules us.
@@ -925,7 +892,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
qemu_coroutine_yield();
if (local_err) {
- error_propagate(task->errp, local_err);
+ error_propagate(errp, local_err);
goto err;
}
@@ -938,7 +905,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
/* start the first job in the transaction */
job_txn_start_seq(backup_state.txn);
- return;
+ return uuid_info;
err_mutex:
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -969,7 +936,7 @@ err:
if (vmaw) {
Error *err = NULL;
vma_writer_close(vmaw, &err);
- unlink(task->backup_file);
+ unlink(backup_file);
}
if (pbs) {
@@ -980,65 +947,8 @@ err:
rmdir(backup_dir);
}
- task->result = NULL;
-
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
-}
-
-UuidInfo *qmp_backup(
- const char *backup_file,
- bool has_password, const char *password,
- bool has_keyfile, const char *keyfile,
- bool has_key_password, const char *key_password,
- bool has_fingerprint, const char *fingerprint,
- bool has_backup_id, const char *backup_id,
- bool has_backup_time, int64_t backup_time,
- bool has_use_dirty_bitmap, bool use_dirty_bitmap,
- bool has_compress, bool compress,
- bool has_encrypt, bool encrypt,
- bool has_format, BackupFormat format,
- bool has_config_file, const char *config_file,
- bool has_firewall_file, const char *firewall_file,
- bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
-{
- QmpBackupTask task = {
- .backup_file = backup_file,
- .has_password = has_password,
- .password = password,
- .has_keyfile = has_keyfile,
- .keyfile = keyfile,
- .has_key_password = has_key_password,
- .key_password = key_password,
- .has_fingerprint = has_fingerprint,
- .fingerprint = fingerprint,
- .has_backup_id = has_backup_id,
- .backup_id = backup_id,
- .has_backup_time = has_backup_time,
- .backup_time = backup_time,
- .has_use_dirty_bitmap = has_use_dirty_bitmap,
- .use_dirty_bitmap = use_dirty_bitmap,
- .has_compress = has_compress,
- .compress = compress,
- .has_encrypt = has_encrypt,
- .encrypt = encrypt,
- .has_format = has_format,
- .format = format,
- .has_config_file = has_config_file,
- .config_file = config_file,
- .has_firewall_file = has_firewall_file,
- .firewall_file = firewall_file,
- .has_devlist = has_devlist,
- .devlist = devlist,
- .has_speed = has_speed,
- .speed = speed,
- .errp = errp,
- };
-
- block_on_coroutine_fn(pvebackup_co_prepare, &task);
-
- return task.result;
+ return NULL;
}
BackupStatus *qmp_query_backup(Error **errp)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bd978ea562..ca1966f54b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -842,7 +842,7 @@
'*config-file': 'str',
'*firewall-file': 'str',
'*devlist': 'str', '*speed': 'int' },
- 'returns': 'UuidInfo' }
+ 'returns': 'UuidInfo', 'coroutine': true }
##
# @query-backup:
@@ -864,7 +864,7 @@
# Notes: This command succeeds even if there is no backup process running.
#
##
-{ 'command': 'backup-cancel' }
+{ 'command': 'backup-cancel', 'coroutine': true }
##
# @ProxmoxSupportStatus:

View File

@@ -1,98 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 10 Feb 2021 11:07:06 +0100
Subject: [PATCH] PBS: add master key support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
this requires a new enough libproxmox-backup-qemu0, and allows querying
from the PVE side to avoid QMP calls with unsupported parameters.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
pve-backup.c | 3 +++
qapi/block-core.json | 7 +++++++
3 files changed, 11 insertions(+)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index ef45552e3b..4c799f00d9 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1039,6 +1039,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS password
false, NULL, // PBS keyfile
false, NULL, // PBS key_password
+ false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
diff --git a/pve-backup.c b/pve-backup.c
index 323014744c..9f6c04a512 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -533,6 +533,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_password, const char *password,
bool has_keyfile, const char *keyfile,
bool has_key_password, const char *key_password,
+ bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
@@ -681,6 +682,7 @@ UuidInfo coroutine_fn *qmp_backup(
has_password ? password : NULL,
has_keyfile ? keyfile : NULL,
has_key_password ? key_password : NULL,
+ has_master_keyfile ? master_keyfile : NULL,
has_compress ? compress : true,
has_encrypt ? encrypt : has_keyfile,
has_fingerprint ? fingerprint : NULL,
@@ -1044,5 +1046,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_savevm = true;
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ca1966f54b..fc8a125451 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -813,6 +813,8 @@
#
# @key-password: password for keyfile (optional for format 'pbs')
#
+# @master-keyfile: PEM-formatted master public keyfile (optional for format 'pbs')
+#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
# @backup-id: backup ID (required for format 'pbs')
@@ -832,6 +834,7 @@
'*password': 'str',
'*keyfile': 'str',
'*key-password': 'str',
+ '*master-keyfile': 'str',
'*fingerprint': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
@@ -884,6 +887,9 @@
# migration cap if this is false/unset may lead
# to crashes on migration!
#
+# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
+# parameter.
+#
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
##
@@ -892,6 +898,7 @@
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
'pbs-library-version': 'str' } }
##

View File

@@ -11,15 +11,15 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[PVE: query-proxmox-support: include library version]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 9 +++++++++
qapi/block-core.json | 29 +++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
pve-backup.c | 8 ++++++++
qapi/block-core.json | 25 +++++++++++++++++++++++++
2 files changed, 33 insertions(+)
diff --git a/pve-backup.c b/pve-backup.c
index c15abefdda..4684789813 100644
index bfb648d6b5..6bf138cfc6 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1075,3 +1075,12 @@ BackupStatus *qmp_query_backup(Error **errp)
@@ -1051,3 +1051,11 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
@@ -29,14 +29,13 @@ index c15abefdda..4684789813 100644
+ ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
+ ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
+ ret->pbs_dirty_bitmap = true;
+ ret->pbs_dirty_bitmap_savevm = true;
+ return ret;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 96bc696aaa..0b453c61d4 100644
index f782c2cf96..1ed5987c88 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -863,6 +863,35 @@
@@ -877,6 +877,31 @@
##
{ 'command': 'backup-cancel' }
@@ -48,15 +47,11 @@ index 96bc696aaa..0b453c61d4 100644
+# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
+# supported.
+#
+# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
+# safely be set for savevm-async.
+#
+# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+#
+##
+{ 'struct': 'ProxmoxSupportStatus',
+ 'data': { 'pbs-dirty-bitmap': 'bool',
+ 'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-library-version': 'str' } }
+
+##

View File

@@ -1,53 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 9 Dec 2020 11:46:57 +0100
Subject: [PATCH] PVE: block/pbs: fast-path reads without allocation if
possible
...and switch over to g_malloc/g_free while at it to align with other
QEMU code.
Tracing shows the fast-path is taken almost all the time, though not
100% so the slow one is still necessary.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/pbs.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/pbs.c b/block/pbs.c
index 0b05ea9080..c5eb4d5bad 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -200,7 +200,16 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
BDRVPBSState *s = bs->opaque;
int ret;
char *pbs_error = NULL;
- uint8_t *buf = malloc(bytes);
+ uint8_t *buf;
+ bool inline_buf = true;
+
+ /* for single-buffer IO vectors we can fast-path the write directly to it */
+ if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
+ buf = qiov->iov->iov_base;
+ } else {
+ inline_buf = false;
+ buf = g_malloc(bytes);
+ }
if (offset < 0 || bytes < 0) {
fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
@@ -223,8 +232,10 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
return -EIO;
}
- qemu_iovec_from_buf(qiov, 0, buf, bytes);
- free(buf);
+ if (!inline_buf) {
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+ }
return 0;
}

View File

@@ -0,0 +1,43 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Fri, 10 Jul 2020 13:22:35 +0200
Subject: [PATCH] pbs: fix missing crypt and compress parameters
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 6bf138cfc6..cd3a132d8b 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -958,6 +958,8 @@ UuidInfo *qmp_backup(
.backup_file = backup_file,
.has_password = has_password,
.password = password,
+ .has_keyfile = has_keyfile,
+ .keyfile = keyfile,
.has_key_password = has_key_password,
.key_password = key_password,
.has_fingerprint = has_fingerprint,
@@ -968,6 +970,10 @@ UuidInfo *qmp_backup(
.backup_time = backup_time,
.has_use_dirty_bitmap = has_use_dirty_bitmap,
.use_dirty_bitmap = use_dirty_bitmap,
+ .has_compress = has_compress,
+ .compress = compress,
+ .has_encrypt = has_encrypt,
+ .encrypt = encrypt,
.has_format = has_format,
.format = format,
.has_config_file = has_config_file,
@@ -976,8 +982,6 @@ UuidInfo *qmp_backup(
.firewall_file = firewall_file,
.has_devlist = has_devlist,
.devlist = devlist,
- .has_compress = has_compress,
- .has_encrypt = has_encrypt,
.has_speed = has_speed,
.speed = speed,
.errp = errp,

View File

@@ -1,25 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 2 Mar 2021 16:34:28 +0100
Subject: [PATCH] PVE: block/stream: increase chunk size
Ceph favors bigger chunks, so increase to 4M.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/stream.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/stream.c b/block/stream.c
index 694709bd25..e09bd5c4ef 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -28,7 +28,7 @@ enum {
* large enough to process multiple clusters in a single call, so
* that populating contiguous regions of the image is efficient.
*/
- STREAM_CHUNK = 512 * 1024, /* in bytes */
+ STREAM_CHUNK = 4 * 1024 * 1024, /* in bytes */
};
typedef struct StreamBlockJob {

View File

@@ -0,0 +1,76 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 19 Aug 2020 12:33:17 +0200
Subject: [PATCH] PVE: handle PBS write callback with big blocks correctly
Under certain conditions QEMU will push more than the given blocksize
into the callback at once. Handle it like VMA does, by iterating the
data until all is written.
The block size is stored per backup device to be used in the callback.
This avoids relying on PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE, in case it is
made configurable in the future.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index cd3a132d8b..f14273645a 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -67,6 +67,7 @@ opts_init(pvebackup_init);
typedef struct PVEBackupDevInfo {
BlockDriverState *bs;
size_t size;
+ uint64_t block_size;
uint8_t dev_id;
bool completed;
char targetfile[PATH_MAX];
@@ -147,17 +148,28 @@ pvebackup_co_dump_pbs_cb(
return -1;
}
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
- qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ uint64_t transferred = 0;
+ uint64_t reused = 0;
+ while (transferred < size) {
+ uint64_t left = size - transferred;
+ uint64_t to_transfer = left < di->block_size ? left : di->block_size;
- if (pbs_res < 0) {
- pvebackup_propagate_error(local_err);
- return pbs_res;
- } else {
- size_t reused = (pbs_res == 0) ? size : 0;
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
+ buf ? buf + transferred : NULL, start + transferred, to_transfer, &local_err);
+ transferred += to_transfer;
+
+ if (pbs_res < 0) {
+ pvebackup_propagate_error(local_err);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return pbs_res;
+ }
+
+ reused += pbs_res == 0 ? to_transfer : 0;
}
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
+
return size;
}
@@ -726,6 +738,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
+ di->block_size = dump_cb_block_size;
+
const char *devname = bdrv_get_device_name(di->bs);
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);

View File

@@ -0,0 +1,85 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 13 Aug 2020 13:50:27 +0200
Subject: [PATCH] PVE: add zero block handling to PBS dump callback
Both the PBS and VMA dump callbacks assume that a NULL pointer can be
passed as *pbuf, but that never happens, as backup-dump.c calls this
function with contents of an iovec.
So first, remove that assumption and add an 'assert' to verify.
Secondly, while the vma-writer already does the buffer_is_zero check
internally, for PBS we relied on that non-existant behaviour for zero
chunks, so do the buffer_is_zero check manually and pass NULL to the
rust lib in case it is true.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index f14273645a..bd802c6205 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -8,6 +8,7 @@
#include "block/blockjob.h"
#include "qapi/qapi-commands-block.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
/* PVE backup state and related function */
@@ -136,10 +137,13 @@ pvebackup_co_dump_pbs_cb(
PVEBackupDevInfo *di = opaque;
assert(backup_state.pbs);
+ assert(buf);
Error *local_err = NULL;
int pbs_res = -1;
+ bool is_zero_block = size == di->block_size && buffer_is_zero(buf, size);
+
qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
@@ -155,7 +159,8 @@ pvebackup_co_dump_pbs_cb(
uint64_t to_transfer = left < di->block_size ? left : di->block_size;
pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id,
- buf ? buf + transferred : NULL, start + transferred, to_transfer, &local_err);
+ is_zero_block ? NULL : buf + transferred, start + transferred,
+ to_transfer, &local_err);
transferred += to_transfer;
if (pbs_res < 0) {
@@ -168,7 +173,7 @@ pvebackup_co_dump_pbs_cb(
}
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- pvebackup_add_transfered_bytes(size, !buf ? size : 0, reused);
+ pvebackup_add_transfered_bytes(size, is_zero_block ? size : 0, reused);
return size;
}
@@ -190,6 +195,7 @@ pvebackup_co_dump_vma_cb(
int ret = -1;
assert(backup_state.vmaw);
+ assert(buf);
uint64_t remaining = size;
@@ -216,9 +222,7 @@ pvebackup_co_dump_vma_cb(
qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
- if (buf) {
- buf += VMA_CLUSTER_SIZE;
- }
+ buf += VMA_CLUSTER_SIZE;
if (ret < 0) {
Error *local_err = NULL;
vma_writer_error_propagate(backup_state.vmaw, &local_err);

View File

@@ -1,33 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 2 Mar 2021 16:11:54 +0100
Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request
Some operations, e.g. block-stream, perform reads while discarding the
results (only copy-on-read matters). In this case they will pass NULL as
the target QEMUIOVector, which will however trip bdrv_pad_request, since
it wants to extend its passed vector.
Simply check for NULL and do nothing, there's no reason to pad the
target if it will be discarded anyway.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/io.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/block/io.c b/block/io.c
index 0a8cbefe86..531b3b7a2d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1734,6 +1734,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
{
int ret;
+ if (!qiov) {
+ return 0;
+ }
+
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {

View File

@@ -7,7 +7,6 @@ Returns advanced information about dirty bitmaps used (or not used) for
the latest PBS backup.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
monitor/hmp-cmds.c | 28 ++++++-----
pve-backup.c | 117 ++++++++++++++++++++++++++++++++-----------
@@ -15,10 +14,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
3 files changed, 159 insertions(+), 42 deletions(-)
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 4c1671e289..c1152f55a7 100644
index 3ff014d32a..c3227a1498 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -200,6 +200,7 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
@@ -195,6 +195,7 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
void hmp_info_backup(Monitor *mon, const QDict *qdict)
{
BackupStatus *info;
@@ -26,7 +25,7 @@ index 4c1671e289..c1152f55a7 100644
info = qmp_query_backup(NULL);
@@ -230,26 +231,29 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
@@ -225,26 +226,29 @@ void hmp_info_backup(Monitor *mon, const QDict *qdict)
// this should not happen normally
monitor_printf(mon, "Total size: %d\n", 0);
} else {
@@ -69,7 +68,7 @@ index 4c1671e289..c1152f55a7 100644
info->zero_bytes, zero_per);
diff --git a/pve-backup.c b/pve-backup.c
index 4684789813..f90abaa50a 100644
index bd802c6205..2db4a62580 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -46,6 +46,7 @@ static struct PVEBackupState {
@@ -80,7 +79,7 @@ index 4684789813..f90abaa50a 100644
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -672,7 +673,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -670,7 +671,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
size_t total = 0;
@@ -88,7 +87,7 @@ index 4684789813..f90abaa50a 100644
l = di_list;
while (l) {
@@ -693,18 +693,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -691,18 +691,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_generate(uuid);
@@ -125,7 +124,7 @@ index 4684789813..f90abaa50a 100644
}
int dump_cb_block_size = PROXMOX_BACKUP_DEFAULT_CHUNK_SIZE; // Hardcoded (4M)
@@ -731,12 +746,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -729,12 +744,12 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"proxmox_backup_new failed: %s", pbs_err);
proxmox_backup_free_error(pbs_err);
@@ -140,7 +139,7 @@ index 4684789813..f90abaa50a 100644
/* register all devices */
l = di_list;
@@ -747,6 +762,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -745,6 +760,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->block_size = dump_cb_block_size;
const char *devname = bdrv_get_device_name(di->bs);
@@ -149,7 +148,7 @@ index 4684789813..f90abaa50a 100644
BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(di->bs, PBS_BITMAP_NAME);
bool expect_only_dirty = false;
@@ -755,49 +772,59 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -753,49 +770,59 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (bitmap == NULL) {
bitmap = bdrv_create_dirty_bitmap(di->bs, dump_cb_block_size, PBS_BITMAP_NAME, task->errp);
if (!bitmap) {
@@ -219,7 +218,7 @@ index 4684789813..f90abaa50a 100644
}
/* register all devices for vma writer */
@@ -807,7 +834,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -805,7 +832,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
l = g_list_next(l);
if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
@@ -228,7 +227,7 @@ index 4684789813..f90abaa50a 100644
}
const char *devname = bdrv_get_device_name(di->bs);
@@ -815,16 +842,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -813,16 +840,14 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (di->dev_id <= 0) {
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"register_stream failed");
@@ -247,7 +246,7 @@ index 4684789813..f90abaa50a 100644
}
backup_dir = task->backup_file;
@@ -841,18 +866,18 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -839,18 +864,18 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
di->size, flags, false, &local_err);
if (local_err) {
error_propagate(task->errp, local_err);
@@ -269,7 +268,7 @@ index 4684789813..f90abaa50a 100644
}
@@ -860,7 +885,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -858,7 +883,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_config_file) {
if (pvebackup_co_add_config(task->config_file, config_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
@@ -278,7 +277,7 @@ index 4684789813..f90abaa50a 100644
}
}
@@ -868,12 +893,11 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -866,12 +891,11 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
if (task->has_firewall_file) {
if (pvebackup_co_add_config(task->firewall_file, firewall_name, format, backup_dir,
vmaw, pbs, task->errp) != 0) {
@@ -293,7 +292,7 @@ index 4684789813..f90abaa50a 100644
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -893,10 +917,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -891,10 +915,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
char *uuid_str = g_strdup(backup_state.stat.uuid_str);
backup_state.stat.total = total;
@@ -305,7 +304,7 @@ index 4684789813..f90abaa50a 100644
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -913,6 +936,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -911,6 +934,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->result = uuid_info;
return;
@@ -315,7 +314,7 @@ index 4684789813..f90abaa50a 100644
err:
l = di_list;
@@ -1076,11 +1102,42 @@ BackupStatus *qmp_query_backup(Error **errp)
@@ -1074,10 +1100,41 @@ BackupStatus *qmp_query_backup(Error **errp)
return info;
}
@@ -354,32 +353,29 @@ index 4684789813..f90abaa50a 100644
ProxmoxSupportStatus *ret = g_malloc0(sizeof(*ret));
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->query_bitmap_info = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0b453c61d4..16e184dd28 100644
index 1ed5987c88..03fc0af99b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -871,6 +871,8 @@
@@ -885,11 +885,14 @@
# @pbs-dirty-bitmap: True if dirty-bitmap-incremental backups to PBS are
# supported.
#
+# @query-bitmap-info: True if the 'query-pbs-bitmap-info' QMP call is supported.
+#
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
@@ -879,6 +881,7 @@
##
{ 'struct': 'ProxmoxSupportStatus',
'data': { 'pbs-dirty-bitmap': 'bool',
+ 'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-library-version': 'str' } }
@@ -892,6 +895,59 @@
##
@@ -902,6 +905,59 @@
##
{ 'command': 'query-proxmox-support', 'returns': 'ProxmoxSupportStatus' }

View File

@@ -1,403 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 7 Dec 2020 15:21:03 +0100
Subject: [PATCH] block: add alloc-track driver
Add a new filter node 'alloc-track', which seperates reads and writes to
different children, thus allowing to put a backing image behind any
blockdev (regardless of driver support). Since we can't detect any
pre-allocated blocks, we can only track new writes, hence the write
target ('file') for this node must always be empty.
Intended use case is for live restoring, i.e. add a backup image as a
block device into a VM, then put an alloc-track on the restore target
and set the backup as backing. With this, one can use a regular
'block-stream' to restore the image, while the VM can already run in the
background. Copy-on-read will help make progress as the VM reads as
well.
This only worked if the target supports backing images, so up until now
only for qcow2, with alloc-track any driver for the target can be used.
If 'auto-remove' is set, alloc-track will automatically detach itself
once the backing image is removed. It will be replaced by 'file'.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: adapt to changed function signatures
make error return value consistent with QEMU]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 350 ++++++++++++++++++++++++++++++++++++++++++++
block/meson.build | 1 +
2 files changed, 351 insertions(+)
create mode 100644 block/alloc-track.c
diff --git a/block/alloc-track.c b/block/alloc-track.c
new file mode 100644
index 0000000000..43d40d11af
--- /dev/null
+++ b/block/alloc-track.c
@@ -0,0 +1,350 @@
+/*
+ * Node to allow backing images to be applied to any node. Assumes a blank
+ * image to begin with, only new writes are tracked as allocated, thus this
+ * must never be put on a node that already contains data.
+ *
+ * Copyright (c) 2020 Proxmox Server Solutions GmbH
+ * Copyright (c) 2020 Stefan Reiter <s.reiter@proxmox.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "sysemu/block-backend.h"
+
+#define TRACK_OPT_AUTO_REMOVE "auto-remove"
+
+typedef enum DropState {
+ DropNone,
+ DropRequested,
+ DropInProgress,
+} DropState;
+
+typedef struct {
+ BdrvDirtyBitmap *bitmap;
+ DropState drop_state;
+ bool auto_remove;
+} BDRVAllocTrackState;
+
+static QemuOptsList runtime_opts = {
+ .name = "alloc-track",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = TRACK_OPT_AUTO_REMOVE,
+ .type = QEMU_OPT_BOOL,
+ .help = "automatically replace this node with 'file' when 'backing'"
+ "is detached",
+ },
+ { /* end of list */ }
+ },
+};
+
+static void track_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BlockDriverInfo bdi;
+
+ if (!bs->file) {
+ return;
+ }
+
+ /* always use alignment from underlying write device so RMW cycle for
+ * bdrv_pwritev reads data from our backing via track_co_preadv (no partial
+ * cluster allocation in 'file') */
+ bdrv_get_info(bs->file->bs, &bdi);
+ bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
+ MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
+}
+
+static int track_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ int ret = 0;
+
+ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
+
+ /* open the target (write) node, backing will be attached by block layer */
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ track_refresh_limits(bs, errp);
+ uint64_t gran = bs->bl.request_alignment;
+ s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, gran, NULL, &local_err);
+ if (local_err) {
+ ret = -EIO;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ s->drop_state = DropNone;
+
+fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ if (s->bitmap) {
+ bdrv_release_dirty_bitmap(s->bitmap);
+ }
+ }
+ qemu_opts_del(opts);
+ return ret;
+}
+
+static void track_close(BlockDriverState *bs)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ if (s->bitmap) {
+ bdrv_release_dirty_bitmap(s->bitmap);
+ }
+}
+
+static int64_t track_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+static int coroutine_fn track_co_preadv(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ QEMUIOVector local_qiov;
+ int ret;
+
+ /* 'cur_offset' is relative to 'offset', 'local_offset' to image start */
+ uint64_t cur_offset, local_offset;
+ int64_t local_bytes;
+ bool alloc;
+
+ if (offset < 0 || bytes < 0) {
+ fprintf(stderr, "unexpected negative 'offset' or 'bytes' value!\n");
+ return -EIO;
+ }
+
+ /* a read request can span multiple granularity-sized chunks, and can thus
+ * contain blocks with different allocation status - we could just iterate
+ * granularity-wise, but for better performance use bdrv_dirty_bitmap_next_X
+ * to find the next flip and consider everything up to that in one go */
+ for (cur_offset = 0; cur_offset < bytes; cur_offset += local_bytes) {
+ local_offset = offset + cur_offset;
+ alloc = bdrv_dirty_bitmap_get(s->bitmap, local_offset);
+ if (alloc) {
+ local_bytes = bdrv_dirty_bitmap_next_zero(s->bitmap, local_offset,
+ bytes - cur_offset);
+ } else {
+ local_bytes = bdrv_dirty_bitmap_next_dirty(s->bitmap, local_offset,
+ bytes - cur_offset);
+ }
+
+ /* _bitmap_next_X return is -1 if no end found within limit, otherwise
+ * offset of next flip (to start of image) */
+ local_bytes = local_bytes < 0 ?
+ bytes - cur_offset :
+ local_bytes - local_offset;
+
+ qemu_iovec_init_slice(&local_qiov, qiov, cur_offset, local_bytes);
+
+ if (alloc) {
+ ret = bdrv_co_preadv(bs->file, local_offset, local_bytes,
+ &local_qiov, flags);
+ } else if (bs->backing) {
+ ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
+ &local_qiov, flags);
+ } else {
+ ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ }
+
+ if (ret != 0) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+{
+ return bdrv_co_pdiscard(bs->file, offset, bytes);
+}
+
+static coroutine_fn int track_co_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->file->bs);
+}
+
+static int coroutine_fn track_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+
+ bool alloc = bdrv_dirty_bitmap_get(s->bitmap, offset);
+ int64_t next_flipped;
+ if (alloc) {
+ next_flipped = bdrv_dirty_bitmap_next_zero(s->bitmap, offset, bytes);
+ } else {
+ next_flipped = bdrv_dirty_bitmap_next_dirty(s->bitmap, offset, bytes);
+ }
+
+ /* in case not the entire region has the same state, we need to set pnum to
+ * indicate for how many bytes our result is valid */
+ *pnum = next_flipped == -1 ? bytes : next_flipped - offset;
+ *map = offset;
+
+ if (alloc) {
+ *file = bs->file->bs;
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
+ } else if (bs->backing) {
+ *file = bs->backing->bs;
+ }
+ return 0;
+}
+
+static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+
+ *nshared = BLK_PERM_ALL;
+
+ /* in case we're currently dropping ourselves, claim to not use any
+ * permissions at all - which is fine, since from this point on we will
+ * never issue a read or write anymore */
+ if (s->drop_state == DropInProgress) {
+ *nperm = 0;
+ return;
+ }
+
+ if (role & BDRV_CHILD_DATA) {
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+ } else {
+ /* 'backing' is also a child of our BDS, but we don't expect it to be
+ * writeable, so we only forward 'consistent read' */
+ *nperm = perm & BLK_PERM_CONSISTENT_READ;
+ }
+}
+
+static void track_drop(void *opaque)
+{
+ BlockDriverState *bs = (BlockDriverState*)opaque;
+ BlockDriverState *file = bs->file->bs;
+ BDRVAllocTrackState *s = bs->opaque;
+
+ assert(file);
+
+ /* we rely on the fact that we're not used anywhere else, so let's wait
+ * until we're only used once - in the drive connected to the guest (and one
+ * ref is held by bdrv_ref in track_change_backing_file) */
+ if (bs->refcnt > 2) {
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
+ return;
+ }
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_drained_begin(bs);
+
+ /* now that we're drained, we can safely set 'DropInProgress' */
+ s->drop_state = DropInProgress;
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+
+ bdrv_replace_node(bs, file, &error_abort);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
+ bdrv_drained_end(bs);
+ bdrv_unref(bs);
+ aio_context_release(aio_context);
+}
+
+static int track_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVAllocTrackState *s = bs->opaque;
+ if (s->auto_remove && s->drop_state == DropNone &&
+ backing_file == NULL && backing_fmt == NULL)
+ {
+ /* backing file has been disconnected, there's no longer any use for
+ * this node, so let's remove ourselves from the block graph - we need
+ * to schedule this for later however, since when this function is
+ * called, the blockjob modifying us is probably not done yet and has a
+ * blocker on 'bs' */
+ s->drop_state = DropRequested;
+ bdrv_ref(bs);
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
+ }
+
+ return 0;
+}
+
+static BlockDriver bdrv_alloc_track = {
+ .format_name = "alloc-track",
+ .instance_size = sizeof(BDRVAllocTrackState),
+
+ .bdrv_file_open = track_open,
+ .bdrv_close = track_close,
+ .bdrv_getlength = track_getlength,
+ .bdrv_child_perm = track_child_perm,
+ .bdrv_refresh_limits = track_refresh_limits,
+
+ .bdrv_co_pwrite_zeroes = track_co_pwrite_zeroes,
+ .bdrv_co_pwritev = track_co_pwritev,
+ .bdrv_co_preadv = track_co_preadv,
+ .bdrv_co_pdiscard = track_co_pdiscard,
+
+ .bdrv_co_flush = track_co_flush,
+ .bdrv_co_flush_to_disk = track_co_flush,
+
+ .supports_backing = true,
+
+ .bdrv_co_block_status = track_co_block_status,
+ .bdrv_change_backing_file = track_change_backing_file,
+};
+
+static void bdrv_alloc_track_init(void)
+{
+ bdrv_register(&bdrv_alloc_track);
+}
+
+block_init(bdrv_alloc_track_init);
diff --git a/block/meson.build b/block/meson.build
index a26a69434e..74e5f49758 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -2,6 +2,7 @@ block_ss.add(genh)
block_ss.add(files(
'accounting.c',
'aio_task.c',
+ 'alloc-track.c',
'amend.c',
'backup.c',
'backup-dump.c',

View File

@@ -0,0 +1,52 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Tue, 30 Jun 2020 13:10:10 +0200
Subject: [PATCH] PVE: redirect stderr to journal when daemonized
QEMU uses the logging for error messages usually, so LOG_ERR is most
fitting.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
Makefile.objs | 1 +
os-posix.c | 7 +++++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/Makefile.objs b/Makefile.objs
index 240eb503f2..c7ba4e11e7 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -54,6 +54,7 @@ common-obj-y += net/
common-obj-y += qdev-monitor.o
common-obj-$(CONFIG_WIN32) += os-win32.o
common-obj-$(CONFIG_POSIX) += os-posix.o
+os-posix.o-libs := -lsystemd
common-obj-$(CONFIG_LINUX) += fsdev/
diff --git a/os-posix.c b/os-posix.c
index 3572db3f44..b45dde63ac 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -28,6 +28,8 @@
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
+#include <systemd/sd-journal.h>
+#include <syslog.h>
#include "qemu-common.h"
/* Needed early for CONFIG_BSD etc. */
@@ -312,9 +314,10 @@ void os_setup_post(void)
dup2(fd, 0);
dup2(fd, 1);
- /* In case -D is given do not redirect stderr to /dev/null */
+ /* In case -D is given do not redirect stderr to journal */
if (!qemu_logfile) {
- dup2(fd, 2);
+ int journal_fd = sd_journal_stream_fd("QEMU", LOG_ERR, 0);
+ dup2(journal_fd, 2);
}
close(fd);

View File

@@ -1,33 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 15:26:30 +0200
Subject: [PATCH] PVE: whitelist 'invalid' QAPI names for backwards compat
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
qapi/pragma.json | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/qapi/pragma.json b/qapi/pragma.json
index a2358e303a..9ff5c84ffd 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -15,6 +15,7 @@
'device_add',
'device_del',
'expire_password',
+ 'get_link_status',
'migrate_cancel',
'netdev_add',
'netdev_del',
@@ -64,6 +65,8 @@
'SysEmuTarget', # query-cpu-fast, query-target
'UuidInfo', # query-uuid
'VncClientInfo', # query-vnc, query-vnc-servers, ...
- 'X86CPURegister32' # qom-get of x86 CPU properties
+ 'X86CPURegister32', # qom-get of x86 CPU properties
# feature-words, filtered-features
+ 'BlockdevOptionsPbs', # for PBS backwards compat
+ 'BalloonInfo'
] } }

View File

@@ -4,17 +4,16 @@ Date: Thu, 20 Aug 2020 14:31:59 +0200
Subject: [PATCH] PVE: Add sequential job transaction support
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/qemu/job.h | 12 ++++++++++++
job.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
job.c | 24 ++++++++++++++++++++++++
2 files changed, 36 insertions(+)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index c105b31076..5096679571 100644
index 32aabb1c60..f7a6a0926a 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -316,6 +316,18 @@ typedef enum JobCreateFlags {
@@ -280,6 +280,18 @@ typedef enum JobCreateFlags {
*/
JobTxn *job_txn_new(void);
@@ -34,7 +33,7 @@ index c105b31076..5096679571 100644
* Release a reference that was previously acquired with job_txn_add_job or
* job_txn_new. If it's the last reference to the object, it will be freed.
diff --git a/job.c b/job.c
index e5699ad200..34c9758349 100644
index b8139c80a4..97ee97a192 100644
--- a/job.c
+++ b/job.c
@@ -72,6 +72,8 @@ struct JobTxn {
@@ -72,7 +71,7 @@ index e5699ad200..34c9758349 100644
static void job_txn_ref(JobTxn *txn)
{
txn->refcnt++;
@@ -897,6 +918,9 @@ static void job_completed_txn_success(Job *job)
@@ -841,6 +862,9 @@ static void job_completed_txn_success(Job *job)
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!job_is_completed(other_job)) {
@@ -82,17 +81,3 @@ index e5699ad200..34c9758349 100644
return;
}
assert(other_job->ret == 0);
@@ -1093,6 +1117,13 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
return -EBUSY;
}
+ /* in a sequential transaction jobs with status CREATED can appear at time
+ * of cancelling, these have not begun work so job_enter won't do anything,
+ * let's ensure they are marked as ABORTING if required */
+ if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+ job_update_rc(job);
+ }
+
AIO_WAIT_WHILE(job->aio_context,
(job_enter(job), !job_is_completed(job)));

View File

@@ -1,35 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 26 May 2021 17:36:55 +0200
Subject: [PATCH] PVE: savevm-async: register yank before
migration_incoming_state_destroy
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/savevm-async.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index e65a5e3482..2ed2536816 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -20,6 +20,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
+#include "qemu/yank.h"
/* #define DEBUG_SAVEVM_STATE */
@@ -514,6 +515,10 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
dirty_bitmap_mig_before_vm_start();
qemu_fclose(f);
+
+ /* state_destroy assumes a real migration which would have added a yank */
+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
migration_incoming_state_destroy();
if (ret < 0) {
error_setg_errno(errp, -ret, "Error while loading VM state");

View File

@@ -11,15 +11,12 @@ To keep the rate-limiting and IO impact from before, we use a sequential
transaction, so drives will still be backed up one after the other.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[add new force parameter to job_cancel_sync calls]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 167 +++++++++++++++------------------------------------
1 file changed, 49 insertions(+), 118 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index f90abaa50a..63c686463f 100644
index 2db4a62580..b52f4a9364 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -52,6 +52,7 @@ static struct PVEBackupState {
@@ -161,7 +158,7 @@ index f90abaa50a..63c686463f 100644
- if (next_job) {
- AioContext *aio_context = next_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&next_job->job, true);
- job_cancel_sync(&next_job->job);
- aio_context_release(aio_context);
- } else {
- break;
@@ -169,7 +166,7 @@ index f90abaa50a..63c686463f 100644
+ if (cancel_job) {
+ AioContext *aio_context = cancel_job->job.aio_context;
+ aio_context_acquire(aio_context);
+ job_cancel_sync(&cancel_job->job, true);
+ job_cancel_sync(&cancel_job->job);
+ job_unref(&cancel_job->job);
+ aio_context_release(aio_context);
}
@@ -203,7 +200,7 @@ index f90abaa50a..63c686463f 100644
- if (job_should_pause(&job->job)) {
- bool error_or_canceled = pvebackup_error_or_canceled();
- if (error_or_canceled) {
- job_cancel_sync(&job->job, true);
- job_cancel_sync(&job->job);
- } else {
- job_resume(&job->job);
- }
@@ -231,19 +228,19 @@ index f90abaa50a..63c686463f 100644
+ }
+ backup_state.txn = job_txn_new_seq();
+
BackupPerf perf = { .max_workers = 16 };
/* create and start all jobs (paused state) */
@@ -526,7 +450,7 @@ static bool create_backup_jobs(void) {
GList *l = backup_state.di_list;
while (l) {
@@ -524,7 +448,7 @@ static bool create_backup_jobs(void) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
bitmap_mode, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
aio_context_release(aio_context);
@@ -538,7 +462,8 @@ static bool create_backup_jobs(void) {
@@ -536,7 +460,8 @@ static bool create_backup_jobs(void) {
pvebackup_propagate_error(create_job_err);
break;
}
@@ -253,7 +250,7 @@ index f90abaa50a..63c686463f 100644
bdrv_unref(di->target);
di->target = NULL;
@@ -556,6 +481,10 @@ static bool create_backup_jobs(void) {
@@ -554,6 +479,10 @@ static bool create_backup_jobs(void) {
bdrv_unref(di->target);
di->target = NULL;
}
@@ -264,7 +261,7 @@ index f90abaa50a..63c686463f 100644
}
}
@@ -946,10 +875,6 @@ err:
@@ -944,10 +873,6 @@ err:
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
@@ -275,7 +272,7 @@ index f90abaa50a..63c686463f 100644
if (di->target) {
bdrv_unref(di->target);
}
@@ -1038,9 +963,15 @@ UuidInfo *qmp_backup(
@@ -1036,9 +961,15 @@ UuidInfo *qmp_backup(
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {

View File

@@ -1,130 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Mon, 7 Feb 2022 14:21:01 +0100
Subject: [PATCH] qemu-img: dd: add -l option for loading a snapshot
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
docs/tools/qemu-img.rst | 6 +++---
qemu-img-cmds.hx | 4 ++--
qemu-img.c | 33 +++++++++++++++++++++++++++++++--
3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 699229eef6..4189ced8bc 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -492,10 +492,10 @@ Command description:
it doesn't need to be specified separately in this case.
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
- dd copies from *INPUT* file to *OUTPUT* file converting it from
- *FMT* format to *OUTPUT_FMT* format.
+ dd copies from *INPUT* file or snapshot *SNAPSHOT_PARAM* to *OUTPUT* file
+ converting it from *FMT* format to *OUTPUT_FMT* format.
The data is by default read and written using blocks of 512 bytes but can be
modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index b5b0bb4467..36f97e1f19 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -58,9 +58,9 @@ SRST
ERST
DEF("dd", img_dd,
- "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
+ "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [-n] [-l snapshot_param] [bs=block_size] [count=blocks] [skip=blocks] [osize=output_size] if=input of=output")
SRST
-.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [-n] [-l SNAPSHOT_PARAM] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] [osize=OUTPUT_SIZE] if=INPUT of=OUTPUT
ERST
DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index c6b4a5567d..041c203fc3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4943,6 +4943,7 @@ static int img_dd(int argc, char **argv)
BlockDriver *drv = NULL, *proto_drv = NULL;
BlockBackend *blk1 = NULL, *blk2 = NULL;
QemuOpts *opts = NULL;
+ QemuOpts *sn_opts = NULL;
QemuOptsList *create_opts = NULL;
Error *local_err = NULL;
bool image_opts = false;
@@ -4952,6 +4953,7 @@ static int img_dd(int argc, char **argv)
int64_t size = 0, readsize = 0;
int64_t block_count = 0, out_pos, in_pos;
bool force_share = false, skip_create = false;
+ const char *snapshot_name = NULL;
struct DdInfo dd = {
.flags = 0,
.count = 0,
@@ -4989,7 +4991,7 @@ static int img_dd(int argc, char **argv)
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long(argc, argv, ":hf:O:Un", long_options, NULL))) {
+ while ((c = getopt_long(argc, argv, ":hf:O:l:Un", long_options, NULL))) {
if (c == EOF) {
break;
}
@@ -5012,6 +5014,19 @@ static int img_dd(int argc, char **argv)
case 'n':
skip_create = true;
break;
+ case 'l':
+ if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+ sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+ optarg, false);
+ if (!sn_opts) {
+ error_report("Failed in parsing snapshot param '%s'",
+ optarg);
+ goto out;
+ }
+ } else {
+ snapshot_name = optarg;
+ }
+ break;
case 'U':
force_share = true;
break;
@@ -5071,11 +5086,24 @@ static int img_dd(int argc, char **argv)
if (dd.flags & C_IF) {
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
force_share);
-
if (!blk1) {
ret = -1;
goto out;
}
+ if (sn_opts) {
+ bdrv_snapshot_load_tmp(blk_bs(blk1),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+ &local_err);
+ } else if (snapshot_name != NULL) {
+ bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(blk1), snapshot_name,
+ &local_err);
+ }
+ if (local_err) {
+ error_reportf_err(local_err, "Failed to load snapshot: ");
+ ret = -1;
+ goto out;
+ }
}
if (dd.flags & C_OSIZE) {
@@ -5230,6 +5258,7 @@ static int img_dd(int argc, char **argv)
out:
g_free(arg);
qemu_opts_del(opts);
+ qemu_opts_del(sn_opts);
qemu_opts_free(create_opts);
blk_unref(blk1);
blk_unref(blk2);

View File

@@ -1,8 +1,7 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 28 Sep 2020 13:40:51 +0200
Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
create_backup_jobs
Subject: [PATCH] PVE-Backup: Use more coroutines and don't block on finishing
proxmox_backup_co_finish is already async, but previously we would wait
for the coroutine using block_on_coroutine_fn(). Avoid this by
@@ -30,34 +29,16 @@ To communicate the finishing state, a new property is introduced to
query-backup: 'finishing'. A new state is explicitly not used, since
that would break compatibility with older qemu-server versions.
Also fix create_backup_jobs:
No more weird bool returns, just the standard "errp" format used
everywhere else too. With this, if backup_job_create fails, the error
message is actually returned over QMP and can be shown to the user.
To facilitate correct cleanup on such an error, we call
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
This additionally allows us to actually hold the backup_mutex during
operation.
Also add a job_cancel_sync before job_unref, since a job must be in
STATUS_NULL to be deleted by unref, which could trigger an assert
before.
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[add new force parameter to job_cancel_sync calls]
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
pve-backup.c | 148 ++++++++++++++++++++++++++-----------------
qapi/block-core.json | 5 +-
2 files changed, 144 insertions(+), 78 deletions(-)
2 files changed, 95 insertions(+), 58 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 63c686463f..6f05796fad 100644
index b52f4a9364..4402c0cb17 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
@@ -71,12 +52,11 @@ index 63c686463f..6f05796fad 100644
QemuMutex lock;
Error *error;
time_t start_time;
@@ -47,20 +49,22 @@ static struct PVEBackupState {
@@ -47,20 +49,21 @@ static struct PVEBackupState {
size_t reused;
size_t zero_bytes;
GList *bitmap_list;
+ bool finishing;
+ bool starting;
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -96,7 +76,7 @@ index 63c686463f..6f05796fad 100644
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
@@ -72,6 +75,7 @@ typedef struct PVEBackupDevInfo {
size_t size;
uint64_t block_size;
uint8_t dev_id;
@@ -104,7 +84,7 @@ index 63c686463f..6f05796fad 100644
char targetfile[PATH_MAX];
BdrvDirtyBitmap *bitmap;
BlockDriverState *target;
@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
@@ -227,12 +231,12 @@ pvebackup_co_dump_vma_cb(
}
// assumes the caller holds backup_mutex
@@ -119,7 +99,7 @@ index 63c686463f..6f05796fad 100644
qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
@@ -261,12 +265,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
@@ -136,28 +116,25 @@ index 63c686463f..6f05796fad 100644
{
PVEBackupDevInfo *di = opaque;
+ int ret = di->completed_ret;
+
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
+ if (ret < 0) {
+ Error *local_err = NULL;
+ error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
+ pvebackup_propagate_error(local_err);
+ }
+
+ di->bs = NULL;
+
+ assert(di->target == NULL);
- bool error_or_canceled = pvebackup_error_or_canceled();
-
- if (backup_state.vmaw) {
- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool starting = backup_state.stat.starting;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ if (starting) {
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
+ * must) skip all cleanup, as we don't know what has and hasn't been
+ * initialized yet. */
+ return;
bool error_or_canceled = pvebackup_error_or_canceled();
@@ -281,27 +302,6 @@ static void coroutine_fn pvebackup_complete_stream(void *opaque)
pvebackup_propagate_error(local_err);
}
}
- if (backup_state.pbs && !error_or_canceled) {
- Error *local_err = NULL;
- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
- if (local_err != NULL) {
- pvebackup_propagate_error(local_err);
- }
- }
-}
-
-static void pvebackup_complete_cb(void *opaque, int ret)
@@ -167,32 +144,22 @@ index 63c686463f..6f05796fad 100644
- PVEBackupDevInfo *di = opaque;
-
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (ret < 0) {
Error *local_err = NULL;
@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
assert(di->target == NULL);
-
- if (ret < 0) {
- Error *local_err = NULL;
- error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
- pvebackup_propagate_error(local_err);
- }
-
- di->bs = NULL;
-
- assert(di->target == NULL);
-
- block_on_coroutine_fn(pvebackup_complete_stream, di);
+ bool error_or_canceled = pvebackup_error_or_canceled();
+
+ if (backup_state.vmaw) {
+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
+ }
+
+ if (backup_state.pbs && !error_or_canceled) {
+ Error *local_err = NULL;
+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
+ if (local_err != NULL) {
+ pvebackup_propagate_error(local_err);
+ }
+ }
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
@@ -310,21 +310,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/* call cleanup if we're the last job */
if (!g_list_first(backup_state.di_list)) {
@@ -221,7 +188,7 @@ index 63c686463f..6f05796fad 100644
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
+ aio_co_enter(qemu_get_aio_context(), co);
+}
+
+/*
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
+ * main loop processing via a bottom half.
@@ -231,11 +198,11 @@ index 63c686463f..6f05796fad 100644
+ Job *job = (Job*)data->data;
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job, true);
+ job_cancel_sync(job);
+ aio_context_release(job_ctx);
+ aio_co_enter(data->ctx, data->co);
+}
+
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
+{
Error *cancel_err = NULL;
@@ -247,7 +214,7 @@ index 63c686463f..6f05796fad 100644
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
@@ -342,27 +370,22 @@ static void pvebackup_cancel(void)
((PVEBackupDevInfo *)bdi->data)->job :
NULL;
@@ -270,7 +237,7 @@ index 63c686463f..6f05796fad 100644
- if (cancel_job) {
- AioContext *aio_context = cancel_job->job.aio_context;
- aio_context_acquire(aio_context);
- job_cancel_sync(&cancel_job->job, true);
- job_cancel_sync(&cancel_job->job);
- job_unref(&cancel_job->job);
- aio_context_release(aio_context);
- }
@@ -284,76 +251,22 @@ index 63c686463f..6f05796fad 100644
}
// assumes the caller holds backup_mutex
@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
@@ -415,6 +438,14 @@ static int coroutine_fn pvebackup_co_add_config(
goto out;
}
-static bool create_backup_jobs(void) {
+/*
+ * backup_job_create can *not* be run from a coroutine (and requires an
+ * acquired AioContext), so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+ * This does imply that this function cannot run with backup_mutex acquired.
+ * That is ok because it is only ever called between setting up the backup_state
+ * struct and starting the jobs, and from within a QMP call. This means that no
+ * other QMP call can interrupt, and no background job is running yet.
+ */
+static void create_backup_jobs_bh(void *opaque) {
static bool create_backup_jobs(void) {
assert(!qemu_in_coroutine());
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
Error *local_err = NULL;
/* create job transaction to synchronize bitmap commit and cancel all
@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
aio_context_release(aio_context);
- if (!job || local_err != NULL) {
- Error *create_job_err = NULL;
- error_setg(&create_job_err, "backup_job_create failed: %s",
- local_err ? error_get_pretty(local_err) : "null");
+ di->job = job;
- pvebackup_propagate_error(create_job_err);
+ if (!job || local_err) {
+ error_setg(errp, "backup_job_create failed: %s",
+ local_err ? error_get_pretty(local_err) : "null");
break;
}
- di->job = job;
-
bdrv_unref(di->target);
di->target = NULL;
}
- bool errors = pvebackup_error_or_canceled();
-
- if (errors) {
+ if (*errp) {
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
}
if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_cancel_sync(&di->job->job, true);
job_unref(&di->job->job);
+ aio_context_release(ctx);
}
}
}
- return errors;
+ /* return */
+ aio_co_enter(data->ctx, data->co);
}
typedef struct QmpBackupTask {
@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
@@ -523,11 +554,12 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
@@ -367,7 +280,7 @@ index 63c686463f..6f05796fad 100644
QmpBackupTask *task = opaque;
task->result = NULL; // just to be sure
@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -548,8 +580,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *firewall_name = "qemu-server.fw";
if (backup_state.di_list) {
@@ -378,7 +291,7 @@ index 63c686463f..6f05796fad 100644
return;
}
@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -616,6 +649,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
di->size = size;
total += size;
@@ -387,58 +300,24 @@ index 63c686463f..6f05796fad 100644
}
uuid_generate(uuid);
@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -847,6 +882,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.dirty = total - backup_state.stat.reused;
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
+ backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
@@ -861,6 +897,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
uuid_info->UUID = uuid_str;
task->result = uuid_info;
+
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(task->errp, local_err);
+ goto err;
+ }
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.starting = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ /* start the first job in the transaction */
+ job_txn_start_seq(backup_state.txn);
+
return;
err_mutex:
@@ -885,6 +960,7 @@ err:
g_free(di);
}
g_list_free(di_list);
+ backup_state.di_list = NULL;
if (devs) {
g_strfreev(devs);
@@ -905,6 +981,8 @@ err:
@@ -903,6 +941,8 @@ err:
}
task->result = NULL;
@@ -447,7 +326,7 @@ index 63c686463f..6f05796fad 100644
return;
}
@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
@@ -956,22 +996,15 @@ UuidInfo *qmp_backup(
.errp = errp,
};
@@ -455,24 +334,23 @@ index 63c686463f..6f05796fad 100644
-
block_on_coroutine_fn(pvebackup_co_prepare, &task);
- if (*errp == NULL) {
- bool errors = create_backup_jobs();
if (*errp == NULL) {
bool errors = create_backup_jobs();
- qemu_mutex_unlock(&backup_state.backup_mutex);
-
- if (!errors) {
if (!errors) {
- /* start the first job in the transaction
- * note: this might directly enter the job, so we need to do this
- * after unlocking the backup_mutex */
- job_txn_start_seq(backup_state.txn);
- }
+ // start the first job in the transaction
job_txn_start_seq(backup_state.txn);
}
- } else {
- qemu_mutex_unlock(&backup_state.backup_mutex);
- }
-
return task.result;
}
}
@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
return task.result;
@@ -1025,6 +1058,7 @@ BackupStatus *qmp_query_backup(Error **errp)
info->transferred = backup_state.stat.transferred;
info->has_reused = true;
info->reused = backup_state.stat.reused;
@@ -481,10 +359,10 @@ index 63c686463f..6f05796fad 100644
qemu_mutex_unlock(&backup_state.stat.lock);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 16e184dd28..cb17d00fe0 100644
index 03fc0af99b..29650896e2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -770,12 +770,15 @@
@@ -784,12 +784,15 @@
#
# @uuid: uuid for this backup job
#

View File

@@ -1,407 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 21 Apr 2022 13:26:48 +0200
Subject: [PATCH] vma: allow partial restore
Introduce a new map line for skipping a certain drive, of the form
skip=drive-scsi0
Since in PVE, most archives are compressed and piped to vma for
restore, it's not easily possible to skip reads.
For the reader, a new skip flag for VmaRestoreState is added and the
target is allowed to be NULL if skip is specified when registering. If
the skip flag is set, no writes will be made as well as no check for
duplicate clusters. Therefore, the flag is not set for verify.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Acked-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
vma-reader.c | 64 ++++++++++++---------
vma.c | 157 +++++++++++++++++++++++++++++----------------------
vma.h | 2 +-
3 files changed, 126 insertions(+), 97 deletions(-)
diff --git a/vma-reader.c b/vma-reader.c
index e65f1e8415..81a891c6b1 100644
--- a/vma-reader.c
+++ b/vma-reader.c
@@ -28,6 +28,7 @@ typedef struct VmaRestoreState {
bool write_zeroes;
unsigned long *bitmap;
int bitmap_size;
+ bool skip;
} VmaRestoreState;
struct VmaReader {
@@ -425,13 +426,14 @@ VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
}
static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
- BlockBackend *target, bool write_zeroes)
+ BlockBackend *target, bool write_zeroes, bool skip)
{
assert(vmar);
assert(dev_id);
vmar->rstate[dev_id].target = target;
vmar->rstate[dev_id].write_zeroes = write_zeroes;
+ vmar->rstate[dev_id].skip = skip;
int64_t size = vmar->devinfo[dev_id].size;
@@ -446,28 +448,30 @@ static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
}
int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
- bool write_zeroes, Error **errp)
+ bool write_zeroes, bool skip, Error **errp)
{
assert(vmar);
- assert(target != NULL);
+ assert(target != NULL || skip);
assert(dev_id);
- assert(vmar->rstate[dev_id].target == NULL);
-
- int64_t size = blk_getlength(target);
- int64_t size_diff = size - vmar->devinfo[dev_id].size;
-
- /* storage types can have different size restrictions, so it
- * is not always possible to create an image with exact size.
- * So we tolerate a size difference up to 4MB.
- */
- if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
- error_setg(errp, "vma_reader_register_bs for stream %s failed - "
- "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
- size, vmar->devinfo[dev_id].size);
- return -1;
+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
+
+ if (target != NULL) {
+ int64_t size = blk_getlength(target);
+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
+
+ /* storage types can have different size restrictions, so it
+ * is not always possible to create an image with exact size.
+ * So we tolerate a size difference up to 4MB.
+ */
+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
+ size, vmar->devinfo[dev_id].size);
+ return -1;
+ }
}
- allocate_rstate(vmar, dev_id, target, write_zeroes);
+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
return 0;
}
@@ -560,19 +564,23 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
VmaRestoreState *rstate = &vmar->rstate[dev_id];
BlockBackend *target = NULL;
+ bool skip = rstate->skip;
+
if (dev_id != vmar->vmstate_stream) {
target = rstate->target;
- if (!verify && !target) {
+ if (!verify && !target && !skip) {
error_setg(errp, "got wrong dev id %d", dev_id);
return -1;
}
- if (vma_reader_get_bitmap(rstate, cluster_num)) {
- error_setg(errp, "found duplicated cluster %zd for stream %s",
- cluster_num, vmar->devinfo[dev_id].devname);
- return -1;
+ if (!skip) {
+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
+ error_setg(errp, "found duplicated cluster %zd for stream %s",
+ cluster_num, vmar->devinfo[dev_id].devname);
+ return -1;
+ }
+ vma_reader_set_bitmap(rstate, cluster_num, 1);
}
- vma_reader_set_bitmap(rstate, cluster_num, 1);
max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
} else {
@@ -618,7 +626,7 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
return -1;
}
- if (!verify) {
+ if (!verify && !skip) {
int nb_sectors = end_sector - sector_num;
if (restore_write_data(vmar, dev_id, target, vmstate_fd,
buf + start, sector_num, nb_sectors,
@@ -654,7 +662,7 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
return -1;
}
- if (!verify) {
+ if (!verify && !skip) {
int nb_sectors = end_sector - sector_num;
if (restore_write_data(vmar, dev_id, target, vmstate_fd,
buf + start, sector_num,
@@ -679,7 +687,7 @@ static int restore_extent(VmaReader *vmar, unsigned char *buf,
vmar->partial_zero_cluster_data += zero_size;
}
- if (rstate->write_zeroes && !verify) {
+ if (rstate->write_zeroes && !verify && !skip) {
if (restore_write_data(vmar, dev_id, target, vmstate_fd,
zero_vma_block, sector_num,
nb_sectors, errp) < 0) {
@@ -850,7 +858,7 @@ int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp)
for (dev_id = 1; dev_id < 255; dev_id++) {
if (vma_reader_get_device_info(vmar, dev_id)) {
- allocate_rstate(vmar, dev_id, NULL, false);
+ allocate_rstate(vmar, dev_id, NULL, false, false);
}
}
diff --git a/vma.c b/vma.c
index e8dffb43e0..e6e9ffc7fe 100644
--- a/vma.c
+++ b/vma.c
@@ -138,6 +138,7 @@ typedef struct RestoreMap {
char *throttling_group;
char *cache;
bool write_zero;
+ bool skip;
} RestoreMap;
static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
@@ -245,47 +246,61 @@ static int extract_content(int argc, char **argv)
char *bps = NULL;
char *group = NULL;
char *cache = NULL;
+ char *devname = NULL;
+ bool skip = false;
+ uint64_t bps_value = 0;
+ const char *path = NULL;
+ bool write_zero = true;
+
if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
break;
}
int len = strlen(line);
if (line[len - 1] == '\n') {
line[len - 1] = '\0';
- if (len == 1) {
+ len = len - 1;
+ if (len == 0) {
break;
}
}
- while (1) {
- if (!try_parse_option(&line, "format", &format, inbuf) &&
- !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
- !try_parse_option(&line, "throttling.group", &group, inbuf) &&
- !try_parse_option(&line, "cache", &cache, inbuf))
- {
- break;
+ if (strncmp(line, "skip", 4) == 0) {
+ if (len < 6 || line[4] != '=') {
+ g_error("read map failed - option 'skip' has no value ('%s')",
+ inbuf);
+ } else {
+ devname = line + 5;
+ skip = true;
+ }
+ } else {
+ while (1) {
+ if (!try_parse_option(&line, "format", &format, inbuf) &&
+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
+ !try_parse_option(&line, "cache", &cache, inbuf))
+ {
+ break;
+ }
}
- }
- uint64_t bps_value = 0;
- if (bps) {
- bps_value = verify_u64(bps);
- g_free(bps);
- }
+ if (bps) {
+ bps_value = verify_u64(bps);
+ g_free(bps);
+ }
- const char *path;
- bool write_zero;
- if (line[0] == '0' && line[1] == ':') {
- path = line + 2;
- write_zero = false;
- } else if (line[0] == '1' && line[1] == ':') {
- path = line + 2;
- write_zero = true;
- } else {
- g_error("read map failed - parse error ('%s')", inbuf);
+ if (line[0] == '0' && line[1] == ':') {
+ path = line + 2;
+ write_zero = false;
+ } else if (line[0] == '1' && line[1] == ':') {
+ path = line + 2;
+ write_zero = true;
+ } else {
+ g_error("read map failed - parse error ('%s')", inbuf);
+ }
+
+ path = extract_devname(path, &devname, -1);
}
- char *devname = NULL;
- path = extract_devname(path, &devname, -1);
if (!devname) {
g_error("read map failed - no dev name specified ('%s')",
inbuf);
@@ -299,6 +314,7 @@ static int extract_content(int argc, char **argv)
map->throttling_group = group;
map->cache = cache;
map->write_zero = write_zero;
+ map->skip = skip;
g_hash_table_insert(devmap, map->devname, map);
@@ -328,6 +344,7 @@ static int extract_content(int argc, char **argv)
const char *cache = NULL;
int flags = BDRV_O_RDWR;
bool write_zero = true;
+ bool skip = false;
BlockBackend *blk = NULL;
@@ -343,6 +360,7 @@ static int extract_content(int argc, char **argv)
throttling_group = map->throttling_group;
cache = map->cache;
write_zero = map->write_zero;
+ skip = map->skip;
} else {
devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
dirname, di->devname);
@@ -361,57 +379,60 @@ static int extract_content(int argc, char **argv)
write_zero = false;
}
- size_t devlen = strlen(devfn);
- QDict *options = NULL;
- bool writethrough;
- if (format) {
- /* explicit format from commandline */
- options = qdict_new();
- qdict_put_str(options, "driver", format);
- } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
- strncmp(devfn, "/dev/", 5) == 0)
- {
- /* This part is now deprecated for PVE as well (just as qemu
- * deprecated not specifying an explicit raw format, too.
- */
- /* explicit raw format */
- options = qdict_new();
- qdict_put_str(options, "driver", "raw");
- }
- if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
- g_error("invalid cache option: %s\n", cache);
- }
+ if (!skip) {
+ size_t devlen = strlen(devfn);
+ QDict *options = NULL;
+ bool writethrough;
+ if (format) {
+ /* explicit format from commandline */
+ options = qdict_new();
+ qdict_put_str(options, "driver", format);
+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
+ strncmp(devfn, "/dev/", 5) == 0)
+ {
+ /* This part is now deprecated for PVE as well (just as qemu
+ * deprecated not specifying an explicit raw format, too.
+ */
+ /* explicit raw format */
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ }
- if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
- g_error("can't open file %s - %s", devfn,
- error_get_pretty(errp));
- }
+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
+ g_error("invalid cache option: %s\n", cache);
+ }
- if (cache) {
- blk_set_enable_write_cache(blk, !writethrough);
- }
+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
+ g_error("can't open file %s - %s", devfn,
+ error_get_pretty(errp));
+ }
- if (throttling_group) {
- blk_io_limits_enable(blk, throttling_group);
- }
+ if (cache) {
+ blk_set_enable_write_cache(blk, !writethrough);
+ }
- if (throttling_bps) {
- if (!throttling_group) {
- blk_io_limits_enable(blk, devfn);
+ if (throttling_group) {
+ blk_io_limits_enable(blk, throttling_group);
}
- ThrottleConfig cfg;
- throttle_config_init(&cfg);
- cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
- Error *err = NULL;
- if (!throttle_is_valid(&cfg, &err)) {
- error_report_err(err);
- g_error("failed to apply throttling");
+ if (throttling_bps) {
+ if (!throttling_group) {
+ blk_io_limits_enable(blk, devfn);
+ }
+
+ ThrottleConfig cfg;
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
+ Error *err = NULL;
+ if (!throttle_is_valid(&cfg, &err)) {
+ error_report_err(err);
+ g_error("failed to apply throttling");
+ }
+ blk_set_io_limits(blk, &cfg);
}
- blk_set_io_limits(blk, &cfg);
}
- if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
g_error("%s", error_get_pretty(errp));
}
diff --git a/vma.h b/vma.h
index c895c97f6d..1b62859165 100644
--- a/vma.h
+++ b/vma.h
@@ -142,7 +142,7 @@ GList *vma_reader_get_config_data(VmaReader *vmar);
VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
BlockBackend *target, bool write_zeroes,
- Error **errp);
+ bool skip, Error **errp);
int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
Error **errp);
int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);

View File

@@ -0,0 +1,187 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 22 Oct 2020 17:01:07 +0200
Subject: [PATCH] PVE: fix and clean up error handling for create_backup_jobs
No more weird bool returns, just the standard "errp" format used
everywhere else too. With this, if backup_job_create fails, the error
message is actually returned over QMP and can be shown to the user.
To facilitate correct cleanup on such an error, we call
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
This additionally allows us to actually hold the backup_mutex during
operation.
Also add a job_cancel_sync before job_unref, since a job must be in
STATUS_NULL to be deleted by unref, which could trigger an assert
before.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
pve-backup.c | 79 +++++++++++++++++++++++++++++++++++-----------------
1 file changed, 54 insertions(+), 25 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 4402c0cb17..c7cde0fb0e 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -50,6 +50,7 @@ static struct PVEBackupState {
size_t zero_bytes;
GList *bitmap_list;
bool finishing;
+ bool starting;
} stat;
int64_t speed;
VmaWriter *vmaw;
@@ -277,6 +278,16 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
PVEBackupDevInfo *di = opaque;
int ret = di->completed_ret;
+ qemu_mutex_lock(&backup_state.stat.lock);
+ bool starting = backup_state.stat.starting;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+ if (starting) {
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
+ * must) skip all cleanup, as we don't know what has and hasn't been
+ * initialized yet. */
+ return;
+ }
+
qemu_co_mutex_lock(&backup_state.backup_mutex);
if (ret < 0) {
@@ -441,15 +452,15 @@ static int coroutine_fn pvebackup_co_add_config(
/*
* backup_job_create can *not* be run from a coroutine (and requires an
* acquired AioContext), so this can't either.
- * This does imply that this function cannot run with backup_mutex acquired.
- * That is ok because it is only ever called between setting up the backup_state
- * struct and starting the jobs, and from within a QMP call. This means that no
- * other QMP call can interrupt, and no background job is running yet.
+ * The caller is responsible that backup_mutex is held nonetheless.
*/
-static bool create_backup_jobs(void) {
+static void create_backup_jobs_bh(void *opaque) {
assert(!qemu_in_coroutine());
+ CoCtxData *data = (CoCtxData*)opaque;
+ Error **errp = (Error**)data->data;
+
Error *local_err = NULL;
/* create job transaction to synchronize bitmap commit and cancel all
@@ -483,24 +494,19 @@ static bool create_backup_jobs(void) {
aio_context_release(aio_context);
- if (!job || local_err != NULL) {
- Error *create_job_err = NULL;
- error_setg(&create_job_err, "backup_job_create failed: %s",
- local_err ? error_get_pretty(local_err) : "null");
+ di->job = job;
- pvebackup_propagate_error(create_job_err);
+ if (!job || local_err) {
+ error_setg(errp, "backup_job_create failed: %s",
+ local_err ? error_get_pretty(local_err) : "null");
break;
}
- di->job = job;
-
bdrv_unref(di->target);
di->target = NULL;
}
- bool errors = pvebackup_error_or_canceled();
-
- if (errors) {
+ if (*errp) {
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -512,12 +518,17 @@ static bool create_backup_jobs(void) {
}
if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_cancel_sync(&di->job->job);
job_unref(&di->job->job);
+ aio_context_release(ctx);
}
}
}
- return errors;
+ /* return */
+ aio_co_enter(data->ctx, data->co);
}
typedef struct QmpBackupTask {
@@ -883,6 +894,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
backup_state.stat.finishing = false;
+ backup_state.stat.starting = true;
qemu_mutex_unlock(&backup_state.stat.lock);
@@ -898,7 +910,32 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
task->result = uuid_info;
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
+ * points, and we'll release it as soon as the BH reschedules us.
+ */
+ CoCtxData waker = {
+ .co = qemu_coroutine_self(),
+ .ctx = qemu_get_current_aio_context(),
+ .data = &local_err,
+ };
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
+ qemu_coroutine_yield();
+
+ if (local_err) {
+ error_propagate(task->errp, local_err);
+ goto err;
+ }
+
qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.starting = false;
+ qemu_mutex_unlock(&backup_state.stat.lock);
+
+ /* start the first job in the transaction */
+ job_txn_start_seq(backup_state.txn);
+
return;
err_mutex:
@@ -921,6 +958,7 @@ err:
g_free(di);
}
g_list_free(di_list);
+ backup_state.di_list = NULL;
if (devs) {
g_strfreev(devs);
@@ -998,15 +1036,6 @@ UuidInfo *qmp_backup(
block_on_coroutine_fn(pvebackup_co_prepare, &task);
- if (*errp == NULL) {
- bool errors = create_backup_jobs();
-
- if (!errors) {
- // start the first job in the transaction
- job_txn_start_seq(backup_state.txn);
- }
- }
-
return task.result;
}

View File

@@ -1,233 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Tue, 26 Apr 2022 16:06:28 +0200
Subject: [PATCH] pbs: namespace support
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
block/pbs.c | 25 +++++++++++++++++++++----
pbs-restore.c | 19 ++++++++++++++++---
pve-backup.c | 6 +++++-
qapi/block-core.json | 5 ++++-
5 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4c799f00d9..0502f42be6 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1041,6 +1041,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, NULL, // PBS key_password
false, NULL, // PBS master_keyfile
false, NULL, // PBS fingerprint
+ false, NULL, // PBS backup-ns
false, NULL, // PBS backup-id
false, 0, // PBS backup-time
false, false, // PBS use-dirty-bitmap
diff --git a/block/pbs.c b/block/pbs.c
index c5eb4d5bad..7471e2ef9d 100644
--- a/block/pbs.c
+++ b/block/pbs.c
@@ -14,6 +14,7 @@
#include <proxmox-backup-qemu.h>
#define PBS_OPT_REPOSITORY "repository"
+#define PBS_OPT_NAMESPACE "namespace"
#define PBS_OPT_SNAPSHOT "snapshot"
#define PBS_OPT_ARCHIVE "archive"
#define PBS_OPT_KEYFILE "keyfile"
@@ -27,6 +28,7 @@ typedef struct {
int64_t length;
char *repository;
+ char *namespace;
char *snapshot;
char *archive;
} BDRVPBSState;
@@ -40,6 +42,11 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "The server address and repository to connect to.",
},
+ {
+ .name = PBS_OPT_NAMESPACE,
+ .type = QEMU_OPT_STRING,
+ .help = "Optional: The snapshot's namespace.",
+ },
{
.name = PBS_OPT_SNAPSHOT,
.type = QEMU_OPT_STRING,
@@ -76,7 +83,7 @@ static QemuOptsList runtime_opts = {
// filename format:
-// pbs:repository=<repo>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
+// pbs:repository=<repo>,namespace=<ns>,snapshot=<snap>,password=<pw>,key_password=<kpw>,fingerprint=<fp>,archive=<archive>
static void pbs_parse_filename(const char *filename, QDict *options,
Error **errp)
{
@@ -112,6 +119,7 @@ static int pbs_open(BlockDriverState *bs, QDict *options, int flags,
s->archive = g_strdup(qemu_opt_get(opts, PBS_OPT_ARCHIVE));
const char *keyfile = qemu_opt_get(opts, PBS_OPT_KEYFILE);
const char *password = qemu_opt_get(opts, PBS_OPT_PASSWORD);
+ const char *namespace = qemu_opt_get(opts, PBS_OPT_NAMESPACE);
const char *fingerprint = qemu_opt_get(opts, PBS_OPT_FINGERPRINT);
const char *key_password = qemu_opt_get(opts, PBS_OPT_ENCRYPTION_PASSWORD);
@@ -124,9 +132,12 @@ static int pbs_open(BlockDriverState *bs, QDict *options, int flags,
if (!key_password) {
key_password = getenv("PBS_ENCRYPTION_PASSWORD");
}
+ if (namespace) {
+ s->namespace = g_strdup(namespace);
+ }
/* connect to PBS server in read mode */
- s->conn = proxmox_restore_new(s->repository, s->snapshot, password,
+ s->conn = proxmox_restore_new_ns(s->repository, s->snapshot, s->namespace, password,
keyfile, key_password, fingerprint, &pbs_error);
/* invalidates qemu_opt_get char pointers from above */
@@ -171,6 +182,7 @@ static int pbs_file_open(BlockDriverState *bs, QDict *options, int flags,
static void pbs_close(BlockDriverState *bs) {
BDRVPBSState *s = bs->opaque;
g_free(s->repository);
+ g_free(s->namespace);
g_free(s->snapshot);
g_free(s->archive);
proxmox_restore_disconnect(s->conn);
@@ -252,8 +264,13 @@ static coroutine_fn int pbs_co_pwritev(BlockDriverState *bs,
static void pbs_refresh_filename(BlockDriverState *bs)
{
BDRVPBSState *s = bs->opaque;
- snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
- s->repository, s->snapshot, s->archive);
+ if (s->namespace) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s:%s(%s)",
+ s->repository, s->namespace, s->snapshot, s->archive);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s/%s(%s)",
+ s->repository, s->snapshot, s->archive);
+ }
}
static const char *const pbs_strong_runtime_opts[] = {
diff --git a/pbs-restore.c b/pbs-restore.c
index 2f834cf42e..f03d9bab8d 100644
--- a/pbs-restore.c
+++ b/pbs-restore.c
@@ -29,7 +29,7 @@
static void help(void)
{
const char *help_msg =
- "usage: pbs-restore [--repository <repo>] snapshot archive-name target [command options]\n"
+ "usage: pbs-restore [--repository <repo>] [--ns namespace] snapshot archive-name target [command options]\n"
;
printf("%s", help_msg);
@@ -77,6 +77,7 @@ int main(int argc, char **argv)
Error *main_loop_err = NULL;
const char *format = "raw";
const char *repository = NULL;
+ const char *backup_ns = NULL;
const char *keyfile = NULL;
int verbose = false;
bool skip_zero = false;
@@ -90,6 +91,7 @@ int main(int argc, char **argv)
{"verbose", no_argument, 0, 'v'},
{"format", required_argument, 0, 'f'},
{"repository", required_argument, 0, 'r'},
+ {"ns", required_argument, 0, 'n'},
{"keyfile", required_argument, 0, 'k'},
{0, 0, 0, 0}
};
@@ -110,6 +112,9 @@ int main(int argc, char **argv)
case 'r':
repository = g_strdup(argv[optind - 1]);
break;
+ case 'n':
+ backup_ns = g_strdup(argv[optind - 1]);
+ break;
case 'k':
keyfile = g_strdup(argv[optind - 1]);
break;
@@ -160,8 +165,16 @@ int main(int argc, char **argv)
fprintf(stderr, "connecting to repository '%s'\n", repository);
}
char *pbs_error = NULL;
- ProxmoxRestoreHandle *conn = proxmox_restore_new(
- repository, snapshot, password, keyfile, key_password, fingerprint, &pbs_error);
+ ProxmoxRestoreHandle *conn = proxmox_restore_new_ns(
+ repository,
+ snapshot,
+ backup_ns,
+ password,
+ keyfile,
+ key_password,
+ fingerprint,
+ &pbs_error
+ );
if (conn == NULL) {
fprintf(stderr, "restore failed: %s\n", pbs_error);
return -1;
diff --git a/pve-backup.c b/pve-backup.c
index 9f6c04a512..f6a5f8c785 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -10,6 +10,8 @@
#include "qapi/qmp/qerror.h"
#include "qemu/cutils.h"
+#include <proxmox-backup-qemu.h>
+
/* PVE backup state and related function */
/*
@@ -535,6 +537,7 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_key_password, const char *key_password,
bool has_master_keyfile, const char *master_keyfile,
bool has_fingerprint, const char *fingerprint,
+ bool has_backup_ns, const char *backup_ns,
bool has_backup_id, const char *backup_id,
bool has_backup_time, int64_t backup_time,
bool has_use_dirty_bitmap, bool use_dirty_bitmap,
@@ -674,8 +677,9 @@ UuidInfo coroutine_fn *qmp_backup(
firewall_name = "fw.conf";
char *pbs_err = NULL;
- pbs = proxmox_backup_new(
+ pbs = proxmox_backup_new_ns(
backup_file,
+ has_backup_ns ? backup_ns : NULL,
backup_id,
backup_time,
dump_cb_block_size,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index fc8a125451..cc2ead0b75 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -817,6 +817,8 @@
#
# @fingerprint: server cert fingerprint (optional for format 'pbs')
#
+# @backup-ns: backup namespace (required for format 'pbs')
+#
# @backup-id: backup ID (required for format 'pbs')
#
# @backup-time: backup timestamp (Unix epoch, required for format 'pbs')
@@ -836,6 +838,7 @@
'*key-password': 'str',
'*master-keyfile': 'str',
'*fingerprint': 'str',
+ '*backup-ns': 'str',
'*backup-id': 'str',
'*backup-time': 'int',
'*use-dirty-bitmap': 'bool',
@@ -3282,7 +3285,7 @@
{ 'struct': 'BlockdevOptionsPbs',
'data': { 'repository': 'str', 'snapshot': 'str', 'archive': 'str',
'*keyfile': 'str', '*password': 'str', '*fingerprint': 'str',
- '*key_password': 'str' } }
+ '*key_password': 'str', '*namespace': 'str' } }
##
# @BlockdevOptionsNVMe:

View File

@@ -1,80 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:05 +0200
Subject: [PATCH] Revert "block/rbd: workaround for ceph issue #53784"
This reverts commit fc176116cdea816ceb8dd969080b2b95f58edbc0 in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/rbd.c | 42 ++----------------------------------------
1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 64a8d7d48b..9fc6dcb957 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1348,7 +1348,6 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
- uint64_t head = 0;
assert(offset + bytes <= s->image_size);
@@ -1376,43 +1375,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
-#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
- /*
- * librbd had a bug until early 2022 that affected all versions of ceph that
- * supported fast-diff. This bug results in reporting of incorrect offsets
- * if the offset parameter to rbd_diff_iterate2 is not object aligned.
- * Work around this bug by rounding down the offset to object boundaries.
- * This is OK because we call rbd_diff_iterate2 with whole_object = true.
- * However, this workaround only works for non cloned images with default
- * striping.
- *
- * See: https://tracker.ceph.com/issues/53784
- */
-
- /* check if RBD image has non-default striping enabled */
- if (features & RBD_FEATURE_STRIPINGV2) {
- return status;
- }
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- /*
- * check if RBD image is a clone (= has a parent).
- *
- * rbd_get_parent_info is deprecated from Nautilus onwards, but the
- * replacement rbd_get_parent is not present in Luminous and Mimic.
- */
- if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
- return status;
- }
-#pragma GCC diagnostic pop
-
- head = req.offs & (s->object_size - 1);
- req.offs -= head;
- bytes += head;
-#endif
-
- r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1431,8 +1394,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}
- assert(req.bytes > head);
- *pnum = req.bytes - head;
+ *pnum = req.bytes;
return status;
}

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Thu, 22 Oct 2020 17:34:17 +0200
Subject: [PATCH] migration/block-dirty-bitmap: fix larger granularity bitmaps
sectors_per_chunk is a 64 bit integer, but the calculation would be done
in 32 bits, leading to an overflow for coarse bitmap granularities.
If that results in the value 0, it leads to a hang where no progress is
made but send_bitmap_bits is constantly called with nr_sectors being 0.
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
migration/block-dirty-bitmap.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 784330ebe1..5bf0d9fbc6 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -334,8 +334,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
dbms->node_name = bs_name;
dbms->bitmap = bitmap;
dbms->total_sectors = bdrv_nb_sectors(bs);
- dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8LLU *
bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+ assert(dbms->sectors_per_chunk != 0);
if (bdrv_dirty_bitmap_enabled(bitmap)) {
dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
}

View File

@@ -12,22 +12,21 @@ Also add a flag to query-proxmox-support so qemu-server can determine if
safe migration is possible and makes sense.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/migration/misc.h | 3 ++
migration/meson.build | 2 +
migration/migration.c | 1 +
migration/Makefile.objs | 1 +
migration/pbs-state.c | 106 +++++++++++++++++++++++++++++++++++++++
pve-backup.c | 1 +
qapi/block-core.json | 6 +++
6 files changed, 119 insertions(+)
softmmu/vl.c | 1 +
6 files changed, 118 insertions(+)
create mode 100644 migration/pbs-state.c
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 465906710d..4f0aeceb6f 100644
index 34e7d75713..f83816dd3c 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -75,4 +75,7 @@ bool migration_in_bg_snapshot(void);
@@ -75,4 +75,7 @@ bool migration_in_incoming_postcopy(void);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
@@ -35,33 +34,18 @@ index 465906710d..4f0aeceb6f 100644
+void pbs_state_mig_init(void);
+
#endif
diff --git a/migration/meson.build b/migration/meson.build
index 0842d00cd2..d012f4d8d3 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -6,8 +6,10 @@ migration_files = files(
'vmstate.c',
'qemu-file.c',
'yank_functions.c',
+ 'pbs-state.c',
)
softmmu_ss.add(migration_files)
+softmmu_ss.add(libproxmox_backup_qemu)
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 0fc619e380..20b3792599 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -9,6 +9,7 @@ common-obj-y += qjson.o
common-obj-y += block-dirty-bitmap.o
common-obj-y += multifd.o
common-obj-y += multifd-zlib.o
+common-obj-y += pbs-state.o
common-obj-$(CONFIG_ZSTD) += multifd-zstd.o
softmmu_ss.add(files(
'block-dirty-bitmap.c',
diff --git a/migration/migration.c b/migration/migration.c
index bb8bbddfe4..8109e468eb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -229,6 +229,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ pbs_state_mig_init();
}
void migration_cancel(const Error *error)
common-obj-$(CONFIG_RDMA) += rdma.o
diff --git a/migration/pbs-state.c b/migration/pbs-state.c
new file mode 100644
index 0000000000..29f2b3860d
@@ -175,24 +159,23 @@ index 0000000000..29f2b3860d
+ NULL);
+}
diff --git a/pve-backup.c b/pve-backup.c
index 6f05796fad..5fa3cc1352 100644
index c7cde0fb0e..f65f1dda26 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -1132,6 +1132,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
@@ -1130,5 +1130,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
ret->pbs_dirty_bitmap = true;
ret->pbs_dirty_bitmap_savevm = true;
+ ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
+ ret->pbs_dirty_bitmap_migration = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cb17d00fe0..bd978ea562 100644
index 29650896e2..0da4b35028 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -879,6 +879,11 @@
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
# safely be set for savevm-async.
@@ -890,12 +890,18 @@
#
# @query-bitmap-info: True if the 'query-pbs-bitmap-info' QMP call is supported.
#
+# @pbs-dirty-bitmap-migration: True if safe migration of dirty-bitmaps including
+# PBS state is supported. Enabling 'dirty-bitmaps'
@@ -202,11 +185,22 @@ index cb17d00fe0..bd978ea562 100644
# @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
#
##
@@ -886,6 +891,7 @@
{ 'struct': 'ProxmoxSupportStatus',
'data': { 'pbs-dirty-bitmap': 'bool',
'query-bitmap-info': 'bool',
'pbs-dirty-bitmap-savevm': 'bool',
+ 'pbs-dirty-bitmap-migration': 'bool',
'pbs-library-version': 'str' } }
##
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 16aa2186b0..88b13871fd 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -4288,6 +4288,7 @@ void qemu_init(int argc, char **argv, char **envp)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ pbs_state_mig_init();
qemu_opts_foreach(qemu_find_opts("mon"),
mon_init_func, NULL, &error_fatal);

View File

@@ -1,35 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Thu, 23 Jun 2022 14:00:07 +0200
Subject: [PATCH] Revert "block/rbd: fix handling of holes in
.bdrv_co_block_status"
This reverts commit 9e302f64bb407a9bb097b626da97228c2654cfee in
preparation to revert 0347a8fd4c3faaedf119be04c197804be40a384b.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/rbd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 9fc6dcb957..98f4ba2620 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1307,11 +1307,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);
-
- /* treat a hole like an unallocated area and bail out */
- if (!exists) {
- return 0;
- }
+ /*
+ * we do not diff against a snapshot so we should never receive a callback
+ * for a hole.
+ */
+ assert(exists);
if (!req->exists && offs > req->offs) {
/*

View File

@@ -1,161 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Tue, 17 May 2022 09:46:02 +0200
Subject: [PATCH] Revert "block/rbd: implement bdrv_co_block_status"
During backup, bdrv_co_block_status is called for each block copy
chunk. When RBD is used, the current implementation with
rbd_diff_iterate2() using whole_object=true takes about linearly more
time, depending on the image size. Since there are linearly more
chunks, the slowdown is quadratic, becoming unacceptable for large
images (starting somewhere between 500-1000 GiB in my testing).
This reverts commit 0347a8fd4c3faaedf119be04c197804be40a384b as a
stop-gap measure, until it's clear how to make the implemenation
more efficient.
Upstream bug report:
https://gitlab.com/qemu-project/qemu/-/issues/1026
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/rbd.c | 112 ----------------------------------------------------
1 file changed, 112 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 98f4ba2620..efcbbe5949 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -97,12 +97,6 @@ typedef struct RBDTask {
int64_t ret;
} RBDTask;
-typedef struct RBDDiffIterateReq {
- uint64_t offs;
- uint64_t bytes;
- bool exists;
-} RBDDiffIterateReq;
-
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid,
@@ -1293,111 +1287,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}
-/*
- * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
- * value in the callback routine. Choose a value that does not conflict with
- * an existing exitcode and return it if we want to prematurely stop the
- * execution because we detected a change in the allocation status.
- */
-#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
-
-static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
- int exists, void *opaque)
-{
- RBDDiffIterateReq *req = opaque;
-
- assert(req->offs + req->bytes <= offs);
- /*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
- assert(exists);
-
- if (!req->exists && offs > req->offs) {
- /*
- * we started in an unallocated area and hit the first allocated
- * block. req->bytes must be set to the length of the unallocated area
- * before the allocated area. stop further processing.
- */
- req->bytes = offs - req->offs;
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- if (req->exists && offs > req->offs + req->bytes) {
- /*
- * we started in an allocated area and jumped over an unallocated area,
- * req->bytes contains the length of the allocated area before the
- * unallocated area. stop further processing.
- */
- return QEMU_RBD_EXIT_DIFF_ITERATE2;
- }
-
- req->bytes += len;
- req->exists = true;
-
- return 0;
-}
-
-static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
-{
- BDRVRBDState *s = bs->opaque;
- int status, r;
- RBDDiffIterateReq req = { .offs = offset };
- uint64_t features, flags;
-
- assert(offset + bytes <= s->image_size);
-
- /* default to all sectors allocated */
- status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
- *map = offset;
- *file = bs;
- *pnum = bytes;
-
- /* check if RBD image supports fast-diff */
- r = rbd_get_features(s->image, &features);
- if (r < 0) {
- return status;
- }
- if (!(features & RBD_FEATURE_FAST_DIFF)) {
- return status;
- }
-
- /* check if RBD fast-diff result is valid */
- r = rbd_get_flags(s->image, &flags);
- if (r < 0) {
- return status;
- }
- if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
- return status;
- }
-
- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
- qemu_rbd_diff_iterate_cb, &req);
- if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
- return status;
- }
- assert(req.bytes <= bytes);
- if (!req.exists) {
- if (r == 0) {
- /*
- * rbd_diff_iterate2 does not invoke callbacks for unallocated
- * areas. This here catches the case where no callback was
- * invoked at all (req.bytes == 0).
- */
- assert(req.bytes == 0);
- req.bytes = bytes;
- }
- status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
- }
-
- *pnum = req.bytes;
- return status;
-}
-
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
{
BDRVRBDState *s = bs->opaque;
@@ -1633,7 +1522,6 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif
- .bdrv_co_block_status = qemu_rbd_co_block_status,
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,

View File

@@ -13,16 +13,15 @@ that are obviously marked as "busy", which would cause none at all to be
transferred.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
migration/block-dirty-bitmap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 9aba7d9c22..f4ecf9c9f9 100644
index 5bf0d9fbc6..1070c19181 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -538,7 +538,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
@@ -323,7 +323,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
error_report_err(local_err);
@@ -30,4 +29,4 @@ index 9aba7d9c22..f4ecf9c9f9 100644
+ continue;
}
if (bitmap_aliases) {
bdrv_ref(bs);

View File

@@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 25 May 2022 13:59:37 +0200
Subject: [PATCH] PVE-Backup: create jobs: correctly cancel in error scenario
The first call to job_cancel_sync() will cancel and free all jobs in
the transaction, so ensure that it's called only once and get rid of
the job_unref() that would operate on freed memory.
It's also necessary to NULL backup_state.pbs in the error scenario,
because a subsequent backup_cancel QMP call (as happens in PVE when
the backup QMP command fails) would try to call proxmox_backup_abort()
and run into a segfault.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index f6a5f8c785..5bed6f4014 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -506,6 +506,11 @@ static void create_backup_jobs_bh(void *opaque) {
}
if (*errp) {
+ /*
+ * It's enough to cancel one job in the transaction, the rest will
+ * follow automatically.
+ */
+ bool canceled = false;
l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -516,12 +521,12 @@ static void create_backup_jobs_bh(void *opaque) {
di->target = NULL;
}
- if (di->job) {
+ if (!canceled && di->job) {
AioContext *ctx = di->job->job.aio_context;
aio_context_acquire(ctx);
job_cancel_sync(&di->job->job, true);
- job_unref(&di->job->job);
aio_context_release(ctx);
+ canceled = true;
}
}
}
@@ -947,6 +952,7 @@ err:
if (pbs) {
proxmox_backup_disconnect(pbs);
+ backup_state.pbs = NULL;
}
if (backup_dir) {

View File

@@ -0,0 +1,39 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 4 Jan 2021 14:49:14 +0100
Subject: [PATCH] PVE: fix aborting multiple 'CREATED' jobs in sequential
transaction
Deadlocks could occur in the AIO_WAIT_WHILE loop in job_finish_sync,
which would wait for CREATED but not running jobs to complete, even
though job_enter is a no-op in that scenario. Mark offending jobs as
ABORTING immediately via job_update_rc if required.
Manifested itself in cancelling or failing backups with more than 2
drives.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Tested-by: Mira Limbeck <m.limbeck@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
job.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/job.c b/job.c
index 97ee97a192..51984e557c 100644
--- a/job.c
+++ b/job.c
@@ -1035,6 +1035,13 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
return -EBUSY;
}
+ /* in a sequential transaction jobs with status CREATED can appear at time
+ * of cancelling, these have not begun work so job_enter won't do anything,
+ * let's ensure they are marked as ABORTING if required */
+ if (job->status == JOB_STATUS_CREATED && job->txn->sequential) {
+ job_update_rc(job);
+ }
+
AIO_WAIT_WHILE(job->aio_context,
(job_enter(job), !job_is_completed(job)));

View File

@@ -1,76 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 25 May 2022 13:59:38 +0200
Subject: [PATCH] PVE-Backup: ensure jobs in di_list are referenced
Ensures that qmp_backup_cancel doesn't pick a job that's already been
freed. With unlucky timings it seems possible that:
1. job_exit -> job_completed -> job_finalize_single starts
2. pvebackup_co_complete_stream gets spawned in completion callback
3. job finalize_single finishes -> job's refcount hits zero -> job is
freed
4. qmp_backup_cancel comes in and locks backup_state.backup_mutex
before pvebackup_co_complete_stream can remove the job from the
di_list
5. qmp_backup_cancel will pick a job that's already been freed
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 5bed6f4014..0c34428713 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -316,6 +316,14 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
}
}
+ if (di->job) {
+ AioContext *ctx = di->job->job.aio_context;
+ aio_context_acquire(ctx);
+ job_unref(&di->job->job);
+ di->job = NULL;
+ aio_context_release(ctx);
+ }
+
// remove self from job list
backup_state.di_list = g_list_remove(backup_state.di_list, di);
@@ -491,9 +499,12 @@ static void create_backup_jobs_bh(void *opaque) {
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
- aio_context_release(aio_context);
-
di->job = job;
+ if (job) {
+ job_ref(&job->job);
+ }
+
+ aio_context_release(aio_context);
if (!job || local_err) {
error_setg(errp, "backup_job_create failed: %s",
@@ -521,12 +532,16 @@ static void create_backup_jobs_bh(void *opaque) {
di->target = NULL;
}
- if (!canceled && di->job) {
+ if (di->job) {
AioContext *ctx = di->job->job.aio_context;
aio_context_acquire(ctx);
- job_cancel_sync(&di->job->job, true);
+ if (!canceled) {
+ job_cancel_sync(&di->job->job, true);
+ canceled = true;
+ }
+ job_unref(&di->job->job);
+ di->job = NULL;
aio_context_release(ctx);
- canceled = true;
}
}
}

View File

@@ -1,120 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 25 May 2022 13:59:39 +0200
Subject: [PATCH] PVE-Backup: avoid segfault issues upon backup-cancel
When canceling a backup in PVE via a signal it's easy to run into a
situation where the job is already failing when the backup_cancel QMP
command comes in. With a bit of unlucky timing on top, it can happen
that job_exit() runs between schedulung of job_cancel_bh() and
execution of job_cancel_bh(). But job_cancel_sync() does not expect
that the job is already finalized (in fact, the job might've been
freed already, but even if it isn't, job_cancel_sync() would try to
deref job->txn which would be NULL at that point).
It is not possible to simply use the job_cancel() (which is advertised
as being async but isn't in all cases) in qmp_backup_cancel() for the
same reason job_cancel_sync() cannot be used. Namely, because it can
invoke job_finish_sync() (which uses AIO_WAIT_WHILE and thus hangs if
called from a coroutine). This happens when there's multiple jobs in
the transaction and job->deferred_to_main_loop is true (is set before
scheduling job_exit()) or if the job was not started yet.
Fix the issue by selecting the job to cancel in job_cancel_bh() itself
using the first job that's not completed yet. This is not necessarily
the first job in the list, because pvebackup_co_complete_stream()
might not yet have removed a completed job when job_cancel_bh() runs.
An alternative would be to continue using only the first job and
checking against JOB_STATUS_CONCLUDED or JOB_STATUS_NULL to decide if
it's still necessary and possible to cancel, but the approach with
using the first non-completed job seemed more robust.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
pve-backup.c | 61 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 0c34428713..2e22030eec 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -355,15 +355,42 @@ static void pvebackup_complete_cb(void *opaque, int ret)
/*
* job_cancel(_sync) does not like to be called from coroutines, so defer to
- * main loop processing via a bottom half.
+ * main loop processing via a bottom half. Assumes that caller holds
+ * backup_mutex.
*/
static void job_cancel_bh(void *opaque) {
CoCtxData *data = (CoCtxData*)opaque;
- Job *job = (Job*)data->data;
- AioContext *job_ctx = job->aio_context;
- aio_context_acquire(job_ctx);
- job_cancel_sync(job, true);
- aio_context_release(job_ctx);
+
+ /*
+ * Be careful to pick a valid job to cancel:
+ * 1. job_cancel_sync() does not expect the job to be finalized already.
+ * 2. job_exit() might run between scheduling and running job_cancel_bh()
+ * and pvebackup_co_complete_stream() might not have removed the job from
+ * the list yet (in fact, cannot, because it waits for the backup_mutex).
+ * Requiring !job_is_completed() ensures that no finalized job is picked.
+ */
+ GList *bdi = g_list_first(backup_state.di_list);
+ while (bdi) {
+ if (bdi->data) {
+ BlockJob *bj = ((PVEBackupDevInfo *)bdi->data)->job;
+ if (bj) {
+ Job *job = &bj->job;
+ if (!job_is_completed(job)) {
+ AioContext *job_ctx = job->aio_context;
+ aio_context_acquire(job_ctx);
+ job_cancel_sync(job, true);
+ aio_context_release(job_ctx);
+ /*
+ * It's enough to cancel one job in the transaction, the
+ * rest will follow automatically.
+ */
+ break;
+ }
+ }
+ }
+ bdi = g_list_next(bdi);
+ }
+
aio_co_enter(data->ctx, data->co);
}
@@ -384,22 +411,12 @@ void coroutine_fn qmp_backup_cancel(Error **errp)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
- /* it's enough to cancel one job in the transaction, the rest will follow
- * automatically */
- GList *bdi = g_list_first(backup_state.di_list);
- BlockJob *cancel_job = bdi && bdi->data ?
- ((PVEBackupDevInfo *)bdi->data)->job :
- NULL;
-
- if (cancel_job) {
- CoCtxData data = {
- .ctx = qemu_get_current_aio_context(),
- .co = qemu_coroutine_self(),
- .data = &cancel_job->job,
- };
- aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
- qemu_coroutine_yield();
- }
+ CoCtxData data = {
+ .ctx = qemu_get_current_aio_context(),
+ .co = qemu_coroutine_self(),
+ };
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+ qemu_coroutine_yield();
qemu_co_mutex_unlock(&backup_state.backup_mutex);
}

View File

@@ -1,57 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 22 Jun 2022 10:45:11 +0200
Subject: [PATCH] vma: create: support 64KiB-unaligned input images
which fixes backing up templates with such disks in PVE, for example
efitype=4m EFI disks on a file-based storage (size = 540672).
If there is not enough left to read, blk_co_preadv will return -EIO,
so limit the size in the last iteration.
For writing, an unaligned end is already handled correctly.
The call to memset is not strictly necessary, because writing also
checks that it doesn't write data beyond the end of the image. But
there are two reasons to do it:
1. It's cleaner that way.
2. It allows detecting when the final piece is all zeroes, which might
not happen if the buffer still contains data from the previous
iteration.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
vma.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/vma.c b/vma.c
index e6e9ffc7fe..304f02bc84 100644
--- a/vma.c
+++ b/vma.c
@@ -548,7 +548,7 @@ static void coroutine_fn backup_run(void *opaque)
struct iovec iov;
QEMUIOVector qiov;
- int64_t start, end;
+ int64_t start, end, readlen;
int ret = 0;
unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
@@ -562,8 +562,16 @@ static void coroutine_fn backup_run(void *opaque)
iov.iov_len = VMA_CLUSTER_SIZE;
qemu_iovec_init_external(&qiov, &iov, 1);
+ if (start + 1 == end) {
+ memset(buf, 0, VMA_CLUSTER_SIZE);
+ readlen = job->len - start * VMA_CLUSTER_SIZE;
+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
+ } else {
+ readlen = VMA_CLUSTER_SIZE;
+ }
+
ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
- VMA_CLUSTER_SIZE, &qiov, 0);
+ readlen, &qiov, 0);
if (ret < 0) {
vma_writer_set_error(job->vmaw, "read error", -1);
goto out;

View File

@@ -1,25 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 22 Jun 2022 10:45:12 +0200
Subject: [PATCH] vma: create: avoid triggering assertion in error case
error_setg expects its argument to not be initialized yet.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
vma-writer.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/vma-writer.c b/vma-writer.c
index df4b20793d..ac7da237d0 100644
--- a/vma-writer.c
+++ b/vma-writer.c
@@ -311,6 +311,8 @@ VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
}
if (vmaw->fd < 0) {
+ error_free(*errp);
+ *errp = NULL;
error_setg(errp, "can't open file %s - %s\n", filename,
g_strerror(errno));
goto err;

View File

@@ -1,36 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fabian Ebner <f.ebner@proxmox.com>
Date: Wed, 22 Jun 2022 10:45:13 +0200
Subject: [PATCH] block: alloc-track: avoid premature break
While the bdrv_co_preadv() calls are expected to return 0 on success,
qemu_iovec_memset() will return the number of bytes set (will be
local_bytes, because the slice with that size was just initialized).
Don't break out of the loop after the branch with qemu_iovec_memset(),
because there might still be work to do. Additionally, ret is an int,
which on 64-bit platforms is too small to hold the size_t returned by
qemu_iovec_memset().
The branch seems to be difficult to reach in practice, because the
whole point of alloc-track is to be used with a backing device.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
block/alloc-track.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/block/alloc-track.c b/block/alloc-track.c
index 6b50fbe537..c1160af04b 100644
--- a/block/alloc-track.c
+++ b/block/alloc-track.c
@@ -174,7 +174,8 @@ static int coroutine_fn track_co_preadv(BlockDriverState *bs,
ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
&local_qiov, flags);
} else {
- ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
+ ret = 0;
}
if (ret != 0) {

View File

@@ -1,144 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Mon, 3 Oct 2022 15:52:04 +0200
Subject: [PATCH] PVE Backup: allow passing max-workers performance setting
For query-proxmox-support, add an indication that it's possible to use
the setting.
For now, the other two BackupPerf settings are not exposed:
* use-copy-range: would need to be implemented by the backup-dump
block driver first, and in fact, the default for backup was changed,
because it wasn't as fast for backup in QEMU, see commit
6a30f663d4c0b3c45a544d541e0c4e214b2473a1.
* max-chunk: enforced to be at least the backup cluster size, which is
4 MiB for PBS and otherwise maximum of source and target cluster size.
And block-copy has a maximum buffer size of 1 MiB, so setting a larger
max-chunk doesn't even have an effect. To make the setting sensibly
usable the check would need to be removed and optionally the
block-copy max buffer size would need to be bumped. I tried doing just
that, and tested different source/target combinations with different
max-chunk settings, but there were no noticable improvements over the
default "unlimited" (resulting in 1 MiB for block-copy).
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/monitor/block-hmp-cmds.c | 4 +++-
pve-backup.c | 18 +++++++++++++-----
qapi/block-core.json | 9 +++++++--
3 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 0502f42be6..cc231ec3f2 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1049,7 +1049,9 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
false, false, // PBS encrypt
true, dir ? BACKUP_FORMAT_DIR : BACKUP_FORMAT_VMA,
false, NULL, false, NULL, !!devlist,
- devlist, qdict_haskey(qdict, "speed"), speed, &error);
+ devlist, qdict_haskey(qdict, "speed"), speed,
+ false, 0, // BackupPerf max-workers
+ &error);
hmp_handle_error(mon, error);
}
diff --git a/pve-backup.c b/pve-backup.c
index 2e22030eec..e9aa7e0f49 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -55,6 +55,7 @@ static struct PVEBackupState {
bool starting;
} stat;
int64_t speed;
+ BackupPerf perf;
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
@@ -492,8 +493,6 @@ static void create_backup_jobs_bh(void *opaque) {
}
backup_state.txn = job_txn_new_seq();
- BackupPerf perf = { .max_workers = 16 };
-
/* create and start all jobs (paused state) */
GList *l = backup_state.di_list;
while (l) {
@@ -513,8 +512,9 @@ static void create_backup_jobs_bh(void *opaque) {
BlockJob *job = backup_job_create(
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
- bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
+ bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+ &local_err);
di->job = job;
if (job) {
@@ -584,7 +584,9 @@ UuidInfo coroutine_fn *qmp_backup(
bool has_config_file, const char *config_file,
bool has_firewall_file, const char *firewall_file,
bool has_devlist, const char *devlist,
- bool has_speed, int64_t speed, Error **errp)
+ bool has_speed, int64_t speed,
+ bool has_max_workers, int64_t max_workers,
+ Error **errp)
{
assert(qemu_in_coroutine());
@@ -914,6 +916,11 @@ UuidInfo coroutine_fn *qmp_backup(
backup_state.speed = (has_speed && speed > 0) ? speed : 0;
+ backup_state.perf = (BackupPerf){ .max_workers = 16 };
+ if (has_max_workers) {
+ backup_state.perf.max_workers = max_workers;
+ }
+
backup_state.vmaw = vmaw;
backup_state.pbs = pbs;
@@ -1089,5 +1096,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->pbs_dirty_bitmap_migration = true;
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
+ ret->backup_max_workers = true;
return ret;
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cc2ead0b75..e3f62faa81 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -829,6 +829,8 @@
#
# @encrypt: use encryption ((optional for format 'pbs', defaults to true if there is a keyfile)
#
+# @max-workers: see @BackupPerf for details. Default 16.
+#
# Returns: the uuid of the backup job
#
##
@@ -847,7 +849,9 @@
'*format': 'BackupFormat',
'*config-file': 'str',
'*firewall-file': 'str',
- '*devlist': 'str', '*speed': 'int' },
+ '*devlist': 'str',
+ '*speed': 'int',
+ '*max-workers': 'int' },
'returns': 'UuidInfo', 'coroutine': true }
##
@@ -902,7 +906,8 @@
'pbs-dirty-bitmap-savevm': 'bool',
'pbs-dirty-bitmap-migration': 'bool',
'pbs-masterkey': 'bool',
- 'pbs-library-version': 'str' } }
+ 'pbs-library-version': 'str',
+ 'backup-max-workers': 'bool' } }
##
# @query-proxmox-support:

Some files were not shown because too many files have changed in this diff Show More