Compare commits
1 Commits
05089ab57d
...
f2653ef0b8
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | f2653ef0b8 |
2
Makefile
2
Makefile
|
@ -58,7 +58,7 @@ $(BUILDDIR): submodule
|
||||||
deb kvm: $(DEBS)
|
deb kvm: $(DEBS)
|
||||||
$(DEB_DBG): $(DEB)
|
$(DEB_DBG): $(DEB)
|
||||||
$(DEB): $(BUILDDIR)
|
$(DEB): $(BUILDDIR)
|
||||||
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc
|
cd $(BUILDDIR); dpkg-buildpackage -b -us -uc -j32
|
||||||
lintian $(DEBS)
|
lintian $(DEBS)
|
||||||
|
|
||||||
sbuild: $(DSC)
|
sbuild: $(DSC)
|
||||||
|
|
|
@ -1,26 +1,8 @@
|
||||||
pve-qemu-kvm (9.0.2-4) bookworm; urgency=medium
|
pve-qemu-kvm (9.0.2-1+vitastor1) bookworm; urgency=medium
|
||||||
|
|
||||||
* async snapshot: ensure any dynamic vCPU-throttling applied for
|
* Add Vitastor support
|
||||||
auto-converge gets always disabled again after finishing the snapshot.
|
|
||||||
|
|
||||||
-- Proxmox Support Team <support@proxmox.com> Sun, 10 Nov 2024 11:23:09 +0100
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 02 Aug 2024 00:03:15 +0300
|
||||||
|
|
||||||
pve-qemu-kvm (9.0.2-3) bookworm; urgency=medium
|
|
||||||
|
|
||||||
* pick up fix for VirtIO PCI regressions
|
|
||||||
|
|
||||||
* pick up stable fixes for 9.0, including fixes for VirtIO-net, ARM and
|
|
||||||
x86(_64) emulation, CVEs to harden NBD server against malicious clients,
|
|
||||||
as well as a few others (VNC, physmem, Intel IOMMU, ...).
|
|
||||||
|
|
||||||
-- Proxmox Support Team <support@proxmox.com> Fri, 06 Sep 2024 16:21:42 +0200
|
|
||||||
|
|
||||||
pve-qemu-kvm (9.0.2-2) bookworm; urgency=medium
|
|
||||||
|
|
||||||
* actually update submodule to QEMU 9.0.2. The previous release was still
|
|
||||||
based on 9.0.0 by mistake.
|
|
||||||
|
|
||||||
-- Proxmox Support Team <support@proxmox.com> Wed, 07 Aug 2024 10:16:01 +0200
|
|
||||||
|
|
||||||
pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
|
pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,7 @@ Depends: ceph-common (>= 0.48),
|
||||||
libspice-server1 (>= 0.14.0~),
|
libspice-server1 (>= 0.14.0~),
|
||||||
libusb-1.0-0 (>= 1.0.17-1),
|
libusb-1.0-0 (>= 1.0.17-1),
|
||||||
libusbredirparser1 (>= 0.6-2),
|
libusbredirparser1 (>= 0.6-2),
|
||||||
|
vitastor-client (>= 0.9.4),
|
||||||
libuuid1,
|
libuuid1,
|
||||||
${misc:Depends},
|
${misc:Depends},
|
||||||
${shlibs:Depends},
|
${shlibs:Depends},
|
||||||
|
|
87
debian/patches/extra/0006-Revert-virtio-pci-fix-use-of-a-released-vector.patch
vendored
Normal file
87
debian/patches/extra/0006-Revert-virtio-pci-fix-use-of-a-released-vector.patch
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||||
|
Date: Thu, 16 May 2024 12:59:52 +0200
|
||||||
|
Subject: [PATCH] Revert "virtio-pci: fix use of a released vector"
|
||||||
|
|
||||||
|
This reverts commit 2ce6cff94df2650c460f809e5ad263f1d22507c0.
|
||||||
|
|
||||||
|
The fix causes some issues:
|
||||||
|
https://gitlab.com/qemu-project/qemu/-/issues/2321
|
||||||
|
https://gitlab.com/qemu-project/qemu/-/issues/2334
|
||||||
|
|
||||||
|
The CVE fixed by commit 2ce6cff94d ("virtio-pci: fix use of a released
|
||||||
|
vector") is CVE-2024-4693 [0] and allows a malicious guest that
|
||||||
|
controls the boot process in the guest to crash its QEMU process.
|
||||||
|
|
||||||
|
The issues sound worse than the CVE, so revert until there is a proper
|
||||||
|
fix.
|
||||||
|
|
||||||
|
[0]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-4693
|
||||||
|
|
||||||
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||||
|
---
|
||||||
|
hw/virtio/virtio-pci.c | 37 ++-----------------------------------
|
||||||
|
1 file changed, 2 insertions(+), 35 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
|
||||||
|
index e04218a9fb..fd66713848 100644
|
||||||
|
--- a/hw/virtio/virtio-pci.c
|
||||||
|
+++ b/hw/virtio/virtio-pci.c
|
||||||
|
@@ -1410,38 +1410,6 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void virtio_pci_set_vector(VirtIODevice *vdev,
|
||||||
|
- VirtIOPCIProxy *proxy,
|
||||||
|
- int queue_no, uint16_t old_vector,
|
||||||
|
- uint16_t new_vector)
|
||||||
|
-{
|
||||||
|
- bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
||||||
|
- msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled();
|
||||||
|
-
|
||||||
|
- if (new_vector == old_vector) {
|
||||||
|
- return;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- * If the device uses irqfd and the vector changes after DRIVER_OK is
|
||||||
|
- * set, we need to release the old vector and set up the new one.
|
||||||
|
- * Otherwise just need to set the new vector on the device.
|
||||||
|
- */
|
||||||
|
- if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) {
|
||||||
|
- kvm_virtio_pci_vector_release_one(proxy, queue_no);
|
||||||
|
- }
|
||||||
|
- /* Set the new vector on the device. */
|
||||||
|
- if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
|
||||||
|
- vdev->config_vector = new_vector;
|
||||||
|
- } else {
|
||||||
|
- virtio_queue_set_vector(vdev, queue_no, new_vector);
|
||||||
|
- }
|
||||||
|
- /* If the new vector changed need to set it up. */
|
||||||
|
- if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) {
|
||||||
|
- kvm_virtio_pci_vector_use_one(proxy, queue_no);
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
|
||||||
|
uint8_t bar, uint64_t offset, uint64_t length,
|
||||||
|
uint8_t id)
|
||||||
|
@@ -1588,8 +1556,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
|
||||||
|
} else {
|
||||||
|
val = VIRTIO_NO_VECTOR;
|
||||||
|
}
|
||||||
|
- virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX,
|
||||||
|
- vdev->config_vector, val);
|
||||||
|
+ vdev->config_vector = val;
|
||||||
|
break;
|
||||||
|
case VIRTIO_PCI_COMMON_STATUS:
|
||||||
|
if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
||||||
|
@@ -1629,7 +1596,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
|
||||||
|
} else {
|
||||||
|
val = VIRTIO_NO_VECTOR;
|
||||||
|
}
|
||||||
|
- virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val);
|
||||||
|
+ virtio_queue_set_vector(vdev, vdev->queue_sel, val);
|
||||||
|
break;
|
||||||
|
case VIRTIO_PCI_COMMON_Q_ENABLE:
|
||||||
|
if (val == 1) {
|
|
@ -1,77 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Cindy Lu <lulu@redhat.com>
|
|
||||||
Date: Tue, 6 Aug 2024 17:37:12 +0800
|
|
||||||
Subject: [PATCH] virtio-pci: Fix the use of an uninitialized irqfd
|
|
||||||
|
|
||||||
The crash was reported in MAC OS and NixOS, here is the link for this bug
|
|
||||||
https://gitlab.com/qemu-project/qemu/-/issues/2334
|
|
||||||
https://gitlab.com/qemu-project/qemu/-/issues/2321
|
|
||||||
|
|
||||||
In this bug, they are using the virtio_input device. The guest notifier was
|
|
||||||
not supported for this device, The function virtio_pci_set_guest_notifiers()
|
|
||||||
was not called, and the vector_irqfd was not initialized.
|
|
||||||
|
|
||||||
So the fix is adding the check for vector_irqfd in virtio_pci_get_notifier()
|
|
||||||
|
|
||||||
The function virtio_pci_get_notifier() can be used in various devices.
|
|
||||||
It could also be called when VIRTIO_CONFIG_S_DRIVER_OK is not set. In this situation,
|
|
||||||
the vector_irqfd being NULL is acceptable. We can allow the device continue to boot
|
|
||||||
|
|
||||||
If the vector_irqfd still hasn't been initialized after VIRTIO_CONFIG_S_DRIVER_OK
|
|
||||||
is set, it means that the function set_guest_notifiers was not called before the
|
|
||||||
driver started. This indicates that the device is not using the notifier.
|
|
||||||
At this point, we will let the check fail.
|
|
||||||
|
|
||||||
This fix is verified in vyatta,MacOS,NixOS,fedora system.
|
|
||||||
|
|
||||||
The bt tree for this bug is:
|
|
||||||
Thread 6 "CPU 0/KVM" received signal SIGSEGV, Segmentation fault.
|
|
||||||
[Switching to Thread 0x7c817be006c0 (LWP 1269146)]
|
|
||||||
kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
|
|
||||||
817 if (irqfd->users == 0) {
|
|
||||||
(gdb) thread apply all bt
|
|
||||||
...
|
|
||||||
Thread 6 (Thread 0x7c817be006c0 (LWP 1269146) "CPU 0/KVM"):
|
|
||||||
0 kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
|
|
||||||
1 kvm_virtio_pci_vector_use_one () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:893
|
|
||||||
2 0x00005983657045e2 in memory_region_write_accessor () at ../qemu-9.0.0/system/memory.c:497
|
|
||||||
3 0x0000598365704ba6 in access_with_adjusted_size () at ../qemu-9.0.0/system/memory.c:573
|
|
||||||
4 0x0000598365705059 in memory_region_dispatch_write () at ../qemu-9.0.0/system/memory.c:1528
|
|
||||||
5 0x00005983659b8e1f in flatview_write_continue_step.isra.0 () at ../qemu-9.0.0/system/physmem.c:2713
|
|
||||||
6 0x000059836570ba7d in flatview_write_continue () at ../qemu-9.0.0/system/physmem.c:2743
|
|
||||||
7 flatview_write () at ../qemu-9.0.0/system/physmem.c:2774
|
|
||||||
8 0x000059836570bb76 in address_space_write () at ../qemu-9.0.0/system/physmem.c:2894
|
|
||||||
9 0x0000598365763afe in address_space_rw () at ../qemu-9.0.0/system/physmem.c:2904
|
|
||||||
10 kvm_cpu_exec () at ../qemu-9.0.0/accel/kvm/kvm-all.c:2917
|
|
||||||
11 0x000059836576656e in kvm_vcpu_thread_fn () at ../qemu-9.0.0/accel/kvm/kvm-accel-ops.c:50
|
|
||||||
12 0x0000598365926ca8 in qemu_thread_start () at ../qemu-9.0.0/util/qemu-thread-posix.c:541
|
|
||||||
13 0x00007c8185bcd1cf in ??? () at /usr/lib/libc.so.6
|
|
||||||
14 0x00007c8185c4e504 in clone () at /usr/lib/libc.so.6
|
|
||||||
|
|
||||||
Fixes: 2ce6cff94d ("virtio-pci: fix use of a released vector")
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Cindy Lu <lulu@redhat.com>
|
|
||||||
Message-Id: <20240806093715.65105-1-lulu@redhat.com>
|
|
||||||
Acked-by: Jason Wang <jasowang@redhat.com>
|
|
||||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
(cherry picked from commit a8e63ff289d137197ad7a701a587cc432872d798)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
hw/virtio/virtio-pci.c | 3 +++
|
|
||||||
1 file changed, 3 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
|
|
||||||
index e04218a9fb..389bab003f 100644
|
|
||||||
--- a/hw/virtio/virtio-pci.c
|
|
||||||
+++ b/hw/virtio/virtio-pci.c
|
|
||||||
@@ -860,6 +860,9 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
|
|
||||||
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
|
|
||||||
VirtQueue *vq;
|
|
||||||
|
|
||||||
+ if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
|
|
||||||
+ return -1;
|
|
||||||
+
|
|
||||||
if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
|
|
||||||
*n = virtio_config_get_guest_notifier(vdev);
|
|
||||||
*vector = vdev->config_vector;
|
|
|
@ -1,35 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Akihiko Odaki <akihiko.odaki@daynix.com>
|
|
||||||
Date: Mon, 1 Jul 2024 20:58:04 +0900
|
|
||||||
Subject: [PATCH] virtio-net: Ensure queue index fits with RSS
|
|
||||||
|
|
||||||
Ensure the queue index points to a valid queue when software RSS
|
|
||||||
enabled. The new calculation matches with the behavior of Linux's TAP
|
|
||||||
device with the RSS eBPF program.
|
|
||||||
|
|
||||||
Fixes: 4474e37a5b3a ("virtio-net: implement RX RSS processing")
|
|
||||||
Reported-by: Zhibin Hu <huzhibin5@huawei.com>
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
|
|
||||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
|
||||||
(cherry picked from commit f1595ceb9aad36a6c1da95bcb77ab9509b38822d)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
hw/net/virtio-net.c | 3 ++-
|
|
||||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
|
||||||
index 3644bfd91b..f48588638d 100644
|
|
||||||
--- a/hw/net/virtio-net.c
|
|
||||||
+++ b/hw/net/virtio-net.c
|
|
||||||
@@ -1949,7 +1949,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
|
|
||||||
if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
|
|
||||||
int index = virtio_net_process_rss(nc, buf, size);
|
|
||||||
if (index >= 0) {
|
|
||||||
- NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
|
|
||||||
+ NetClientState *nc2 =
|
|
||||||
+ qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
|
|
||||||
return virtio_net_receive_rcu(nc2, buf, size, true);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,338 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: thomas <east.moutain.yang@gmail.com>
|
|
||||||
Date: Fri, 12 Jul 2024 11:10:53 +0800
|
|
||||||
Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for
|
|
||||||
kick
|
|
||||||
|
|
||||||
Patch 06b12970174 ("virtio-net: fix network stall under load")
|
|
||||||
added double-check to test whether the available buffer size
|
|
||||||
can satisfy the request or not, in case the guest has added
|
|
||||||
some buffers to the avail ring simultaneously after the first
|
|
||||||
check. It will be lucky if the available buffer size becomes
|
|
||||||
okay after the double-check, then the host can send the packet
|
|
||||||
to the guest. If the buffer size still can't satisfy the request,
|
|
||||||
even if the guest has added some buffers, viritio-net would
|
|
||||||
stall at the host side forever.
|
|
||||||
|
|
||||||
The patch enables notification and checks whether the guest has
|
|
||||||
added some buffers since last check of available buffers when
|
|
||||||
the available buffers are insufficient. If no buffer is added,
|
|
||||||
return false, else recheck the available buffers in the loop.
|
|
||||||
If the available buffers are sufficient, disable notification
|
|
||||||
and return true.
|
|
||||||
|
|
||||||
Changes:
|
|
||||||
1. Change the return type of virtqueue_get_avail_bytes() from void
|
|
||||||
to int, it returns an opaque that represents the shadow_avail_idx
|
|
||||||
of the virtqueue on success, else -1 on error.
|
|
||||||
2. Add a new API: virtio_queue_enable_notification_and_check(),
|
|
||||||
it takes an opaque as input arg which is returned from
|
|
||||||
virtqueue_get_avail_bytes(). It enables notification firstly,
|
|
||||||
then checks whether the guest has added some buffers since
|
|
||||||
last check of available buffers or not by virtio_queue_poll(),
|
|
||||||
return ture if yes.
|
|
||||||
|
|
||||||
The patch also reverts patch "06b12970174".
|
|
||||||
|
|
||||||
The case below can reproduce the stall.
|
|
||||||
|
|
||||||
Guest 0
|
|
||||||
+--------+
|
|
||||||
| iperf |
|
|
||||||
---------------> | server |
|
|
||||||
Host | +--------+
|
|
||||||
+--------+ | ...
|
|
||||||
| iperf |----
|
|
||||||
| client |---- Guest n
|
|
||||||
+--------+ | +--------+
|
|
||||||
| | iperf |
|
|
||||||
---------------> | server |
|
|
||||||
+--------+
|
|
||||||
|
|
||||||
Boot many guests from qemu with virtio network:
|
|
||||||
qemu ... -netdev tap,id=net_x \
|
|
||||||
-device virtio-net-pci-non-transitional,\
|
|
||||||
iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x
|
|
||||||
|
|
||||||
Each guest acts as iperf server with commands below:
|
|
||||||
iperf3 -s -D -i 10 -p 8001
|
|
||||||
iperf3 -s -D -i 10 -p 8002
|
|
||||||
|
|
||||||
The host as iperf client:
|
|
||||||
iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000
|
|
||||||
iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000
|
|
||||||
|
|
||||||
After some time, the host loses connection to the guest,
|
|
||||||
the guest can send packet to the host, but can't receive
|
|
||||||
packet from the host.
|
|
||||||
|
|
||||||
It's more likely to happen if SWIOTLB is enabled in the guest,
|
|
||||||
allocating and freeing bounce buffer takes some CPU ticks,
|
|
||||||
copying from/to bounce buffer takes more CPU ticks, compared
|
|
||||||
with that there is no bounce buffer in the guest.
|
|
||||||
Once the rate of producing packets from the host approximates
|
|
||||||
the rate of receiveing packets in the guest, the guest would
|
|
||||||
loop in NAPI.
|
|
||||||
|
|
||||||
receive packets ---
|
|
||||||
| |
|
|
||||||
v |
|
|
||||||
free buf virtnet_poll
|
|
||||||
| |
|
|
||||||
v |
|
|
||||||
add buf to avail ring ---
|
|
||||||
|
|
|
||||||
| need kick the host?
|
|
||||||
| NAPI continues
|
|
||||||
v
|
|
||||||
receive packets ---
|
|
||||||
| |
|
|
||||||
v |
|
|
||||||
free buf virtnet_poll
|
|
||||||
| |
|
|
||||||
v |
|
|
||||||
add buf to avail ring ---
|
|
||||||
|
|
|
||||||
v
|
|
||||||
... ...
|
|
||||||
|
|
||||||
On the other hand, the host fetches free buf from avail
|
|
||||||
ring, if the buf in the avail ring is not enough, the
|
|
||||||
host notifies the guest the event by writing the avail
|
|
||||||
idx read from avail ring to the event idx of used ring,
|
|
||||||
then the host goes to sleep, waiting for the kick signal
|
|
||||||
from the guest.
|
|
||||||
|
|
||||||
Once the guest finds the host is waiting for kick singal
|
|
||||||
(in virtqueue_kick_prepare_split()), it kicks the host.
|
|
||||||
|
|
||||||
The host may stall forever at the sequences below:
|
|
||||||
|
|
||||||
Host Guest
|
|
||||||
------------ -----------
|
|
||||||
fetch buf, send packet receive packet ---
|
|
||||||
... ... |
|
|
||||||
fetch buf, send packet add buf |
|
|
||||||
... add buf virtnet_poll
|
|
||||||
buf not enough avail idx-> add buf |
|
|
||||||
read avail idx add buf |
|
|
||||||
add buf ---
|
|
||||||
receive packet ---
|
|
||||||
write event idx ... |
|
|
||||||
wait for kick add buf virtnet_poll
|
|
||||||
... |
|
|
||||||
---
|
|
||||||
no more packet, exit NAPI
|
|
||||||
|
|
||||||
In the first loop of NAPI above, indicated in the range of
|
|
||||||
virtnet_poll above, the host is sending packets while the
|
|
||||||
guest is receiving packets and adding buffers.
|
|
||||||
step 1: The buf is not enough, for example, a big packet
|
|
||||||
needs 5 buf, but the available buf count is 3.
|
|
||||||
The host read current avail idx.
|
|
||||||
step 2: The guest adds some buf, then checks whether the
|
|
||||||
host is waiting for kick signal, not at this time.
|
|
||||||
The used ring is not empty, the guest continues
|
|
||||||
the second loop of NAPI.
|
|
||||||
step 3: The host writes the avail idx read from avail
|
|
||||||
ring to used ring as event idx via
|
|
||||||
virtio_queue_set_notification(q->rx_vq, 1).
|
|
||||||
step 4: At the end of the second loop of NAPI, recheck
|
|
||||||
whether kick is needed, as the event idx in the
|
|
||||||
used ring written by the host is beyound the
|
|
||||||
range of kick condition, the guest will not
|
|
||||||
send kick signal to the host.
|
|
||||||
|
|
||||||
Fixes: 06b12970174 ("virtio-net: fix network stall under load")
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com>
|
|
||||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
|
||||||
(cherry picked from commit f937309fbdbb48c354220a3e7110c202ae4aa7fa)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
hw/net/virtio-net.c | 28 ++++++++++-------
|
|
||||||
hw/virtio/virtio.c | 64 +++++++++++++++++++++++++++++++++++---
|
|
||||||
include/hw/virtio/virtio.h | 21 +++++++++++--
|
|
||||||
3 files changed, 94 insertions(+), 19 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
|
||||||
index f48588638d..d4b979d343 100644
|
|
||||||
--- a/hw/net/virtio-net.c
|
|
||||||
+++ b/hw/net/virtio-net.c
|
|
||||||
@@ -1680,24 +1680,28 @@ static bool virtio_net_can_receive(NetClientState *nc)
|
|
||||||
|
|
||||||
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
|
|
||||||
{
|
|
||||||
+ int opaque;
|
|
||||||
+ unsigned int in_bytes;
|
|
||||||
VirtIONet *n = q->n;
|
|
||||||
- if (virtio_queue_empty(q->rx_vq) ||
|
|
||||||
- (n->mergeable_rx_bufs &&
|
|
||||||
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
|
|
||||||
- virtio_queue_set_notification(q->rx_vq, 1);
|
|
||||||
-
|
|
||||||
- /* To avoid a race condition where the guest has made some buffers
|
|
||||||
- * available after the above check but before notification was
|
|
||||||
- * enabled, check for available buffers again.
|
|
||||||
- */
|
|
||||||
- if (virtio_queue_empty(q->rx_vq) ||
|
|
||||||
- (n->mergeable_rx_bufs &&
|
|
||||||
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
|
|
||||||
+
|
|
||||||
+ while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
|
|
||||||
+ opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
|
|
||||||
+ bufsize, 0);
|
|
||||||
+ /* Buffer is enough, disable notifiaction */
|
|
||||||
+ if (bufsize <= in_bytes) {
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
|
|
||||||
+ /* Guest has added some buffers, try again */
|
|
||||||
+ continue;
|
|
||||||
+ } else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtio_queue_set_notification(q->rx_vq, 0);
|
|
||||||
+
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
|
|
||||||
index fd2dfe3a6b..08fba6b2d8 100644
|
|
||||||
--- a/hw/virtio/virtio.c
|
|
||||||
+++ b/hw/virtio/virtio.c
|
|
||||||
@@ -743,6 +743,60 @@ int virtio_queue_empty(VirtQueue *vq)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx)
|
|
||||||
+{
|
|
||||||
+ if (unlikely(!vq->vring.avail)) {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return (uint16_t)shadow_idx != vring_avail_idx(vq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx)
|
|
||||||
+{
|
|
||||||
+ VRingPackedDesc desc;
|
|
||||||
+ VRingMemoryRegionCaches *caches;
|
|
||||||
+
|
|
||||||
+ if (unlikely(!vq->vring.desc)) {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ caches = vring_get_region_caches(vq);
|
|
||||||
+ if (!caches) {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
|
|
||||||
+ shadow_idx, true);
|
|
||||||
+
|
|
||||||
+ return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx)
|
|
||||||
+{
|
|
||||||
+ if (virtio_device_disabled(vq->vdev)) {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
||||||
+ return virtio_queue_packed_poll(vq, shadow_idx);
|
|
||||||
+ } else {
|
|
||||||
+ return virtio_queue_split_poll(vq, shadow_idx);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
|
|
||||||
+ int opaque)
|
|
||||||
+{
|
|
||||||
+ virtio_queue_set_notification(vq, 1);
|
|
||||||
+
|
|
||||||
+ if (opaque >= 0) {
|
|
||||||
+ return virtio_queue_poll(vq, (unsigned)opaque);
|
|
||||||
+ } else {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
|
|
||||||
unsigned int len)
|
|
||||||
{
|
|
||||||
@@ -1330,9 +1384,9 @@ err:
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
|
||||||
- unsigned int *out_bytes,
|
|
||||||
- unsigned max_in_bytes, unsigned max_out_bytes)
|
|
||||||
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
|
||||||
+ unsigned int *out_bytes, unsigned max_in_bytes,
|
|
||||||
+ unsigned max_out_bytes)
|
|
||||||
{
|
|
||||||
uint16_t desc_size;
|
|
||||||
VRingMemoryRegionCaches *caches;
|
|
||||||
@@ -1365,7 +1419,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
|
||||||
caches);
|
|
||||||
}
|
|
||||||
|
|
||||||
- return;
|
|
||||||
+ return (int)vq->shadow_avail_idx;
|
|
||||||
err:
|
|
||||||
if (in_bytes) {
|
|
||||||
*in_bytes = 0;
|
|
||||||
@@ -1373,6 +1427,8 @@ err:
|
|
||||||
if (out_bytes) {
|
|
||||||
*out_bytes = 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
|
|
||||||
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
|
|
||||||
index 2eafad17b8..8b4da92889 100644
|
|
||||||
--- a/include/hw/virtio/virtio.h
|
|
||||||
+++ b/include/hw/virtio/virtio.h
|
|
||||||
@@ -271,9 +271,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
|
|
||||||
VirtQueueElement *elem);
|
|
||||||
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
|
|
||||||
unsigned int out_bytes);
|
|
||||||
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
|
||||||
- unsigned int *out_bytes,
|
|
||||||
- unsigned max_in_bytes, unsigned max_out_bytes);
|
|
||||||
+/**
|
|
||||||
+ * Return <0 on error or an opaque >=0 to pass to
|
|
||||||
+ * virtio_queue_enable_notification_and_check on success.
|
|
||||||
+ */
|
|
||||||
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
|
||||||
+ unsigned int *out_bytes, unsigned max_in_bytes,
|
|
||||||
+ unsigned max_out_bytes);
|
|
||||||
|
|
||||||
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
|
|
||||||
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
|
|
||||||
@@ -307,6 +311,17 @@ int virtio_queue_ready(VirtQueue *vq);
|
|
||||||
|
|
||||||
int virtio_queue_empty(VirtQueue *vq);
|
|
||||||
|
|
||||||
+/**
|
|
||||||
+ * Enable notification and check whether guest has added some
|
|
||||||
+ * buffers since last call to virtqueue_get_avail_bytes.
|
|
||||||
+ *
|
|
||||||
+ * @opaque: value returned from virtqueue_get_avail_bytes
|
|
||||||
+ */
|
|
||||||
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
|
|
||||||
+ int opaque);
|
|
||||||
+
|
|
||||||
+void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t idx);
|
|
||||||
+
|
|
||||||
/* Host binding interface. */
|
|
||||||
|
|
||||||
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);
|
|
|
@ -1,70 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: David Woodhouse <dwmw@amazon.co.uk>
|
|
||||||
Date: Tue, 9 Jul 2024 13:34:44 +0100
|
|
||||||
Subject: [PATCH] net: Reinstate '-net nic, model=help' output as documented in
|
|
||||||
man page
|
|
||||||
|
|
||||||
While refactoring the NIC initialization code, I broke '-net nic,model=help'
|
|
||||||
which no longer outputs a list of available NIC models.
|
|
||||||
|
|
||||||
Fixes: 2cdeca04adab ("net: report list of available models according to platform")
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
|
|
||||||
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
|
||||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
|
||||||
(cherry picked from commit 64f75f57f9d2c8c12ac6d9355fa5d3a2af5879ca)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
net/net.c | 25 ++++++++++++++++++++++---
|
|
||||||
1 file changed, 22 insertions(+), 3 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/net/net.c b/net/net.c
|
|
||||||
index a2f0c828bb..e6ca2529bb 100644
|
|
||||||
--- a/net/net.c
|
|
||||||
+++ b/net/net.c
|
|
||||||
@@ -1150,6 +1150,21 @@ NICInfo *qemu_find_nic_info(const char *typename, bool match_default,
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool is_nic_model_help_option(const char *model)
|
|
||||||
+{
|
|
||||||
+ if (model && is_help_option(model)) {
|
|
||||||
+ /*
|
|
||||||
+ * Trigger the help output by instantiating the hash table which
|
|
||||||
+ * will gather tha available models as they get registered.
|
|
||||||
+ */
|
|
||||||
+ if (!nic_model_help) {
|
|
||||||
+ nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
|
|
||||||
+ g_free, NULL);
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
|
|
||||||
/* "I have created a device. Please configure it if you can" */
|
|
||||||
bool qemu_configure_nic_device(DeviceState *dev, bool match_default,
|
|
||||||
@@ -1733,6 +1748,12 @@ void net_check_clients(void)
|
|
||||||
|
|
||||||
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
|
|
||||||
{
|
|
||||||
+ const char *model = qemu_opt_get_del(opts, "model");
|
|
||||||
+
|
|
||||||
+ if (is_nic_model_help_option(model)) {
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
return net_client_init(opts, false, errp);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1789,9 +1810,7 @@ static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
|
|
||||||
memset(ni, 0, sizeof(*ni));
|
|
||||||
ni->model = qemu_opt_get_del(opts, "model");
|
|
||||||
|
|
||||||
- if (!nic_model_help && !g_strcmp0(ni->model, "help")) {
|
|
||||||
- nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
|
|
||||||
- g_free, NULL);
|
|
||||||
+ if (is_nic_model_help_option(ni->model)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: David Woodhouse <dwmw@amazon.co.uk>
|
|
||||||
Date: Tue, 6 Aug 2024 18:21:37 +0100
|
|
||||||
Subject: [PATCH] net: Fix '-net nic,model=' for non-help arguments
|
|
||||||
|
|
||||||
Oops, don't *delete* the model option when checking for 'help'.
|
|
||||||
|
|
||||||
Fixes: 64f75f57f9d2 ("net: Reinstate '-net nic, model=help' output as documented in man page")
|
|
||||||
Reported-by: Hans <sungdgdhtryrt@gmail.com>
|
|
||||||
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
|
||||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
|
||||||
(cherry picked from commit fa62cb989a9146c82f8f172715042852f5d36200)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
net/net.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/net/net.c b/net/net.c
|
|
||||||
index e6ca2529bb..897bb936cf 100644
|
|
||||||
--- a/net/net.c
|
|
||||||
+++ b/net/net.c
|
|
||||||
@@ -1748,7 +1748,7 @@ void net_check_clients(void)
|
|
||||||
|
|
||||||
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
|
|
||||||
{
|
|
||||||
- const char *model = qemu_opt_get_del(opts, "model");
|
|
||||||
+ const char *model = qemu_opt_get(opts, "model");
|
|
||||||
|
|
||||||
if (is_nic_model_help_option(model)) {
|
|
||||||
return 0;
|
|
|
@ -1,57 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Date: Mon, 22 Jul 2024 18:29:54 +0100
|
|
||||||
Subject: [PATCH] target/arm: Don't assert for 128-bit tile accesses when SVL
|
|
||||||
is 128
|
|
||||||
|
|
||||||
For an instruction which accesses a 128-bit element tile when
|
|
||||||
the SVL is also 128 (for example MOV z0.Q, p0/M, ZA0H.Q[w0,0]),
|
|
||||||
we will assert in get_tile_rowcol():
|
|
||||||
|
|
||||||
qemu-system-aarch64: ../../tcg/tcg-op.c:926: tcg_gen_deposit_z_i32: Assertion `len > 0' failed.
|
|
||||||
|
|
||||||
This happens because we calculate
|
|
||||||
len = ctz32(streaming_vec_reg_size(s)) - esz;$
|
|
||||||
but if the SVL and the element size are the same len is 0, and
|
|
||||||
the deposit operation asserts.
|
|
||||||
|
|
||||||
In this case the ZA storage contains exactly one 128 bit
|
|
||||||
element ZA tile, and the horizontal or vertical slice is just
|
|
||||||
that tile. This means that regardless of the index value in
|
|
||||||
the Ws register, we always access that tile. (In pseudocode terms,
|
|
||||||
we calculate (index + offset) MOD 1, which is 0.)
|
|
||||||
|
|
||||||
Special case the len == 0 case to avoid hitting the assertion
|
|
||||||
in tcg_gen_deposit_z_i32().
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
Message-id: 20240722172957.1041231-2-peter.maydell@linaro.org
|
|
||||||
(cherry picked from commit 56f1c0db928aae0b83fd91c89ddb226b137e2b21)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
target/arm/tcg/translate-sme.c | 10 +++++++++-
|
|
||||||
1 file changed, 9 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
|
|
||||||
index 185a8a917b..a50a419af2 100644
|
|
||||||
--- a/target/arm/tcg/translate-sme.c
|
|
||||||
+++ b/target/arm/tcg/translate-sme.c
|
|
||||||
@@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
|
|
||||||
/* Prepare a power-of-two modulo via extraction of @len bits. */
|
|
||||||
len = ctz32(streaming_vec_reg_size(s)) - esz;
|
|
||||||
|
|
||||||
- if (vertical) {
|
|
||||||
+ if (!len) {
|
|
||||||
+ /*
|
|
||||||
+ * SVL is 128 and the element size is 128. There is exactly
|
|
||||||
+ * one 128x128 tile in the ZA storage, and so we calculate
|
|
||||||
+ * (Rs + imm) MOD 1, which is always 0. We need to special case
|
|
||||||
+ * this because TCG doesn't allow deposit ops with len 0.
|
|
||||||
+ */
|
|
||||||
+ tcg_gen_movi_i32(tmp, 0);
|
|
||||||
+ } else if (vertical) {
|
|
||||||
/*
|
|
||||||
* Compute the byte offset of the index within the tile:
|
|
||||||
* (index % (svl / size)) * size
|
|
|
@ -1,59 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Date: Mon, 22 Jul 2024 18:29:55 +0100
|
|
||||||
Subject: [PATCH] target/arm: Fix UMOPA/UMOPS of 16-bit values
|
|
||||||
|
|
||||||
The UMOPA/UMOPS instructions are supposed to multiply unsigned 8 or
|
|
||||||
16 bit elements and accumulate the products into a 64-bit element.
|
|
||||||
In the Arm ARM pseudocode, this is done with the usual
|
|
||||||
infinite-precision signed arithmetic. However our implementation
|
|
||||||
doesn't quite get it right, because in the DEF_IMOP_64() macro we do:
|
|
||||||
sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0);
|
|
||||||
|
|
||||||
where NTYPE and MTYPE are uint16_t or int16_t. In the uint16_t case,
|
|
||||||
the C usual arithmetic conversions mean the values are converted to
|
|
||||||
"int" type and the multiply is done as a 32-bit multiply. This means
|
|
||||||
that if the inputs are, for example, 0xffff and 0xffff then the
|
|
||||||
result is 0xFFFE0001 as an int, which is then promoted to uint64_t
|
|
||||||
for the accumulation into sum; this promotion incorrectly sign
|
|
||||||
extends the multiply.
|
|
||||||
|
|
||||||
Avoid the incorrect sign extension by casting to int64_t before
|
|
||||||
the multiply, so we do the multiply as 64-bit signed arithmetic,
|
|
||||||
which is a type large enough that the multiply can never
|
|
||||||
overflow into the sign bit.
|
|
||||||
|
|
||||||
(The equivalent 8-bit operations in DEF_IMOP_32() are fine, because
|
|
||||||
the 8-bit multiplies can never overflow into the sign bit of a
|
|
||||||
32-bit integer.)
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2372
|
|
||||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
Message-id: 20240722172957.1041231-3-peter.maydell@linaro.org
|
|
||||||
(cherry picked from commit ea3f5a90f036734522e9af3bffd77e69e9f47355)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
target/arm/tcg/sme_helper.c | 8 ++++----
|
|
||||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
|
|
||||||
index 5a6dd76489..f9001f5213 100644
|
|
||||||
--- a/target/arm/tcg/sme_helper.c
|
|
||||||
+++ b/target/arm/tcg/sme_helper.c
|
|
||||||
@@ -1146,10 +1146,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
|
|
||||||
uint64_t sum = 0; \
|
|
||||||
/* Apply P to N as a mask, making the inactive elements 0. */ \
|
|
||||||
n &= expand_pred_h(p); \
|
|
||||||
- sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
|
|
||||||
- sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
|
|
||||||
- sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
|
|
||||||
- sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
|
|
||||||
+ sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
|
|
||||||
+ sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
|
|
||||||
+ sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
|
|
||||||
+ sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
|
|
||||||
return neg ? a - sum : a + sum; \
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,62 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Date: Mon, 22 Jul 2024 18:29:56 +0100
|
|
||||||
Subject: [PATCH] target/arm: Avoid shifts by -1 in tszimm_shr() and
|
|
||||||
tszimm_shl()
|
|
||||||
|
|
||||||
The function tszimm_esz() returns a shift amount, or possibly -1 in
|
|
||||||
certain cases that correspond to unallocated encodings in the
|
|
||||||
instruction set. We catch these later in the trans_ functions
|
|
||||||
(generally with an "a-esz < 0" check), but before we do the
|
|
||||||
decodetree-generated code will also call tszimm_shr() or tszimm_sl(),
|
|
||||||
which will use the tszimm_esz() return value as a shift count without
|
|
||||||
checking that it is not negative, which is undefined behaviour.
|
|
||||||
|
|
||||||
Avoid the UB by checking the return value in tszimm_shr() and
|
|
||||||
tszimm_shl().
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Resolves: Coverity CID 1547617, 1547694
|
|
||||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
Message-id: 20240722172957.1041231-4-peter.maydell@linaro.org
|
|
||||||
(cherry picked from commit 76916dfa89e8900639c1055c07a295c06628a0bc)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
target/arm/tcg/translate-sve.c | 18 ++++++++++++++++--
|
|
||||||
1 file changed, 16 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
|
|
||||||
index ada05aa530..466a19c25a 100644
|
|
||||||
--- a/target/arm/tcg/translate-sve.c
|
|
||||||
+++ b/target/arm/tcg/translate-sve.c
|
|
||||||
@@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x)
|
|
||||||
|
|
||||||
static int tszimm_shr(DisasContext *s, int x)
|
|
||||||
{
|
|
||||||
- return (16 << tszimm_esz(s, x)) - x;
|
|
||||||
+ /*
|
|
||||||
+ * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
|
|
||||||
+ * trans function will check for esz < 0), so we can return any
|
|
||||||
+ * value we like from here in that case as long as we avoid UB.
|
|
||||||
+ */
|
|
||||||
+ int esz = tszimm_esz(s, x);
|
|
||||||
+ if (esz < 0) {
|
|
||||||
+ return esz;
|
|
||||||
+ }
|
|
||||||
+ return (16 << esz) - x;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* See e.g. LSL (immediate, predicated). */
|
|
||||||
static int tszimm_shl(DisasContext *s, int x)
|
|
||||||
{
|
|
||||||
- return x - (8 << tszimm_esz(s, x));
|
|
||||||
+ /* As with tszimm_shr(), value will be unused if esz < 0 */
|
|
||||||
+ int esz = tszimm_esz(s, x);
|
|
||||||
+ if (esz < 0) {
|
|
||||||
+ return esz;
|
|
||||||
+ }
|
|
||||||
+ return x - (8 << esz);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The SH bit is in bit 8. Extract the low 8 and shift. */
|
|
|
@ -1,41 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Date: Mon, 22 Jul 2024 18:29:57 +0100
|
|
||||||
Subject: [PATCH] target/arm: Ignore SMCR_EL2.LEN and SVCR_EL2.LEN if EL2 is
|
|
||||||
not enabled
|
|
||||||
|
|
||||||
When determining the current vector length, the SMCR_EL2.LEN and
|
|
||||||
SVCR_EL2.LEN settings should only be considered if EL2 is enabled
|
|
||||||
(compare the pseudocode CurrentSVL and CurrentNSVL which call
|
|
||||||
EL2Enabled()).
|
|
||||||
|
|
||||||
We were checking against ARM_FEATURE_EL2 rather than calling
|
|
||||||
arm_is_el2_enabled(), which meant that we would look at
|
|
||||||
SMCR_EL2/SVCR_EL2 when in Secure EL1 or Secure EL0 even if Secure EL2
|
|
||||||
was not enabled.
|
|
||||||
|
|
||||||
Use the correct check in sve_vqm1_for_el_sm().
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
Message-id: 20240722172957.1041231-5-peter.maydell@linaro.org
|
|
||||||
(cherry picked from commit f573ac059ed060234fcef4299fae9e500d357c33)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
target/arm/helper.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/target/arm/helper.c b/target/arm/helper.c
|
|
||||||
index a620481d7c..42044ae14b 100644
|
|
||||||
--- a/target/arm/helper.c
|
|
||||||
+++ b/target/arm/helper.c
|
|
||||||
@@ -7191,7 +7191,7 @@ uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm)
|
|
||||||
if (el <= 1 && !el_is_in_host(env, el)) {
|
|
||||||
len = MIN(len, 0xf & (uint32_t)cr[1]);
|
|
||||||
}
|
|
||||||
- if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
|
|
||||||
+ if (el <= 2 && arm_is_el2_enabled(env)) {
|
|
||||||
len = MIN(len, 0xf & (uint32_t)cr[2]);
|
|
||||||
}
|
|
||||||
if (arm_feature(env, ARM_FEATURE_EL3)) {
|
|
|
@ -1,164 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Date: Thu, 1 Aug 2024 10:15:03 +0100
|
|
||||||
Subject: [PATCH] target/arm: Handle denormals correctly for FMOPA (widening)
|
|
||||||
|
|
||||||
The FMOPA (widening) SME instruction takes pairs of half-precision
|
|
||||||
floating point values, widens them to single-precision, does a
|
|
||||||
two-way dot product and accumulates the results into a
|
|
||||||
single-precision destination. We don't quite correctly handle the
|
|
||||||
FPCR bits FZ and FZ16 which control flushing of denormal inputs and
|
|
||||||
outputs. This is because at the moment we pass a single float_status
|
|
||||||
value to the helper function, which then uses that configuration for
|
|
||||||
all the fp operations it does. However, because the inputs to this
|
|
||||||
operation are float16 and the outputs are float32 we need to use the
|
|
||||||
fp_status_f16 for the float16 input widening but the normal fp_status
|
|
||||||
for everything else. Otherwise we will apply the flushing control
|
|
||||||
FPCR.FZ16 to the 32-bit output rather than the FPCR.FZ control, and
|
|
||||||
incorrectly flush a denormal output to zero when we should not (or
|
|
||||||
vice-versa).
|
|
||||||
|
|
||||||
(In commit 207d30b5fdb5b we tried to fix the FZ handling but
|
|
||||||
didn't get it right, switching from "use FPCR.FZ for everything" to
|
|
||||||
"use FPCR.FZ16 for everything".)
|
|
||||||
|
|
||||||
Pass the CPU env to the sme_fmopa_h helper instead of an fp_status
|
|
||||||
pointer, and have the helper pass an extra fp_status into the
|
|
||||||
f16_dotadd() function so that we can use the right status for the
|
|
||||||
right parts of this operation.
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Fixes: 207d30b5fdb5 ("target/arm: Use FPST_F16 for SME FMOPA (widening)")
|
|
||||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2373
|
|
||||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
(cherry picked from commit 55f9f4ee018c5ccea81d8c8c586756d7711ae46f)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
target/arm/tcg/helper-sme.h | 2 +-
|
|
||||||
target/arm/tcg/sme_helper.c | 39 +++++++++++++++++++++++-----------
|
|
||||||
target/arm/tcg/translate-sme.c | 25 ++++++++++++++++++++--
|
|
||||||
3 files changed, 51 insertions(+), 15 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
|
|
||||||
index 27eef49a11..d22bf9d21b 100644
|
|
||||||
--- a/target/arm/tcg/helper-sme.h
|
|
||||||
+++ b/target/arm/tcg/helper-sme.h
|
|
||||||
@@ -121,7 +121,7 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
|
||||||
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
|
|
||||||
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
|
||||||
+ void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
|
||||||
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
|
|
||||||
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
|
||||||
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
|
|
||||||
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
|
|
||||||
index f9001f5213..3906bb51c0 100644
|
|
||||||
--- a/target/arm/tcg/sme_helper.c
|
|
||||||
+++ b/target/arm/tcg/sme_helper.c
|
|
||||||
@@ -976,12 +976,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
|
|
||||||
}
|
|
||||||
|
|
||||||
static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
|
|
||||||
- float_status *s_std, float_status *s_odd)
|
|
||||||
+ float_status *s_f16, float_status *s_std,
|
|
||||||
+ float_status *s_odd)
|
|
||||||
{
|
|
||||||
- float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
|
|
||||||
- float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
|
|
||||||
- float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
|
|
||||||
- float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
|
|
||||||
+ /*
|
|
||||||
+ * We need three different float_status for different parts of this
|
|
||||||
+ * operation:
|
|
||||||
+ * - the input conversion of the float16 values must use the
|
|
||||||
+ * f16-specific float_status, so that the FPCR.FZ16 control is applied
|
|
||||||
+ * - operations on float32 including the final accumulation must use
|
|
||||||
+ * the normal float_status, so that FPCR.FZ is applied
|
|
||||||
+ * - we have pre-set-up copy of s_std which is set to round-to-odd,
|
|
||||||
+ * for the multiply (see below)
|
|
||||||
+ */
|
|
||||||
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
|
|
||||||
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
|
|
||||||
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
|
|
||||||
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
|
|
||||||
float64 t64;
|
|
||||||
float32 t32;
|
|
||||||
|
|
||||||
@@ -1003,20 +1014,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
|
|
||||||
}
|
|
||||||
|
|
||||||
void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
|
|
||||||
- void *vpm, void *vst, uint32_t desc)
|
|
||||||
+ void *vpm, CPUARMState *env, uint32_t desc)
|
|
||||||
{
|
|
||||||
intptr_t row, col, oprsz = simd_maxsz(desc);
|
|
||||||
uint32_t neg = simd_data(desc) * 0x80008000u;
|
|
||||||
uint16_t *pn = vpn, *pm = vpm;
|
|
||||||
- float_status fpst_odd, fpst_std;
|
|
||||||
+ float_status fpst_odd, fpst_std, fpst_f16;
|
|
||||||
|
|
||||||
/*
|
|
||||||
- * Make a copy of float_status because this operation does not
|
|
||||||
- * update the cumulative fp exception status. It also produces
|
|
||||||
- * default nans. Make a second copy with round-to-odd -- see above.
|
|
||||||
+ * Make copies of fp_status and fp_status_f16, because this operation
|
|
||||||
+ * does not update the cumulative fp exception status. It also
|
|
||||||
+ * produces default NaNs. We also need a second copy of fp_status with
|
|
||||||
+ * round-to-odd -- see above.
|
|
||||||
*/
|
|
||||||
- fpst_std = *(float_status *)vst;
|
|
||||||
+ fpst_f16 = env->vfp.fp_status_f16;
|
|
||||||
+ fpst_std = env->vfp.fp_status;
|
|
||||||
set_default_nan_mode(true, &fpst_std);
|
|
||||||
+ set_default_nan_mode(true, &fpst_f16);
|
|
||||||
fpst_odd = fpst_std;
|
|
||||||
set_float_rounding_mode(float_round_to_odd, &fpst_odd);
|
|
||||||
|
|
||||||
@@ -1036,7 +1050,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
|
|
||||||
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
|
|
||||||
|
|
||||||
m = f16mop_adj_pair(m, pcol, 0);
|
|
||||||
- *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
|
|
||||||
+ *a = f16_dotadd(*a, n, m,
|
|
||||||
+ &fpst_f16, &fpst_std, &fpst_odd);
|
|
||||||
}
|
|
||||||
col += 4;
|
|
||||||
pcol >>= 4;
|
|
||||||
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
|
|
||||||
index a50a419af2..ae42ddef7b 100644
|
|
||||||
--- a/target/arm/tcg/translate-sme.c
|
|
||||||
+++ b/target/arm/tcg/translate-sme.c
|
|
||||||
@@ -334,8 +334,29 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
|
|
||||||
- MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
|
|
||||||
+static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
|
|
||||||
+ gen_helper_gvec_5_ptr *fn)
|
|
||||||
+{
|
|
||||||
+ int svl = streaming_vec_reg_size(s);
|
|
||||||
+ uint32_t desc = simd_desc(svl, svl, a->sub);
|
|
||||||
+ TCGv_ptr za, zn, zm, pn, pm;
|
|
||||||
+
|
|
||||||
+ if (!sme_smza_enabled_check(s)) {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ za = get_tile(s, esz, a->zad);
|
|
||||||
+ zn = vec_full_reg_ptr(s, a->zn);
|
|
||||||
+ zm = vec_full_reg_ptr(s, a->zm);
|
|
||||||
+ pn = pred_full_reg_ptr(s, a->pn);
|
|
||||||
+ pm = pred_full_reg_ptr(s, a->pm);
|
|
||||||
+
|
|
||||||
+ fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc));
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
|
|
||||||
+ MO_32, gen_helper_sme_fmopa_h)
|
|
||||||
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
|
|
||||||
MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
|
|
||||||
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
|
|
|
@ -1,39 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?=
|
|
||||||
<clement.mathieu--drif@eviden.com>
|
|
||||||
Date: Tue, 9 Jul 2024 14:26:08 +0000
|
|
||||||
Subject: [PATCH] intel_iommu: fix FRCD construction macro
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
The constant must be unsigned, otherwise the two's complement
|
|
||||||
overrides the other fields when a PASID is present.
|
|
||||||
|
|
||||||
Fixes: 1b2b12376c8a ("intel-iommu: PASID support")
|
|
||||||
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
|
|
||||||
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
|
|
||||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
|
||||||
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
|
|
||||||
Message-Id: <20240709142557.317271-2-clement.mathieu--drif@eviden.com>
|
|
||||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
(cherry picked from commit a3c8d7e38550c3d5a46e6fa94ffadfa625a4861d)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
hw/i386/intel_iommu_internal.h | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
|
|
||||||
index f8cf99bddf..cbc4030031 100644
|
|
||||||
--- a/hw/i386/intel_iommu_internal.h
|
|
||||||
+++ b/hw/i386/intel_iommu_internal.h
|
|
||||||
@@ -267,7 +267,7 @@
|
|
||||||
/* For the low 64-bit of 128-bit */
|
|
||||||
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
|
|
||||||
#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
|
|
||||||
-#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
|
|
||||||
+#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31)
|
|
||||||
#define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48)
|
|
||||||
|
|
||||||
/* DMA Remapping Fault Conditions */
|
|
|
@ -1,33 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
Date: Mon, 12 Aug 2024 12:58:42 +1000
|
|
||||||
Subject: [PATCH] target/i386: Do not apply REX to MMX operands
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Fixes: b3e22b2318a ("target/i386: add core of new i386 decoder")
|
|
||||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2495
|
|
||||||
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
|
|
||||||
Link: https://lore.kernel.org/r/20240812025844.58956-2-richard.henderson@linaro.org
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
(cherry picked from commit 416f2b16c02c618c0f233372ebfe343f9ee667d4)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
target/i386/tcg/decode-new.c.inc | 5 ++++-
|
|
||||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
|
|
||||||
index 4209d59ca8..09b8d2314a 100644
|
|
||||||
--- a/target/i386/tcg/decode-new.c.inc
|
|
||||||
+++ b/target/i386/tcg/decode-new.c.inc
|
|
||||||
@@ -1271,7 +1271,10 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
|
|
||||||
op->unit = X86_OP_SSE;
|
|
||||||
}
|
|
||||||
get_reg:
|
|
||||||
- op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
|
|
||||||
+ op->n = ((get_modrm(s, env) >> 3) & 7);
|
|
||||||
+ if (op->unit != X86_OP_MMX) {
|
|
||||||
+ op->n |= REX_R(s);
|
|
||||||
+ }
|
|
||||||
break;
|
|
||||||
|
|
||||||
case X86_TYPE_E: /* ALU modrm operand */
|
|
|
@ -1,42 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
|
||||||
Date: Fri, 9 Aug 2024 14:13:40 +0200
|
|
||||||
Subject: [PATCH] module: Prevent crash by resetting local_err in
|
|
||||||
module_load_qom_all()
|
|
||||||
|
|
||||||
Set local_err to NULL after it has been freed in error_report_err(). This
|
|
||||||
avoids triggering assert(*errp == NULL) failure in error_setv() when
|
|
||||||
local_err is reused in the loop.
|
|
||||||
|
|
||||||
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
|
||||||
Reviewed-by: Claudio Fontana <cfontana@suse.de>
|
|
||||||
Reviewed-by: Denis V. Lunev <den@openvz.org>
|
|
||||||
Link: https://lore.kernel.org/r/20240809121340.992049-2-alexander.ivanov@virtuozzo.com
|
|
||||||
[Do the same by moving the declaration instead. - Paolo]
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
(cherry picked from commit 940d802b24e63650e0eacad3714e2ce171cba17c)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
util/module.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/util/module.c b/util/module.c
|
|
||||||
index 32e263163c..3eb0f06df1 100644
|
|
||||||
--- a/util/module.c
|
|
||||||
+++ b/util/module.c
|
|
||||||
@@ -354,13 +354,13 @@ int module_load_qom(const char *type, Error **errp)
|
|
||||||
void module_load_qom_all(void)
|
|
||||||
{
|
|
||||||
const QemuModinfo *modinfo;
|
|
||||||
- Error *local_err = NULL;
|
|
||||||
|
|
||||||
if (module_loaded_qom_all) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
|
|
||||||
+ Error *local_err = NULL;
|
|
||||||
if (!modinfo->objs) {
|
|
||||||
continue;
|
|
||||||
}
|
|
|
@ -1,164 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Eric Blake <eblake@redhat.com>
|
|
||||||
Date: Wed, 7 Aug 2024 08:50:01 -0500
|
|
||||||
Subject: [PATCH] nbd/server: Plumb in new args to nbd_client_add()
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Upcoming patches to fix a CVE need to track an opaque pointer passed
|
|
||||||
in by the owner of a client object, as well as request for a time
|
|
||||||
limit on how fast negotiation must complete. Prepare for that by
|
|
||||||
changing the signature of nbd_client_new() and adding an accessor to
|
|
||||||
get at the opaque pointer, although for now the two servers
|
|
||||||
(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though
|
|
||||||
they pass in a new default timeout value.
|
|
||||||
|
|
||||||
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
Message-ID: <20240807174943.771624-11-eblake@redhat.com>
|
|
||||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
||||||
[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan]
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
blockdev-nbd.c | 6 ++++--
|
|
||||||
include/block/nbd.h | 11 ++++++++++-
|
|
||||||
nbd/server.c | 20 +++++++++++++++++---
|
|
||||||
qemu-nbd.c | 4 +++-
|
|
||||||
4 files changed, 34 insertions(+), 7 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
|
||||||
index 213012435f..267a1de903 100644
|
|
||||||
--- a/blockdev-nbd.c
|
|
||||||
+++ b/blockdev-nbd.c
|
|
||||||
@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
|
||||||
nbd_update_server_watch(nbd_server);
|
|
||||||
|
|
||||||
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
|
|
||||||
- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz,
|
|
||||||
- nbd_blockdev_client_closed);
|
|
||||||
+ /* TODO - expose handshake timeout as QMP option */
|
|
||||||
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
|
||||||
+ nbd_server->tlscreds, nbd_server->tlsauthz,
|
|
||||||
+ nbd_blockdev_client_closed, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void nbd_update_server_watch(NBDServerData *s)
|
|
||||||
diff --git a/include/block/nbd.h b/include/block/nbd.h
|
|
||||||
index 4e7bd6342f..1d4d65922d 100644
|
|
||||||
--- a/include/block/nbd.h
|
|
||||||
+++ b/include/block/nbd.h
|
|
||||||
@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts;
|
|
||||||
|
|
||||||
extern const BlockExportDriver blk_exp_nbd;
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must
|
|
||||||
+ * succeed at NBD_OPT_GO before being forcefully dropped as too slow.
|
|
||||||
+ */
|
|
||||||
+#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
|
|
||||||
+
|
|
||||||
/* Handshake phase structs - this struct is passed on the wire */
|
|
||||||
|
|
||||||
typedef struct NBDOption {
|
|
||||||
@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp);
|
|
||||||
NBDExport *nbd_export_find(const char *name);
|
|
||||||
|
|
||||||
void nbd_client_new(QIOChannelSocket *sioc,
|
|
||||||
+ uint32_t handshake_max_secs,
|
|
||||||
QCryptoTLSCreds *tlscreds,
|
|
||||||
const char *tlsauthz,
|
|
||||||
- void (*close_fn)(NBDClient *, bool));
|
|
||||||
+ void (*close_fn)(NBDClient *, bool),
|
|
||||||
+ void *owner);
|
|
||||||
+void *nbd_client_owner(NBDClient *client);
|
|
||||||
void nbd_client_get(NBDClient *client);
|
|
||||||
void nbd_client_put(NBDClient *client);
|
|
||||||
|
|
||||||
diff --git a/nbd/server.c b/nbd/server.c
|
|
||||||
index 892797bb11..e50012499f 100644
|
|
||||||
--- a/nbd/server.c
|
|
||||||
+++ b/nbd/server.c
|
|
||||||
@@ -124,12 +124,14 @@ struct NBDMetaContexts {
|
|
||||||
struct NBDClient {
|
|
||||||
int refcount; /* atomic */
|
|
||||||
void (*close_fn)(NBDClient *client, bool negotiated);
|
|
||||||
+ void *owner;
|
|
||||||
|
|
||||||
QemuMutex lock;
|
|
||||||
|
|
||||||
NBDExport *exp;
|
|
||||||
QCryptoTLSCreds *tlscreds;
|
|
||||||
char *tlsauthz;
|
|
||||||
+ uint32_t handshake_max_secs;
|
|
||||||
QIOChannelSocket *sioc; /* The underlying data channel */
|
|
||||||
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
|
|
||||||
|
|
||||||
@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
|
||||||
|
|
||||||
qemu_co_mutex_init(&client->send_lock);
|
|
||||||
|
|
||||||
+ /* TODO - utilize client->handshake_max_secs */
|
|
||||||
if (nbd_negotiate(client, &local_err)) {
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- * Create a new client listener using the given channel @sioc.
|
|
||||||
+ * Create a new client listener using the given channel @sioc and @owner.
|
|
||||||
* Begin servicing it in a coroutine. When the connection closes, call
|
|
||||||
- * @close_fn with an indication of whether the client completed negotiation.
|
|
||||||
+ * @close_fn with an indication of whether the client completed negotiation
|
|
||||||
+ * within @handshake_max_secs seconds (0 for unbounded).
|
|
||||||
*/
|
|
||||||
void nbd_client_new(QIOChannelSocket *sioc,
|
|
||||||
+ uint32_t handshake_max_secs,
|
|
||||||
QCryptoTLSCreds *tlscreds,
|
|
||||||
const char *tlsauthz,
|
|
||||||
- void (*close_fn)(NBDClient *, bool))
|
|
||||||
+ void (*close_fn)(NBDClient *, bool),
|
|
||||||
+ void *owner)
|
|
||||||
{
|
|
||||||
NBDClient *client;
|
|
||||||
Coroutine *co;
|
|
||||||
@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc,
|
|
||||||
object_ref(OBJECT(client->tlscreds));
|
|
||||||
}
|
|
||||||
client->tlsauthz = g_strdup(tlsauthz);
|
|
||||||
+ client->handshake_max_secs = handshake_max_secs;
|
|
||||||
client->sioc = sioc;
|
|
||||||
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
|
|
||||||
object_ref(OBJECT(client->sioc));
|
|
||||||
client->ioc = QIO_CHANNEL(sioc);
|
|
||||||
object_ref(OBJECT(client->ioc));
|
|
||||||
client->close_fn = close_fn;
|
|
||||||
+ client->owner = owner;
|
|
||||||
|
|
||||||
co = qemu_coroutine_create(nbd_co_client_start, client);
|
|
||||||
qemu_coroutine_enter(co);
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+void *
|
|
||||||
+nbd_client_owner(NBDClient *client)
|
|
||||||
+{
|
|
||||||
+ return client->owner;
|
|
||||||
+}
|
|
||||||
diff --git a/qemu-nbd.c b/qemu-nbd.c
|
|
||||||
index d7b3ccab21..48e2fa5858 100644
|
|
||||||
--- a/qemu-nbd.c
|
|
||||||
+++ b/qemu-nbd.c
|
|
||||||
@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
|
||||||
|
|
||||||
nb_fds++;
|
|
||||||
nbd_update_server_watch();
|
|
||||||
- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
|
|
||||||
+ /* TODO - expose handshake timeout as command line option */
|
|
||||||
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
|
||||||
+ tlscreds, tlsauthz, nbd_client_closed, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void nbd_update_server_watch(void)
|
|
|
@ -1,172 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Eric Blake <eblake@redhat.com>
|
|
||||||
Date: Tue, 6 Aug 2024 13:53:00 -0500
|
|
||||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Cap default max-connections to 100
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Allowing an unlimited number of clients to any web service is a recipe
|
|
||||||
for a rudimentary denial of service attack: the client merely needs to
|
|
||||||
open lots of sockets without closing them, until qemu no longer has
|
|
||||||
any more fds available to allocate.
|
|
||||||
|
|
||||||
For qemu-nbd, we default to allowing only 1 connection unless more are
|
|
||||||
explicitly asked for (-e or --shared); this was historically picked as
|
|
||||||
a nice default (without an explicit -t, a non-persistent qemu-nbd goes
|
|
||||||
away after a client disconnects, without needing any additional
|
|
||||||
follow-up commands), and we are not going to change that interface now
|
|
||||||
(besides, someday we want to point people towards qemu-storage-daemon
|
|
||||||
instead of qemu-nbd).
|
|
||||||
|
|
||||||
But for qemu proper, and the newer qemu-storage-daemon, the QMP
|
|
||||||
nbd-server-start command has historically had a default of unlimited
|
|
||||||
number of connections, in part because unlike qemu-nbd it is
|
|
||||||
inherently persistent until nbd-server-stop. Allowing multiple client
|
|
||||||
sockets is particularly useful for clients that can take advantage of
|
|
||||||
MULTI_CONN (creating parallel sockets to increase throughput),
|
|
||||||
although known clients that do so (such as libnbd's nbdcopy) typically
|
|
||||||
use only 8 or 16 connections (the benefits of scaling diminish once
|
|
||||||
more sockets are competing for kernel attention). Picking a number
|
|
||||||
large enough for typical use cases, but not unlimited, makes it
|
|
||||||
slightly harder for a malicious client to perform a denial of service
|
|
||||||
merely by opening lots of connections withot progressing through the
|
|
||||||
handshake.
|
|
||||||
|
|
||||||
This change does not eliminate CVE-2024-7409 on its own, but reduces
|
|
||||||
the chance for fd exhaustion or unlimited memory usage as an attack
|
|
||||||
surface. On the other hand, by itself, it makes it more obvious that
|
|
||||||
with a finite limit, we have the problem of an unauthenticated client
|
|
||||||
holding 100 fds opened as a way to block out a legitimate client from
|
|
||||||
being able to connect; thus, later patches will further add timeouts
|
|
||||||
to reject clients that are not making progress.
|
|
||||||
|
|
||||||
This is an INTENTIONAL change in behavior, and will break any client
|
|
||||||
of nbd-server-start that was not passing an explicit max-connections
|
|
||||||
parameter, yet expects more than 100 simultaneous connections. We are
|
|
||||||
not aware of any such client (as stated above, most clients aware of
|
|
||||||
MULTI_CONN get by just fine on 8 or 16 connections, and probably cope
|
|
||||||
with later connections failing by relying on the earlier connections;
|
|
||||||
libvirt has not yet been passing max-connections, but generally
|
|
||||||
creates NBD servers with the intent for a single client for the sake
|
|
||||||
of live storage migration; meanwhile, the KubeSAN project anticipates
|
|
||||||
a large cluster sharing multiple clients [up to 8 per node, and up to
|
|
||||||
100 nodes in a cluster], but it currently uses qemu-nbd with an
|
|
||||||
explicit --shared=0 rather than qemu-storage-daemon with
|
|
||||||
nbd-server-start).
|
|
||||||
|
|
||||||
We considered using a deprecation period (declare that omitting
|
|
||||||
max-parameters is deprecated, and make it mandatory in 3 releases -
|
|
||||||
then we don't need to pick an arbitrary default); that has zero risk
|
|
||||||
of breaking any apps that accidentally depended on more than 100
|
|
||||||
connections, and where such breakage might not be noticed under unit
|
|
||||||
testing but only under the larger loads of production usage. But it
|
|
||||||
does not close the denial-of-service hole until far into the future,
|
|
||||||
and requires all apps to change to add the parameter even if 100 was
|
|
||||||
good enough. It also has a drawback that any app (like libvirt) that
|
|
||||||
is accidentally relying on an unlimited default should seriously
|
|
||||||
consider their own CVE now, at which point they are going to change to
|
|
||||||
pass explicit max-connections sooner than waiting for 3 qemu releases.
|
|
||||||
Finally, if our changed default breaks an app, that app can always
|
|
||||||
pass in an explicit max-parameters with a larger value.
|
|
||||||
|
|
||||||
It is also intentional that the HMP interface to nbd-server-start is
|
|
||||||
not changed to expose max-connections (any client needing to fine-tune
|
|
||||||
things should be using QMP).
|
|
||||||
|
|
||||||
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
Message-ID: <20240807174943.771624-12-eblake@redhat.com>
|
|
||||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
||||||
[ericb: Expand commit message to summarize Dan's argument for why we
|
|
||||||
break corner-case back-compat behavior without a deprecation period]
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
block/monitor/block-hmp-cmds.c | 3 ++-
|
|
||||||
blockdev-nbd.c | 8 ++++++++
|
|
||||||
include/block/nbd.h | 7 +++++++
|
|
||||||
qapi/block-export.json | 4 ++--
|
|
||||||
4 files changed, 19 insertions(+), 3 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
|
||||||
index d954bec6f1..bdf2eb50b6 100644
|
|
||||||
--- a/block/monitor/block-hmp-cmds.c
|
|
||||||
+++ b/block/monitor/block-hmp-cmds.c
|
|
||||||
@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
|
|
||||||
goto exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
- nbd_server_start(addr, NULL, NULL, 0, &local_err);
|
|
||||||
+ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS,
|
|
||||||
+ &local_err);
|
|
||||||
qapi_free_SocketAddress(addr);
|
|
||||||
if (local_err != NULL) {
|
|
||||||
goto exit;
|
|
||||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
|
||||||
index 267a1de903..24ba5382db 100644
|
|
||||||
--- a/blockdev-nbd.c
|
|
||||||
+++ b/blockdev-nbd.c
|
|
||||||
@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
|
|
||||||
|
|
||||||
void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
|
|
||||||
{
|
|
||||||
+ if (!arg->has_max_connections) {
|
|
||||||
+ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz,
|
|
||||||
arg->max_connections, errp);
|
|
||||||
}
|
|
||||||
@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
|
|
||||||
{
|
|
||||||
SocketAddress *addr_flat = socket_address_flatten(addr);
|
|
||||||
|
|
||||||
+ if (!has_max_connections) {
|
|
||||||
+ max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp);
|
|
||||||
qapi_free_SocketAddress(addr_flat);
|
|
||||||
}
|
|
||||||
diff --git a/include/block/nbd.h b/include/block/nbd.h
|
|
||||||
index 1d4d65922d..d4f8b21aec 100644
|
|
||||||
--- a/include/block/nbd.h
|
|
||||||
+++ b/include/block/nbd.h
|
|
||||||
@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd;
|
|
||||||
*/
|
|
||||||
#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at
|
|
||||||
+ * once; must be large enough to allow a MULTI_CONN-aware client like
|
|
||||||
+ * nbdcopy to create its typical number of 8-16 sockets.
|
|
||||||
+ */
|
|
||||||
+#define NBD_DEFAULT_MAX_CONNECTIONS 100
|
|
||||||
+
|
|
||||||
/* Handshake phase structs - this struct is passed on the wire */
|
|
||||||
|
|
||||||
typedef struct NBDOption {
|
|
||||||
diff --git a/qapi/block-export.json b/qapi/block-export.json
|
|
||||||
index 3919a2d5b9..f45e4fd481 100644
|
|
||||||
--- a/qapi/block-export.json
|
|
||||||
+++ b/qapi/block-export.json
|
|
||||||
@@ -28,7 +28,7 @@
|
|
||||||
# @max-connections: The maximum number of connections to allow at the
|
|
||||||
# same time, 0 for unlimited. Setting this to 1 also stops the
|
|
||||||
# server from advertising multiple client support (since 5.2;
|
|
||||||
-# default: 0)
|
|
||||||
+# default: 100)
|
|
||||||
#
|
|
||||||
# Since: 4.2
|
|
||||||
##
|
|
||||||
@@ -63,7 +63,7 @@
|
|
||||||
# @max-connections: The maximum number of connections to allow at the
|
|
||||||
# same time, 0 for unlimited. Setting this to 1 also stops the
|
|
||||||
# server from advertising multiple client support (since 5.2;
|
|
||||||
-# default: 0).
|
|
||||||
+# default: 100).
|
|
||||||
#
|
|
||||||
# Errors:
|
|
||||||
# - if the server is already running
|
|
|
@ -1,123 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Eric Blake <eblake@redhat.com>
|
|
||||||
Date: Thu, 8 Aug 2024 16:05:08 -0500
|
|
||||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Drop non-negotiating clients
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
A client that opens a socket but does not negotiate is merely hogging
|
|
||||||
qemu's resources (an open fd and a small amount of memory); and a
|
|
||||||
malicious client that can access the port where NBD is listening can
|
|
||||||
attempt a denial of service attack by intentionally opening and
|
|
||||||
abandoning lots of unfinished connections. The previous patch put a
|
|
||||||
default bound on the number of such ongoing connections, but once that
|
|
||||||
limit is hit, no more clients can connect (including legitimate ones).
|
|
||||||
The solution is to insist that clients complete handshake within a
|
|
||||||
reasonable time limit, defaulting to 10 seconds. A client that has
|
|
||||||
not successfully completed NBD_OPT_GO by then (including the case of
|
|
||||||
where the client didn't know TLS credentials to even reach the point
|
|
||||||
of NBD_OPT_GO) is wasting our time and does not deserve to stay
|
|
||||||
connected. Later patches will allow fine-tuning the limit away from
|
|
||||||
the default value (including disabling it for doing integration
|
|
||||||
testing of the handshake process itself).
|
|
||||||
|
|
||||||
Note that this patch in isolation actually makes it more likely to see
|
|
||||||
qemu SEGV after nbd-server-stop, as any client socket still connected
|
|
||||||
when the server shuts down will now be closed after 10 seconds rather
|
|
||||||
than at the client's whims. That will be addressed in the next patch.
|
|
||||||
|
|
||||||
For a demo of this patch in action:
|
|
||||||
$ qemu-nbd -f raw -r -t -e 10 file &
|
|
||||||
$ nbdsh --opt-mode -c '
|
|
||||||
H = list()
|
|
||||||
for i in range(20):
|
|
||||||
print(i)
|
|
||||||
H.insert(i, nbd.NBD())
|
|
||||||
H[i].set_opt_mode(True)
|
|
||||||
H[i].connect_uri("nbd://localhost")
|
|
||||||
'
|
|
||||||
$ kill $!
|
|
||||||
|
|
||||||
where later connections get to start progressing once earlier ones are
|
|
||||||
forcefully dropped for taking too long, rather than hanging.
|
|
||||||
|
|
||||||
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
Message-ID: <20240807174943.771624-13-eblake@redhat.com>
|
|
||||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
||||||
[eblake: rebase to changes earlier in series, reduce scope of timer]
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
nbd/server.c | 28 +++++++++++++++++++++++++++-
|
|
||||||
nbd/trace-events | 1 +
|
|
||||||
2 files changed, 28 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/nbd/server.c b/nbd/server.c
|
|
||||||
index e50012499f..39285cc971 100644
|
|
||||||
--- a/nbd/server.c
|
|
||||||
+++ b/nbd/server.c
|
|
||||||
@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void nbd_handshake_timer_cb(void *opaque)
|
|
||||||
+{
|
|
||||||
+ QIOChannel *ioc = opaque;
|
|
||||||
+
|
|
||||||
+ trace_nbd_handshake_timer_cb();
|
|
||||||
+ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static coroutine_fn void nbd_co_client_start(void *opaque)
|
|
||||||
{
|
|
||||||
NBDClient *client = opaque;
|
|
||||||
Error *local_err = NULL;
|
|
||||||
+ QEMUTimer *handshake_timer = NULL;
|
|
||||||
|
|
||||||
qemu_co_mutex_init(&client->send_lock);
|
|
||||||
|
|
||||||
- /* TODO - utilize client->handshake_max_secs */
|
|
||||||
+ /*
|
|
||||||
+ * Create a timer to bound the time spent in negotiation. If the
|
|
||||||
+ * timer expires, it is likely nbd_negotiate will fail because the
|
|
||||||
+ * socket was shutdown.
|
|
||||||
+ */
|
|
||||||
+ if (client->handshake_max_secs > 0) {
|
|
||||||
+ handshake_timer = aio_timer_new(qemu_get_aio_context(),
|
|
||||||
+ QEMU_CLOCK_REALTIME,
|
|
||||||
+ SCALE_NS,
|
|
||||||
+ nbd_handshake_timer_cb,
|
|
||||||
+ client->sioc);
|
|
||||||
+ timer_mod(handshake_timer,
|
|
||||||
+ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
|
|
||||||
+ client->handshake_max_secs * NANOSECONDS_PER_SECOND);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (nbd_negotiate(client, &local_err)) {
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
}
|
|
||||||
+ timer_free(handshake_timer);
|
|
||||||
client_close(client, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ timer_free(handshake_timer);
|
|
||||||
WITH_QEMU_LOCK_GUARD(&client->lock) {
|
|
||||||
nbd_client_receive_next_request(client);
|
|
||||||
}
|
|
||||||
diff --git a/nbd/trace-events b/nbd/trace-events
|
|
||||||
index 00ae3216a1..cbd0a4ab7e 100644
|
|
||||||
--- a/nbd/trace-events
|
|
||||||
+++ b/nbd/trace-events
|
|
||||||
@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload
|
|
||||||
nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
|
|
||||||
nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
|
|
||||||
nbd_trip(void) "Reading request"
|
|
||||||
+nbd_handshake_timer_cb(void) "client took too long to negotiate"
|
|
||||||
|
|
||||||
# client-connection.c
|
|
||||||
nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64
|
|
|
@ -1,161 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Eric Blake <eblake@redhat.com>
|
|
||||||
Date: Wed, 7 Aug 2024 12:23:13 -0500
|
|
||||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Close stray clients at server-stop
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
A malicious client can attempt to connect to an NBD server, and then
|
|
||||||
intentionally delay progress in the handshake, including if it does
|
|
||||||
not know the TLS secrets. Although the previous two patches reduce
|
|
||||||
this behavior by capping the default max-connections parameter and
|
|
||||||
killing slow clients, they did not eliminate the possibility of a
|
|
||||||
client waiting to close the socket until after the QMP nbd-server-stop
|
|
||||||
command is executed, at which point qemu would SEGV when trying to
|
|
||||||
dereference the NULL nbd_server global which is no longer present.
|
|
||||||
This amounts to a denial of service attack. Worse, if another NBD
|
|
||||||
server is started before the malicious client disconnects, I cannot
|
|
||||||
rule out additional adverse effects when the old client interferes
|
|
||||||
with the connection count of the new server (although the most likely
|
|
||||||
is a crash due to an assertion failure when checking
|
|
||||||
nbd_server->connections > 0).
|
|
||||||
|
|
||||||
For environments without this patch, the CVE can be mitigated by
|
|
||||||
ensuring (such as via a firewall) that only trusted clients can
|
|
||||||
connect to an NBD server. Note that using frameworks like libvirt
|
|
||||||
that ensure that TLS is used and that nbd-server-stop is not executed
|
|
||||||
while any trusted clients are still connected will only help if there
|
|
||||||
is also no possibility for an untrusted client to open a connection
|
|
||||||
but then stall on the NBD handshake.
|
|
||||||
|
|
||||||
Given the previous patches, it would be possible to guarantee that no
|
|
||||||
clients remain connected by having nbd-server-stop sleep for longer
|
|
||||||
than the default handshake deadline before finally freeing the global
|
|
||||||
nbd_server object, but that could make QMP non-responsive for a long
|
|
||||||
time. So intead, this patch fixes the problem by tracking all client
|
|
||||||
sockets opened while the server is running, and forcefully closing any
|
|
||||||
such sockets remaining without a completed handshake at the time of
|
|
||||||
nbd-server-stop, then waiting until the coroutines servicing those
|
|
||||||
sockets notice the state change. nbd-server-stop now has a second
|
|
||||||
AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the
|
|
||||||
blk_exp_close_all_type() that disconnects all clients that completed
|
|
||||||
handshakes), but forced socket shutdown is enough to progress the
|
|
||||||
coroutines and quickly tear down all clients before the server is
|
|
||||||
freed, thus finally fixing the CVE.
|
|
||||||
|
|
||||||
This patch relies heavily on the fact that nbd/server.c guarantees
|
|
||||||
that it only calls nbd_blockdev_client_closed() from the main loop
|
|
||||||
(see the assertion in nbd_client_put() and the hoops used in
|
|
||||||
nbd_client_put_nonzero() to achieve that); if we did not have that
|
|
||||||
guarantee, we would also need a mutex protecting our accesses of the
|
|
||||||
list of connections to survive re-entrancy from independent iothreads.
|
|
||||||
|
|
||||||
Although I did not actually try to test old builds, it looks like this
|
|
||||||
problem has existed since at least commit 862172f45c (v2.12.0, 2017) -
|
|
||||||
even back when that patch started using a QIONetListener to handle
|
|
||||||
listening on multiple sockets, nbd_server_free() was already unaware
|
|
||||||
that the nbd_blockdev_client_closed callback can be reached later by a
|
|
||||||
client thread that has not completed handshakes (and therefore the
|
|
||||||
client's socket never got added to the list closed in
|
|
||||||
nbd_export_close_all), despite that patch intentionally tearing down
|
|
||||||
the QIONetListener to prevent new clients.
|
|
||||||
|
|
||||||
Reported-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
|
||||||
Fixes: CVE-2024-7409
|
|
||||||
CC: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
Message-ID: <20240807174943.771624-14-eblake@redhat.com>
|
|
||||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
||||||
(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++-
|
|
||||||
1 file changed, 34 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
|
||||||
index 24ba5382db..f73409ae49 100644
|
|
||||||
--- a/blockdev-nbd.c
|
|
||||||
+++ b/blockdev-nbd.c
|
|
||||||
@@ -21,12 +21,18 @@
|
|
||||||
#include "io/channel-socket.h"
|
|
||||||
#include "io/net-listener.h"
|
|
||||||
|
|
||||||
+typedef struct NBDConn {
|
|
||||||
+ QIOChannelSocket *cioc;
|
|
||||||
+ QLIST_ENTRY(NBDConn) next;
|
|
||||||
+} NBDConn;
|
|
||||||
+
|
|
||||||
typedef struct NBDServerData {
|
|
||||||
QIONetListener *listener;
|
|
||||||
QCryptoTLSCreds *tlscreds;
|
|
||||||
char *tlsauthz;
|
|
||||||
uint32_t max_connections;
|
|
||||||
uint32_t connections;
|
|
||||||
+ QLIST_HEAD(, NBDConn) conns;
|
|
||||||
} NBDServerData;
|
|
||||||
|
|
||||||
static NBDServerData *nbd_server;
|
|
||||||
@@ -51,6 +57,14 @@ int nbd_server_max_connections(void)
|
|
||||||
|
|
||||||
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
|
|
||||||
{
|
|
||||||
+ NBDConn *conn = nbd_client_owner(client);
|
|
||||||
+
|
|
||||||
+ assert(qemu_in_main_thread() && nbd_server);
|
|
||||||
+
|
|
||||||
+ object_unref(OBJECT(conn->cioc));
|
|
||||||
+ QLIST_REMOVE(conn, next);
|
|
||||||
+ g_free(conn);
|
|
||||||
+
|
|
||||||
nbd_client_put(client);
|
|
||||||
assert(nbd_server->connections > 0);
|
|
||||||
nbd_server->connections--;
|
|
||||||
@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
|
|
||||||
static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
|
||||||
gpointer opaque)
|
|
||||||
{
|
|
||||||
+ NBDConn *conn = g_new0(NBDConn, 1);
|
|
||||||
+
|
|
||||||
+ assert(qemu_in_main_thread() && nbd_server);
|
|
||||||
nbd_server->connections++;
|
|
||||||
+ object_ref(OBJECT(cioc));
|
|
||||||
+ conn->cioc = cioc;
|
|
||||||
+ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next);
|
|
||||||
nbd_update_server_watch(nbd_server);
|
|
||||||
|
|
||||||
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
|
|
||||||
/* TODO - expose handshake timeout as QMP option */
|
|
||||||
nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
|
||||||
nbd_server->tlscreds, nbd_server->tlsauthz,
|
|
||||||
- nbd_blockdev_client_closed, NULL);
|
|
||||||
+ nbd_blockdev_client_closed, conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void nbd_update_server_watch(NBDServerData *s)
|
|
||||||
@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s)
|
|
||||||
|
|
||||||
static void nbd_server_free(NBDServerData *server)
|
|
||||||
{
|
|
||||||
+ NBDConn *conn, *tmp;
|
|
||||||
+
|
|
||||||
if (!server) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /*
|
|
||||||
+ * Forcefully close the listener socket, and any clients that have
|
|
||||||
+ * not yet disconnected on their own.
|
|
||||||
+ */
|
|
||||||
qio_net_listener_disconnect(server->listener);
|
|
||||||
object_unref(OBJECT(server->listener));
|
|
||||||
+ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
|
|
||||||
+ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
|
|
||||||
+ NULL);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0);
|
|
||||||
+
|
|
||||||
if (server->tlscreds) {
|
|
||||||
object_unref(OBJECT(server->tlscreds));
|
|
||||||
}
|
|
|
@ -1,47 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
|
|
||||||
Date: Tue, 20 Aug 2024 17:11:12 +0400
|
|
||||||
Subject: [PATCH] vnc: fix crash when no console attached
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Since commit e99441a3793b5 ("ui/curses: Do not use console_select()")
|
|
||||||
qemu_text_console_put_keysym() no longer checks for NULL console
|
|
||||||
argument, which leads to a later crash:
|
|
||||||
|
|
||||||
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
|
|
||||||
0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
|
|
||||||
332 } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
|
|
||||||
(gdb) bt
|
|
||||||
#0 0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
|
|
||||||
#1 0x00005555559e18e5 in qemu_text_console_put_keysym (s=<optimized out>, keysym=<optimized out>) at ../ui/console.c:303
|
|
||||||
#2 0x00005555559f2e88 in do_key_event (vs=vs@entry=0x5555579045c0, down=down@entry=1, keycode=keycode@entry=60, sym=sym@entry=65471) at ../ui/vnc.c:2034
|
|
||||||
#3 0x00005555559f845c in ext_key_event (vs=0x5555579045c0, down=1, sym=65471, keycode=<optimized out>) at ../ui/vnc.c:2070
|
|
||||||
#4 protocol_client_msg (vs=0x5555579045c0, data=<optimized out>, len=<optimized out>) at ../ui/vnc.c:2514
|
|
||||||
#5 0x00005555559f515c in vnc_client_read (vs=0x5555579045c0) at ../ui/vnc.c:1607
|
|
||||||
|
|
||||||
Fixes: e99441a3793b5 ("ui/curses: Do not use console_select()")
|
|
||||||
Fixes: https://issues.redhat.com/browse/RHEL-50529
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
|
||||||
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
|
|
||||||
(picked from https://lore.kernel.org/qemu-devel/20240820131112.1267954-1-marcandre.lureau@redhat.com/)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
ui/vnc.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/ui/vnc.c b/ui/vnc.c
|
|
||||||
index b3fd78022b..953ea38318 100644
|
|
||||||
--- a/ui/vnc.c
|
|
||||||
+++ b/ui/vnc.c
|
|
||||||
@@ -1935,7 +1935,7 @@ static void do_key_event(VncState *vs, int down, int keycode, int sym)
|
|
||||||
}
|
|
||||||
|
|
||||||
qkbd_state_key_event(vs->vd->kbd, qcode, down);
|
|
||||||
- if (!qemu_console_is_graphic(vs->vd->dcl.con)) {
|
|
||||||
+ if (QEMU_IS_TEXT_CONSOLE(vs->vd->dcl.con)) {
|
|
||||||
QemuTextConsole *con = QEMU_TEXT_CONSOLE(vs->vd->dcl.con);
|
|
||||||
bool numlock = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_NUMLOCK);
|
|
||||||
bool control = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_CTRL);
|
|
|
@ -1,89 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Eric Blake <eblake@redhat.com>
|
|
||||||
Date: Thu, 22 Aug 2024 09:35:29 -0500
|
|
||||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing
|
|
||||||
server
|
|
||||||
|
|
||||||
Commit 3e7ef738 plugged the use-after-free of the global nbd_server
|
|
||||||
object, but overlooked a use-after-free of nbd_server->listener.
|
|
||||||
Although this race is harder to hit, notice that our shutdown path
|
|
||||||
first drops the reference count of nbd_server->listener, then triggers
|
|
||||||
actions that can result in a pending client reaching the
|
|
||||||
nbd_blockdev_client_closed() callback, which in turn calls
|
|
||||||
qio_net_listener_set_client_func on a potentially stale object.
|
|
||||||
|
|
||||||
If we know we don't want any more clients to connect, and have already
|
|
||||||
told the listener socket to shut down, then we should not be trying to
|
|
||||||
update the listener socket's associated function.
|
|
||||||
|
|
||||||
Reproducer:
|
|
||||||
|
|
||||||
> #!/usr/bin/python3
|
|
||||||
>
|
|
||||||
> import os
|
|
||||||
> from threading import Thread
|
|
||||||
>
|
|
||||||
> def start_stop():
|
|
||||||
> while 1:
|
|
||||||
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start",
|
|
||||||
+"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'')
|
|
||||||
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'')
|
|
||||||
>
|
|
||||||
> def nbd_list():
|
|
||||||
> while 1:
|
|
||||||
> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock')
|
|
||||||
>
|
|
||||||
> def test():
|
|
||||||
> sst = Thread(target=start_stop)
|
|
||||||
> sst.start()
|
|
||||||
> nlt = Thread(target=nbd_list)
|
|
||||||
> nlt.start()
|
|
||||||
>
|
|
||||||
> sst.join()
|
|
||||||
> nlt.join()
|
|
||||||
>
|
|
||||||
> test()
|
|
||||||
|
|
||||||
Fixes: CVE-2024-7409
|
|
||||||
Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop")
|
|
||||||
CC: qemu-stable@nongnu.org
|
|
||||||
Reported-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
|
|
||||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
||||||
Message-ID: <20240822143617.800419-2-eblake@redhat.com>
|
|
||||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
||||||
(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c)
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
blockdev-nbd.c | 12 ++++++++----
|
|
||||||
1 file changed, 8 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
|
||||||
index f73409ae49..b36f41b7c5 100644
|
|
||||||
--- a/blockdev-nbd.c
|
|
||||||
+++ b/blockdev-nbd.c
|
|
||||||
@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
|
||||||
|
|
||||||
static void nbd_update_server_watch(NBDServerData *s)
|
|
||||||
{
|
|
||||||
- if (!s->max_connections || s->connections < s->max_connections) {
|
|
||||||
- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL);
|
|
||||||
- } else {
|
|
||||||
- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
|
|
||||||
+ if (s->listener) {
|
|
||||||
+ if (!s->max_connections || s->connections < s->max_connections) {
|
|
||||||
+ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL,
|
|
||||||
+ NULL);
|
|
||||||
+ } else {
|
|
||||||
+ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server)
|
|
||||||
*/
|
|
||||||
qio_net_listener_disconnect(server->listener);
|
|
||||||
object_unref(OBJECT(server->listener));
|
|
||||||
+ server->listener = NULL;
|
|
||||||
QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
|
|
||||||
qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
|
|
||||||
NULL);
|
|
|
@ -1,134 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: David Hildenbrand <david@redhat.com>
|
|
||||||
Date: Wed, 28 Aug 2024 11:07:43 +0200
|
|
||||||
Subject: [PATCH] softmmu/physmem: fix memory leak in dirty_memory_extend()
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
As reported by Peter, we might be leaking memory when removing the
|
|
||||||
highest RAMBlock (in the weird ram_addr_t space), and adding a new one.
|
|
||||||
|
|
||||||
We will fail to realize that we already allocated bitmaps for more
|
|
||||||
dirty memory blocks, and effectively discard the pointers to them.
|
|
||||||
|
|
||||||
Fix it by getting rid of last_ram_page() and by remembering the number
|
|
||||||
of dirty memory blocks that have been allocated already.
|
|
||||||
|
|
||||||
While at it, let's use "unsigned int" for the number of blocks, which
|
|
||||||
should be sufficient until we reach ~32 exabytes.
|
|
||||||
|
|
||||||
Looks like this leak was introduced as we switched from using a single
|
|
||||||
bitmap_zero_extend() to allocating multiple bitmaps:
|
|
||||||
bitmap_zero_extend() relies on g_renew() which should have taken care of
|
|
||||||
this.
|
|
||||||
|
|
||||||
Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com
|
|
||||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug")
|
|
||||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
||||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
|
||||||
Tested-by: Peter Maydell <peter.maydell@linaro.org>
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Cc: Stefan Hajnoczi <stefanha@redhat.com>
|
|
||||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Cc: Peter Xu <peterx@redhat.com>
|
|
||||||
Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org>
|
|
||||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
|
||||||
(picked from https://lore.kernel.org/qemu-devel/20240828090743.128647-1-david@redhat.com/)
|
|
||||||
[FE: backport - remove not-yet-existing variable in context of hunk touching ram_block_add()]
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
include/exec/ramlist.h | 1 +
|
|
||||||
system/physmem.c | 35 +++++++++--------------------------
|
|
||||||
2 files changed, 10 insertions(+), 26 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
|
|
||||||
index 2ad2a81acc..d9cfe530be 100644
|
|
||||||
--- a/include/exec/ramlist.h
|
|
||||||
+++ b/include/exec/ramlist.h
|
|
||||||
@@ -50,6 +50,7 @@ typedef struct RAMList {
|
|
||||||
/* RCU-enabled, writes protected by the ramlist lock. */
|
|
||||||
QLIST_HEAD(, RAMBlock) blocks;
|
|
||||||
DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
|
|
||||||
+ unsigned int num_dirty_blocks;
|
|
||||||
uint32_t version;
|
|
||||||
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
|
||||||
} RAMList;
|
|
||||||
diff --git a/system/physmem.c b/system/physmem.c
|
|
||||||
index a4fe3d2bf8..78f7db1121 100644
|
|
||||||
--- a/system/physmem.c
|
|
||||||
+++ b/system/physmem.c
|
|
||||||
@@ -1497,18 +1497,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
-static unsigned long last_ram_page(void)
|
|
||||||
-{
|
|
||||||
- RAMBlock *block;
|
|
||||||
- ram_addr_t last = 0;
|
|
||||||
-
|
|
||||||
- RCU_READ_LOCK_GUARD();
|
|
||||||
- RAMBLOCK_FOREACH(block) {
|
|
||||||
- last = MAX(last, block->offset + block->max_length);
|
|
||||||
- }
|
|
||||||
- return last >> TARGET_PAGE_BITS;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
@@ -1762,13 +1750,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Called with ram_list.mutex held */
|
|
||||||
-static void dirty_memory_extend(ram_addr_t old_ram_size,
|
|
||||||
- ram_addr_t new_ram_size)
|
|
||||||
+static void dirty_memory_extend(ram_addr_t new_ram_size)
|
|
||||||
{
|
|
||||||
- ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
|
|
||||||
- DIRTY_MEMORY_BLOCK_SIZE);
|
|
||||||
- ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
|
|
||||||
- DIRTY_MEMORY_BLOCK_SIZE);
|
|
||||||
+ unsigned int old_num_blocks = ram_list.num_dirty_blocks;
|
|
||||||
+ unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size,
|
|
||||||
+ DIRTY_MEMORY_BLOCK_SIZE);
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Only need to extend if block count increased */
|
|
||||||
@@ -1800,6 +1786,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
|
|
||||||
g_free_rcu(old_blocks, rcu);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ ram_list.num_dirty_blocks = new_num_blocks;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ram_block_add(RAMBlock *new_block, Error **errp)
|
|
||||||
@@ -1808,11 +1796,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
|
||||||
const bool shared = qemu_ram_is_shared(new_block);
|
|
||||||
RAMBlock *block;
|
|
||||||
RAMBlock *last_block = NULL;
|
|
||||||
- ram_addr_t old_ram_size, new_ram_size;
|
|
||||||
+ ram_addr_t ram_size;
|
|
||||||
Error *err = NULL;
|
|
||||||
|
|
||||||
- old_ram_size = last_ram_page();
|
|
||||||
-
|
|
||||||
qemu_mutex_lock_ramlist();
|
|
||||||
new_block->offset = find_ram_offset(new_block->max_length);
|
|
||||||
|
|
||||||
@@ -1840,11 +1826,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- new_ram_size = MAX(old_ram_size,
|
|
||||||
- (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
|
|
||||||
- if (new_ram_size > old_ram_size) {
|
|
||||||
- dirty_memory_extend(old_ram_size, new_ram_size);
|
|
||||||
- }
|
|
||||||
+ ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
|
|
||||||
+ dirty_memory_extend(ram_size);
|
|
||||||
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
|
|
||||||
* QLIST (which has an RCU-friendly variant) does not have insertion at
|
|
||||||
* tail, so save the last element in last_block.
|
|
|
@ -1,104 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
Date: Thu, 7 Nov 2024 17:51:13 +0100
|
|
||||||
Subject: [PATCH] block/reqlist: allow adding overlapping requests
|
|
||||||
|
|
||||||
Allow overlapping request by removing the assert that made it
|
|
||||||
impossible. There are only two callers:
|
|
||||||
|
|
||||||
1. block_copy_task_create()
|
|
||||||
|
|
||||||
It already asserts the very same condition before calling
|
|
||||||
reqlist_init_req().
|
|
||||||
|
|
||||||
2. cbw_snapshot_read_lock()
|
|
||||||
|
|
||||||
There is no need to have read requests be non-overlapping in
|
|
||||||
copy-before-write when used for snapshot-access. In fact, there was no
|
|
||||||
protection against two callers of cbw_snapshot_read_lock() calling
|
|
||||||
reqlist_init_req() with overlapping ranges and this could lead to an
|
|
||||||
assertion failure [1].
|
|
||||||
|
|
||||||
In particular, with the reproducer script below [0], two
|
|
||||||
cbw_co_snapshot_block_status() callers could race, with the second
|
|
||||||
calling reqlist_init_req() before the first one finishes and removes
|
|
||||||
its conflicting request.
|
|
||||||
|
|
||||||
[0]:
|
|
||||||
|
|
||||||
> #!/bin/bash -e
|
|
||||||
> dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
|
|
||||||
> ./qemu-img create /tmp/fleecing.raw -f raw 1G
|
|
||||||
> (
|
|
||||||
> ./qemu-system-x86_64 --qmp stdio \
|
|
||||||
> --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
|
|
||||||
> --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \
|
|
||||||
> <<EOF
|
|
||||||
> {"execute": "qmp_capabilities"}
|
|
||||||
> {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", "file": "node0", "target": "node1", "node-name": "node3" } }
|
|
||||||
> {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", "file": "node3", "node-name": "snap0" } }
|
|
||||||
> {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", "data": { "path": "/tmp/nbd.socket" } } } }
|
|
||||||
> {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "snap0", "type": "nbd", "name": "exp0"}}
|
|
||||||
> EOF
|
|
||||||
> ) &
|
|
||||||
> sleep 5
|
|
||||||
> while true; do
|
|
||||||
> ./qemu-nbd -d /dev/nbd0
|
|
||||||
> ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r
|
|
||||||
> nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket'
|
|
||||||
> done
|
|
||||||
|
|
||||||
[1]:
|
|
||||||
|
|
||||||
> #5 0x000071e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
|
|
||||||
> #6 0x0000615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
|
|
||||||
> #7 0x00006152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237
|
|
||||||
> #8 0x00006152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304
|
|
||||||
> #9 0x00006152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726
|
|
||||||
> #10 0x000061528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48
|
|
||||||
> #11 0x00006152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
|
|
||||||
> #12 0x00006152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652
|
|
||||||
> #13 0x00006152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
|
|
||||||
> #14 0x00006152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473
|
|
||||||
> #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
|
|
||||||
> #16 0x000061528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
|
|
||||||
> #17 0x000061528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
|
|
||||||
> #18 0x000061528538f906 in nbd_trip (...) at ../nbd/server.c:3121
|
|
||||||
> #19 0x00006152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175
|
|
||||||
|
|
||||||
Cc: qemu-stable@nongnu.org
|
|
||||||
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
|
||||||
---
|
|
||||||
block/copy-before-write.c | 3 ++-
|
|
||||||
block/reqlist.c | 2 --
|
|
||||||
2 files changed, 2 insertions(+), 3 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
|
||||||
index 8aba27a71d..3698b3bc60 100644
|
|
||||||
--- a/block/copy-before-write.c
|
|
||||||
+++ b/block/copy-before-write.c
|
|
||||||
@@ -65,7 +65,8 @@ typedef struct BDRVCopyBeforeWriteState {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @frozen_read_reqs: current read requests for fleecing user in bs->file
|
|
||||||
- * node. These areas must not be rewritten by guest.
|
|
||||||
+ * node. These areas must not be rewritten by guest. There can be multiple
|
|
||||||
+ * overlapping read requests.
|
|
||||||
*/
|
|
||||||
BlockReqList frozen_read_reqs;
|
|
||||||
|
|
||||||
diff --git a/block/reqlist.c b/block/reqlist.c
|
|
||||||
index 08cb57cfa4..098e807378 100644
|
|
||||||
--- a/block/reqlist.c
|
|
||||||
+++ b/block/reqlist.c
|
|
||||||
@@ -20,8 +20,6 @@
|
|
||||||
void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
|
|
||||||
int64_t bytes)
|
|
||||||
{
|
|
||||||
- assert(!reqlist_find_conflict(reqs, offset, bytes));
|
|
||||||
-
|
|
||||||
*req = (BlockReq) {
|
|
||||||
.offset = offset,
|
|
||||||
.bytes = bytes,
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,8 +28,7 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||||
adapt to removal of QEMUFileOps
|
adapt to removal of QEMUFileOps
|
||||||
improve condition for entering final stage
|
improve condition for entering final stage
|
||||||
adapt to QAPI and other changes for 8.2
|
adapt to QAPI and other changes for 8.2
|
||||||
make sure to not call vm_start() from coroutine
|
make sure to not call vm_start() from coroutine]
|
||||||
stop CPU throttling after finishing]
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||||
---
|
---
|
||||||
hmp-commands-info.hx | 13 +
|
hmp-commands-info.hx | 13 +
|
||||||
|
@ -37,13 +36,13 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||||
include/migration/snapshot.h | 2 +
|
include/migration/snapshot.h | 2 +
|
||||||
include/monitor/hmp.h | 3 +
|
include/monitor/hmp.h | 3 +
|
||||||
migration/meson.build | 1 +
|
migration/meson.build | 1 +
|
||||||
migration/savevm-async.c | 545 +++++++++++++++++++++++++++++++++++
|
migration/savevm-async.c | 538 +++++++++++++++++++++++++++++++++++
|
||||||
monitor/hmp-cmds.c | 38 +++
|
monitor/hmp-cmds.c | 38 +++
|
||||||
qapi/migration.json | 34 +++
|
qapi/migration.json | 34 +++
|
||||||
qapi/misc.json | 18 ++
|
qapi/misc.json | 18 ++
|
||||||
qemu-options.hx | 12 +
|
qemu-options.hx | 12 +
|
||||||
system/vl.c | 10 +
|
system/vl.c | 10 +
|
||||||
11 files changed, 693 insertions(+)
|
11 files changed, 686 insertions(+)
|
||||||
create mode 100644 migration/savevm-async.c
|
create mode 100644 migration/savevm-async.c
|
||||||
|
|
||||||
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
|
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
|
||||||
|
@ -141,10 +140,10 @@ index 95d1cf2250..800f12a60d 100644
|
||||||
'threadinfo.c',
|
'threadinfo.c',
|
||||||
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
|
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
|
||||||
new file mode 100644
|
new file mode 100644
|
||||||
index 0000000000..1af32604c7
|
index 0000000000..72cf6588c2
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ b/migration/savevm-async.c
|
+++ b/migration/savevm-async.c
|
||||||
@@ -0,0 +1,545 @@
|
@@ -0,0 +1,538 @@
|
||||||
+#include "qemu/osdep.h"
|
+#include "qemu/osdep.h"
|
||||||
+#include "migration/channel-savevm-async.h"
|
+#include "migration/channel-savevm-async.h"
|
||||||
+#include "migration/migration.h"
|
+#include "migration/migration.h"
|
||||||
|
@ -155,7 +154,6 @@ index 0000000000..1af32604c7
|
||||||
+#include "migration/global_state.h"
|
+#include "migration/global_state.h"
|
||||||
+#include "migration/ram.h"
|
+#include "migration/ram.h"
|
||||||
+#include "migration/qemu-file.h"
|
+#include "migration/qemu-file.h"
|
||||||
+#include "sysemu/cpu-throttle.h"
|
|
||||||
+#include "sysemu/sysemu.h"
|
+#include "sysemu/sysemu.h"
|
||||||
+#include "sysemu/runstate.h"
|
+#include "sysemu/runstate.h"
|
||||||
+#include "block/block.h"
|
+#include "block/block.h"
|
||||||
|
@ -344,12 +342,6 @@ index 0000000000..1af32604c7
|
||||||
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
|
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
|
||||||
+ ms->to_dst_file = NULL;
|
+ ms->to_dst_file = NULL;
|
||||||
+
|
+
|
||||||
+ /*
|
|
||||||
+ * Same as in migration_iteration_finish(): saving RAM might've turned on CPU throttling for
|
|
||||||
+ * auto-converge, make sure to disable it.
|
|
||||||
+ */
|
|
||||||
+ cpu_throttle_stop();
|
|
||||||
+
|
|
||||||
+ qemu_savevm_state_cleanup();
|
+ qemu_savevm_state_cleanup();
|
||||||
+
|
+
|
||||||
+ ret = save_snapshot_cleanup();
|
+ ret = save_snapshot_cleanup();
|
||||||
|
|
|
@ -193,10 +193,10 @@ index 32fd4a34fd..36a0cd8cc8 100644
|
||||||
|
|
||||||
/*
|
/*
|
||||||
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
|
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
|
||||||
index 1af32604c7..be2035cd2e 100644
|
index 72cf6588c2..fb4e8ea689 100644
|
||||||
--- a/migration/savevm-async.c
|
--- a/migration/savevm-async.c
|
||||||
+++ b/migration/savevm-async.c
|
+++ b/migration/savevm-async.c
|
||||||
@@ -386,7 +386,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
|
@@ -379,7 +379,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
|
||||||
|
|
||||||
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
|
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
|
||||||
&snap_state.bs_pos));
|
&snap_state.bs_pos));
|
||||||
|
@ -205,7 +205,7 @@ index 1af32604c7..be2035cd2e 100644
|
||||||
|
|
||||||
if (!snap_state.file) {
|
if (!snap_state.file) {
|
||||||
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
|
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
|
||||||
@@ -510,7 +510,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
|
@@ -503,7 +503,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
|
||||||
blk_op_block_all(be, blocker);
|
blk_op_block_all(be, blocker);
|
||||||
|
|
||||||
/* restore the VM state */
|
/* restore the VM state */
|
||||||
|
|
|
@ -120,10 +120,10 @@ index e99914eaa4..6bba803f94 100644
|
||||||
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
|
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
|
||||||
system_ss.add(files('block-ram-registrar.c'))
|
system_ss.add(files('block-ram-registrar.c'))
|
||||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||||
index bdf2eb50b6..439a7a14c8 100644
|
index d954bec6f1..5000c084c5 100644
|
||||||
--- a/block/monitor/block-hmp-cmds.c
|
--- a/block/monitor/block-hmp-cmds.c
|
||||||
+++ b/block/monitor/block-hmp-cmds.c
|
+++ b/block/monitor/block-hmp-cmds.c
|
||||||
@@ -1009,3 +1009,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
|
@@ -1008,3 +1008,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
|
||||||
qmp_blockdev_change_medium(device, NULL, target, arg, true, force,
|
qmp_blockdev_change_medium(device, NULL, target, arg, true, force,
|
||||||
!!read_only, read_only_mode, errp);
|
!!read_only, read_only_mode, errp);
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,10 +82,10 @@ index cc618e4561..12d662e9d4 100644
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||||
index 28f6a096cd..ef4e666303 100644
|
index 853e01a1eb..47b3cdd09f 100644
|
||||||
--- a/block/copy-before-write.c
|
--- a/block/copy-before-write.c
|
||||||
+++ b/block/copy-before-write.c
|
+++ b/block/copy-before-write.c
|
||||||
@@ -478,7 +478,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
|
@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
|
||||||
|
|
||||||
s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
|
s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
|
||||||
s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
|
s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
|
||||||
|
|
|
@ -36,10 +36,10 @@ index 1963e47ab9..fe69723ada 100644
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||||
index ef4e666303..adb27649a8 100644
|
index 47b3cdd09f..bba58326d7 100644
|
||||||
--- a/block/copy-before-write.c
|
--- a/block/copy-before-write.c
|
||||||
+++ b/block/copy-before-write.c
|
+++ b/block/copy-before-write.c
|
||||||
@@ -547,6 +547,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
||||||
BlockDriverState *target,
|
BlockDriverState *target,
|
||||||
const char *filter_node_name,
|
const char *filter_node_name,
|
||||||
bool discard_source,
|
bool discard_source,
|
||||||
|
@ -47,7 +47,7 @@ index ef4e666303..adb27649a8 100644
|
||||||
BlockCopyState **bcs,
|
BlockCopyState **bcs,
|
||||||
Error **errp)
|
Error **errp)
|
||||||
{
|
{
|
||||||
@@ -565,6 +566,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
||||||
}
|
}
|
||||||
qdict_put_str(opts, "file", bdrv_get_node_name(source));
|
qdict_put_str(opts, "file", bdrv_get_node_name(source));
|
||||||
qdict_put_str(opts, "target", bdrv_get_node_name(target));
|
qdict_put_str(opts, "target", bdrv_get_node_name(target));
|
||||||
|
|
|
@ -68,10 +68,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||||
3 files changed, 142 insertions(+), 4 deletions(-)
|
3 files changed, 142 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||||
index 439a7a14c8..d0e7771dcc 100644
|
index 5000c084c5..70b3de4c7e 100644
|
||||||
--- a/block/monitor/block-hmp-cmds.c
|
--- a/block/monitor/block-hmp-cmds.c
|
||||||
+++ b/block/monitor/block-hmp-cmds.c
|
+++ b/block/monitor/block-hmp-cmds.c
|
||||||
@@ -1044,6 +1044,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
|
@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
devlist, qdict_haskey(qdict, "speed"), speed,
|
devlist, qdict_haskey(qdict, "speed"), speed,
|
||||||
false, 0, // BackupPerf max-workers
|
false, 0, // BackupPerf max-workers
|
||||||
|
|
|
@ -21,7 +21,7 @@ Tested-by: Friedrich Weber <f.weber@proxmox.com>
|
||||||
3 files changed, 22 insertions(+), 6 deletions(-)
|
3 files changed, 22 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||||
index adb27649a8..a5bb4d14f6 100644
|
index bba58326d7..50cc4c7aae 100644
|
||||||
--- a/block/copy-before-write.c
|
--- a/block/copy-before-write.c
|
||||||
+++ b/block/copy-before-write.c
|
+++ b/block/copy-before-write.c
|
||||||
@@ -27,6 +27,7 @@
|
@@ -27,6 +27,7 @@
|
||||||
|
@ -32,7 +32,7 @@ index adb27649a8..a5bb4d14f6 100644
|
||||||
#include "qemu/cutils.h"
|
#include "qemu/cutils.h"
|
||||||
#include "qapi/error.h"
|
#include "qapi/error.h"
|
||||||
#include "block/block_int.h"
|
#include "block/block_int.h"
|
||||||
@@ -75,7 +76,8 @@ typedef struct BDRVCopyBeforeWriteState {
|
@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
|
||||||
* @snapshot_error is normally zero. But on first copy-before-write failure
|
* @snapshot_error is normally zero. But on first copy-before-write failure
|
||||||
* when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
|
* when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
|
||||||
* value of this error (<0). After that all in-flight and further
|
* value of this error (<0). After that all in-flight and further
|
||||||
|
@ -42,7 +42,7 @@ index adb27649a8..a5bb4d14f6 100644
|
||||||
*/
|
*/
|
||||||
int snapshot_error;
|
int snapshot_error;
|
||||||
} BDRVCopyBeforeWriteState;
|
} BDRVCopyBeforeWriteState;
|
||||||
@@ -115,7 +117,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ index adb27649a8..a5bb4d14f6 100644
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,9 +141,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
||||||
WITH_QEMU_LOCK_GUARD(&s->lock) {
|
WITH_QEMU_LOCK_GUARD(&s->lock) {
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
|
assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
|
||||||
|
@ -62,7 +62,7 @@ index adb27649a8..a5bb4d14f6 100644
|
||||||
} else {
|
} else {
|
||||||
bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
|
bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
|
||||||
}
|
}
|
||||||
@@ -215,7 +215,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||||
|
|
||||||
QEMU_LOCK_GUARD(&s->lock);
|
QEMU_LOCK_GUARD(&s->lock);
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ index adb27649a8..a5bb4d14f6 100644
|
||||||
g_free(req);
|
g_free(req);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@@ -586,6 +586,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
|
@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
|
||||||
bdrv_unref(bs);
|
bdrv_unref(bs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
Date: Thu, 7 Nov 2024 17:51:14 +0100
|
|
||||||
Subject: [PATCH] PVE backup: fixup error handling for fleecing
|
|
||||||
|
|
||||||
The drained section needs to be terminated before breaking out of the
|
|
||||||
loop in the error scenarios. Otherwise, guest IO on the drive would
|
|
||||||
become stuck.
|
|
||||||
|
|
||||||
If the job is created successfully, then the job completion callback
|
|
||||||
will clean up the snapshot access block nodes. In case failure
|
|
||||||
happened before the job is created, there was no cleanup for the
|
|
||||||
snapshot access block nodes yet. Add it.
|
|
||||||
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
pve-backup.c | 38 +++++++++++++++++++++++++-------------
|
|
||||||
1 file changed, 25 insertions(+), 13 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/pve-backup.c b/pve-backup.c
|
|
||||||
index 4e730aa3da..c4178758b3 100644
|
|
||||||
--- a/pve-backup.c
|
|
||||||
+++ b/pve-backup.c
|
|
||||||
@@ -357,22 +357,23 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
|
||||||
qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
-static void pvebackup_complete_cb(void *opaque, int ret)
|
|
||||||
+static void cleanup_snapshot_access(PVEBackupDevInfo *di)
|
|
||||||
{
|
|
||||||
- PVEBackupDevInfo *di = opaque;
|
|
||||||
- di->completed_ret = ret;
|
|
||||||
-
|
|
||||||
- /*
|
|
||||||
- * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
|
|
||||||
- * won't be done as a coroutine anyways:
|
|
||||||
- * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
|
|
||||||
- * just spawn a BH calling bdrv_unref().
|
|
||||||
- * - For cbw, draining would need to spawn a BH.
|
|
||||||
- */
|
|
||||||
if (di->fleecing.snapshot_access) {
|
|
||||||
bdrv_unref(di->fleecing.snapshot_access);
|
|
||||||
di->fleecing.snapshot_access = NULL;
|
|
||||||
}
|
|
||||||
+ if (di->fleecing.cbw) {
|
|
||||||
+ bdrv_cbw_drop(di->fleecing.cbw);
|
|
||||||
+ di->fleecing.cbw = NULL;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void pvebackup_complete_cb(void *opaque, int ret)
|
|
||||||
+{
|
|
||||||
+ PVEBackupDevInfo *di = opaque;
|
|
||||||
+ di->completed_ret = ret;
|
|
||||||
+
|
|
||||||
if (di->fleecing.cbw) {
|
|
||||||
/*
|
|
||||||
* With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
|
|
||||||
@@ -383,10 +384,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
||||||
if (di->completed_ret == -EACCES && snapshot_error) {
|
|
||||||
di->completed_ret = snapshot_error;
|
|
||||||
}
|
|
||||||
- bdrv_cbw_drop(di->fleecing.cbw);
|
|
||||||
- di->fleecing.cbw = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /*
|
|
||||||
+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
|
|
||||||
+ * won't be done as a coroutine anyways:
|
|
||||||
+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
|
|
||||||
+ * just spawn a BH calling bdrv_unref().
|
|
||||||
+ * - For cbw, draining would need to spawn a BH.
|
|
||||||
+ */
|
|
||||||
+ cleanup_snapshot_access(di);
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* Needs to happen outside of coroutine, because it takes the graph write lock.
|
|
||||||
*/
|
|
||||||
@@ -587,6 +595,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
if (!di->fleecing.cbw) {
|
|
||||||
error_setg(errp, "appending cbw node for fleecing failed: %s",
|
|
||||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
+ bdrv_drained_end(di->bs);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -599,6 +608,8 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
if (!di->fleecing.snapshot_access) {
|
|
||||||
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
|
||||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
+ cleanup_snapshot_access(di);
|
|
||||||
+ bdrv_drained_end(di->bs);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
source_bs = di->fleecing.snapshot_access;
|
|
||||||
@@ -637,6 +648,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!job || local_err) {
|
|
||||||
+ cleanup_snapshot_access(di);
|
|
||||||
error_setg(errp, "backup_job_create failed: %s",
|
|
||||||
local_err ? error_get_pretty(local_err) : "null");
|
|
||||||
break;
|
|
|
@ -1,135 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
Date: Thu, 7 Nov 2024 17:51:15 +0100
|
|
||||||
Subject: [PATCH] PVE backup: factor out setting up snapshot access for
|
|
||||||
fleecing
|
|
||||||
|
|
||||||
Avoids some line bloat in the create_backup_jobs_bh() function and is
|
|
||||||
in preparation for setting up the snapshot access independently of
|
|
||||||
fleecing, in particular that will be useful for providing access to
|
|
||||||
the snapshot via NBD.
|
|
||||||
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
pve-backup.c | 95 ++++++++++++++++++++++++++++++++--------------------
|
|
||||||
1 file changed, 58 insertions(+), 37 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/pve-backup.c b/pve-backup.c
|
|
||||||
index c4178758b3..051ebffe48 100644
|
|
||||||
--- a/pve-backup.c
|
|
||||||
+++ b/pve-backup.c
|
|
||||||
@@ -525,6 +525,62 @@ static int coroutine_fn pvebackup_co_add_config(
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * Setup a snapshot-access block node for a device with associated fleecing image.
|
|
||||||
+ */
|
|
||||||
+static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
|
|
||||||
+{
|
|
||||||
+ Error *local_err = NULL;
|
|
||||||
+
|
|
||||||
+ if (!di->fleecing.bs) {
|
|
||||||
+ error_setg(errp, "no associated fleecing image");
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ QDict *cbw_opts = qdict_new();
|
|
||||||
+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
|
|
||||||
+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
|
|
||||||
+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
|
|
||||||
+
|
|
||||||
+ if (di->bitmap) {
|
|
||||||
+ /*
|
|
||||||
+ * Only guest writes to parts relevant for the backup need to be intercepted with
|
|
||||||
+ * old data being copied to the fleecing image.
|
|
||||||
+ */
|
|
||||||
+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
|
|
||||||
+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
|
|
||||||
+ }
|
|
||||||
+ /*
|
|
||||||
+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
|
|
||||||
+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
|
|
||||||
+ * abort a bit before that.
|
|
||||||
+ */
|
|
||||||
+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
|
|
||||||
+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
|
|
||||||
+
|
|
||||||
+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
|
|
||||||
+
|
|
||||||
+ if (!di->fleecing.cbw) {
|
|
||||||
+ error_setg(errp, "appending cbw node for fleecing failed: %s",
|
|
||||||
+ local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ QDict *snapshot_access_opts = qdict_new();
|
|
||||||
+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
|
|
||||||
+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
|
|
||||||
+
|
|
||||||
+ di->fleecing.snapshot_access =
|
|
||||||
+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
|
|
||||||
+ if (!di->fleecing.snapshot_access) {
|
|
||||||
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
|
||||||
+ local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* backup_job_create can *not* be run from a coroutine, so this can't either.
|
|
||||||
* The caller is responsible that backup_mutex is held nonetheless.
|
|
||||||
@@ -569,49 +625,14 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
const char *job_id = bdrv_get_device_name(di->bs);
|
|
||||||
bdrv_graph_co_rdunlock();
|
|
||||||
if (di->fleecing.bs) {
|
|
||||||
- QDict *cbw_opts = qdict_new();
|
|
||||||
- qdict_put_str(cbw_opts, "driver", "copy-before-write");
|
|
||||||
- qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
|
|
||||||
- qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
|
|
||||||
-
|
|
||||||
- if (di->bitmap) {
|
|
||||||
- /*
|
|
||||||
- * Only guest writes to parts relevant for the backup need to be intercepted with
|
|
||||||
- * old data being copied to the fleecing image.
|
|
||||||
- */
|
|
||||||
- qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
|
|
||||||
- qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
|
|
||||||
- }
|
|
||||||
- /*
|
|
||||||
- * Fleecing storage is supposed to be fast and it's better to break backup than guest
|
|
||||||
- * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
|
|
||||||
- * abort a bit before that.
|
|
||||||
- */
|
|
||||||
- qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
|
|
||||||
- qdict_put_int(cbw_opts, "cbw-timeout", 45);
|
|
||||||
-
|
|
||||||
- di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
|
|
||||||
-
|
|
||||||
- if (!di->fleecing.cbw) {
|
|
||||||
- error_setg(errp, "appending cbw node for fleecing failed: %s",
|
|
||||||
- local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
- bdrv_drained_end(di->bs);
|
|
||||||
- break;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- QDict *snapshot_access_opts = qdict_new();
|
|
||||||
- qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
|
|
||||||
- qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
|
|
||||||
-
|
|
||||||
- di->fleecing.snapshot_access =
|
|
||||||
- bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
|
|
||||||
- if (!di->fleecing.snapshot_access) {
|
|
||||||
+ if (setup_snapshot_access(di, &local_err) < 0) {
|
|
||||||
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
|
||||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
cleanup_snapshot_access(di);
|
|
||||||
bdrv_drained_end(di->bs);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
source_bs = di->fleecing.snapshot_access;
|
|
||||||
discard_source = true;
|
|
||||||
|
|
|
@ -1,135 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
Date: Thu, 7 Nov 2024 17:51:16 +0100
|
|
||||||
Subject: [PATCH] PVE backup: save device name in device info structure
|
|
||||||
|
|
||||||
The device name needs to be queried while holding the graph read lock
|
|
||||||
and since it doesn't change during the whole operation, just get it
|
|
||||||
once during setup and avoid the need to query it again in different
|
|
||||||
places.
|
|
||||||
|
|
||||||
Also in preparation to use it more often in error messages and for the
|
|
||||||
upcoming external backup access API.
|
|
||||||
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
pve-backup.c | 29 +++++++++++++++--------------
|
|
||||||
1 file changed, 15 insertions(+), 14 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/pve-backup.c b/pve-backup.c
|
|
||||||
index 051ebffe48..33c23e53c2 100644
|
|
||||||
--- a/pve-backup.c
|
|
||||||
+++ b/pve-backup.c
|
|
||||||
@@ -94,6 +94,7 @@ typedef struct PVEBackupDevInfo {
|
|
||||||
size_t size;
|
|
||||||
uint64_t block_size;
|
|
||||||
uint8_t dev_id;
|
|
||||||
+ char* device_name;
|
|
||||||
int completed_ret; // INT_MAX if not completed
|
|
||||||
BdrvDirtyBitmap *bitmap;
|
|
||||||
BlockDriverState *target;
|
|
||||||
@@ -327,6 +328,8 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
|
||||||
}
|
|
||||||
|
|
||||||
di->bs = NULL;
|
|
||||||
+ g_free(di->device_name);
|
|
||||||
+ di->device_name = NULL;
|
|
||||||
|
|
||||||
assert(di->target == NULL);
|
|
||||||
|
|
||||||
@@ -621,9 +624,6 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
|
|
||||||
BlockDriverState *source_bs = di->bs;
|
|
||||||
bool discard_source = false;
|
|
||||||
- bdrv_graph_co_rdlock();
|
|
||||||
- const char *job_id = bdrv_get_device_name(di->bs);
|
|
||||||
- bdrv_graph_co_rdunlock();
|
|
||||||
if (di->fleecing.bs) {
|
|
||||||
if (setup_snapshot_access(di, &local_err) < 0) {
|
|
||||||
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
|
||||||
@@ -654,7 +654,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
}
|
|
||||||
|
|
||||||
BlockJob *job = backup_job_create(
|
|
||||||
- job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
|
|
||||||
+ di->device_name, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
|
|
||||||
bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
|
|
||||||
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
|
|
||||||
&local_err);
|
|
||||||
@@ -751,6 +751,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
|
|
||||||
}
|
|
||||||
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
|
|
||||||
di->bs = bs;
|
|
||||||
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
|
|
||||||
|
|
||||||
if (fleecing && device_uses_fleecing(*d)) {
|
|
||||||
g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
|
|
||||||
@@ -789,6 +790,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
|
|
||||||
|
|
||||||
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
|
|
||||||
di->bs = bs;
|
|
||||||
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
|
|
||||||
di_list = g_list_append(di_list, di);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -956,9 +958,6 @@ UuidInfo coroutine_fn *qmp_backup(
|
|
||||||
|
|
||||||
di->block_size = dump_cb_block_size;
|
|
||||||
|
|
||||||
- bdrv_graph_co_rdlock();
|
|
||||||
- const char *devname = bdrv_get_device_name(di->bs);
|
|
||||||
- bdrv_graph_co_rdunlock();
|
|
||||||
PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
|
|
||||||
size_t dirty = di->size;
|
|
||||||
|
|
||||||
@@ -973,7 +972,8 @@ UuidInfo coroutine_fn *qmp_backup(
|
|
||||||
}
|
|
||||||
action = PBS_BITMAP_ACTION_NEW;
|
|
||||||
} else {
|
|
||||||
- expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
|
|
||||||
+ expect_only_dirty =
|
|
||||||
+ proxmox_backup_check_incremental(pbs, di->device_name, di->size) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (expect_only_dirty) {
|
|
||||||
@@ -997,7 +997,8 @@ UuidInfo coroutine_fn *qmp_backup(
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
|
|
||||||
+ int dev_id = proxmox_backup_co_register_image(pbs, di->device_name, di->size,
|
|
||||||
+ expect_only_dirty, errp);
|
|
||||||
if (dev_id < 0) {
|
|
||||||
goto err_mutex;
|
|
||||||
}
|
|
||||||
@@ -1009,7 +1010,7 @@ UuidInfo coroutine_fn *qmp_backup(
|
|
||||||
di->dev_id = dev_id;
|
|
||||||
|
|
||||||
PBSBitmapInfo *info = g_malloc(sizeof(*info));
|
|
||||||
- info->drive = g_strdup(devname);
|
|
||||||
+ info->drive = g_strdup(di->device_name);
|
|
||||||
info->action = action;
|
|
||||||
info->size = di->size;
|
|
||||||
info->dirty = dirty;
|
|
||||||
@@ -1034,10 +1035,7 @@ UuidInfo coroutine_fn *qmp_backup(
|
|
||||||
goto err_mutex;
|
|
||||||
}
|
|
||||||
|
|
||||||
- bdrv_graph_co_rdlock();
|
|
||||||
- const char *devname = bdrv_get_device_name(di->bs);
|
|
||||||
- bdrv_graph_co_rdunlock();
|
|
||||||
- di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
|
|
||||||
+ di->dev_id = vma_writer_register_stream(vmaw, di->device_name, di->size);
|
|
||||||
if (di->dev_id <= 0) {
|
|
||||||
error_set(errp, ERROR_CLASS_GENERIC_ERROR,
|
|
||||||
"register_stream failed");
|
|
||||||
@@ -1148,6 +1146,9 @@ err:
|
|
||||||
bdrv_co_unref(di->target);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ g_free(di->device_name);
|
|
||||||
+ di->device_name = NULL;
|
|
||||||
+
|
|
||||||
g_free(di);
|
|
||||||
}
|
|
||||||
g_list_free(di_list);
|
|
|
@ -1,25 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
Date: Thu, 7 Nov 2024 17:51:17 +0100
|
|
||||||
Subject: [PATCH] PVE backup: include device name in error when setting up
|
|
||||||
snapshot access fails
|
|
||||||
|
|
||||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
||||||
---
|
|
||||||
pve-backup.c | 3 ++-
|
|
||||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/pve-backup.c b/pve-backup.c
|
|
||||||
index 33c23e53c2..d931746453 100644
|
|
||||||
--- a/pve-backup.c
|
|
||||||
+++ b/pve-backup.c
|
|
||||||
@@ -626,7 +626,8 @@ static void create_backup_jobs_bh(void *opaque) {
|
|
||||||
bool discard_source = false;
|
|
||||||
if (di->fleecing.bs) {
|
|
||||||
if (setup_snapshot_access(di, &local_err) < 0) {
|
|
||||||
- error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
|
||||||
+ error_setg(errp, "%s - setting up snapshot access for fleecing failed: %s",
|
|
||||||
+ di->device_name,
|
|
||||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
|
||||||
cleanup_snapshot_access(di);
|
|
||||||
bdrv_drained_end(di->bs);
|
|
|
@ -3,38 +3,18 @@ extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
|
||||||
extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
|
extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
|
||||||
extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
|
extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
|
||||||
extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
|
extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
|
||||||
extra/0006-block-copy-before-write-fix-permission.patch
|
extra/0006-Revert-virtio-pci-fix-use-of-a-released-vector.patch
|
||||||
extra/0007-block-copy-before-write-support-unligned-snapshot-di.patch
|
extra/0007-block-copy-before-write-fix-permission.patch
|
||||||
extra/0008-block-copy-before-write-create-block_copy-bitmap-in-.patch
|
extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch
|
||||||
extra/0009-qapi-blockdev-backup-add-discard-source-parameter.patch
|
extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch
|
||||||
extra/0010-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
|
extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch
|
||||||
extra/0011-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
|
extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
|
||||||
extra/0012-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
|
extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
|
||||||
extra/0013-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
|
extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
|
||||||
extra/0014-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
|
extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
|
||||||
extra/0015-block-copy-Fix-missing-graph-lock.patch
|
extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
|
||||||
extra/0016-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
|
extra/0016-block-copy-Fix-missing-graph-lock.patch
|
||||||
extra/0017-virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch
|
extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
|
||||||
extra/0018-virtio-net-Ensure-queue-index-fits-with-RSS.patch
|
|
||||||
extra/0019-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch
|
|
||||||
extra/0020-net-Reinstate-net-nic-model-help-output-as-documente.patch
|
|
||||||
extra/0021-net-Fix-net-nic-model-for-non-help-arguments.patch
|
|
||||||
extra/0022-target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch
|
|
||||||
extra/0023-target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch
|
|
||||||
extra/0024-target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch
|
|
||||||
extra/0025-target-arm-Ignore-SMCR_EL2.LEN-and-SVCR_EL2.LEN-if-E.patch
|
|
||||||
extra/0026-target-arm-Handle-denormals-correctly-for-FMOPA-wide.patch
|
|
||||||
extra/0027-intel_iommu-fix-FRCD-construction-macro.patch
|
|
||||||
extra/0028-target-i386-Do-not-apply-REX-to-MMX-operands.patch
|
|
||||||
extra/0029-module-Prevent-crash-by-resetting-local_err-in-modul.patch
|
|
||||||
extra/0030-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch
|
|
||||||
extra/0031-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch
|
|
||||||
extra/0032-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch
|
|
||||||
extra/0033-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch
|
|
||||||
extra/0034-vnc-fix-crash-when-no-console-attached.patch
|
|
||||||
extra/0035-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch
|
|
||||||
extra/0036-softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch
|
|
||||||
extra/0037-block-reqlist-allow-adding-overlapping-requests.patch
|
|
||||||
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
|
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
|
||||||
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
|
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
|
||||||
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
|
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
|
||||||
|
@ -88,7 +68,4 @@ pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
|
||||||
pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
|
pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
|
||||||
pve/0046-PVE-backup-add-fleecing-option.patch
|
pve/0046-PVE-backup-add-fleecing-option.patch
|
||||||
pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch
|
pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch
|
||||||
pve/0048-PVE-backup-fixup-error-handling-for-fleecing.patch
|
pve-qemu-9.0-vitastor.patch
|
||||||
pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch
|
|
||||||
pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch
|
|
||||||
pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch
|
|
||||||
|
|
2
qemu
2
qemu
|
@ -1 +1 @@
|
||||||
Subproject commit 5ebde3b5c00e15f560f73055fac4ab31c0cac6d2
|
Subproject commit c25df57ae8f9fe1c72eee2dab37d76d904ac382e
|
Loading…
Reference in New Issue