Compare commits
11 Commits
f2653ef0b8
...
05089ab57d
Author | SHA1 | Date |
---|---|---|
Thomas Lamprecht | 05089ab57d | |
Thomas Lamprecht | 9e8ef15831 | |
Thomas Lamprecht | 531db7df01 | |
Thomas Lamprecht | d14bffa8c0 | |
Thomas Lamprecht | 4bc8223ac9 | |
Fiona Ebner | fd53092e9b | |
Thomas Lamprecht | 7446610389 | |
Fiona Ebner | 903a63402e | |
Fiona Ebner | 441072fc57 | |
Fiona Ebner | 582fd47901 | |
Fiona Ebner | 356bc2483a |
|
@ -1,3 +1,27 @@
|
|||
pve-qemu-kvm (9.0.2-4) bookworm; urgency=medium
|
||||
|
||||
* async snapshot: ensure any dynamic vCPU-throttling applied for
|
||||
auto-converge gets always disabled again after finishing the snapshot.
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Sun, 10 Nov 2024 11:23:09 +0100
|
||||
|
||||
pve-qemu-kvm (9.0.2-3) bookworm; urgency=medium
|
||||
|
||||
* pick up fix for VirtIO PCI regressions
|
||||
|
||||
* pick up stable fixes for 9.0, including fixes for VirtIO-net, ARM and
|
||||
x86(_64) emulation, CVEs to harden NBD server against malicious clients,
|
||||
as well as a few others (VNC, physmem, Intel IOMMU, ...).
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Fri, 06 Sep 2024 16:21:42 +0200
|
||||
|
||||
pve-qemu-kvm (9.0.2-2) bookworm; urgency=medium
|
||||
|
||||
* actually update submodule to QEMU 9.0.2. The previous release was still
|
||||
based on 9.0.0 by mistake.
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Wed, 07 Aug 2024 10:16:01 +0200
|
||||
|
||||
pve-qemu-kvm (9.0.2-1) bookworm; urgency=medium
|
||||
|
||||
* update submodule and patches to QEMU 9.0.2. While our version had most
|
||||
|
|
|
@ -1,87 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Thu, 16 May 2024 12:59:52 +0200
|
||||
Subject: [PATCH] Revert "virtio-pci: fix use of a released vector"
|
||||
|
||||
This reverts commit 2ce6cff94df2650c460f809e5ad263f1d22507c0.
|
||||
|
||||
The fix causes some issues:
|
||||
https://gitlab.com/qemu-project/qemu/-/issues/2321
|
||||
https://gitlab.com/qemu-project/qemu/-/issues/2334
|
||||
|
||||
The CVE fixed by commit 2ce6cff94d ("virtio-pci: fix use of a released
|
||||
vector") is CVE-2024-4693 [0] and allows a malicious guest that
|
||||
controls the boot process in the guest to crash its QEMU process.
|
||||
|
||||
The issues sound worse than the CVE, so revert until there is a proper
|
||||
fix.
|
||||
|
||||
[0]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-4693
|
||||
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
hw/virtio/virtio-pci.c | 37 ++-----------------------------------
|
||||
1 file changed, 2 insertions(+), 35 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
|
||||
index e04218a9fb..fd66713848 100644
|
||||
--- a/hw/virtio/virtio-pci.c
|
||||
+++ b/hw/virtio/virtio-pci.c
|
||||
@@ -1410,38 +1410,6 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
|
||||
return offset;
|
||||
}
|
||||
|
||||
-static void virtio_pci_set_vector(VirtIODevice *vdev,
|
||||
- VirtIOPCIProxy *proxy,
|
||||
- int queue_no, uint16_t old_vector,
|
||||
- uint16_t new_vector)
|
||||
-{
|
||||
- bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
||||
- msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled();
|
||||
-
|
||||
- if (new_vector == old_vector) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * If the device uses irqfd and the vector changes after DRIVER_OK is
|
||||
- * set, we need to release the old vector and set up the new one.
|
||||
- * Otherwise just need to set the new vector on the device.
|
||||
- */
|
||||
- if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) {
|
||||
- kvm_virtio_pci_vector_release_one(proxy, queue_no);
|
||||
- }
|
||||
- /* Set the new vector on the device. */
|
||||
- if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
|
||||
- vdev->config_vector = new_vector;
|
||||
- } else {
|
||||
- virtio_queue_set_vector(vdev, queue_no, new_vector);
|
||||
- }
|
||||
- /* If the new vector changed need to set it up. */
|
||||
- if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) {
|
||||
- kvm_virtio_pci_vector_use_one(proxy, queue_no);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
|
||||
uint8_t bar, uint64_t offset, uint64_t length,
|
||||
uint8_t id)
|
||||
@@ -1588,8 +1556,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
|
||||
} else {
|
||||
val = VIRTIO_NO_VECTOR;
|
||||
}
|
||||
- virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX,
|
||||
- vdev->config_vector, val);
|
||||
+ vdev->config_vector = val;
|
||||
break;
|
||||
case VIRTIO_PCI_COMMON_STATUS:
|
||||
if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
||||
@@ -1629,7 +1596,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
|
||||
} else {
|
||||
val = VIRTIO_NO_VECTOR;
|
||||
}
|
||||
- virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val);
|
||||
+ virtio_queue_set_vector(vdev, vdev->queue_sel, val);
|
||||
break;
|
||||
case VIRTIO_PCI_COMMON_Q_ENABLE:
|
||||
if (val == 1) {
|
77
debian/patches/extra/0017-virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch
vendored
Normal file
77
debian/patches/extra/0017-virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Cindy Lu <lulu@redhat.com>
|
||||
Date: Tue, 6 Aug 2024 17:37:12 +0800
|
||||
Subject: [PATCH] virtio-pci: Fix the use of an uninitialized irqfd
|
||||
|
||||
The crash was reported in MAC OS and NixOS, here is the link for this bug
|
||||
https://gitlab.com/qemu-project/qemu/-/issues/2334
|
||||
https://gitlab.com/qemu-project/qemu/-/issues/2321
|
||||
|
||||
In this bug, they are using the virtio_input device. The guest notifier was
|
||||
not supported for this device, The function virtio_pci_set_guest_notifiers()
|
||||
was not called, and the vector_irqfd was not initialized.
|
||||
|
||||
So the fix is adding the check for vector_irqfd in virtio_pci_get_notifier()
|
||||
|
||||
The function virtio_pci_get_notifier() can be used in various devices.
|
||||
It could also be called when VIRTIO_CONFIG_S_DRIVER_OK is not set. In this situation,
|
||||
the vector_irqfd being NULL is acceptable. We can allow the device continue to boot
|
||||
|
||||
If the vector_irqfd still hasn't been initialized after VIRTIO_CONFIG_S_DRIVER_OK
|
||||
is set, it means that the function set_guest_notifiers was not called before the
|
||||
driver started. This indicates that the device is not using the notifier.
|
||||
At this point, we will let the check fail.
|
||||
|
||||
This fix is verified in vyatta,MacOS,NixOS,fedora system.
|
||||
|
||||
The bt tree for this bug is:
|
||||
Thread 6 "CPU 0/KVM" received signal SIGSEGV, Segmentation fault.
|
||||
[Switching to Thread 0x7c817be006c0 (LWP 1269146)]
|
||||
kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
|
||||
817 if (irqfd->users == 0) {
|
||||
(gdb) thread apply all bt
|
||||
...
|
||||
Thread 6 (Thread 0x7c817be006c0 (LWP 1269146) "CPU 0/KVM"):
|
||||
0 kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817
|
||||
1 kvm_virtio_pci_vector_use_one () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:893
|
||||
2 0x00005983657045e2 in memory_region_write_accessor () at ../qemu-9.0.0/system/memory.c:497
|
||||
3 0x0000598365704ba6 in access_with_adjusted_size () at ../qemu-9.0.0/system/memory.c:573
|
||||
4 0x0000598365705059 in memory_region_dispatch_write () at ../qemu-9.0.0/system/memory.c:1528
|
||||
5 0x00005983659b8e1f in flatview_write_continue_step.isra.0 () at ../qemu-9.0.0/system/physmem.c:2713
|
||||
6 0x000059836570ba7d in flatview_write_continue () at ../qemu-9.0.0/system/physmem.c:2743
|
||||
7 flatview_write () at ../qemu-9.0.0/system/physmem.c:2774
|
||||
8 0x000059836570bb76 in address_space_write () at ../qemu-9.0.0/system/physmem.c:2894
|
||||
9 0x0000598365763afe in address_space_rw () at ../qemu-9.0.0/system/physmem.c:2904
|
||||
10 kvm_cpu_exec () at ../qemu-9.0.0/accel/kvm/kvm-all.c:2917
|
||||
11 0x000059836576656e in kvm_vcpu_thread_fn () at ../qemu-9.0.0/accel/kvm/kvm-accel-ops.c:50
|
||||
12 0x0000598365926ca8 in qemu_thread_start () at ../qemu-9.0.0/util/qemu-thread-posix.c:541
|
||||
13 0x00007c8185bcd1cf in ??? () at /usr/lib/libc.so.6
|
||||
14 0x00007c8185c4e504 in clone () at /usr/lib/libc.so.6
|
||||
|
||||
Fixes: 2ce6cff94d ("virtio-pci: fix use of a released vector")
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Cindy Lu <lulu@redhat.com>
|
||||
Message-Id: <20240806093715.65105-1-lulu@redhat.com>
|
||||
Acked-by: Jason Wang <jasowang@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit a8e63ff289d137197ad7a701a587cc432872d798)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
hw/virtio/virtio-pci.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
|
||||
index e04218a9fb..389bab003f 100644
|
||||
--- a/hw/virtio/virtio-pci.c
|
||||
+++ b/hw/virtio/virtio-pci.c
|
||||
@@ -860,6 +860,9 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
|
||||
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
|
||||
VirtQueue *vq;
|
||||
|
||||
+ if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
|
||||
+ return -1;
|
||||
+
|
||||
if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
|
||||
*n = virtio_config_get_guest_notifier(vdev);
|
||||
*vector = vdev->config_vector;
|
|
@ -0,0 +1,35 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
Date: Mon, 1 Jul 2024 20:58:04 +0900
|
||||
Subject: [PATCH] virtio-net: Ensure queue index fits with RSS
|
||||
|
||||
Ensure the queue index points to a valid queue when software RSS
|
||||
enabled. The new calculation matches with the behavior of Linux's TAP
|
||||
device with the RSS eBPF program.
|
||||
|
||||
Fixes: 4474e37a5b3a ("virtio-net: implement RX RSS processing")
|
||||
Reported-by: Zhibin Hu <huzhibin5@huawei.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
(cherry picked from commit f1595ceb9aad36a6c1da95bcb77ab9509b38822d)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 3644bfd91b..f48588638d 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -1949,7 +1949,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
|
||||
if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
|
||||
int index = virtio_net_process_rss(nc, buf, size);
|
||||
if (index >= 0) {
|
||||
- NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
|
||||
+ NetClientState *nc2 =
|
||||
+ qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
|
||||
return virtio_net_receive_rcu(nc2, buf, size, true);
|
||||
}
|
||||
}
|
338
debian/patches/extra/0019-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch
vendored
Normal file
338
debian/patches/extra/0019-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch
vendored
Normal file
|
@ -0,0 +1,338 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: thomas <east.moutain.yang@gmail.com>
|
||||
Date: Fri, 12 Jul 2024 11:10:53 +0800
|
||||
Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for
|
||||
kick
|
||||
|
||||
Patch 06b12970174 ("virtio-net: fix network stall under load")
|
||||
added double-check to test whether the available buffer size
|
||||
can satisfy the request or not, in case the guest has added
|
||||
some buffers to the avail ring simultaneously after the first
|
||||
check. It will be lucky if the available buffer size becomes
|
||||
okay after the double-check, then the host can send the packet
|
||||
to the guest. If the buffer size still can't satisfy the request,
|
||||
even if the guest has added some buffers, viritio-net would
|
||||
stall at the host side forever.
|
||||
|
||||
The patch enables notification and checks whether the guest has
|
||||
added some buffers since last check of available buffers when
|
||||
the available buffers are insufficient. If no buffer is added,
|
||||
return false, else recheck the available buffers in the loop.
|
||||
If the available buffers are sufficient, disable notification
|
||||
and return true.
|
||||
|
||||
Changes:
|
||||
1. Change the return type of virtqueue_get_avail_bytes() from void
|
||||
to int, it returns an opaque that represents the shadow_avail_idx
|
||||
of the virtqueue on success, else -1 on error.
|
||||
2. Add a new API: virtio_queue_enable_notification_and_check(),
|
||||
it takes an opaque as input arg which is returned from
|
||||
virtqueue_get_avail_bytes(). It enables notification firstly,
|
||||
then checks whether the guest has added some buffers since
|
||||
last check of available buffers or not by virtio_queue_poll(),
|
||||
return ture if yes.
|
||||
|
||||
The patch also reverts patch "06b12970174".
|
||||
|
||||
The case below can reproduce the stall.
|
||||
|
||||
Guest 0
|
||||
+--------+
|
||||
| iperf |
|
||||
---------------> | server |
|
||||
Host | +--------+
|
||||
+--------+ | ...
|
||||
| iperf |----
|
||||
| client |---- Guest n
|
||||
+--------+ | +--------+
|
||||
| | iperf |
|
||||
---------------> | server |
|
||||
+--------+
|
||||
|
||||
Boot many guests from qemu with virtio network:
|
||||
qemu ... -netdev tap,id=net_x \
|
||||
-device virtio-net-pci-non-transitional,\
|
||||
iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x
|
||||
|
||||
Each guest acts as iperf server with commands below:
|
||||
iperf3 -s -D -i 10 -p 8001
|
||||
iperf3 -s -D -i 10 -p 8002
|
||||
|
||||
The host as iperf client:
|
||||
iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000
|
||||
iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000
|
||||
|
||||
After some time, the host loses connection to the guest,
|
||||
the guest can send packet to the host, but can't receive
|
||||
packet from the host.
|
||||
|
||||
It's more likely to happen if SWIOTLB is enabled in the guest,
|
||||
allocating and freeing bounce buffer takes some CPU ticks,
|
||||
copying from/to bounce buffer takes more CPU ticks, compared
|
||||
with that there is no bounce buffer in the guest.
|
||||
Once the rate of producing packets from the host approximates
|
||||
the rate of receiveing packets in the guest, the guest would
|
||||
loop in NAPI.
|
||||
|
||||
receive packets ---
|
||||
| |
|
||||
v |
|
||||
free buf virtnet_poll
|
||||
| |
|
||||
v |
|
||||
add buf to avail ring ---
|
||||
|
|
||||
| need kick the host?
|
||||
| NAPI continues
|
||||
v
|
||||
receive packets ---
|
||||
| |
|
||||
v |
|
||||
free buf virtnet_poll
|
||||
| |
|
||||
v |
|
||||
add buf to avail ring ---
|
||||
|
|
||||
v
|
||||
... ...
|
||||
|
||||
On the other hand, the host fetches free buf from avail
|
||||
ring, if the buf in the avail ring is not enough, the
|
||||
host notifies the guest the event by writing the avail
|
||||
idx read from avail ring to the event idx of used ring,
|
||||
then the host goes to sleep, waiting for the kick signal
|
||||
from the guest.
|
||||
|
||||
Once the guest finds the host is waiting for kick singal
|
||||
(in virtqueue_kick_prepare_split()), it kicks the host.
|
||||
|
||||
The host may stall forever at the sequences below:
|
||||
|
||||
Host Guest
|
||||
------------ -----------
|
||||
fetch buf, send packet receive packet ---
|
||||
... ... |
|
||||
fetch buf, send packet add buf |
|
||||
... add buf virtnet_poll
|
||||
buf not enough avail idx-> add buf |
|
||||
read avail idx add buf |
|
||||
add buf ---
|
||||
receive packet ---
|
||||
write event idx ... |
|
||||
wait for kick add buf virtnet_poll
|
||||
... |
|
||||
---
|
||||
no more packet, exit NAPI
|
||||
|
||||
In the first loop of NAPI above, indicated in the range of
|
||||
virtnet_poll above, the host is sending packets while the
|
||||
guest is receiving packets and adding buffers.
|
||||
step 1: The buf is not enough, for example, a big packet
|
||||
needs 5 buf, but the available buf count is 3.
|
||||
The host read current avail idx.
|
||||
step 2: The guest adds some buf, then checks whether the
|
||||
host is waiting for kick signal, not at this time.
|
||||
The used ring is not empty, the guest continues
|
||||
the second loop of NAPI.
|
||||
step 3: The host writes the avail idx read from avail
|
||||
ring to used ring as event idx via
|
||||
virtio_queue_set_notification(q->rx_vq, 1).
|
||||
step 4: At the end of the second loop of NAPI, recheck
|
||||
whether kick is needed, as the event idx in the
|
||||
used ring written by the host is beyound the
|
||||
range of kick condition, the guest will not
|
||||
send kick signal to the host.
|
||||
|
||||
Fixes: 06b12970174 ("virtio-net: fix network stall under load")
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
(cherry picked from commit f937309fbdbb48c354220a3e7110c202ae4aa7fa)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
hw/net/virtio-net.c | 28 ++++++++++-------
|
||||
hw/virtio/virtio.c | 64 +++++++++++++++++++++++++++++++++++---
|
||||
include/hw/virtio/virtio.h | 21 +++++++++++--
|
||||
3 files changed, 94 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index f48588638d..d4b979d343 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -1680,24 +1680,28 @@ static bool virtio_net_can_receive(NetClientState *nc)
|
||||
|
||||
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
|
||||
{
|
||||
+ int opaque;
|
||||
+ unsigned int in_bytes;
|
||||
VirtIONet *n = q->n;
|
||||
- if (virtio_queue_empty(q->rx_vq) ||
|
||||
- (n->mergeable_rx_bufs &&
|
||||
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
|
||||
- virtio_queue_set_notification(q->rx_vq, 1);
|
||||
-
|
||||
- /* To avoid a race condition where the guest has made some buffers
|
||||
- * available after the above check but before notification was
|
||||
- * enabled, check for available buffers again.
|
||||
- */
|
||||
- if (virtio_queue_empty(q->rx_vq) ||
|
||||
- (n->mergeable_rx_bufs &&
|
||||
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
|
||||
+
|
||||
+ while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
|
||||
+ opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
|
||||
+ bufsize, 0);
|
||||
+ /* Buffer is enough, disable notifiaction */
|
||||
+ if (bufsize <= in_bytes) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
|
||||
+ /* Guest has added some buffers, try again */
|
||||
+ continue;
|
||||
+ } else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
virtio_queue_set_notification(q->rx_vq, 0);
|
||||
+
|
||||
return 1;
|
||||
}
|
||||
|
||||
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
|
||||
index fd2dfe3a6b..08fba6b2d8 100644
|
||||
--- a/hw/virtio/virtio.c
|
||||
+++ b/hw/virtio/virtio.c
|
||||
@@ -743,6 +743,60 @@ int virtio_queue_empty(VirtQueue *vq)
|
||||
}
|
||||
}
|
||||
|
||||
+static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx)
|
||||
+{
|
||||
+ if (unlikely(!vq->vring.avail)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ return (uint16_t)shadow_idx != vring_avail_idx(vq);
|
||||
+}
|
||||
+
|
||||
+static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx)
|
||||
+{
|
||||
+ VRingPackedDesc desc;
|
||||
+ VRingMemoryRegionCaches *caches;
|
||||
+
|
||||
+ if (unlikely(!vq->vring.desc)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ caches = vring_get_region_caches(vq);
|
||||
+ if (!caches) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
|
||||
+ shadow_idx, true);
|
||||
+
|
||||
+ return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter);
|
||||
+}
|
||||
+
|
||||
+static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx)
|
||||
+{
|
||||
+ if (virtio_device_disabled(vq->vdev)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
||||
+ return virtio_queue_packed_poll(vq, shadow_idx);
|
||||
+ } else {
|
||||
+ return virtio_queue_split_poll(vq, shadow_idx);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
|
||||
+ int opaque)
|
||||
+{
|
||||
+ virtio_queue_set_notification(vq, 1);
|
||||
+
|
||||
+ if (opaque >= 0) {
|
||||
+ return virtio_queue_poll(vq, (unsigned)opaque);
|
||||
+ } else {
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
unsigned int len)
|
||||
{
|
||||
@@ -1330,9 +1384,9 @@ err:
|
||||
goto done;
|
||||
}
|
||||
|
||||
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
- unsigned int *out_bytes,
|
||||
- unsigned max_in_bytes, unsigned max_out_bytes)
|
||||
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
+ unsigned int *out_bytes, unsigned max_in_bytes,
|
||||
+ unsigned max_out_bytes)
|
||||
{
|
||||
uint16_t desc_size;
|
||||
VRingMemoryRegionCaches *caches;
|
||||
@@ -1365,7 +1419,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
caches);
|
||||
}
|
||||
|
||||
- return;
|
||||
+ return (int)vq->shadow_avail_idx;
|
||||
err:
|
||||
if (in_bytes) {
|
||||
*in_bytes = 0;
|
||||
@@ -1373,6 +1427,8 @@ err:
|
||||
if (out_bytes) {
|
||||
*out_bytes = 0;
|
||||
}
|
||||
+
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
|
||||
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
|
||||
index 2eafad17b8..8b4da92889 100644
|
||||
--- a/include/hw/virtio/virtio.h
|
||||
+++ b/include/hw/virtio/virtio.h
|
||||
@@ -271,9 +271,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
|
||||
VirtQueueElement *elem);
|
||||
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
|
||||
unsigned int out_bytes);
|
||||
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
- unsigned int *out_bytes,
|
||||
- unsigned max_in_bytes, unsigned max_out_bytes);
|
||||
+/**
|
||||
+ * Return <0 on error or an opaque >=0 to pass to
|
||||
+ * virtio_queue_enable_notification_and_check on success.
|
||||
+ */
|
||||
+int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
+ unsigned int *out_bytes, unsigned max_in_bytes,
|
||||
+ unsigned max_out_bytes);
|
||||
|
||||
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
|
||||
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
|
||||
@@ -307,6 +311,17 @@ int virtio_queue_ready(VirtQueue *vq);
|
||||
|
||||
int virtio_queue_empty(VirtQueue *vq);
|
||||
|
||||
+/**
|
||||
+ * Enable notification and check whether guest has added some
|
||||
+ * buffers since last call to virtqueue_get_avail_bytes.
|
||||
+ *
|
||||
+ * @opaque: value returned from virtqueue_get_avail_bytes
|
||||
+ */
|
||||
+bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
|
||||
+ int opaque);
|
||||
+
|
||||
+void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t idx);
|
||||
+
|
||||
/* Host binding interface. */
|
||||
|
||||
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);
|
70
debian/patches/extra/0020-net-Reinstate-net-nic-model-help-output-as-documente.patch
vendored
Normal file
70
debian/patches/extra/0020-net-Reinstate-net-nic-model-help-output-as-documente.patch
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: David Woodhouse <dwmw@amazon.co.uk>
|
||||
Date: Tue, 9 Jul 2024 13:34:44 +0100
|
||||
Subject: [PATCH] net: Reinstate '-net nic, model=help' output as documented in
|
||||
man page
|
||||
|
||||
While refactoring the NIC initialization code, I broke '-net nic,model=help'
|
||||
which no longer outputs a list of available NIC models.
|
||||
|
||||
Fixes: 2cdeca04adab ("net: report list of available models according to platform")
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
|
||||
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
(cherry picked from commit 64f75f57f9d2c8c12ac6d9355fa5d3a2af5879ca)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
net/net.c | 25 ++++++++++++++++++++++---
|
||||
1 file changed, 22 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/net/net.c b/net/net.c
|
||||
index a2f0c828bb..e6ca2529bb 100644
|
||||
--- a/net/net.c
|
||||
+++ b/net/net.c
|
||||
@@ -1150,6 +1150,21 @@ NICInfo *qemu_find_nic_info(const char *typename, bool match_default,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+static bool is_nic_model_help_option(const char *model)
|
||||
+{
|
||||
+ if (model && is_help_option(model)) {
|
||||
+ /*
|
||||
+ * Trigger the help output by instantiating the hash table which
|
||||
+ * will gather tha available models as they get registered.
|
||||
+ */
|
||||
+ if (!nic_model_help) {
|
||||
+ nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
|
||||
+ g_free, NULL);
|
||||
+ }
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
|
||||
/* "I have created a device. Please configure it if you can" */
|
||||
bool qemu_configure_nic_device(DeviceState *dev, bool match_default,
|
||||
@@ -1733,6 +1748,12 @@ void net_check_clients(void)
|
||||
|
||||
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
|
||||
{
|
||||
+ const char *model = qemu_opt_get_del(opts, "model");
|
||||
+
|
||||
+ if (is_nic_model_help_option(model)) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
return net_client_init(opts, false, errp);
|
||||
}
|
||||
|
||||
@@ -1789,9 +1810,7 @@ static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
|
||||
memset(ni, 0, sizeof(*ni));
|
||||
ni->model = qemu_opt_get_del(opts, "model");
|
||||
|
||||
- if (!nic_model_help && !g_strcmp0(ni->model, "help")) {
|
||||
- nic_model_help = g_hash_table_new_full(g_str_hash, g_str_equal,
|
||||
- g_free, NULL);
|
||||
+ if (is_nic_model_help_option(ni->model)) {
|
||||
return 0;
|
||||
}
|
||||
|
32
debian/patches/extra/0021-net-Fix-net-nic-model-for-non-help-arguments.patch
vendored
Normal file
32
debian/patches/extra/0021-net-Fix-net-nic-model-for-non-help-arguments.patch
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: David Woodhouse <dwmw@amazon.co.uk>
|
||||
Date: Tue, 6 Aug 2024 18:21:37 +0100
|
||||
Subject: [PATCH] net: Fix '-net nic,model=' for non-help arguments
|
||||
|
||||
Oops, don't *delete* the model option when checking for 'help'.
|
||||
|
||||
Fixes: 64f75f57f9d2 ("net: Reinstate '-net nic, model=help' output as documented in man page")
|
||||
Reported-by: Hans <sungdgdhtryrt@gmail.com>
|
||||
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
(cherry picked from commit fa62cb989a9146c82f8f172715042852f5d36200)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
net/net.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/net/net.c b/net/net.c
|
||||
index e6ca2529bb..897bb936cf 100644
|
||||
--- a/net/net.c
|
||||
+++ b/net/net.c
|
||||
@@ -1748,7 +1748,7 @@ void net_check_clients(void)
|
||||
|
||||
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
|
||||
{
|
||||
- const char *model = qemu_opt_get_del(opts, "model");
|
||||
+ const char *model = qemu_opt_get(opts, "model");
|
||||
|
||||
if (is_nic_model_help_option(model)) {
|
||||
return 0;
|
57
debian/patches/extra/0022-target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch
vendored
Normal file
57
debian/patches/extra/0022-target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Mon, 22 Jul 2024 18:29:54 +0100
|
||||
Subject: [PATCH] target/arm: Don't assert for 128-bit tile accesses when SVL
|
||||
is 128
|
||||
|
||||
For an instruction which accesses a 128-bit element tile when
|
||||
the SVL is also 128 (for example MOV z0.Q, p0/M, ZA0H.Q[w0,0]),
|
||||
we will assert in get_tile_rowcol():
|
||||
|
||||
qemu-system-aarch64: ../../tcg/tcg-op.c:926: tcg_gen_deposit_z_i32: Assertion `len > 0' failed.
|
||||
|
||||
This happens because we calculate
|
||||
len = ctz32(streaming_vec_reg_size(s)) - esz;$
|
||||
but if the SVL and the element size are the same len is 0, and
|
||||
the deposit operation asserts.
|
||||
|
||||
In this case the ZA storage contains exactly one 128 bit
|
||||
element ZA tile, and the horizontal or vertical slice is just
|
||||
that tile. This means that regardless of the index value in
|
||||
the Ws register, we always access that tile. (In pseudocode terms,
|
||||
we calculate (index + offset) MOD 1, which is 0.)
|
||||
|
||||
Special case the len == 0 case to avoid hitting the assertion
|
||||
in tcg_gen_deposit_z_i32().
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Message-id: 20240722172957.1041231-2-peter.maydell@linaro.org
|
||||
(cherry picked from commit 56f1c0db928aae0b83fd91c89ddb226b137e2b21)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
target/arm/tcg/translate-sme.c | 10 +++++++++-
|
||||
1 file changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
|
||||
index 185a8a917b..a50a419af2 100644
|
||||
--- a/target/arm/tcg/translate-sme.c
|
||||
+++ b/target/arm/tcg/translate-sme.c
|
||||
@@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
|
||||
/* Prepare a power-of-two modulo via extraction of @len bits. */
|
||||
len = ctz32(streaming_vec_reg_size(s)) - esz;
|
||||
|
||||
- if (vertical) {
|
||||
+ if (!len) {
|
||||
+ /*
|
||||
+ * SVL is 128 and the element size is 128. There is exactly
|
||||
+ * one 128x128 tile in the ZA storage, and so we calculate
|
||||
+ * (Rs + imm) MOD 1, which is always 0. We need to special case
|
||||
+ * this because TCG doesn't allow deposit ops with len 0.
|
||||
+ */
|
||||
+ tcg_gen_movi_i32(tmp, 0);
|
||||
+ } else if (vertical) {
|
||||
/*
|
||||
* Compute the byte offset of the index within the tile:
|
||||
* (index % (svl / size)) * size
|
|
@ -0,0 +1,59 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Mon, 22 Jul 2024 18:29:55 +0100
|
||||
Subject: [PATCH] target/arm: Fix UMOPA/UMOPS of 16-bit values
|
||||
|
||||
The UMOPA/UMOPS instructions are supposed to multiply unsigned 8 or
|
||||
16 bit elements and accumulate the products into a 64-bit element.
|
||||
In the Arm ARM pseudocode, this is done with the usual
|
||||
infinite-precision signed arithmetic. However our implementation
|
||||
doesn't quite get it right, because in the DEF_IMOP_64() macro we do:
|
||||
sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0);
|
||||
|
||||
where NTYPE and MTYPE are uint16_t or int16_t. In the uint16_t case,
|
||||
the C usual arithmetic conversions mean the values are converted to
|
||||
"int" type and the multiply is done as a 32-bit multiply. This means
|
||||
that if the inputs are, for example, 0xffff and 0xffff then the
|
||||
result is 0xFFFE0001 as an int, which is then promoted to uint64_t
|
||||
for the accumulation into sum; this promotion incorrectly sign
|
||||
extends the multiply.
|
||||
|
||||
Avoid the incorrect sign extension by casting to int64_t before
|
||||
the multiply, so we do the multiply as 64-bit signed arithmetic,
|
||||
which is a type large enough that the multiply can never
|
||||
overflow into the sign bit.
|
||||
|
||||
(The equivalent 8-bit operations in DEF_IMOP_32() are fine, because
|
||||
the 8-bit multiplies can never overflow into the sign bit of a
|
||||
32-bit integer.)
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2372
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Message-id: 20240722172957.1041231-3-peter.maydell@linaro.org
|
||||
(cherry picked from commit ea3f5a90f036734522e9af3bffd77e69e9f47355)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
target/arm/tcg/sme_helper.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
|
||||
index 5a6dd76489..f9001f5213 100644
|
||||
--- a/target/arm/tcg/sme_helper.c
|
||||
+++ b/target/arm/tcg/sme_helper.c
|
||||
@@ -1146,10 +1146,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
|
||||
uint64_t sum = 0; \
|
||||
/* Apply P to N as a mask, making the inactive elements 0. */ \
|
||||
n &= expand_pred_h(p); \
|
||||
- sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
|
||||
- sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
|
||||
- sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
|
||||
- sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
|
||||
+ sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
|
||||
+ sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
|
||||
+ sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
|
||||
+ sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
|
||||
return neg ? a - sum : a + sum; \
|
||||
}
|
||||
|
62
debian/patches/extra/0024-target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch
vendored
Normal file
62
debian/patches/extra/0024-target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Mon, 22 Jul 2024 18:29:56 +0100
|
||||
Subject: [PATCH] target/arm: Avoid shifts by -1 in tszimm_shr() and
|
||||
tszimm_shl()
|
||||
|
||||
The function tszimm_esz() returns a shift amount, or possibly -1 in
|
||||
certain cases that correspond to unallocated encodings in the
|
||||
instruction set. We catch these later in the trans_ functions
|
||||
(generally with an "a-esz < 0" check), but before we do the
|
||||
decodetree-generated code will also call tszimm_shr() or tszimm_sl(),
|
||||
which will use the tszimm_esz() return value as a shift count without
|
||||
checking that it is not negative, which is undefined behaviour.
|
||||
|
||||
Avoid the UB by checking the return value in tszimm_shr() and
|
||||
tszimm_shl().
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Resolves: Coverity CID 1547617, 1547694
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Message-id: 20240722172957.1041231-4-peter.maydell@linaro.org
|
||||
(cherry picked from commit 76916dfa89e8900639c1055c07a295c06628a0bc)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
target/arm/tcg/translate-sve.c | 18 ++++++++++++++++--
|
||||
1 file changed, 16 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
|
||||
index ada05aa530..466a19c25a 100644
|
||||
--- a/target/arm/tcg/translate-sve.c
|
||||
+++ b/target/arm/tcg/translate-sve.c
|
||||
@@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x)
|
||||
|
||||
static int tszimm_shr(DisasContext *s, int x)
|
||||
{
|
||||
- return (16 << tszimm_esz(s, x)) - x;
|
||||
+ /*
|
||||
+ * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
|
||||
+ * trans function will check for esz < 0), so we can return any
|
||||
+ * value we like from here in that case as long as we avoid UB.
|
||||
+ */
|
||||
+ int esz = tszimm_esz(s, x);
|
||||
+ if (esz < 0) {
|
||||
+ return esz;
|
||||
+ }
|
||||
+ return (16 << esz) - x;
|
||||
}
|
||||
|
||||
/* See e.g. LSL (immediate, predicated). */
|
||||
static int tszimm_shl(DisasContext *s, int x)
|
||||
{
|
||||
- return x - (8 << tszimm_esz(s, x));
|
||||
+ /* As with tszimm_shr(), value will be unused if esz < 0 */
|
||||
+ int esz = tszimm_esz(s, x);
|
||||
+ if (esz < 0) {
|
||||
+ return esz;
|
||||
+ }
|
||||
+ return x - (8 << esz);
|
||||
}
|
||||
|
||||
/* The SH bit is in bit 8. Extract the low 8 and shift. */
|
41
debian/patches/extra/0025-target-arm-Ignore-SMCR_EL2.LEN-and-SVCR_EL2.LEN-if-E.patch
vendored
Normal file
41
debian/patches/extra/0025-target-arm-Ignore-SMCR_EL2.LEN-and-SVCR_EL2.LEN-if-E.patch
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Mon, 22 Jul 2024 18:29:57 +0100
|
||||
Subject: [PATCH] target/arm: Ignore SMCR_EL2.LEN and SVCR_EL2.LEN if EL2 is
|
||||
not enabled
|
||||
|
||||
When determining the current vector length, the SMCR_EL2.LEN and
|
||||
SVCR_EL2.LEN settings should only be considered if EL2 is enabled
|
||||
(compare the pseudocode CurrentSVL and CurrentNSVL which call
|
||||
EL2Enabled()).
|
||||
|
||||
We were checking against ARM_FEATURE_EL2 rather than calling
|
||||
arm_is_el2_enabled(), which meant that we would look at
|
||||
SMCR_EL2/SVCR_EL2 when in Secure EL1 or Secure EL0 even if Secure EL2
|
||||
was not enabled.
|
||||
|
||||
Use the correct check in sve_vqm1_for_el_sm().
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Message-id: 20240722172957.1041231-5-peter.maydell@linaro.org
|
||||
(cherry picked from commit f573ac059ed060234fcef4299fae9e500d357c33)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
target/arm/helper.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/arm/helper.c b/target/arm/helper.c
|
||||
index a620481d7c..42044ae14b 100644
|
||||
--- a/target/arm/helper.c
|
||||
+++ b/target/arm/helper.c
|
||||
@@ -7191,7 +7191,7 @@ uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm)
|
||||
if (el <= 1 && !el_is_in_host(env, el)) {
|
||||
len = MIN(len, 0xf & (uint32_t)cr[1]);
|
||||
}
|
||||
- if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
|
||||
+ if (el <= 2 && arm_is_el2_enabled(env)) {
|
||||
len = MIN(len, 0xf & (uint32_t)cr[2]);
|
||||
}
|
||||
if (arm_feature(env, ARM_FEATURE_EL3)) {
|
164
debian/patches/extra/0026-target-arm-Handle-denormals-correctly-for-FMOPA-wide.patch
vendored
Normal file
164
debian/patches/extra/0026-target-arm-Handle-denormals-correctly-for-FMOPA-wide.patch
vendored
Normal file
|
@ -0,0 +1,164 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Thu, 1 Aug 2024 10:15:03 +0100
|
||||
Subject: [PATCH] target/arm: Handle denormals correctly for FMOPA (widening)
|
||||
|
||||
The FMOPA (widening) SME instruction takes pairs of half-precision
|
||||
floating point values, widens them to single-precision, does a
|
||||
two-way dot product and accumulates the results into a
|
||||
single-precision destination. We don't quite correctly handle the
|
||||
FPCR bits FZ and FZ16 which control flushing of denormal inputs and
|
||||
outputs. This is because at the moment we pass a single float_status
|
||||
value to the helper function, which then uses that configuration for
|
||||
all the fp operations it does. However, because the inputs to this
|
||||
operation are float16 and the outputs are float32 we need to use the
|
||||
fp_status_f16 for the float16 input widening but the normal fp_status
|
||||
for everything else. Otherwise we will apply the flushing control
|
||||
FPCR.FZ16 to the 32-bit output rather than the FPCR.FZ control, and
|
||||
incorrectly flush a denormal output to zero when we should not (or
|
||||
vice-versa).
|
||||
|
||||
(In commit 207d30b5fdb5b we tried to fix the FZ handling but
|
||||
didn't get it right, switching from "use FPCR.FZ for everything" to
|
||||
"use FPCR.FZ16 for everything".)
|
||||
|
||||
Pass the CPU env to the sme_fmopa_h helper instead of an fp_status
|
||||
pointer, and have the helper pass an extra fp_status into the
|
||||
f16_dotadd() function so that we can use the right status for the
|
||||
right parts of this operation.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Fixes: 207d30b5fdb5 ("target/arm: Use FPST_F16 for SME FMOPA (widening)")
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2373
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
(cherry picked from commit 55f9f4ee018c5ccea81d8c8c586756d7711ae46f)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
target/arm/tcg/helper-sme.h | 2 +-
|
||||
target/arm/tcg/sme_helper.c | 39 +++++++++++++++++++++++-----------
|
||||
target/arm/tcg/translate-sme.c | 25 ++++++++++++++++++++--
|
||||
3 files changed, 51 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
|
||||
index 27eef49a11..d22bf9d21b 100644
|
||||
--- a/target/arm/tcg/helper-sme.h
|
||||
+++ b/target/arm/tcg/helper-sme.h
|
||||
@@ -121,7 +121,7 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
|
||||
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
+ void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
|
||||
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
|
||||
index f9001f5213..3906bb51c0 100644
|
||||
--- a/target/arm/tcg/sme_helper.c
|
||||
+++ b/target/arm/tcg/sme_helper.c
|
||||
@@ -976,12 +976,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
|
||||
}
|
||||
|
||||
static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
|
||||
- float_status *s_std, float_status *s_odd)
|
||||
+ float_status *s_f16, float_status *s_std,
|
||||
+ float_status *s_odd)
|
||||
{
|
||||
- float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
|
||||
- float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
|
||||
- float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
|
||||
- float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
|
||||
+ /*
|
||||
+ * We need three different float_status for different parts of this
|
||||
+ * operation:
|
||||
+ * - the input conversion of the float16 values must use the
|
||||
+ * f16-specific float_status, so that the FPCR.FZ16 control is applied
|
||||
+ * - operations on float32 including the final accumulation must use
|
||||
+ * the normal float_status, so that FPCR.FZ is applied
|
||||
+ * - we have pre-set-up copy of s_std which is set to round-to-odd,
|
||||
+ * for the multiply (see below)
|
||||
+ */
|
||||
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
|
||||
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
|
||||
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
|
||||
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
|
||||
float64 t64;
|
||||
float32 t32;
|
||||
|
||||
@@ -1003,20 +1014,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
|
||||
}
|
||||
|
||||
void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
|
||||
- void *vpm, void *vst, uint32_t desc)
|
||||
+ void *vpm, CPUARMState *env, uint32_t desc)
|
||||
{
|
||||
intptr_t row, col, oprsz = simd_maxsz(desc);
|
||||
uint32_t neg = simd_data(desc) * 0x80008000u;
|
||||
uint16_t *pn = vpn, *pm = vpm;
|
||||
- float_status fpst_odd, fpst_std;
|
||||
+ float_status fpst_odd, fpst_std, fpst_f16;
|
||||
|
||||
/*
|
||||
- * Make a copy of float_status because this operation does not
|
||||
- * update the cumulative fp exception status. It also produces
|
||||
- * default nans. Make a second copy with round-to-odd -- see above.
|
||||
+ * Make copies of fp_status and fp_status_f16, because this operation
|
||||
+ * does not update the cumulative fp exception status. It also
|
||||
+ * produces default NaNs. We also need a second copy of fp_status with
|
||||
+ * round-to-odd -- see above.
|
||||
*/
|
||||
- fpst_std = *(float_status *)vst;
|
||||
+ fpst_f16 = env->vfp.fp_status_f16;
|
||||
+ fpst_std = env->vfp.fp_status;
|
||||
set_default_nan_mode(true, &fpst_std);
|
||||
+ set_default_nan_mode(true, &fpst_f16);
|
||||
fpst_odd = fpst_std;
|
||||
set_float_rounding_mode(float_round_to_odd, &fpst_odd);
|
||||
|
||||
@@ -1036,7 +1050,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
|
||||
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
|
||||
|
||||
m = f16mop_adj_pair(m, pcol, 0);
|
||||
- *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
|
||||
+ *a = f16_dotadd(*a, n, m,
|
||||
+ &fpst_f16, &fpst_std, &fpst_odd);
|
||||
}
|
||||
col += 4;
|
||||
pcol >>= 4;
|
||||
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
|
||||
index a50a419af2..ae42ddef7b 100644
|
||||
--- a/target/arm/tcg/translate-sme.c
|
||||
+++ b/target/arm/tcg/translate-sme.c
|
||||
@@ -334,8 +334,29 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
|
||||
return true;
|
||||
}
|
||||
|
||||
-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
|
||||
- MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
|
||||
+static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
|
||||
+ gen_helper_gvec_5_ptr *fn)
|
||||
+{
|
||||
+ int svl = streaming_vec_reg_size(s);
|
||||
+ uint32_t desc = simd_desc(svl, svl, a->sub);
|
||||
+ TCGv_ptr za, zn, zm, pn, pm;
|
||||
+
|
||||
+ if (!sme_smza_enabled_check(s)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ za = get_tile(s, esz, a->zad);
|
||||
+ zn = vec_full_reg_ptr(s, a->zn);
|
||||
+ zm = vec_full_reg_ptr(s, a->zm);
|
||||
+ pn = pred_full_reg_ptr(s, a->pn);
|
||||
+ pm = pred_full_reg_ptr(s, a->pm);
|
||||
+
|
||||
+ fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc));
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
|
||||
+ MO_32, gen_helper_sme_fmopa_h)
|
||||
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
|
||||
MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
|
||||
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
|
|
@ -0,0 +1,39 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?=
|
||||
<clement.mathieu--drif@eviden.com>
|
||||
Date: Tue, 9 Jul 2024 14:26:08 +0000
|
||||
Subject: [PATCH] intel_iommu: fix FRCD construction macro
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The constant must be unsigned, otherwise the two's complement
|
||||
overrides the other fields when a PASID is present.
|
||||
|
||||
Fixes: 1b2b12376c8a ("intel-iommu: PASID support")
|
||||
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
|
||||
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
|
||||
Message-Id: <20240709142557.317271-2-clement.mathieu--drif@eviden.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit a3c8d7e38550c3d5a46e6fa94ffadfa625a4861d)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
hw/i386/intel_iommu_internal.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
|
||||
index f8cf99bddf..cbc4030031 100644
|
||||
--- a/hw/i386/intel_iommu_internal.h
|
||||
+++ b/hw/i386/intel_iommu_internal.h
|
||||
@@ -267,7 +267,7 @@
|
||||
/* For the low 64-bit of 128-bit */
|
||||
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
|
||||
#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
|
||||
-#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
|
||||
+#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31)
|
||||
#define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48)
|
||||
|
||||
/* DMA Remapping Fault Conditions */
|
33
debian/patches/extra/0028-target-i386-Do-not-apply-REX-to-MMX-operands.patch
vendored
Normal file
33
debian/patches/extra/0028-target-i386-Do-not-apply-REX-to-MMX-operands.patch
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Henderson <richard.henderson@linaro.org>
|
||||
Date: Mon, 12 Aug 2024 12:58:42 +1000
|
||||
Subject: [PATCH] target/i386: Do not apply REX to MMX operands
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Fixes: b3e22b2318a ("target/i386: add core of new i386 decoder")
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2495
|
||||
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Link: https://lore.kernel.org/r/20240812025844.58956-2-richard.henderson@linaro.org
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 416f2b16c02c618c0f233372ebfe343f9ee667d4)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
target/i386/tcg/decode-new.c.inc | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
|
||||
index 4209d59ca8..09b8d2314a 100644
|
||||
--- a/target/i386/tcg/decode-new.c.inc
|
||||
+++ b/target/i386/tcg/decode-new.c.inc
|
||||
@@ -1271,7 +1271,10 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
|
||||
op->unit = X86_OP_SSE;
|
||||
}
|
||||
get_reg:
|
||||
- op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
|
||||
+ op->n = ((get_modrm(s, env) >> 3) & 7);
|
||||
+ if (op->unit != X86_OP_MMX) {
|
||||
+ op->n |= REX_R(s);
|
||||
+ }
|
||||
break;
|
||||
|
||||
case X86_TYPE_E: /* ALU modrm operand */
|
42
debian/patches/extra/0029-module-Prevent-crash-by-resetting-local_err-in-modul.patch
vendored
Normal file
42
debian/patches/extra/0029-module-Prevent-crash-by-resetting-local_err-in-modul.patch
vendored
Normal file
|
@ -0,0 +1,42 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
||||
Date: Fri, 9 Aug 2024 14:13:40 +0200
|
||||
Subject: [PATCH] module: Prevent crash by resetting local_err in
|
||||
module_load_qom_all()
|
||||
|
||||
Set local_err to NULL after it has been freed in error_report_err(). This
|
||||
avoids triggering assert(*errp == NULL) failure in error_setv() when
|
||||
local_err is reused in the loop.
|
||||
|
||||
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
||||
Reviewed-by: Claudio Fontana <cfontana@suse.de>
|
||||
Reviewed-by: Denis V. Lunev <den@openvz.org>
|
||||
Link: https://lore.kernel.org/r/20240809121340.992049-2-alexander.ivanov@virtuozzo.com
|
||||
[Do the same by moving the declaration instead. - Paolo]
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 940d802b24e63650e0eacad3714e2ce171cba17c)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
util/module.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/util/module.c b/util/module.c
|
||||
index 32e263163c..3eb0f06df1 100644
|
||||
--- a/util/module.c
|
||||
+++ b/util/module.c
|
||||
@@ -354,13 +354,13 @@ int module_load_qom(const char *type, Error **errp)
|
||||
void module_load_qom_all(void)
|
||||
{
|
||||
const QemuModinfo *modinfo;
|
||||
- Error *local_err = NULL;
|
||||
|
||||
if (module_loaded_qom_all) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
|
||||
+ Error *local_err = NULL;
|
||||
if (!modinfo->objs) {
|
||||
continue;
|
||||
}
|
164
debian/patches/extra/0030-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch
vendored
Normal file
164
debian/patches/extra/0030-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch
vendored
Normal file
|
@ -0,0 +1,164 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Wed, 7 Aug 2024 08:50:01 -0500
|
||||
Subject: [PATCH] nbd/server: Plumb in new args to nbd_client_add()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Upcoming patches to fix a CVE need to track an opaque pointer passed
|
||||
in by the owner of a client object, as well as request for a time
|
||||
limit on how fast negotiation must complete. Prepare for that by
|
||||
changing the signature of nbd_client_new() and adding an accessor to
|
||||
get at the opaque pointer, although for now the two servers
|
||||
(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though
|
||||
they pass in a new default timeout value.
|
||||
|
||||
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-11-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
blockdev-nbd.c | 6 ++++--
|
||||
include/block/nbd.h | 11 ++++++++++-
|
||||
nbd/server.c | 20 +++++++++++++++++---
|
||||
qemu-nbd.c | 4 +++-
|
||||
4 files changed, 34 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index 213012435f..267a1de903 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
nbd_update_server_watch(nbd_server);
|
||||
|
||||
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
|
||||
- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz,
|
||||
- nbd_blockdev_client_closed);
|
||||
+ /* TODO - expose handshake timeout as QMP option */
|
||||
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
||||
+ nbd_server->tlscreds, nbd_server->tlsauthz,
|
||||
+ nbd_blockdev_client_closed, NULL);
|
||||
}
|
||||
|
||||
static void nbd_update_server_watch(NBDServerData *s)
|
||||
diff --git a/include/block/nbd.h b/include/block/nbd.h
|
||||
index 4e7bd6342f..1d4d65922d 100644
|
||||
--- a/include/block/nbd.h
|
||||
+++ b/include/block/nbd.h
|
||||
@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts;
|
||||
|
||||
extern const BlockExportDriver blk_exp_nbd;
|
||||
|
||||
+/*
|
||||
+ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must
|
||||
+ * succeed at NBD_OPT_GO before being forcefully dropped as too slow.
|
||||
+ */
|
||||
+#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
|
||||
+
|
||||
/* Handshake phase structs - this struct is passed on the wire */
|
||||
|
||||
typedef struct NBDOption {
|
||||
@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp);
|
||||
NBDExport *nbd_export_find(const char *name);
|
||||
|
||||
void nbd_client_new(QIOChannelSocket *sioc,
|
||||
+ uint32_t handshake_max_secs,
|
||||
QCryptoTLSCreds *tlscreds,
|
||||
const char *tlsauthz,
|
||||
- void (*close_fn)(NBDClient *, bool));
|
||||
+ void (*close_fn)(NBDClient *, bool),
|
||||
+ void *owner);
|
||||
+void *nbd_client_owner(NBDClient *client);
|
||||
void nbd_client_get(NBDClient *client);
|
||||
void nbd_client_put(NBDClient *client);
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index 892797bb11..e50012499f 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -124,12 +124,14 @@ struct NBDMetaContexts {
|
||||
struct NBDClient {
|
||||
int refcount; /* atomic */
|
||||
void (*close_fn)(NBDClient *client, bool negotiated);
|
||||
+ void *owner;
|
||||
|
||||
QemuMutex lock;
|
||||
|
||||
NBDExport *exp;
|
||||
QCryptoTLSCreds *tlscreds;
|
||||
char *tlsauthz;
|
||||
+ uint32_t handshake_max_secs;
|
||||
QIOChannelSocket *sioc; /* The underlying data channel */
|
||||
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
|
||||
|
||||
@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||
|
||||
qemu_co_mutex_init(&client->send_lock);
|
||||
|
||||
+ /* TODO - utilize client->handshake_max_secs */
|
||||
if (nbd_negotiate(client, &local_err)) {
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Create a new client listener using the given channel @sioc.
|
||||
+ * Create a new client listener using the given channel @sioc and @owner.
|
||||
* Begin servicing it in a coroutine. When the connection closes, call
|
||||
- * @close_fn with an indication of whether the client completed negotiation.
|
||||
+ * @close_fn with an indication of whether the client completed negotiation
|
||||
+ * within @handshake_max_secs seconds (0 for unbounded).
|
||||
*/
|
||||
void nbd_client_new(QIOChannelSocket *sioc,
|
||||
+ uint32_t handshake_max_secs,
|
||||
QCryptoTLSCreds *tlscreds,
|
||||
const char *tlsauthz,
|
||||
- void (*close_fn)(NBDClient *, bool))
|
||||
+ void (*close_fn)(NBDClient *, bool),
|
||||
+ void *owner)
|
||||
{
|
||||
NBDClient *client;
|
||||
Coroutine *co;
|
||||
@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc,
|
||||
object_ref(OBJECT(client->tlscreds));
|
||||
}
|
||||
client->tlsauthz = g_strdup(tlsauthz);
|
||||
+ client->handshake_max_secs = handshake_max_secs;
|
||||
client->sioc = sioc;
|
||||
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
|
||||
object_ref(OBJECT(client->sioc));
|
||||
client->ioc = QIO_CHANNEL(sioc);
|
||||
object_ref(OBJECT(client->ioc));
|
||||
client->close_fn = close_fn;
|
||||
+ client->owner = owner;
|
||||
|
||||
co = qemu_coroutine_create(nbd_co_client_start, client);
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
+
|
||||
+void *
|
||||
+nbd_client_owner(NBDClient *client)
|
||||
+{
|
||||
+ return client->owner;
|
||||
+}
|
||||
diff --git a/qemu-nbd.c b/qemu-nbd.c
|
||||
index d7b3ccab21..48e2fa5858 100644
|
||||
--- a/qemu-nbd.c
|
||||
+++ b/qemu-nbd.c
|
||||
@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
|
||||
nb_fds++;
|
||||
nbd_update_server_watch();
|
||||
- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
|
||||
+ /* TODO - expose handshake timeout as command line option */
|
||||
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
||||
+ tlscreds, tlsauthz, nbd_client_closed, NULL);
|
||||
}
|
||||
|
||||
static void nbd_update_server_watch(void)
|
172
debian/patches/extra/0031-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch
vendored
Normal file
172
debian/patches/extra/0031-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch
vendored
Normal file
|
@ -0,0 +1,172 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Tue, 6 Aug 2024 13:53:00 -0500
|
||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Cap default max-connections to 100
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Allowing an unlimited number of clients to any web service is a recipe
|
||||
for a rudimentary denial of service attack: the client merely needs to
|
||||
open lots of sockets without closing them, until qemu no longer has
|
||||
any more fds available to allocate.
|
||||
|
||||
For qemu-nbd, we default to allowing only 1 connection unless more are
|
||||
explicitly asked for (-e or --shared); this was historically picked as
|
||||
a nice default (without an explicit -t, a non-persistent qemu-nbd goes
|
||||
away after a client disconnects, without needing any additional
|
||||
follow-up commands), and we are not going to change that interface now
|
||||
(besides, someday we want to point people towards qemu-storage-daemon
|
||||
instead of qemu-nbd).
|
||||
|
||||
But for qemu proper, and the newer qemu-storage-daemon, the QMP
|
||||
nbd-server-start command has historically had a default of unlimited
|
||||
number of connections, in part because unlike qemu-nbd it is
|
||||
inherently persistent until nbd-server-stop. Allowing multiple client
|
||||
sockets is particularly useful for clients that can take advantage of
|
||||
MULTI_CONN (creating parallel sockets to increase throughput),
|
||||
although known clients that do so (such as libnbd's nbdcopy) typically
|
||||
use only 8 or 16 connections (the benefits of scaling diminish once
|
||||
more sockets are competing for kernel attention). Picking a number
|
||||
large enough for typical use cases, but not unlimited, makes it
|
||||
slightly harder for a malicious client to perform a denial of service
|
||||
merely by opening lots of connections withot progressing through the
|
||||
handshake.
|
||||
|
||||
This change does not eliminate CVE-2024-7409 on its own, but reduces
|
||||
the chance for fd exhaustion or unlimited memory usage as an attack
|
||||
surface. On the other hand, by itself, it makes it more obvious that
|
||||
with a finite limit, we have the problem of an unauthenticated client
|
||||
holding 100 fds opened as a way to block out a legitimate client from
|
||||
being able to connect; thus, later patches will further add timeouts
|
||||
to reject clients that are not making progress.
|
||||
|
||||
This is an INTENTIONAL change in behavior, and will break any client
|
||||
of nbd-server-start that was not passing an explicit max-connections
|
||||
parameter, yet expects more than 100 simultaneous connections. We are
|
||||
not aware of any such client (as stated above, most clients aware of
|
||||
MULTI_CONN get by just fine on 8 or 16 connections, and probably cope
|
||||
with later connections failing by relying on the earlier connections;
|
||||
libvirt has not yet been passing max-connections, but generally
|
||||
creates NBD servers with the intent for a single client for the sake
|
||||
of live storage migration; meanwhile, the KubeSAN project anticipates
|
||||
a large cluster sharing multiple clients [up to 8 per node, and up to
|
||||
100 nodes in a cluster], but it currently uses qemu-nbd with an
|
||||
explicit --shared=0 rather than qemu-storage-daemon with
|
||||
nbd-server-start).
|
||||
|
||||
We considered using a deprecation period (declare that omitting
|
||||
max-parameters is deprecated, and make it mandatory in 3 releases -
|
||||
then we don't need to pick an arbitrary default); that has zero risk
|
||||
of breaking any apps that accidentally depended on more than 100
|
||||
connections, and where such breakage might not be noticed under unit
|
||||
testing but only under the larger loads of production usage. But it
|
||||
does not close the denial-of-service hole until far into the future,
|
||||
and requires all apps to change to add the parameter even if 100 was
|
||||
good enough. It also has a drawback that any app (like libvirt) that
|
||||
is accidentally relying on an unlimited default should seriously
|
||||
consider their own CVE now, at which point they are going to change to
|
||||
pass explicit max-connections sooner than waiting for 3 qemu releases.
|
||||
Finally, if our changed default breaks an app, that app can always
|
||||
pass in an explicit max-parameters with a larger value.
|
||||
|
||||
It is also intentional that the HMP interface to nbd-server-start is
|
||||
not changed to expose max-connections (any client needing to fine-tune
|
||||
things should be using QMP).
|
||||
|
||||
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-12-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
[ericb: Expand commit message to summarize Dan's argument for why we
|
||||
break corner-case back-compat behavior without a deprecation period]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
block/monitor/block-hmp-cmds.c | 3 ++-
|
||||
blockdev-nbd.c | 8 ++++++++
|
||||
include/block/nbd.h | 7 +++++++
|
||||
qapi/block-export.json | 4 ++--
|
||||
4 files changed, 19 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||
index d954bec6f1..bdf2eb50b6 100644
|
||||
--- a/block/monitor/block-hmp-cmds.c
|
||||
+++ b/block/monitor/block-hmp-cmds.c
|
||||
@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
- nbd_server_start(addr, NULL, NULL, 0, &local_err);
|
||||
+ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS,
|
||||
+ &local_err);
|
||||
qapi_free_SocketAddress(addr);
|
||||
if (local_err != NULL) {
|
||||
goto exit;
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index 267a1de903..24ba5382db 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
|
||||
|
||||
void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
|
||||
{
|
||||
+ if (!arg->has_max_connections) {
|
||||
+ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
|
||||
+ }
|
||||
+
|
||||
nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz,
|
||||
arg->max_connections, errp);
|
||||
}
|
||||
@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
|
||||
{
|
||||
SocketAddress *addr_flat = socket_address_flatten(addr);
|
||||
|
||||
+ if (!has_max_connections) {
|
||||
+ max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
|
||||
+ }
|
||||
+
|
||||
nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp);
|
||||
qapi_free_SocketAddress(addr_flat);
|
||||
}
|
||||
diff --git a/include/block/nbd.h b/include/block/nbd.h
|
||||
index 1d4d65922d..d4f8b21aec 100644
|
||||
--- a/include/block/nbd.h
|
||||
+++ b/include/block/nbd.h
|
||||
@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd;
|
||||
*/
|
||||
#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
|
||||
|
||||
+/*
|
||||
+ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at
|
||||
+ * once; must be large enough to allow a MULTI_CONN-aware client like
|
||||
+ * nbdcopy to create its typical number of 8-16 sockets.
|
||||
+ */
|
||||
+#define NBD_DEFAULT_MAX_CONNECTIONS 100
|
||||
+
|
||||
/* Handshake phase structs - this struct is passed on the wire */
|
||||
|
||||
typedef struct NBDOption {
|
||||
diff --git a/qapi/block-export.json b/qapi/block-export.json
|
||||
index 3919a2d5b9..f45e4fd481 100644
|
||||
--- a/qapi/block-export.json
|
||||
+++ b/qapi/block-export.json
|
||||
@@ -28,7 +28,7 @@
|
||||
# @max-connections: The maximum number of connections to allow at the
|
||||
# same time, 0 for unlimited. Setting this to 1 also stops the
|
||||
# server from advertising multiple client support (since 5.2;
|
||||
-# default: 0)
|
||||
+# default: 100)
|
||||
#
|
||||
# Since: 4.2
|
||||
##
|
||||
@@ -63,7 +63,7 @@
|
||||
# @max-connections: The maximum number of connections to allow at the
|
||||
# same time, 0 for unlimited. Setting this to 1 also stops the
|
||||
# server from advertising multiple client support (since 5.2;
|
||||
-# default: 0).
|
||||
+# default: 100).
|
||||
#
|
||||
# Errors:
|
||||
# - if the server is already running
|
123
debian/patches/extra/0032-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch
vendored
Normal file
123
debian/patches/extra/0032-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch
vendored
Normal file
|
@ -0,0 +1,123 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Thu, 8 Aug 2024 16:05:08 -0500
|
||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Drop non-negotiating clients
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
A client that opens a socket but does not negotiate is merely hogging
|
||||
qemu's resources (an open fd and a small amount of memory); and a
|
||||
malicious client that can access the port where NBD is listening can
|
||||
attempt a denial of service attack by intentionally opening and
|
||||
abandoning lots of unfinished connections. The previous patch put a
|
||||
default bound on the number of such ongoing connections, but once that
|
||||
limit is hit, no more clients can connect (including legitimate ones).
|
||||
The solution is to insist that clients complete handshake within a
|
||||
reasonable time limit, defaulting to 10 seconds. A client that has
|
||||
not successfully completed NBD_OPT_GO by then (including the case of
|
||||
where the client didn't know TLS credentials to even reach the point
|
||||
of NBD_OPT_GO) is wasting our time and does not deserve to stay
|
||||
connected. Later patches will allow fine-tuning the limit away from
|
||||
the default value (including disabling it for doing integration
|
||||
testing of the handshake process itself).
|
||||
|
||||
Note that this patch in isolation actually makes it more likely to see
|
||||
qemu SEGV after nbd-server-stop, as any client socket still connected
|
||||
when the server shuts down will now be closed after 10 seconds rather
|
||||
than at the client's whims. That will be addressed in the next patch.
|
||||
|
||||
For a demo of this patch in action:
|
||||
$ qemu-nbd -f raw -r -t -e 10 file &
|
||||
$ nbdsh --opt-mode -c '
|
||||
H = list()
|
||||
for i in range(20):
|
||||
print(i)
|
||||
H.insert(i, nbd.NBD())
|
||||
H[i].set_opt_mode(True)
|
||||
H[i].connect_uri("nbd://localhost")
|
||||
'
|
||||
$ kill $!
|
||||
|
||||
where later connections get to start progressing once earlier ones are
|
||||
forcefully dropped for taking too long, rather than hanging.
|
||||
|
||||
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-13-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
[eblake: rebase to changes earlier in series, reduce scope of timer]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
nbd/server.c | 28 +++++++++++++++++++++++++++-
|
||||
nbd/trace-events | 1 +
|
||||
2 files changed, 28 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index e50012499f..39285cc971 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client)
|
||||
}
|
||||
}
|
||||
|
||||
+static void nbd_handshake_timer_cb(void *opaque)
|
||||
+{
|
||||
+ QIOChannel *ioc = opaque;
|
||||
+
|
||||
+ trace_nbd_handshake_timer_cb();
|
||||
+ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
|
||||
+}
|
||||
+
|
||||
static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||
{
|
||||
NBDClient *client = opaque;
|
||||
Error *local_err = NULL;
|
||||
+ QEMUTimer *handshake_timer = NULL;
|
||||
|
||||
qemu_co_mutex_init(&client->send_lock);
|
||||
|
||||
- /* TODO - utilize client->handshake_max_secs */
|
||||
+ /*
|
||||
+ * Create a timer to bound the time spent in negotiation. If the
|
||||
+ * timer expires, it is likely nbd_negotiate will fail because the
|
||||
+ * socket was shutdown.
|
||||
+ */
|
||||
+ if (client->handshake_max_secs > 0) {
|
||||
+ handshake_timer = aio_timer_new(qemu_get_aio_context(),
|
||||
+ QEMU_CLOCK_REALTIME,
|
||||
+ SCALE_NS,
|
||||
+ nbd_handshake_timer_cb,
|
||||
+ client->sioc);
|
||||
+ timer_mod(handshake_timer,
|
||||
+ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
|
||||
+ client->handshake_max_secs * NANOSECONDS_PER_SECOND);
|
||||
+ }
|
||||
+
|
||||
if (nbd_negotiate(client, &local_err)) {
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
}
|
||||
+ timer_free(handshake_timer);
|
||||
client_close(client, false);
|
||||
return;
|
||||
}
|
||||
|
||||
+ timer_free(handshake_timer);
|
||||
WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||
nbd_client_receive_next_request(client);
|
||||
}
|
||||
diff --git a/nbd/trace-events b/nbd/trace-events
|
||||
index 00ae3216a1..cbd0a4ab7e 100644
|
||||
--- a/nbd/trace-events
|
||||
+++ b/nbd/trace-events
|
||||
@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload
|
||||
nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
|
||||
nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
|
||||
nbd_trip(void) "Reading request"
|
||||
+nbd_handshake_timer_cb(void) "client took too long to negotiate"
|
||||
|
||||
# client-connection.c
|
||||
nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64
|
161
debian/patches/extra/0033-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch
vendored
Normal file
161
debian/patches/extra/0033-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch
vendored
Normal file
|
@ -0,0 +1,161 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Wed, 7 Aug 2024 12:23:13 -0500
|
||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Close stray clients at server-stop
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
A malicious client can attempt to connect to an NBD server, and then
|
||||
intentionally delay progress in the handshake, including if it does
|
||||
not know the TLS secrets. Although the previous two patches reduce
|
||||
this behavior by capping the default max-connections parameter and
|
||||
killing slow clients, they did not eliminate the possibility of a
|
||||
client waiting to close the socket until after the QMP nbd-server-stop
|
||||
command is executed, at which point qemu would SEGV when trying to
|
||||
dereference the NULL nbd_server global which is no longer present.
|
||||
This amounts to a denial of service attack. Worse, if another NBD
|
||||
server is started before the malicious client disconnects, I cannot
|
||||
rule out additional adverse effects when the old client interferes
|
||||
with the connection count of the new server (although the most likely
|
||||
is a crash due to an assertion failure when checking
|
||||
nbd_server->connections > 0).
|
||||
|
||||
For environments without this patch, the CVE can be mitigated by
|
||||
ensuring (such as via a firewall) that only trusted clients can
|
||||
connect to an NBD server. Note that using frameworks like libvirt
|
||||
that ensure that TLS is used and that nbd-server-stop is not executed
|
||||
while any trusted clients are still connected will only help if there
|
||||
is also no possibility for an untrusted client to open a connection
|
||||
but then stall on the NBD handshake.
|
||||
|
||||
Given the previous patches, it would be possible to guarantee that no
|
||||
clients remain connected by having nbd-server-stop sleep for longer
|
||||
than the default handshake deadline before finally freeing the global
|
||||
nbd_server object, but that could make QMP non-responsive for a long
|
||||
time. So intead, this patch fixes the problem by tracking all client
|
||||
sockets opened while the server is running, and forcefully closing any
|
||||
such sockets remaining without a completed handshake at the time of
|
||||
nbd-server-stop, then waiting until the coroutines servicing those
|
||||
sockets notice the state change. nbd-server-stop now has a second
|
||||
AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the
|
||||
blk_exp_close_all_type() that disconnects all clients that completed
|
||||
handshakes), but forced socket shutdown is enough to progress the
|
||||
coroutines and quickly tear down all clients before the server is
|
||||
freed, thus finally fixing the CVE.
|
||||
|
||||
This patch relies heavily on the fact that nbd/server.c guarantees
|
||||
that it only calls nbd_blockdev_client_closed() from the main loop
|
||||
(see the assertion in nbd_client_put() and the hoops used in
|
||||
nbd_client_put_nonzero() to achieve that); if we did not have that
|
||||
guarantee, we would also need a mutex protecting our accesses of the
|
||||
list of connections to survive re-entrancy from independent iothreads.
|
||||
|
||||
Although I did not actually try to test old builds, it looks like this
|
||||
problem has existed since at least commit 862172f45c (v2.12.0, 2017) -
|
||||
even back when that patch started using a QIONetListener to handle
|
||||
listening on multiple sockets, nbd_server_free() was already unaware
|
||||
that the nbd_blockdev_client_closed callback can be reached later by a
|
||||
client thread that has not completed handshakes (and therefore the
|
||||
client's socket never got added to the list closed in
|
||||
nbd_export_close_all), despite that patch intentionally tearing down
|
||||
the QIONetListener to prevent new clients.
|
||||
|
||||
Reported-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
||||
Fixes: CVE-2024-7409
|
||||
CC: qemu-stable@nongnu.org
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-14-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 34 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index 24ba5382db..f73409ae49 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -21,12 +21,18 @@
|
||||
#include "io/channel-socket.h"
|
||||
#include "io/net-listener.h"
|
||||
|
||||
+typedef struct NBDConn {
|
||||
+ QIOChannelSocket *cioc;
|
||||
+ QLIST_ENTRY(NBDConn) next;
|
||||
+} NBDConn;
|
||||
+
|
||||
typedef struct NBDServerData {
|
||||
QIONetListener *listener;
|
||||
QCryptoTLSCreds *tlscreds;
|
||||
char *tlsauthz;
|
||||
uint32_t max_connections;
|
||||
uint32_t connections;
|
||||
+ QLIST_HEAD(, NBDConn) conns;
|
||||
} NBDServerData;
|
||||
|
||||
static NBDServerData *nbd_server;
|
||||
@@ -51,6 +57,14 @@ int nbd_server_max_connections(void)
|
||||
|
||||
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
|
||||
{
|
||||
+ NBDConn *conn = nbd_client_owner(client);
|
||||
+
|
||||
+ assert(qemu_in_main_thread() && nbd_server);
|
||||
+
|
||||
+ object_unref(OBJECT(conn->cioc));
|
||||
+ QLIST_REMOVE(conn, next);
|
||||
+ g_free(conn);
|
||||
+
|
||||
nbd_client_put(client);
|
||||
assert(nbd_server->connections > 0);
|
||||
nbd_server->connections--;
|
||||
@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
|
||||
static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
gpointer opaque)
|
||||
{
|
||||
+ NBDConn *conn = g_new0(NBDConn, 1);
|
||||
+
|
||||
+ assert(qemu_in_main_thread() && nbd_server);
|
||||
nbd_server->connections++;
|
||||
+ object_ref(OBJECT(cioc));
|
||||
+ conn->cioc = cioc;
|
||||
+ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next);
|
||||
nbd_update_server_watch(nbd_server);
|
||||
|
||||
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
|
||||
/* TODO - expose handshake timeout as QMP option */
|
||||
nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
||||
nbd_server->tlscreds, nbd_server->tlsauthz,
|
||||
- nbd_blockdev_client_closed, NULL);
|
||||
+ nbd_blockdev_client_closed, conn);
|
||||
}
|
||||
|
||||
static void nbd_update_server_watch(NBDServerData *s)
|
||||
@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s)
|
||||
|
||||
static void nbd_server_free(NBDServerData *server)
|
||||
{
|
||||
+ NBDConn *conn, *tmp;
|
||||
+
|
||||
if (!server) {
|
||||
return;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Forcefully close the listener socket, and any clients that have
|
||||
+ * not yet disconnected on their own.
|
||||
+ */
|
||||
qio_net_listener_disconnect(server->listener);
|
||||
object_unref(OBJECT(server->listener));
|
||||
+ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
|
||||
+ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
|
||||
+ NULL);
|
||||
+ }
|
||||
+
|
||||
+ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0);
|
||||
+
|
||||
if (server->tlscreds) {
|
||||
object_unref(OBJECT(server->tlscreds));
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
|
||||
Date: Tue, 20 Aug 2024 17:11:12 +0400
|
||||
Subject: [PATCH] vnc: fix crash when no console attached
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Since commit e99441a3793b5 ("ui/curses: Do not use console_select()")
|
||||
qemu_text_console_put_keysym() no longer checks for NULL console
|
||||
argument, which leads to a later crash:
|
||||
|
||||
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
|
||||
0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
|
||||
332 } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
|
||||
(gdb) bt
|
||||
#0 0x00005555559ee186 in qemu_text_console_handle_keysym (s=0x0, keysym=31) at ../ui/console-vc.c:332
|
||||
#1 0x00005555559e18e5 in qemu_text_console_put_keysym (s=<optimized out>, keysym=<optimized out>) at ../ui/console.c:303
|
||||
#2 0x00005555559f2e88 in do_key_event (vs=vs@entry=0x5555579045c0, down=down@entry=1, keycode=keycode@entry=60, sym=sym@entry=65471) at ../ui/vnc.c:2034
|
||||
#3 0x00005555559f845c in ext_key_event (vs=0x5555579045c0, down=1, sym=65471, keycode=<optimized out>) at ../ui/vnc.c:2070
|
||||
#4 protocol_client_msg (vs=0x5555579045c0, data=<optimized out>, len=<optimized out>) at ../ui/vnc.c:2514
|
||||
#5 0x00005555559f515c in vnc_client_read (vs=0x5555579045c0) at ../ui/vnc.c:1607
|
||||
|
||||
Fixes: e99441a3793b5 ("ui/curses: Do not use console_select()")
|
||||
Fixes: https://issues.redhat.com/browse/RHEL-50529
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
(picked from https://lore.kernel.org/qemu-devel/20240820131112.1267954-1-marcandre.lureau@redhat.com/)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
ui/vnc.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/ui/vnc.c b/ui/vnc.c
|
||||
index b3fd78022b..953ea38318 100644
|
||||
--- a/ui/vnc.c
|
||||
+++ b/ui/vnc.c
|
||||
@@ -1935,7 +1935,7 @@ static void do_key_event(VncState *vs, int down, int keycode, int sym)
|
||||
}
|
||||
|
||||
qkbd_state_key_event(vs->vd->kbd, qcode, down);
|
||||
- if (!qemu_console_is_graphic(vs->vd->dcl.con)) {
|
||||
+ if (QEMU_IS_TEXT_CONSOLE(vs->vd->dcl.con)) {
|
||||
QemuTextConsole *con = QEMU_TEXT_CONSOLE(vs->vd->dcl.con);
|
||||
bool numlock = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_NUMLOCK);
|
||||
bool control = qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_CTRL);
|
89
debian/patches/extra/0035-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch
vendored
Normal file
89
debian/patches/extra/0035-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch
vendored
Normal file
|
@ -0,0 +1,89 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Thu, 22 Aug 2024 09:35:29 -0500
|
||||
Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing
|
||||
server
|
||||
|
||||
Commit 3e7ef738 plugged the use-after-free of the global nbd_server
|
||||
object, but overlooked a use-after-free of nbd_server->listener.
|
||||
Although this race is harder to hit, notice that our shutdown path
|
||||
first drops the reference count of nbd_server->listener, then triggers
|
||||
actions that can result in a pending client reaching the
|
||||
nbd_blockdev_client_closed() callback, which in turn calls
|
||||
qio_net_listener_set_client_func on a potentially stale object.
|
||||
|
||||
If we know we don't want any more clients to connect, and have already
|
||||
told the listener socket to shut down, then we should not be trying to
|
||||
update the listener socket's associated function.
|
||||
|
||||
Reproducer:
|
||||
|
||||
> #!/usr/bin/python3
|
||||
>
|
||||
> import os
|
||||
> from threading import Thread
|
||||
>
|
||||
> def start_stop():
|
||||
> while 1:
|
||||
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start",
|
||||
+"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'')
|
||||
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'')
|
||||
>
|
||||
> def nbd_list():
|
||||
> while 1:
|
||||
> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock')
|
||||
>
|
||||
> def test():
|
||||
> sst = Thread(target=start_stop)
|
||||
> sst.start()
|
||||
> nlt = Thread(target=nbd_list)
|
||||
> nlt.start()
|
||||
>
|
||||
> sst.join()
|
||||
> nlt.join()
|
||||
>
|
||||
> test()
|
||||
|
||||
Fixes: CVE-2024-7409
|
||||
Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop")
|
||||
CC: qemu-stable@nongnu.org
|
||||
Reported-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240822143617.800419-2-eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c)
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
blockdev-nbd.c | 12 ++++++++----
|
||||
1 file changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index f73409ae49..b36f41b7c5 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
|
||||
static void nbd_update_server_watch(NBDServerData *s)
|
||||
{
|
||||
- if (!s->max_connections || s->connections < s->max_connections) {
|
||||
- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL);
|
||||
- } else {
|
||||
- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
|
||||
+ if (s->listener) {
|
||||
+ if (!s->max_connections || s->connections < s->max_connections) {
|
||||
+ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL,
|
||||
+ NULL);
|
||||
+ } else {
|
||||
+ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server)
|
||||
*/
|
||||
qio_net_listener_disconnect(server->listener);
|
||||
object_unref(OBJECT(server->listener));
|
||||
+ server->listener = NULL;
|
||||
QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
|
||||
qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
|
||||
NULL);
|
134
debian/patches/extra/0036-softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch
vendored
Normal file
134
debian/patches/extra/0036-softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch
vendored
Normal file
|
@ -0,0 +1,134 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: David Hildenbrand <david@redhat.com>
|
||||
Date: Wed, 28 Aug 2024 11:07:43 +0200
|
||||
Subject: [PATCH] softmmu/physmem: fix memory leak in dirty_memory_extend()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
As reported by Peter, we might be leaking memory when removing the
|
||||
highest RAMBlock (in the weird ram_addr_t space), and adding a new one.
|
||||
|
||||
We will fail to realize that we already allocated bitmaps for more
|
||||
dirty memory blocks, and effectively discard the pointers to them.
|
||||
|
||||
Fix it by getting rid of last_ram_page() and by remembering the number
|
||||
of dirty memory blocks that have been allocated already.
|
||||
|
||||
While at it, let's use "unsigned int" for the number of blocks, which
|
||||
should be sufficient until we reach ~32 exabytes.
|
||||
|
||||
Looks like this leak was introduced as we switched from using a single
|
||||
bitmap_zero_extend() to allocating multiple bitmaps:
|
||||
bitmap_zero_extend() relies on g_renew() which should have taken care of
|
||||
this.
|
||||
|
||||
Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com
|
||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug")
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Tested-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Cc: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Peter Xu <peterx@redhat.com>
|
||||
Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org>
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
(picked from https://lore.kernel.org/qemu-devel/20240828090743.128647-1-david@redhat.com/)
|
||||
[FE: backport - remove not-yet-existing variable in context of hunk touching ram_block_add()]
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
include/exec/ramlist.h | 1 +
|
||||
system/physmem.c | 35 +++++++++--------------------------
|
||||
2 files changed, 10 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
|
||||
index 2ad2a81acc..d9cfe530be 100644
|
||||
--- a/include/exec/ramlist.h
|
||||
+++ b/include/exec/ramlist.h
|
||||
@@ -50,6 +50,7 @@ typedef struct RAMList {
|
||||
/* RCU-enabled, writes protected by the ramlist lock. */
|
||||
QLIST_HEAD(, RAMBlock) blocks;
|
||||
DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
|
||||
+ unsigned int num_dirty_blocks;
|
||||
uint32_t version;
|
||||
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
||||
} RAMList;
|
||||
diff --git a/system/physmem.c b/system/physmem.c
|
||||
index a4fe3d2bf8..78f7db1121 100644
|
||||
--- a/system/physmem.c
|
||||
+++ b/system/physmem.c
|
||||
@@ -1497,18 +1497,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
|
||||
return offset;
|
||||
}
|
||||
|
||||
-static unsigned long last_ram_page(void)
|
||||
-{
|
||||
- RAMBlock *block;
|
||||
- ram_addr_t last = 0;
|
||||
-
|
||||
- RCU_READ_LOCK_GUARD();
|
||||
- RAMBLOCK_FOREACH(block) {
|
||||
- last = MAX(last, block->offset + block->max_length);
|
||||
- }
|
||||
- return last >> TARGET_PAGE_BITS;
|
||||
-}
|
||||
-
|
||||
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
|
||||
{
|
||||
int ret;
|
||||
@@ -1762,13 +1750,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
|
||||
}
|
||||
|
||||
/* Called with ram_list.mutex held */
|
||||
-static void dirty_memory_extend(ram_addr_t old_ram_size,
|
||||
- ram_addr_t new_ram_size)
|
||||
+static void dirty_memory_extend(ram_addr_t new_ram_size)
|
||||
{
|
||||
- ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
|
||||
- DIRTY_MEMORY_BLOCK_SIZE);
|
||||
- ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
|
||||
- DIRTY_MEMORY_BLOCK_SIZE);
|
||||
+ unsigned int old_num_blocks = ram_list.num_dirty_blocks;
|
||||
+ unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size,
|
||||
+ DIRTY_MEMORY_BLOCK_SIZE);
|
||||
int i;
|
||||
|
||||
/* Only need to extend if block count increased */
|
||||
@@ -1800,6 +1786,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
|
||||
g_free_rcu(old_blocks, rcu);
|
||||
}
|
||||
}
|
||||
+
|
||||
+ ram_list.num_dirty_blocks = new_num_blocks;
|
||||
}
|
||||
|
||||
static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
@@ -1808,11 +1796,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
const bool shared = qemu_ram_is_shared(new_block);
|
||||
RAMBlock *block;
|
||||
RAMBlock *last_block = NULL;
|
||||
- ram_addr_t old_ram_size, new_ram_size;
|
||||
+ ram_addr_t ram_size;
|
||||
Error *err = NULL;
|
||||
|
||||
- old_ram_size = last_ram_page();
|
||||
-
|
||||
qemu_mutex_lock_ramlist();
|
||||
new_block->offset = find_ram_offset(new_block->max_length);
|
||||
|
||||
@@ -1840,11 +1826,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- new_ram_size = MAX(old_ram_size,
|
||||
- (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
|
||||
- if (new_ram_size > old_ram_size) {
|
||||
- dirty_memory_extend(old_ram_size, new_ram_size);
|
||||
- }
|
||||
+ ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
|
||||
+ dirty_memory_extend(ram_size);
|
||||
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
|
||||
* QLIST (which has an RCU-friendly variant) does not have insertion at
|
||||
* tail, so save the last element in last_block.
|
104
debian/patches/extra/0037-block-reqlist-allow-adding-overlapping-requests.patch
vendored
Normal file
104
debian/patches/extra/0037-block-reqlist-allow-adding-overlapping-requests.patch
vendored
Normal file
|
@ -0,0 +1,104 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Thu, 7 Nov 2024 17:51:13 +0100
|
||||
Subject: [PATCH] block/reqlist: allow adding overlapping requests
|
||||
|
||||
Allow overlapping request by removing the assert that made it
|
||||
impossible. There are only two callers:
|
||||
|
||||
1. block_copy_task_create()
|
||||
|
||||
It already asserts the very same condition before calling
|
||||
reqlist_init_req().
|
||||
|
||||
2. cbw_snapshot_read_lock()
|
||||
|
||||
There is no need to have read requests be non-overlapping in
|
||||
copy-before-write when used for snapshot-access. In fact, there was no
|
||||
protection against two callers of cbw_snapshot_read_lock() calling
|
||||
reqlist_init_req() with overlapping ranges and this could lead to an
|
||||
assertion failure [1].
|
||||
|
||||
In particular, with the reproducer script below [0], two
|
||||
cbw_co_snapshot_block_status() callers could race, with the second
|
||||
calling reqlist_init_req() before the first one finishes and removes
|
||||
its conflicting request.
|
||||
|
||||
[0]:
|
||||
|
||||
> #!/bin/bash -e
|
||||
> dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
|
||||
> ./qemu-img create /tmp/fleecing.raw -f raw 1G
|
||||
> (
|
||||
> ./qemu-system-x86_64 --qmp stdio \
|
||||
> --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
|
||||
> --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \
|
||||
> <<EOF
|
||||
> {"execute": "qmp_capabilities"}
|
||||
> {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", "file": "node0", "target": "node1", "node-name": "node3" } }
|
||||
> {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", "file": "node3", "node-name": "snap0" } }
|
||||
> {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", "data": { "path": "/tmp/nbd.socket" } } } }
|
||||
> {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "snap0", "type": "nbd", "name": "exp0"}}
|
||||
> EOF
|
||||
> ) &
|
||||
> sleep 5
|
||||
> while true; do
|
||||
> ./qemu-nbd -d /dev/nbd0
|
||||
> ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r
|
||||
> nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket'
|
||||
> done
|
||||
|
||||
[1]:
|
||||
|
||||
> #5 0x000071e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
|
||||
> #6 0x0000615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
|
||||
> #7 0x00006152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237
|
||||
> #8 0x00006152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304
|
||||
> #9 0x00006152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726
|
||||
> #10 0x000061528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48
|
||||
> #11 0x00006152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
|
||||
> #12 0x00006152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652
|
||||
> #13 0x00006152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
|
||||
> #14 0x00006152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473
|
||||
> #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
|
||||
> #16 0x000061528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
|
||||
> #17 0x000061528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
|
||||
> #18 0x000061528538f906 in nbd_trip (...) at ../nbd/server.c:3121
|
||||
> #19 0x00006152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
---
|
||||
block/copy-before-write.c | 3 ++-
|
||||
block/reqlist.c | 2 --
|
||||
2 files changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||
index 8aba27a71d..3698b3bc60 100644
|
||||
--- a/block/copy-before-write.c
|
||||
+++ b/block/copy-before-write.c
|
||||
@@ -65,7 +65,8 @@ typedef struct BDRVCopyBeforeWriteState {
|
||||
|
||||
/*
|
||||
* @frozen_read_reqs: current read requests for fleecing user in bs->file
|
||||
- * node. These areas must not be rewritten by guest.
|
||||
+ * node. These areas must not be rewritten by guest. There can be multiple
|
||||
+ * overlapping read requests.
|
||||
*/
|
||||
BlockReqList frozen_read_reqs;
|
||||
|
||||
diff --git a/block/reqlist.c b/block/reqlist.c
|
||||
index 08cb57cfa4..098e807378 100644
|
||||
--- a/block/reqlist.c
|
||||
+++ b/block/reqlist.c
|
||||
@@ -20,8 +20,6 @@
|
||||
void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
|
||||
int64_t bytes)
|
||||
{
|
||||
- assert(!reqlist_find_conflict(reqs, offset, bytes));
|
||||
-
|
||||
*req = (BlockReq) {
|
||||
.offset = offset,
|
||||
.bytes = bytes,
|
|
@ -28,7 +28,8 @@ Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
|||
adapt to removal of QEMUFileOps
|
||||
improve condition for entering final stage
|
||||
adapt to QAPI and other changes for 8.2
|
||||
make sure to not call vm_start() from coroutine]
|
||||
make sure to not call vm_start() from coroutine
|
||||
stop CPU throttling after finishing]
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
hmp-commands-info.hx | 13 +
|
||||
|
@ -36,13 +37,13 @@ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|||
include/migration/snapshot.h | 2 +
|
||||
include/monitor/hmp.h | 3 +
|
||||
migration/meson.build | 1 +
|
||||
migration/savevm-async.c | 538 +++++++++++++++++++++++++++++++++++
|
||||
migration/savevm-async.c | 545 +++++++++++++++++++++++++++++++++++
|
||||
monitor/hmp-cmds.c | 38 +++
|
||||
qapi/migration.json | 34 +++
|
||||
qapi/misc.json | 18 ++
|
||||
qemu-options.hx | 12 +
|
||||
system/vl.c | 10 +
|
||||
11 files changed, 686 insertions(+)
|
||||
11 files changed, 693 insertions(+)
|
||||
create mode 100644 migration/savevm-async.c
|
||||
|
||||
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
|
||||
|
@ -140,10 +141,10 @@ index 95d1cf2250..800f12a60d 100644
|
|||
'threadinfo.c',
|
||||
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
|
||||
new file mode 100644
|
||||
index 0000000000..72cf6588c2
|
||||
index 0000000000..1af32604c7
|
||||
--- /dev/null
|
||||
+++ b/migration/savevm-async.c
|
||||
@@ -0,0 +1,538 @@
|
||||
@@ -0,0 +1,545 @@
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "migration/channel-savevm-async.h"
|
||||
+#include "migration/migration.h"
|
||||
|
@ -154,6 +155,7 @@ index 0000000000..72cf6588c2
|
|||
+#include "migration/global_state.h"
|
||||
+#include "migration/ram.h"
|
||||
+#include "migration/qemu-file.h"
|
||||
+#include "sysemu/cpu-throttle.h"
|
||||
+#include "sysemu/sysemu.h"
|
||||
+#include "sysemu/runstate.h"
|
||||
+#include "block/block.h"
|
||||
|
@ -342,6 +344,12 @@ index 0000000000..72cf6588c2
|
|||
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
|
||||
+ ms->to_dst_file = NULL;
|
||||
+
|
||||
+ /*
|
||||
+ * Same as in migration_iteration_finish(): saving RAM might've turned on CPU throttling for
|
||||
+ * auto-converge, make sure to disable it.
|
||||
+ */
|
||||
+ cpu_throttle_stop();
|
||||
+
|
||||
+ qemu_savevm_state_cleanup();
|
||||
+
|
||||
+ ret = save_snapshot_cleanup();
|
||||
|
|
|
@ -193,10 +193,10 @@ index 32fd4a34fd..36a0cd8cc8 100644
|
|||
|
||||
/*
|
||||
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
|
||||
index 72cf6588c2..fb4e8ea689 100644
|
||||
index 1af32604c7..be2035cd2e 100644
|
||||
--- a/migration/savevm-async.c
|
||||
+++ b/migration/savevm-async.c
|
||||
@@ -379,7 +379,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
|
||||
@@ -386,7 +386,7 @@ void qmp_savevm_start(const char *statefile, Error **errp)
|
||||
|
||||
QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
|
||||
&snap_state.bs_pos));
|
||||
|
@ -205,7 +205,7 @@ index 72cf6588c2..fb4e8ea689 100644
|
|||
|
||||
if (!snap_state.file) {
|
||||
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
|
||||
@@ -503,7 +503,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
|
||||
@@ -510,7 +510,8 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
|
||||
blk_op_block_all(be, blocker);
|
||||
|
||||
/* restore the VM state */
|
||||
|
|
|
@ -120,10 +120,10 @@ index e99914eaa4..6bba803f94 100644
|
|||
system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
|
||||
system_ss.add(files('block-ram-registrar.c'))
|
||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||
index d954bec6f1..5000c084c5 100644
|
||||
index bdf2eb50b6..439a7a14c8 100644
|
||||
--- a/block/monitor/block-hmp-cmds.c
|
||||
+++ b/block/monitor/block-hmp-cmds.c
|
||||
@@ -1008,3 +1008,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
|
||||
@@ -1009,3 +1009,42 @@ void hmp_change_medium(Monitor *mon, const char *device, const char *target,
|
||||
qmp_blockdev_change_medium(device, NULL, target, arg, true, force,
|
||||
!!read_only, read_only_mode, errp);
|
||||
}
|
||||
|
|
|
@ -82,10 +82,10 @@ index cc618e4561..12d662e9d4 100644
|
|||
return NULL;
|
||||
}
|
||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||
index 853e01a1eb..47b3cdd09f 100644
|
||||
index 28f6a096cd..ef4e666303 100644
|
||||
--- a/block/copy-before-write.c
|
||||
+++ b/block/copy-before-write.c
|
||||
@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
@@ -478,7 +478,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
|
||||
s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
|
||||
s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
|
||||
|
|
|
@ -36,10 +36,10 @@ index 1963e47ab9..fe69723ada 100644
|
|||
goto error;
|
||||
}
|
||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||
index 47b3cdd09f..bba58326d7 100644
|
||||
index ef4e666303..adb27649a8 100644
|
||||
--- a/block/copy-before-write.c
|
||||
+++ b/block/copy-before-write.c
|
||||
@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
||||
@@ -547,6 +547,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
||||
BlockDriverState *target,
|
||||
const char *filter_node_name,
|
||||
bool discard_source,
|
||||
|
@ -47,7 +47,7 @@ index 47b3cdd09f..bba58326d7 100644
|
|||
BlockCopyState **bcs,
|
||||
Error **errp)
|
||||
{
|
||||
@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
||||
@@ -565,6 +566,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
||||
}
|
||||
qdict_put_str(opts, "file", bdrv_get_node_name(source));
|
||||
qdict_put_str(opts, "target", bdrv_get_node_name(target));
|
||||
|
|
|
@ -68,10 +68,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
|||
3 files changed, 142 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||
index 5000c084c5..70b3de4c7e 100644
|
||||
index 439a7a14c8..d0e7771dcc 100644
|
||||
--- a/block/monitor/block-hmp-cmds.c
|
||||
+++ b/block/monitor/block-hmp-cmds.c
|
||||
@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
|
||||
@@ -1044,6 +1044,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
|
||||
NULL, NULL,
|
||||
devlist, qdict_haskey(qdict, "speed"), speed,
|
||||
false, 0, // BackupPerf max-workers
|
||||
|
|
|
@ -21,7 +21,7 @@ Tested-by: Friedrich Weber <f.weber@proxmox.com>
|
|||
3 files changed, 22 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||
index bba58326d7..50cc4c7aae 100644
|
||||
index adb27649a8..a5bb4d14f6 100644
|
||||
--- a/block/copy-before-write.c
|
||||
+++ b/block/copy-before-write.c
|
||||
@@ -27,6 +27,7 @@
|
||||
|
@ -32,7 +32,7 @@ index bba58326d7..50cc4c7aae 100644
|
|||
#include "qemu/cutils.h"
|
||||
#include "qapi/error.h"
|
||||
#include "block/block_int.h"
|
||||
@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
|
||||
@@ -75,7 +76,8 @@ typedef struct BDRVCopyBeforeWriteState {
|
||||
* @snapshot_error is normally zero. But on first copy-before-write failure
|
||||
* when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
|
||||
* value of this error (<0). After that all in-flight and further
|
||||
|
@ -42,7 +42,7 @@ index bba58326d7..50cc4c7aae 100644
|
|||
*/
|
||||
int snapshot_error;
|
||||
} BDRVCopyBeforeWriteState;
|
||||
@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
||||
@@ -115,7 +117,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ index bba58326d7..50cc4c7aae 100644
|
|||
return 0;
|
||||
}
|
||||
|
||||
@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
||||
@@ -139,9 +141,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
||||
WITH_QEMU_LOCK_GUARD(&s->lock) {
|
||||
if (ret < 0) {
|
||||
assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
|
||||
|
@ -62,7 +62,7 @@ index bba58326d7..50cc4c7aae 100644
|
|||
} else {
|
||||
bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
|
||||
}
|
||||
@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||
@@ -215,7 +215,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||
|
||||
QEMU_LOCK_GUARD(&s->lock);
|
||||
|
||||
|
@ -71,7 +71,7 @@ index bba58326d7..50cc4c7aae 100644
|
|||
g_free(req);
|
||||
return NULL;
|
||||
}
|
||||
@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
|
||||
@@ -586,6 +586,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
|
||||
bdrv_unref(bs);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Thu, 7 Nov 2024 17:51:14 +0100
|
||||
Subject: [PATCH] PVE backup: fixup error handling for fleecing
|
||||
|
||||
The drained section needs to be terminated before breaking out of the
|
||||
loop in the error scenarios. Otherwise, guest IO on the drive would
|
||||
become stuck.
|
||||
|
||||
If the job is created successfully, then the job completion callback
|
||||
will clean up the snapshot access block nodes. In case failure
|
||||
happened before the job is created, there was no cleanup for the
|
||||
snapshot access block nodes yet. Add it.
|
||||
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
pve-backup.c | 38 +++++++++++++++++++++++++-------------
|
||||
1 file changed, 25 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/pve-backup.c b/pve-backup.c
|
||||
index 4e730aa3da..c4178758b3 100644
|
||||
--- a/pve-backup.c
|
||||
+++ b/pve-backup.c
|
||||
@@ -357,22 +357,23 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
||||
qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||||
}
|
||||
|
||||
-static void pvebackup_complete_cb(void *opaque, int ret)
|
||||
+static void cleanup_snapshot_access(PVEBackupDevInfo *di)
|
||||
{
|
||||
- PVEBackupDevInfo *di = opaque;
|
||||
- di->completed_ret = ret;
|
||||
-
|
||||
- /*
|
||||
- * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
|
||||
- * won't be done as a coroutine anyways:
|
||||
- * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
|
||||
- * just spawn a BH calling bdrv_unref().
|
||||
- * - For cbw, draining would need to spawn a BH.
|
||||
- */
|
||||
if (di->fleecing.snapshot_access) {
|
||||
bdrv_unref(di->fleecing.snapshot_access);
|
||||
di->fleecing.snapshot_access = NULL;
|
||||
}
|
||||
+ if (di->fleecing.cbw) {
|
||||
+ bdrv_cbw_drop(di->fleecing.cbw);
|
||||
+ di->fleecing.cbw = NULL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void pvebackup_complete_cb(void *opaque, int ret)
|
||||
+{
|
||||
+ PVEBackupDevInfo *di = opaque;
|
||||
+ di->completed_ret = ret;
|
||||
+
|
||||
if (di->fleecing.cbw) {
|
||||
/*
|
||||
* With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
|
||||
@@ -383,10 +384,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
||||
if (di->completed_ret == -EACCES && snapshot_error) {
|
||||
di->completed_ret = snapshot_error;
|
||||
}
|
||||
- bdrv_cbw_drop(di->fleecing.cbw);
|
||||
- di->fleecing.cbw = NULL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
|
||||
+ * won't be done as a coroutine anyways:
|
||||
+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
|
||||
+ * just spawn a BH calling bdrv_unref().
|
||||
+ * - For cbw, draining would need to spawn a BH.
|
||||
+ */
|
||||
+ cleanup_snapshot_access(di);
|
||||
+
|
||||
/*
|
||||
* Needs to happen outside of coroutine, because it takes the graph write lock.
|
||||
*/
|
||||
@@ -587,6 +595,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
if (!di->fleecing.cbw) {
|
||||
error_setg(errp, "appending cbw node for fleecing failed: %s",
|
||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
+ bdrv_drained_end(di->bs);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -599,6 +608,8 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
if (!di->fleecing.snapshot_access) {
|
||||
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
+ cleanup_snapshot_access(di);
|
||||
+ bdrv_drained_end(di->bs);
|
||||
break;
|
||||
}
|
||||
source_bs = di->fleecing.snapshot_access;
|
||||
@@ -637,6 +648,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
}
|
||||
|
||||
if (!job || local_err) {
|
||||
+ cleanup_snapshot_access(di);
|
||||
error_setg(errp, "backup_job_create failed: %s",
|
||||
local_err ? error_get_pretty(local_err) : "null");
|
||||
break;
|
135
debian/patches/pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch
vendored
Normal file
135
debian/patches/pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch
vendored
Normal file
|
@ -0,0 +1,135 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Thu, 7 Nov 2024 17:51:15 +0100
|
||||
Subject: [PATCH] PVE backup: factor out setting up snapshot access for
|
||||
fleecing
|
||||
|
||||
Avoids some line bloat in the create_backup_jobs_bh() function and is
|
||||
in preparation for setting up the snapshot access independently of
|
||||
fleecing, in particular that will be useful for providing access to
|
||||
the snapshot via NBD.
|
||||
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
pve-backup.c | 95 ++++++++++++++++++++++++++++++++--------------------
|
||||
1 file changed, 58 insertions(+), 37 deletions(-)
|
||||
|
||||
diff --git a/pve-backup.c b/pve-backup.c
|
||||
index c4178758b3..051ebffe48 100644
|
||||
--- a/pve-backup.c
|
||||
+++ b/pve-backup.c
|
||||
@@ -525,6 +525,62 @@ static int coroutine_fn pvebackup_co_add_config(
|
||||
goto out;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Setup a snapshot-access block node for a device with associated fleecing image.
|
||||
+ */
|
||||
+static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+
|
||||
+ if (!di->fleecing.bs) {
|
||||
+ error_setg(errp, "no associated fleecing image");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ QDict *cbw_opts = qdict_new();
|
||||
+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
|
||||
+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
|
||||
+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
|
||||
+
|
||||
+ if (di->bitmap) {
|
||||
+ /*
|
||||
+ * Only guest writes to parts relevant for the backup need to be intercepted with
|
||||
+ * old data being copied to the fleecing image.
|
||||
+ */
|
||||
+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
|
||||
+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
|
||||
+ }
|
||||
+ /*
|
||||
+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
|
||||
+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
|
||||
+ * abort a bit before that.
|
||||
+ */
|
||||
+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
|
||||
+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
|
||||
+
|
||||
+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
|
||||
+
|
||||
+ if (!di->fleecing.cbw) {
|
||||
+ error_setg(errp, "appending cbw node for fleecing failed: %s",
|
||||
+ local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ QDict *snapshot_access_opts = qdict_new();
|
||||
+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
|
||||
+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
|
||||
+
|
||||
+ di->fleecing.snapshot_access =
|
||||
+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
|
||||
+ if (!di->fleecing.snapshot_access) {
|
||||
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
||||
+ local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* backup_job_create can *not* be run from a coroutine, so this can't either.
|
||||
* The caller is responsible that backup_mutex is held nonetheless.
|
||||
@@ -569,49 +625,14 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
const char *job_id = bdrv_get_device_name(di->bs);
|
||||
bdrv_graph_co_rdunlock();
|
||||
if (di->fleecing.bs) {
|
||||
- QDict *cbw_opts = qdict_new();
|
||||
- qdict_put_str(cbw_opts, "driver", "copy-before-write");
|
||||
- qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
|
||||
- qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
|
||||
-
|
||||
- if (di->bitmap) {
|
||||
- /*
|
||||
- * Only guest writes to parts relevant for the backup need to be intercepted with
|
||||
- * old data being copied to the fleecing image.
|
||||
- */
|
||||
- qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
|
||||
- qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
|
||||
- }
|
||||
- /*
|
||||
- * Fleecing storage is supposed to be fast and it's better to break backup than guest
|
||||
- * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
|
||||
- * abort a bit before that.
|
||||
- */
|
||||
- qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
|
||||
- qdict_put_int(cbw_opts, "cbw-timeout", 45);
|
||||
-
|
||||
- di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
|
||||
-
|
||||
- if (!di->fleecing.cbw) {
|
||||
- error_setg(errp, "appending cbw node for fleecing failed: %s",
|
||||
- local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
- bdrv_drained_end(di->bs);
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- QDict *snapshot_access_opts = qdict_new();
|
||||
- qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
|
||||
- qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
|
||||
-
|
||||
- di->fleecing.snapshot_access =
|
||||
- bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
|
||||
- if (!di->fleecing.snapshot_access) {
|
||||
+ if (setup_snapshot_access(di, &local_err) < 0) {
|
||||
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
cleanup_snapshot_access(di);
|
||||
bdrv_drained_end(di->bs);
|
||||
break;
|
||||
}
|
||||
+
|
||||
source_bs = di->fleecing.snapshot_access;
|
||||
discard_source = true;
|
||||
|
135
debian/patches/pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch
vendored
Normal file
135
debian/patches/pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch
vendored
Normal file
|
@ -0,0 +1,135 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Thu, 7 Nov 2024 17:51:16 +0100
|
||||
Subject: [PATCH] PVE backup: save device name in device info structure
|
||||
|
||||
The device name needs to be queried while holding the graph read lock
|
||||
and since it doesn't change during the whole operation, just get it
|
||||
once during setup and avoid the need to query it again in different
|
||||
places.
|
||||
|
||||
Also in preparation to use it more often in error messages and for the
|
||||
upcoming external backup access API.
|
||||
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
pve-backup.c | 29 +++++++++++++++--------------
|
||||
1 file changed, 15 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/pve-backup.c b/pve-backup.c
|
||||
index 051ebffe48..33c23e53c2 100644
|
||||
--- a/pve-backup.c
|
||||
+++ b/pve-backup.c
|
||||
@@ -94,6 +94,7 @@ typedef struct PVEBackupDevInfo {
|
||||
size_t size;
|
||||
uint64_t block_size;
|
||||
uint8_t dev_id;
|
||||
+ char* device_name;
|
||||
int completed_ret; // INT_MAX if not completed
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
BlockDriverState *target;
|
||||
@@ -327,6 +328,8 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
||||
}
|
||||
|
||||
di->bs = NULL;
|
||||
+ g_free(di->device_name);
|
||||
+ di->device_name = NULL;
|
||||
|
||||
assert(di->target == NULL);
|
||||
|
||||
@@ -621,9 +624,6 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
|
||||
BlockDriverState *source_bs = di->bs;
|
||||
bool discard_source = false;
|
||||
- bdrv_graph_co_rdlock();
|
||||
- const char *job_id = bdrv_get_device_name(di->bs);
|
||||
- bdrv_graph_co_rdunlock();
|
||||
if (di->fleecing.bs) {
|
||||
if (setup_snapshot_access(di, &local_err) < 0) {
|
||||
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
||||
@@ -654,7 +654,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
}
|
||||
|
||||
BlockJob *job = backup_job_create(
|
||||
- job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
|
||||
+ di->device_name, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
|
||||
bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
|
||||
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
|
||||
&local_err);
|
||||
@@ -751,6 +751,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
|
||||
}
|
||||
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
|
||||
di->bs = bs;
|
||||
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
|
||||
|
||||
if (fleecing && device_uses_fleecing(*d)) {
|
||||
g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
|
||||
@@ -789,6 +790,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
|
||||
|
||||
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
|
||||
di->bs = bs;
|
||||
+ di->device_name = g_strdup(bdrv_get_device_name(bs));
|
||||
di_list = g_list_append(di_list, di);
|
||||
}
|
||||
}
|
||||
@@ -956,9 +958,6 @@ UuidInfo coroutine_fn *qmp_backup(
|
||||
|
||||
di->block_size = dump_cb_block_size;
|
||||
|
||||
- bdrv_graph_co_rdlock();
|
||||
- const char *devname = bdrv_get_device_name(di->bs);
|
||||
- bdrv_graph_co_rdunlock();
|
||||
PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED;
|
||||
size_t dirty = di->size;
|
||||
|
||||
@@ -973,7 +972,8 @@ UuidInfo coroutine_fn *qmp_backup(
|
||||
}
|
||||
action = PBS_BITMAP_ACTION_NEW;
|
||||
} else {
|
||||
- expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0;
|
||||
+ expect_only_dirty =
|
||||
+ proxmox_backup_check_incremental(pbs, di->device_name, di->size) != 0;
|
||||
}
|
||||
|
||||
if (expect_only_dirty) {
|
||||
@@ -997,7 +997,8 @@ UuidInfo coroutine_fn *qmp_backup(
|
||||
}
|
||||
}
|
||||
|
||||
- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp);
|
||||
+ int dev_id = proxmox_backup_co_register_image(pbs, di->device_name, di->size,
|
||||
+ expect_only_dirty, errp);
|
||||
if (dev_id < 0) {
|
||||
goto err_mutex;
|
||||
}
|
||||
@@ -1009,7 +1010,7 @@ UuidInfo coroutine_fn *qmp_backup(
|
||||
di->dev_id = dev_id;
|
||||
|
||||
PBSBitmapInfo *info = g_malloc(sizeof(*info));
|
||||
- info->drive = g_strdup(devname);
|
||||
+ info->drive = g_strdup(di->device_name);
|
||||
info->action = action;
|
||||
info->size = di->size;
|
||||
info->dirty = dirty;
|
||||
@@ -1034,10 +1035,7 @@ UuidInfo coroutine_fn *qmp_backup(
|
||||
goto err_mutex;
|
||||
}
|
||||
|
||||
- bdrv_graph_co_rdlock();
|
||||
- const char *devname = bdrv_get_device_name(di->bs);
|
||||
- bdrv_graph_co_rdunlock();
|
||||
- di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
|
||||
+ di->dev_id = vma_writer_register_stream(vmaw, di->device_name, di->size);
|
||||
if (di->dev_id <= 0) {
|
||||
error_set(errp, ERROR_CLASS_GENERIC_ERROR,
|
||||
"register_stream failed");
|
||||
@@ -1148,6 +1146,9 @@ err:
|
||||
bdrv_co_unref(di->target);
|
||||
}
|
||||
|
||||
+ g_free(di->device_name);
|
||||
+ di->device_name = NULL;
|
||||
+
|
||||
g_free(di);
|
||||
}
|
||||
g_list_free(di_list);
|
25
debian/patches/pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch
vendored
Normal file
25
debian/patches/pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Thu, 7 Nov 2024 17:51:17 +0100
|
||||
Subject: [PATCH] PVE backup: include device name in error when setting up
|
||||
snapshot access fails
|
||||
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
---
|
||||
pve-backup.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pve-backup.c b/pve-backup.c
|
||||
index 33c23e53c2..d931746453 100644
|
||||
--- a/pve-backup.c
|
||||
+++ b/pve-backup.c
|
||||
@@ -626,7 +626,8 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||
bool discard_source = false;
|
||||
if (di->fleecing.bs) {
|
||||
if (setup_snapshot_access(di, &local_err) < 0) {
|
||||
- error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
||||
+ error_setg(errp, "%s - setting up snapshot access for fleecing failed: %s",
|
||||
+ di->device_name,
|
||||
local_err ? error_get_pretty(local_err) : "unknown error");
|
||||
cleanup_snapshot_access(di);
|
||||
bdrv_drained_end(di->bs);
|
|
@ -3,18 +3,38 @@ extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
|
|||
extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
|
||||
extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
|
||||
extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
|
||||
extra/0006-Revert-virtio-pci-fix-use-of-a-released-vector.patch
|
||||
extra/0007-block-copy-before-write-fix-permission.patch
|
||||
extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch
|
||||
extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch
|
||||
extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch
|
||||
extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
|
||||
extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
|
||||
extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
|
||||
extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
|
||||
extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
|
||||
extra/0016-block-copy-Fix-missing-graph-lock.patch
|
||||
extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
|
||||
extra/0006-block-copy-before-write-fix-permission.patch
|
||||
extra/0007-block-copy-before-write-support-unligned-snapshot-di.patch
|
||||
extra/0008-block-copy-before-write-create-block_copy-bitmap-in-.patch
|
||||
extra/0009-qapi-blockdev-backup-add-discard-source-parameter.patch
|
||||
extra/0010-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
|
||||
extra/0011-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
|
||||
extra/0012-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
|
||||
extra/0013-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
|
||||
extra/0014-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
|
||||
extra/0015-block-copy-Fix-missing-graph-lock.patch
|
||||
extra/0016-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
|
||||
extra/0017-virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch
|
||||
extra/0018-virtio-net-Ensure-queue-index-fits-with-RSS.patch
|
||||
extra/0019-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch
|
||||
extra/0020-net-Reinstate-net-nic-model-help-output-as-documente.patch
|
||||
extra/0021-net-Fix-net-nic-model-for-non-help-arguments.patch
|
||||
extra/0022-target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch
|
||||
extra/0023-target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch
|
||||
extra/0024-target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch
|
||||
extra/0025-target-arm-Ignore-SMCR_EL2.LEN-and-SVCR_EL2.LEN-if-E.patch
|
||||
extra/0026-target-arm-Handle-denormals-correctly-for-FMOPA-wide.patch
|
||||
extra/0027-intel_iommu-fix-FRCD-construction-macro.patch
|
||||
extra/0028-target-i386-Do-not-apply-REX-to-MMX-operands.patch
|
||||
extra/0029-module-Prevent-crash-by-resetting-local_err-in-modul.patch
|
||||
extra/0030-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch
|
||||
extra/0031-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch
|
||||
extra/0032-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch
|
||||
extra/0033-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch
|
||||
extra/0034-vnc-fix-crash-when-no-console-attached.patch
|
||||
extra/0035-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch
|
||||
extra/0036-softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch
|
||||
extra/0037-block-reqlist-allow-adding-overlapping-requests.patch
|
||||
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
|
||||
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
|
||||
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
|
||||
|
@ -68,3 +88,7 @@ pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
|
|||
pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
|
||||
pve/0046-PVE-backup-add-fleecing-option.patch
|
||||
pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch
|
||||
pve/0048-PVE-backup-fixup-error-handling-for-fleecing.patch
|
||||
pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch
|
||||
pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch
|
||||
pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch
|
||||
|
|
2
qemu
2
qemu
|
@ -1 +1 @@
|
|||
Subproject commit c25df57ae8f9fe1c72eee2dab37d76d904ac382e
|
||||
Subproject commit 5ebde3b5c00e15f560f73055fac4ab31c0cac6d2
|
Loading…
Reference in New Issue