pve-qemu/debian/patches/pve/0041-PVE-Backup-avoid-use-Q...

212 lines
6.5 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Fri, 17 Apr 2020 08:57:47 +0200
Subject: [PATCH] PVE Backup: avoid use QemuRecMutex inside coroutines
---
pve-backup.c | 59 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 169f0c68d0..dddf430399 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -11,6 +11,23 @@
/* PVE backup state and related function */
+/*
+ * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
+ * so you may not use normal mutexes within coroutines:
+ *
+ * ---bad-example---
+ * qemu_rec_mutex_lock(lock)
+ * ...
+ * qemu_coroutine_yield() // wait for something
+ * // we are now inside a different thread
+ * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
+ * ---end-bad-example--
+ *
+ * ==> Always use CoMutext inside coroutines.
+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
+ *
+ */
+
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
@@ -30,12 +47,14 @@ static struct PVEBackupState {
ProxmoxBackupHandle *pbs;
GList *di_list;
QemuRecMutex backup_mutex;
+ CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
qemu_rec_mutex_init(&backup_state.stat.lock);
qemu_rec_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
// initialize PVEBackupState at startup
@@ -114,16 +133,16 @@ pvebackup_co_dump_pbs_cb(
Error *local_err = NULL;
int pbs_res = -1;
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
if (pvebackup_error_or_canceled()) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
return -1;
}
pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
if (pbs_res < 0) {
pvebackup_propagate_error(local_err);
@@ -149,7 +168,6 @@ pvebackup_co_dump_vma_cb(
const unsigned char *buf = pbuf;
PVEBackupDevInfo *di = opaque;
-
int ret = -1;
assert(backup_state.vmaw);
@@ -167,16 +185,16 @@ pvebackup_co_dump_vma_cb(
}
while (remaining > 0) {
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
if (pvebackup_error_or_canceled()) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
return -1;
}
size_t zero_bytes = 0;
ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
if (buf) {
@@ -203,12 +221,11 @@ pvebackup_co_dump_vma_cb(
return size;
}
+// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_cleanup(void *unused)
{
assert(qemu_in_coroutine());
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
-
qemu_rec_mutex_lock(&backup_state.stat.lock);
backup_state.stat.end_time = time(NULL);
qemu_rec_mutex_unlock(&backup_state.stat.lock);
@@ -239,9 +256,9 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
}
+// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_complete_stream(void *opaque)
{
PVEBackupDevInfo *di = opaque;
@@ -295,6 +312,8 @@ static void pvebackup_complete_cb(void *opaque, int ret)
static void pvebackup_cancel(void)
{
+ assert(!qemu_in_coroutine());
+
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
@@ -348,6 +367,7 @@ void qmp_backup_cancel(Error **errp)
pvebackup_cancel();
}
+// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
const char *name,
@@ -431,9 +451,9 @@ static void pvebackup_run_next_job(void)
}
}
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
+
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex);
}
static bool create_backup_jobs(void) {
@@ -520,6 +540,7 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
+// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
@@ -543,11 +564,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *config_name = "qemu-server.conf";
const char *firewall_name = "qemu-server.fw";
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
-
if (backup_state.di_list) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
return;
}
@@ -792,8 +810,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.di_list = di_list;
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
@@ -836,8 +852,6 @@ err:
rmdir(backup_dir);
}
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
task->result = NULL;
return;
}
@@ -881,13 +895,16 @@ UuidInfo *qmp_backup(
.errp = errp,
};
+ qemu_rec_mutex_lock(&backup_state.backup_mutex);
+
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
create_backup_jobs();
qemu_rec_mutex_unlock(&backup_state.backup_mutex);
pvebackup_run_next_job();
+ } else {
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex);
}
return task.result;