From c53cd04e70641fdf9410aac40c617d074047b3e1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 11 May 2021 18:31:51 +0200 Subject: [PATCH 01/15] hmp: Fix loadvm to resume the VM on success instead of failure Commit f61fe11aa6f broke hmp_loadvm() by adding an incorrect negation when converting from 0/-errno return values to a bool value. The result is that loadvm resumes the VM now if it failed and keeps it stopped if it failed. Fix it to restore the old behaviour and do it the other way around. Fixes: f61fe11aa6f7f8f0ffe4ddaa56a8108f3ab57854 Cc: qemu-stable@nongnu.org Reported-by: Yanhui Ma Signed-off-by: Kevin Wolf Message-Id: <20210511163151.45167-1-kwolf@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- monitor/hmp-cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index d9bef63373..d10ee14110 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1133,7 +1133,7 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict) vm_stop(RUN_STATE_RESTORE_VM); - if (!load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { + if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { vm_start(); } hmp_handle_error(mon, err); From 04c9f7e04ae102edf384613df98268d59ff8fb9b Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:32 -0400 Subject: [PATCH 02/15] virtiofsd: Check for EINTR in preadv() and retry We don't seem to check for EINTR and retry. There are other places in code where we check for EINTR. So lets add a check. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-2-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 9efdbd8ffd..755d7fb25c 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -421,6 +421,9 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, if (ret == -1) { ret = errno; + if (ret == EINTR) { + continue; + } fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", __func__, len); goto err; From b31ff389315f2745cecc0f42cca7f4383b1a2a0d Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:33 -0400 Subject: [PATCH 03/15] virtiofsd: Get rid of unreachable code in read pvreadv() can return following. - error - 0 in case of EOF - short read We seem to handle all the cases already. We are retrying read in case of short read. So another check for short read seems like dead code. Get rid of it. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-3-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 755d7fb25c..28e2974d1a 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -446,11 +446,6 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, in_sg_left); break; } - if (ret != len) { - fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); - ret = EIO; - goto err; - } in_sg_left -= ret; len -= ret; } while (in_sg_left); From 97dbfc5ae631724a2ae7f54de28c2f8e383b5980 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:34 -0400 Subject: [PATCH 04/15] virtiofsd: Use iov_discard_front() to skip bytes There are places where we need to skip few bytes from front of the iovec array. We have our own custom code for that. Looks like iov_discard_front() can do same thing. So use that helper instead. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-4-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 28e2974d1a..09674f2e90 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -389,23 +389,15 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); /* These get updated as we skip */ struct iovec *in_sg_ptr = in_sg_cpy; - int in_sg_cpy_count = in_num; + unsigned int in_sg_cpy_count = in_num; /* skip over parts of in_sg that contained the header iov */ size_t skip_size = iov_len; size_t in_sg_left = 0; do { - while (skip_size != 0 && in_sg_cpy_count) { - if (skip_size >= in_sg_ptr[0].iov_len) { - skip_size -= in_sg_ptr[0].iov_len; - in_sg_ptr++; - in_sg_cpy_count--; - } else { - in_sg_ptr[0].iov_len -= skip_size; - in_sg_ptr[0].iov_base += skip_size; - break; - } + if (skip_size != 0) { + iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, skip_size); } int i; From 0106f6f234d6361bb99cabfa48a953c929a3ca90 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:35 -0400 Subject: [PATCH 05/15] virtiofsd: get rid of in_sg_left variable in_sg_left seems to be being used primarly for debugging purpose. It is keeping track of how many bytes are left in the scatter list we are reading into. We already have another variable "len" which keeps track how many bytes are left to be read. And in_sg_left is greater than or equal to len. We have already ensured that in the beginning of function. if (in_len < tosend_len) { fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", __func__, elem->index, tosend_len); ret = E2BIG; goto err; } So in_sg_left seems like a redundant variable. It probably was useful for debugging when code was being developed. Get rid of it. It helps simplify this function. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-5-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 09674f2e90..ed5146d7a6 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -394,20 +394,16 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, /* skip over parts of in_sg that contained the header iov */ size_t skip_size = iov_len; - size_t in_sg_left = 0; do { if (skip_size != 0) { iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, skip_size); } - int i; - for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { - in_sg_left += in_sg_ptr[i].iov_len; - } fuse_log(FUSE_LOG_DEBUG, "%s: after skip skip_size=%zd in_sg_cpy_count=%d " - "in_sg_left=%zd\n", - __func__, skip_size, in_sg_cpy_count, in_sg_left); + "len remaining=%zd\n", __func__, skip_size, in_sg_cpy_count, + len); + ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, buf->buf[0].pos); @@ -434,13 +430,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, } if (!ret) { /* EOF case? */ - fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, - in_sg_left); + fuse_log(FUSE_LOG_DEBUG, "%s: !ret len remaining=%zd\n", __func__, + len); break; } - in_sg_left -= ret; len -= ret; - } while (in_sg_left); + } while (len); /* Need to fix out->len on EOF */ if (len) { From bf7a3ee04430dfe426eacf6ee587e2a069ba67ce Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:36 -0400 Subject: [PATCH 06/15] virtiofsd: Simplify skip byte logic We need to skip bytes in two cases. a. Before we start reading into in_sg, we need to skip iov_len bytes in the beginning which typically will have fuse_out_header. b. If preadv() does a short read, then we need to retry preadv() with remainig bytes and skip the bytes preadv() read in short read. For case a, there is no reason that skipping logic be inside the while loop. Move it outside. And only retain logic "b" inside while loop. Also get rid of variable "skip_size". Looks like we can do without it. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-6-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index ed5146d7a6..49c7dd788a 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -392,17 +392,11 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, unsigned int in_sg_cpy_count = in_num; /* skip over parts of in_sg that contained the header iov */ - size_t skip_size = iov_len; + iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, iov_len); do { - if (skip_size != 0) { - iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, skip_size); - } - - fuse_log(FUSE_LOG_DEBUG, - "%s: after skip skip_size=%zd in_sg_cpy_count=%d " - "len remaining=%zd\n", __func__, skip_size, in_sg_cpy_count, - len); + fuse_log(FUSE_LOG_DEBUG, "%s: in_sg_cpy_count=%d len remaining=%zd\n", + __func__, in_sg_cpy_count, len); ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, buf->buf[0].pos); @@ -421,7 +415,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, if (ret < len && ret) { fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); /* Skip over this much next time around */ - skip_size = ret; + iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, ret); buf->buf[0].pos += ret; len -= ret; From 1a5fff8e63a5ab55ccdec4f134b2f96453bf789f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:37 -0400 Subject: [PATCH 07/15] virtiofsd: Check EOF before short read In virtio_send_data_iov() we are checking first for short read and then EOF condition. Change the order. Basically check for error and EOF first and last remaining piece is short ready which will lead to retry automatically at the end of while loop. Just that it is little simpler to read to the code. There is no need to call "continue" and also one less call of "len-=ret". Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-7-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 49c7dd788a..99f91c9d87 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -410,25 +410,24 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, __func__, len); goto err; } - fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, - ret, len); - if (ret < len && ret) { - fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); - /* Skip over this much next time around */ - iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, ret); - buf->buf[0].pos += ret; - len -= ret; - /* Lets do another read */ - continue; - } if (!ret) { /* EOF case? */ fuse_log(FUSE_LOG_DEBUG, "%s: !ret len remaining=%zd\n", __func__, len); break; } + fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, + ret, len); + len -= ret; + /* Short read. Retry reading remaining bytes */ + if (len) { + fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); + /* Skip over this much next time around */ + iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, ret); + buf->buf[0].pos += ret; + } } while (len); /* Need to fix out->len on EOF */ From b5fd59cf907df7fa2272426010c4d264682347f2 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 18 May 2021 17:35:38 -0400 Subject: [PATCH 08/15] virtiofsd: Set req->reply_sent right after sending reply There is no reason to set it in label "err". We should be able to set it right after sending reply. It is easier to read. Also got rid of label "err" because now only thing it was doing was return a code. We can return from the error location itself and no need to first jump to label "err". Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Connor Kuehl Signed-off-by: Vivek Goyal Message-Id: <20210518213538.693422-8-vgoyal@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 99f91c9d87..fa4aff9b0e 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -366,14 +366,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, if (in_len < sizeof(struct fuse_out_header)) { fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", __func__, elem->index); - ret = E2BIG; - goto err; + return E2BIG; } if (in_len < tosend_len) { fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", __func__, elem->index, tosend_len); - ret = E2BIG; - goto err; + return E2BIG; } /* TODO: Limit to 'len' */ @@ -408,7 +406,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, } fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", __func__, len); - goto err; + return ret; } if (!ret) { @@ -438,21 +436,14 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, out_sg->len = tosend_len; } - ret = 0; - vu_dispatch_rdlock(qi->virtio_dev); pthread_mutex_lock(&qi->vq_lock); vu_queue_push(dev, q, elem, tosend_len); vu_queue_notify(dev, q); pthread_mutex_unlock(&qi->vq_lock); vu_dispatch_unlock(qi->virtio_dev); - -err: - if (ret == 0) { - req->reply_sent = true; - } - - return ret; + req->reply_sent = true; + return 0; } static __thread bool clone_fs_called; From d14d4f4f1815dcf63fa6b90e9a34854977e42f84 Mon Sep 17 00:00:00 2001 From: Mahmoud Mandour Date: Sun, 14 Mar 2021 05:23:22 +0200 Subject: [PATCH 09/15] tools/virtiofsd/buffer.c: replaced a calloc call with GLib's g_try_new0 Replaced a call to calloc() and its respective free() call with GLib's g_try_new0() and g_free() calls. Signed-off-by: Mahmoud Mandour Message-Id: <20210314032324.45142-7-ma.mandourr@gmail.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c index 874f01c488..b5f04be356 100644 --- a/tools/virtiofsd/buffer.c +++ b/tools/virtiofsd/buffer.c @@ -37,7 +37,7 @@ static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, struct iovec *iov; int fd = out_buf->fd; - iov = calloc(iovcnt, sizeof(struct iovec)); + iov = g_try_new0(struct iovec, iovcnt); if (!iov) { return -ENOMEM; } @@ -61,7 +61,7 @@ static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, res = -errno; } - free(iov); + g_free(iov); return res; } From bf99f30bc3420f2f4b82f7f827fb93197d33c017 Mon Sep 17 00:00:00 2001 From: Mahmoud Mandour Date: Sun, 14 Mar 2021 05:23:23 +0200 Subject: [PATCH 10/15] tools/virtiofsd/fuse_opt.c: Replaced a malloc with GLib's g_try_malloc Replaced a malloc() call and its respective free() with GLib's g_try_malloc() and g_free() calls. Signed-off-by: Mahmoud Mandour Message-Id: <20210314032324.45142-8-ma.mandourr@gmail.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_opt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c index f0ab8d22f4..9d371448e9 100644 --- a/tools/virtiofsd/fuse_opt.c +++ b/tools/virtiofsd/fuse_opt.c @@ -272,7 +272,7 @@ static int process_opt_sep_arg(struct fuse_opt_context *ctx, } param = ctx->argv[ctx->argctr]; - newarg = malloc(sep + strlen(param) + 1); + newarg = g_try_malloc(sep + strlen(param) + 1); if (!newarg) { return alloc_failed(); } @@ -280,7 +280,7 @@ static int process_opt_sep_arg(struct fuse_opt_context *ctx, memcpy(newarg, arg, sep); strcpy(newarg + sep, param); res = process_opt(ctx, opt, sep, newarg, iso); - free(newarg); + g_free(newarg); return res; } From efb208dc9c3f1e881aecff21fb1c7a7b6b869480 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 19 May 2021 14:47:40 +0800 Subject: [PATCH 11/15] migration/rdma: Fix cm_event used before being initialized A segmentation fault was triggered when i try to abort a postcopy + rdma migration. since rdma_ack_cm_event releases a uninitialized cm_event in these case. like below: 2496 ret = rdma_get_cm_event(rdma->channel, &cm_event); 2497 if (ret) { 2498 perror("rdma_get_cm_event after rdma_connect"); 2499 ERROR(errp, "connecting to destination!"); 2500 rdma_ack_cm_event(cm_event); <<<< cause segmentation fault 2501 goto err_rdma_source_connect; 2502 } Refer to the rdma_get_cm_event() code, cm_event will be updated/changed only if rdma_get_cm_event() returns 0. So it's okey to remove the ack in error patch. Signed-off-by: Li Zhijian Message-Id: <20210519064740.10828-1-lizhijian@cn.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/migration/rdma.c b/migration/rdma.c index 00eac34232..41726cc74a 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2497,7 +2497,6 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) if (ret) { perror("rdma_get_cm_event after rdma_connect"); ERROR(errp, "connecting to destination!"); - rdma_ack_cm_event(cm_event); goto err_rdma_source_connect; } From 4e812d2338acb354b969b59f792f413f567c0ace Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 20 May 2021 16:11:45 +0800 Subject: [PATCH 12/15] migration/rdma: cleanup rdma in rdma_start_incoming_migration error path the error path after calling qemu_rdma_dest_init() should do rdma cleanup Signed-off-by: Li Zhijian Message-Id: <20210520081148.17001-1-lizhijian@cn.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 41726cc74a..7e7595faab 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4040,7 +4040,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) if (ret) { ERROR(errp, "listening on socket!"); - goto err; + goto cleanup_rdma; } trace_rdma_start_incoming_migration_after_rdma_listen(); @@ -4050,7 +4050,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) rdma_return_path = qemu_rdma_data_init(host_port, &local_err); if (rdma_return_path == NULL) { - goto err; + goto cleanup_rdma; } qemu_rdma_return_path_dest_init(rdma_return_path, rdma); @@ -4059,6 +4059,9 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, NULL, (void *)(intptr_t)rdma); return; + +cleanup_rdma: + qemu_rdma_cleanup(rdma); err: error_propagate(errp, local_err); if (rdma) { From f53b450ada3bf7fa1c88fbf4f13b864af7795bd3 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 25 May 2021 16:05:50 +0800 Subject: [PATCH 13/15] migration/rdma: Fix rdma_addrinfo res leaks rdma_freeaddrinfo() is the reverse operation of rdma_getaddrinfo() Signed-off-by: Li Zhijian Reviewed-by: Dr. David Alan Gilbert Message-Id: <20210525080552.28259-2-lizhijian@cn.fujitsu.com> Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/migration/rdma.c b/migration/rdma.c index 7e7595faab..651534e825 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -987,10 +987,12 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp) } } + rdma_freeaddrinfo(res); ERROR(errp, "could not resolve address %s", rdma->host); goto err_resolve_get_addr; route: + rdma_freeaddrinfo(res); qemu_rdma_dump_gid("source_resolve_addr", rdma->cm_id); ret = rdma_get_cm_event(rdma->channel, &cm_event); @@ -2593,6 +2595,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp) break; } + rdma_freeaddrinfo(res); if (!e) { ERROR(errp, "Error: could not rdma_bind_addr!"); goto err_dest_init_bind_addr; From 44bcfd45e9806c78d9d526d69b0590227d215a78 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 25 May 2021 16:05:51 +0800 Subject: [PATCH 14/15] migration/rdma: destination: create the return patch after the first accept destination side: $ build/qemu-system-x86_64 -enable-kvm -netdev tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl -spice streaming-video=filter,port=5902,disable-ticketing -incoming rdma:192.168.1.10:8888 (qemu) migrate_set_capability postcopy-ram on (qemu) dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name uverbs0, infiniband_verbs class device path /sys/class/infiniband_verbs/uverbs0, infiniband class device path /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet Segmentation fault (core dumped) (gdb) bt #0 qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272 #1 rdma_accept_incoming_migration (opaque=0x0) at ../migration/rdma.c:3986 #2 0x0000563c9e51f02a in aio_dispatch_handler (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at ../util/aio-posix.c:329 #3 0x0000563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at ../util/aio-posix.c:372 #4 aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382 #5 0x0000563c9e4f4d9e in aio_ctx_dispatch (source=, callback=, user_data=) at ../util/async.c:306 #6 0x00007fe96ef3fa9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 #7 0x0000563c9e4ffeb8 in glib_pollfds_poll () at ../util/main-loop.c:231 #8 os_host_main_loop_wait (timeout=12188789) at ../util/main-loop.c:254 #9 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:530 #10 0x0000563c9e3c7211 in qemu_main_loop () at ../softmmu/runstate.c:725 #11 0x0000563c9dfd46fe in main (argc=, argv=, envp=) at ../softmmu/main.c:50 The rdma return path will not be created when qemu incoming is starting since migrate_copy() is false at that moment, then a NULL return path rdma was referenced if the user enabled postcopy later. Signed-off-by: Li Zhijian Message-Id: <20210525080552.28259-3-lizhijian@cn.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 651534e825..d829d08d07 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks { typedef struct RDMAContext { char *host; int port; + char *host_port; RDMAWorkRequestData wr_data[RDMA_WRID_MAX]; @@ -2392,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) rdma->channel = NULL; } g_free(rdma->host); + g_free(rdma->host_port); rdma->host = NULL; + rdma->host_port = NULL; } @@ -2648,6 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp) if (!inet_parse(addr, host_port, NULL)) { rdma->port = atoi(addr->port); rdma->host = g_strdup(addr->host); + rdma->host_port = g_strdup(host_port); } else { ERROR(errp, "bad RDMA migration address '%s'", host_port); g_free(rdma); @@ -3276,6 +3280,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) .private_data = &cap, .private_data_len = sizeof(cap), }; + RDMAContext *rdma_return_path = NULL; struct rdma_cm_event *cm_event; struct ibv_context *verbs; int ret = -EINVAL; @@ -3291,6 +3296,20 @@ static int qemu_rdma_accept(RDMAContext *rdma) goto err_rdma_dest_wait; } + /* + * initialize the RDMAContext for return path for postcopy after first + * connection request reached. + */ + if (migrate_postcopy() && !rdma->is_return_path) { + rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); + if (rdma_return_path == NULL) { + rdma_ack_cm_event(cm_event); + goto err_rdma_dest_wait; + } + + qemu_rdma_return_path_dest_init(rdma_return_path, rdma); + } + memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap)); network_to_caps(&cap); @@ -3406,6 +3425,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) err_rdma_dest_wait: rdma->error_state = ret; qemu_rdma_cleanup(rdma); + g_free(rdma_return_path); return ret; } @@ -4048,17 +4068,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) trace_rdma_start_incoming_migration_after_rdma_listen(); - /* initialize the RDMAContext for return path */ - if (migrate_postcopy()) { - rdma_return_path = qemu_rdma_data_init(host_port, &local_err); - - if (rdma_return_path == NULL) { - goto cleanup_rdma; - } - - qemu_rdma_return_path_dest_init(rdma_return_path, rdma); - } - qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, NULL, (void *)(intptr_t)rdma); return; @@ -4069,6 +4078,7 @@ err: error_propagate(errp, local_err); if (rdma) { g_free(rdma->host); + g_free(rdma->host_port); } g_free(rdma); g_free(rdma_return_path); From e49e49dd73b8b17f5f341b3e11c8b6878c43d3e1 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 25 May 2021 16:05:52 +0800 Subject: [PATCH 15/15] migration/rdma: source: poll cm_event from return path source side always blocks if postcopy is only enabled at source side. users are not able to cancel this migration in this case. Let source side have chance to cancel this migration Signed-off-by: Li Zhijian Message-Id: <20210525080552.28259-4-lizhijian@cn.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert Typo fix --- migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index d829d08d07..1cdb4561f3 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -36,6 +36,7 @@ #include #include "trace.h" #include "qom/object.h" +#include /* * Print and error on both the Monitor and the Log file. @@ -2460,7 +2461,36 @@ err_rdma_source_init: return -1; } -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) +static int qemu_get_cm_event_timeout(RDMAContext *rdma, + struct rdma_cm_event **cm_event, + long msec, Error **errp) +{ + int ret; + struct pollfd poll_fd = { + .fd = rdma->channel->fd, + .events = POLLIN, + .revents = 0 + }; + + do { + ret = poll(&poll_fd, 1, msec); + } while (ret < 0 && errno == EINTR); + + if (ret == 0) { + ERROR(errp, "poll cm event timeout"); + return -1; + } else if (ret < 0) { + ERROR(errp, "failed to poll cm event, errno=%i", errno); + return -1; + } else if (poll_fd.revents & POLLIN) { + return rdma_get_cm_event(rdma->channel, cm_event); + } else { + ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents); + return -1; + } +} + +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) { RDMACapabilities cap = { .version = RDMA_CONTROL_VERSION_CURRENT, @@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) goto err_rdma_source_connect; } - ret = rdma_get_cm_event(rdma->channel, &cm_event); + if (return_path) { + ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp); + } else { + ret = rdma_get_cm_event(rdma->channel, &cm_event); + } if (ret) { perror("rdma_get_cm_event after rdma_connect"); ERROR(errp, "connecting to destination!"); @@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque, } trace_rdma_start_outgoing_migration_after_rdma_source_init(); - ret = qemu_rdma_connect(rdma, errp); + ret = qemu_rdma_connect(rdma, errp, false); if (ret) { goto err; @@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque, goto return_path_err; } - ret = qemu_rdma_connect(rdma_return_path, errp); + ret = qemu_rdma_connect(rdma_return_path, errp, true); if (ret) { goto return_path_err;