migration pull: 2020-10-26

Another go at Peter's postcopy fixes
 
 Cleanups from Bihong Yu and Peter Maydell.
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAl+W9n8ACgkQBRYzHrxb
 /ef2uRAAqWTFLXuBF8+evEd1mMq2SM3ZYTuc7QKTY3MzAH6J/OMvJbZ112itqWOb
 iZ5NuuWH4PvzOhlR/PNNf1Yv3hTfv36HinG+OCh6s+6aqVx9yHOAfdBgmJIdYAeg
 Sk1jx43dvCyN2FwPs31ir3L6mwsrtfkRsS+2FeyrvRoEl4WE9mOoypCft3vdd9Dw
 zZea0Pw7vIs454D4n1vpJiQtq6B4eSAlQKpTLfQbglpTm4MgqLERzGvpT6hbQXJR
 eQyTOqRe08viIOZ+oN0B/+RVO6T9jc4Y1bEl2NSak1v4Tf7NNfDkFpLAjFm07V/1
 tIhL/NOOsHdzfHQtrZpzKQgwaceb1N5qo0PfxD6/tRf9HlXY54iw6yY75+5c5Y89
 UK8VSIYKnM2yXeVDLShxixIr3A1Z+zA41XydDwaLZczjeV7+nwrAXAjO8a+j6Dox
 zj4IyN2g5elEOmarC8qkvbDZ+TVvA2tookhWVwoz+D8ChYkcRDKP9eoYomfRwg+e
 NKRFuLBkyVPb0eEhyOV6HqJbMfTLpHneTM94v6HGz8tiK8IlMZfTTnC2Mr5gTXuS
 /cgOVhsY7+l+pKpxpGJmU3aUCYRk1CuK6MhXgjYEFMh5Siba8s0ZPZVaEm/BUyO1
 rD+tVup87xMiJq3xnmLX+opblYE9G+b67hH1KuPc5vZXiSwuTkQ=
 =OL0Q
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201026a' into staging

migration pull: 2020-10-26

Another go at Peter's postcopy fixes

Cleanups from Bihong Yu and Peter Maydell.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Mon 26 Oct 2020 16:17:03 GMT
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20201026a:
  migration-test: Only hide error if !QTEST_LOG
  migration/postcopy: Release fd before going into 'postcopy-pause'
  migration: Sync requested pages after postcopy recovery
  migration: Maintain postcopy faulted addresses
  migration: Introduce migrate_send_rp_message_req_pages()
  migration: Pass incoming state into qemu_ufd_copy_ioctl()
  migration: using trace_ to replace DPRINTF
  migration: Delete redundant spaces
  migration: Open brace '{' following function declarations go on the next line
  migration: Do not initialise statics and globals to 0 or NULL
  migration: Add braces {} for if statement
  migration: Open brace '{' following struct go on the same line
  migration: Add spaces around operator
  migration: Don't use '#' flag of printf format
  migration: Do not use C99 // comments
  migration: Drop unused VMSTATE_FLOAT64 support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2020-10-27 10:25:42 +00:00
commit d55450df99
13 changed files with 213 additions and 103 deletions

View File

@ -219,7 +219,6 @@ extern const VMStateInfo vmstate_info_uint64;
#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
extern const VMStateInfo vmstate_info_nullptr;
extern const VMStateInfo vmstate_info_float64;
extern const VMStateInfo vmstate_info_cpudouble;
extern const VMStateInfo vmstate_info_timer;
@ -997,12 +996,6 @@ extern const VMStateInfo vmstate_info_qlist;
VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint64, uint64_t)
#define VMSTATE_FLOAT64_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64)
#define VMSTATE_FLOAT64(_f, _s) \
VMSTATE_FLOAT64_V(_f, _s, 0)
#define VMSTATE_TIMER_PTR_TEST(_f, _s, _test) \
VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
@ -1114,12 +1107,6 @@ extern const VMStateInfo vmstate_info_qlist;
#define VMSTATE_INT64_ARRAY(_f, _s, _n) \
VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0)
#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v) \
VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64)
#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n) \
VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0)
#define VMSTATE_CPUDOUBLE_ARRAY_V(_f, _s, _n, _v) \
VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_cpudouble, CPU_DoubleU)

View File

@ -26,6 +26,7 @@
#include "qemu-file.h"
#include "migration/vmstate.h"
#include "sysemu/block-backend.h"
#include "trace.h"
#define BLK_MIG_BLOCK_SIZE (1 << 20)
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS)
@ -40,7 +41,7 @@
#define MAX_IO_BUFFERS 512
#define MAX_PARALLEL_IO 16
//#define DEBUG_BLK_MIGRATION
/* #define DEBUG_BLK_MIGRATION */
#ifdef DEBUG_BLK_MIGRATION
#define DPRINTF(fmt, ...) \
@ -434,10 +435,9 @@ static int init_blk_migration(QEMUFile *f)
block_mig_state.total_sector_sum += sectors;
if (bmds->shared_base) {
DPRINTF("Start migration for %s with shared base image\n",
bdrv_get_device_name(bs));
trace_migration_block_init_shared(bdrv_get_device_name(bs));
} else {
DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
trace_migration_block_init_full(bdrv_get_device_name(bs));
}
QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
@ -592,7 +592,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
return (bmds->cur_dirty >= bmds->total_sectors);
error:
DPRINTF("Error reading sector %" PRId64 "\n", sector);
trace_migration_block_save_device_dirty(sector);
g_free(blk->buf);
g_free(blk);
return ret;
@ -628,9 +628,9 @@ static int flush_blks(QEMUFile *f)
BlkMigBlock *blk;
int ret = 0;
DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
__func__, block_mig_state.submitted, block_mig_state.read_done,
block_mig_state.transferred);
trace_migration_block_flush_blks("Enter", block_mig_state.submitted,
block_mig_state.read_done,
block_mig_state.transferred);
blk_mig_lock();
while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
@ -656,9 +656,9 @@ static int flush_blks(QEMUFile *f)
}
blk_mig_unlock();
DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __func__,
block_mig_state.submitted, block_mig_state.read_done,
block_mig_state.transferred);
trace_migration_block_flush_blks("Exit", block_mig_state.submitted,
block_mig_state.read_done,
block_mig_state.transferred);
return ret;
}
@ -727,8 +727,8 @@ static int block_save_setup(QEMUFile *f, void *opaque)
{
int ret;
DPRINTF("Enter save live setup submitted %d transferred %d\n",
block_mig_state.submitted, block_mig_state.transferred);
trace_migration_block_save("setup", block_mig_state.submitted,
block_mig_state.transferred);
qemu_mutex_lock_iothread();
ret = init_blk_migration(f);
@ -759,8 +759,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
int64_t last_ftell = qemu_ftell(f);
int64_t delta_ftell;
DPRINTF("Enter save live iterate submitted %d transferred %d\n",
block_mig_state.submitted, block_mig_state.transferred);
trace_migration_block_save("iterate", block_mig_state.submitted,
block_mig_state.transferred);
ret = flush_blks(f);
if (ret) {
@ -825,8 +825,8 @@ static int block_save_complete(QEMUFile *f, void *opaque)
{
int ret;
DPRINTF("Enter save live complete submitted %d transferred %d\n",
block_mig_state.submitted, block_mig_state.transferred);
trace_migration_block_save("complete", block_mig_state.submitted,
block_mig_state.transferred);
ret = flush_blks(f);
if (ret) {
@ -851,7 +851,7 @@ static int block_save_complete(QEMUFile *f, void *opaque)
/* report completion */
qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
DPRINTF("Block migration completed\n");
trace_migration_block_save_complete();
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
@ -884,7 +884,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
pending = max_size + BLK_MIG_BLOCK_SIZE;
}
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
trace_migration_block_save_pending(pending);
/* We don't do postcopy */
*res_precopy_only += pending;
}
@ -998,7 +998,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
(addr == 100) ? '\n' : '\r');
fflush(stdout);
} else if (!(flags & BLK_MIG_FLAG_EOS)) {
fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
fprintf(stderr, "Unknown block migration flags: 0x%x\n", flags);
return -EINVAL;
}
ret = qemu_file_get_error(f);

View File

@ -143,6 +143,13 @@ static int migration_maybe_pause(MigrationState *s,
int new_state);
static void migrate_fd_cancel(MigrationState *s);
static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
{
uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
return (a > b) - (a < b);
}
void migration_object_init(void)
{
Error *err = NULL;
@ -164,6 +171,8 @@ void migration_object_init(void)
qemu_event_init(&current_incoming->main_thread_load_event, false);
qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
qemu_mutex_init(&current_incoming->page_request_mutex);
current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
if (!migration_object_check(current_migration, &err)) {
error_report_err(err);
@ -230,6 +239,11 @@ void migration_incoming_state_destroy(void)
qemu_event_reset(&mis->main_thread_load_event);
if (mis->page_requested) {
g_tree_destroy(mis->page_requested);
mis->page_requested = NULL;
}
if (mis->socket_address_list) {
qapi_free_SocketAddressList(mis->socket_address_list);
mis->socket_address_list = NULL;
@ -306,8 +320,8 @@ error:
* Start: Address offset within the RB
* Len: Length in bytes required - must be a multiple of pagesize
*/
int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
ram_addr_t start)
int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
RAMBlock *rb, ram_addr_t start)
{
uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
size_t msglen = 12; /* start + len */
@ -343,6 +357,37 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
}
int migrate_send_rp_req_pages(MigrationIncomingState *mis,
RAMBlock *rb, ram_addr_t start, uint64_t haddr)
{
void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb)));
bool received;
WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
received = ramblock_recv_bitmap_test_byte_offset(rb, start);
if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
/*
* The page has not been received, and it's not yet in the page
* request list. Queue it. Set the value of element to 1, so that
* things like g_tree_lookup() will return TRUE (1) when found.
*/
g_tree_insert(mis->page_requested, aligned, (gpointer)1);
mis->page_requested_count++;
trace_postcopy_page_req_add(aligned, mis->page_requested_count);
}
}
/*
* If the page is there, skip sending the message. We don't even need the
* lock because as long as the page arrived, it'll be there forever.
*/
if (received) {
return 0;
}
return migrate_send_rp_message_req_pages(mis, rb, start);
}
static bool migration_colo_enabled;
bool migration_incoming_colo_enabled(void)
{
@ -2468,8 +2513,8 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
* Since we currently insist on matching page sizes, just sanity check
* we're being asked for whole host pages.
*/
if (start & (our_host_ps-1) ||
(len & (our_host_ps-1))) {
if (start & (our_host_ps - 1) ||
(len & (our_host_ps - 1))) {
error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
" len: %zd", __func__, start, len);
mark_source_rp_bad(ms);
@ -3123,9 +3168,6 @@ static MigThrError postcopy_pause(MigrationState *s)
while (true) {
QEMUFile *file;
migrate_set_state(&s->state, s->state,
MIGRATION_STATUS_POSTCOPY_PAUSED);
/* Current channel is possibly broken. Release it. */
assert(s->to_dst_file);
qemu_mutex_lock(&s->qemu_file_lock);
@ -3136,6 +3178,9 @@ static MigThrError postcopy_pause(MigrationState *s)
qemu_file_shutdown(file);
qemu_fclose(file);
migrate_set_state(&s->state, s->state,
MIGRATION_STATUS_POSTCOPY_PAUSED);
error_report("Detected IO failure for postcopy. "
"Migration paused.");

View File

@ -104,6 +104,23 @@ struct MigrationIncomingState {
/* List of listening socket addresses */
SocketAddressList *socket_address_list;
/* A tree of pages that we requested to the source VM */
GTree *page_requested;
/* For debugging purpose only, but would be nice to keep */
int page_requested_count;
/*
* The mutex helps to maintain the requested pages that we sent to the
* source, IOW, to guarantee coherent between the page_requests tree and
* the per-ramblock receivedmap. Note! This does not guarantee consistency
* of the real page copy procedures (using UFFDIO_[ZERO]COPY). E.g., even
* if one bit in receivedmap is cleared, UFFDIO_COPY could have happened
* for that page already. This is intended so that the mutex won't
* serialize and blocked by slow operations like UFFDIO_* ioctls. However
* this should be enough to make sure the page_requested tree always
* contains valid information.
*/
QemuMutex page_request_mutex;
};
MigrationIncomingState *migration_incoming_get_current(void);
@ -124,8 +141,7 @@ struct MigrationClass {
DeviceClass parent_class;
};
struct MigrationState
{
struct MigrationState {
/*< private >*/
DeviceState parent_obj;
@ -332,7 +348,9 @@ void migrate_send_rp_shut(MigrationIncomingState *mis,
void migrate_send_rp_pong(MigrationIncomingState *mis,
uint32_t value);
int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
ram_addr_t start);
ram_addr_t start, uint64_t haddr);
int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
RAMBlock *rb, ram_addr_t start);
void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
char *block_name);
void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);

View File

@ -18,14 +18,7 @@
#include "qapi/error.h"
#include "qemu/host-utils.h"
#include "page_cache.h"
#ifdef DEBUG_CACHE
#define DPRINTF(fmt, ...) \
do { fprintf(stdout, "cache: " fmt, ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...) \
do { } while (0)
#endif
#include "trace.h"
/* the page in cache will not be replaced in two cycles */
#define CACHED_PAGE_LIFETIME 2
@ -75,7 +68,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp)
cache->num_items = 0;
cache->max_num_items = num_pages;
DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items);
trace_migration_pagecache_init(cache->max_num_items);
/* We prefer not to abort if there is no memory */
cache->page_cache = g_try_malloc((cache->max_num_items) *
@ -169,7 +162,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata,
if (!it->it_data) {
it->it_data = g_try_malloc(cache->page_size);
if (!it->it_data) {
DPRINTF("Error allocating page\n");
trace_migration_pagecache_insert();
return -1;
}
cache->num_items++;

View File

@ -403,7 +403,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
strerror(errno));
goto out;
}
g_assert(((size_t)testarea & (pagesize-1)) == 0);
g_assert(((size_t)testarea & (pagesize - 1)) == 0);
reg_struct.range.start = (uintptr_t)testarea;
reg_struct.range.len = pagesize;
@ -684,7 +684,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
qemu_ram_get_idstr(rb), rb_offset);
return postcopy_wake_shared(pcfd, client_addr, rb);
}
migrate_send_rp_req_pages(mis, rb, aligned_rbo);
migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr);
return 0;
}
@ -979,7 +979,8 @@ retry:
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
ret = migrate_send_rp_req_pages(mis, rb, rb_offset);
ret = migrate_send_rp_req_pages(mis, rb, rb_offset,
msg.arg.pagefault.address);
if (ret) {
/* May be network failure, try to wait for recovery */
if (ret == -EIO && postcopy_pause_fault_thread(mis)) {
@ -1128,10 +1129,12 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
return 0;
}
static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
static int qemu_ufd_copy_ioctl(MigrationIncomingState *mis, void *host_addr,
void *from_addr, uint64_t pagesize, RAMBlock *rb)
{
int userfault_fd = mis->userfault_fd;
int ret;
if (from_addr) {
struct uffdio_copy copy_struct;
copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
@ -1147,10 +1150,20 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct);
}
if (!ret) {
qemu_mutex_lock(&mis->page_request_mutex);
ramblock_recv_bitmap_set_range(rb, host_addr,
pagesize / qemu_target_page_size());
/*
* If this page resolves a page fault for a previous recorded faulted
* address, take a special note to maintain the requested page list.
*/
if (g_tree_lookup(mis->page_requested, host_addr)) {
g_tree_remove(mis->page_requested, host_addr);
mis->page_requested_count--;
trace_postcopy_page_req_del(host_addr, mis->page_requested_count);
}
qemu_mutex_unlock(&mis->page_request_mutex);
mark_postcopy_blocktime_end((uintptr_t)host_addr);
}
return ret;
}
@ -1185,7 +1198,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
* which would be slightly cheaper, but we'd have to be careful
* of the order of updating our page state.
*/
if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
if (qemu_ufd_copy_ioctl(mis, host, from, pagesize, rb)) {
int e = errno;
error_report("%s: %s copy host: %p from: %p (size: %zd)",
__func__, strerror(e), host, from, pagesize);
@ -1212,7 +1225,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
* but it's not available for everything (e.g. hugetlbpages)
*/
if (qemu_ram_is_uf_zeroable(rb)) {
if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) {
if (qemu_ufd_copy_ioctl(mis, host, NULL, pagesize, rb)) {
int e = errno;
error_report("%s: %s zero host: %p",
__func__, strerror(e), host);

View File

@ -101,14 +101,16 @@ static struct {
static void XBZRLE_cache_lock(void)
{
if (migrate_use_xbzrle())
if (migrate_use_xbzrle()) {
qemu_mutex_lock(&XBZRLE.lock);
}
}
static void XBZRLE_cache_unlock(void)
{
if (migrate_use_xbzrle())
if (migrate_use_xbzrle()) {
qemu_mutex_unlock(&XBZRLE.lock);
}
}
/**
@ -1563,7 +1565,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
rs->last_req_rb = ramblock;
}
trace_ram_save_queue_pages(ramblock->idstr, start, len);
if (start+len > ramblock->used_length) {
if (start + len > ramblock->used_length) {
error_report("%s request overrun start=" RAM_ADDR_FMT " len="
RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
__func__, start, len, ramblock->used_length);
@ -2741,7 +2743,7 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
*/
static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
{
static RAMBlock *block = NULL;
static RAMBlock *block;
char id[256];
uint8_t len;
@ -3298,7 +3300,7 @@ static int ram_load_postcopy(QEMUFile *f)
multifd_recv_sync_main();
break;
default:
error_report("Unknown combination of migration flags: %#x"
error_report("Unknown combination of migration flags: 0x%x"
" (postcopy mode)", flags);
ret = -EINVAL;
break;
@ -3576,7 +3578,7 @@ static int ram_load_precopy(QEMUFile *f)
if (flags & RAM_SAVE_FLAG_HOOK) {
ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
} else {
error_report("Unknown combination of migration flags: %#x",
error_report("Unknown combination of migration flags: 0x%x",
flags);
ret = -EINVAL;
}

View File

@ -273,7 +273,8 @@ static uint64_t htonll(uint64_t v)
return u.llv;
}
static uint64_t ntohll(uint64_t v) {
static uint64_t ntohll(uint64_t v)
{
union { uint32_t lv[2]; uint64_t llv; } u;
u.llv = v;
return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]);
@ -854,7 +855,7 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
*/
if (!verbs) {
int num_devices, x;
struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
struct ibv_device **dev_list = ibv_get_device_list(&num_devices);
bool roce_found = false;
bool ib_found = false;
@ -1288,7 +1289,7 @@ const char *print_wrid(int wrid)
* workload information or LRU information is available, do not attempt to use
* this feature except for basic testing.
*/
//#define RDMA_UNREGISTRATION_EXAMPLE
/* #define RDMA_UNREGISTRATION_EXAMPLE */
/*
* Perform a non-optimized memory unregistration after every transfer

View File

@ -63,7 +63,7 @@
#include "qemu/bitmap.h"
#include "net/announce.h"
const unsigned int postcopy_ram_discard_version = 0;
const unsigned int postcopy_ram_discard_version;
/* Subcommands for QEMU_VM_COMMAND */
enum qemu_vm_cmd {
@ -520,7 +520,7 @@ static const VMStateDescription vmstate_configuration = {
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
.subsections = (const VMStateDescription *[]) {
&vmstate_target_page_bits,
&vmstate_capabilites,
&vmstate_uuid,
@ -2010,6 +2010,49 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
return LOADVM_QUIT;
}
/* We must be with page_request_mutex held */
static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
gpointer data)
{
MigrationIncomingState *mis = data;
void *host_addr = (void *) key;
ram_addr_t rb_offset;
RAMBlock *rb;
int ret;
rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
if (!rb) {
/*
* This should _never_ happen. However be nice for a migrating VM to
* not crash/assert. Post an error (note: intended to not use *_once
* because we do want to see all the illegal addresses; and this can
* never be triggered by the guest so we're safe) and move on next.
*/
error_report("%s: illegal host addr %p", __func__, host_addr);
/* Try the next entry */
return FALSE;
}
ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
if (ret) {
/* Please refer to above comment. */
error_report("%s: send rp message failed for addr %p",
__func__, host_addr);
return FALSE;
}
trace_postcopy_page_req_sync(host_addr);
return FALSE;
}
static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
{
WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
}
}
static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
{
if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
@ -2032,6 +2075,20 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
/* Tell source that "we are ready" */
migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
/*
* After a postcopy recovery, the source should have lost the postcopy
* queue, or potentially the requested pages could have been lost during
* the network down phase. Let's re-sync with the source VM by re-sending
* all the pending pages that we eagerly need, so these threads won't get
* blocked too long due to the recovery.
*
* Without this procedure, the faulted destination VM threads (waiting for
* page requests right before the postcopy is interrupted) can keep hanging
* until the pages are sent by the source during the background copying of
* pages, or another thread faulted on the same address accidentally.
*/
migrate_send_rp_req_pages_pending(mis);
return 0;
}

View File

@ -49,6 +49,7 @@ vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
postcopy_pause_incoming(void) ""
postcopy_pause_incoming_continued(void) ""
postcopy_page_req_sync(void *host_addr) "sync page req %p"
# vmstate.c
vmstate_load_field_error(const char *field, int ret) "field \"%s\" load failed, ret = %d"
@ -162,6 +163,7 @@ postcopy_pause_return_path(void) ""
postcopy_pause_return_path_continued(void) ""
postcopy_pause_continued(void) ""
postcopy_start_set_run(void) ""
postcopy_page_req_add(void *addr, int count) "new page req %p total %d"
source_return_path_thread_bad_end(void) ""
source_return_path_thread_end(void) ""
source_return_path_thread_entry(void) ""
@ -272,6 +274,7 @@ postcopy_ram_incoming_cleanup_blocktime(uint64_t total) "total blocktime %" PRIu
postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64
postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64
postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s"
postcopy_page_req_del(void *addr, int count) "resolved page req %p total %d"
get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u"
@ -325,3 +328,16 @@ get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock n
calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32
skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64
find_page_matched(const char *idstr) "ramblock %s addr or size changed"
# block.c
migration_block_init_shared(const char *blk_device_name) "Start migration for %s with shared base image"
migration_block_init_full(const char *blk_device_name) "Start full migration for %s"
migration_block_save_device_dirty(int64_t sector) "Error reading sector %" PRId64
migration_block_flush_blks(const char *action, int submitted, int read_done, int transferred) "%s submitted %d read_done %d transferred %d"
migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
migration_block_save_complete(void) "Block migration completed"
migration_block_save_pending(uint64_t pending) "Enter save live pending %" PRIu64
# page_cache.c
migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64
migration_pagecache_insert(void) "Error allocating page"

View File

@ -420,32 +420,6 @@ const VMStateInfo vmstate_info_uint16_equal = {
.put = put_uint16,
};
/* floating point */
static int get_float64(QEMUFile *f, void *pv, size_t size,
const VMStateField *field)
{
float64 *v = pv;
*v = make_float64(qemu_get_be64(f));
return 0;
}
static int put_float64(QEMUFile *f, void *pv, size_t size,
const VMStateField *field, QJSON *vmdesc)
{
uint64_t *v = pv;
qemu_put_be64(f, float64_val(*v));
return 0;
}
const VMStateInfo vmstate_info_float64 = {
.name = "float64",
.get = get_float64,
.put = put_float64,
};
/* CPU_DoubleU type */
static int get_cpudouble(QEMUFile *f, void *pv, size_t size,

View File

@ -32,13 +32,13 @@ static int vmstate_n_elems(void *opaque, const VMStateField *field)
if (field->flags & VMS_ARRAY) {
n_elems = field->num;
} else if (field->flags & VMS_VARRAY_INT32) {
n_elems = *(int32_t *)(opaque+field->num_offset);
n_elems = *(int32_t *)(opaque + field->num_offset);
} else if (field->flags & VMS_VARRAY_UINT32) {
n_elems = *(uint32_t *)(opaque+field->num_offset);
n_elems = *(uint32_t *)(opaque + field->num_offset);
} else if (field->flags & VMS_VARRAY_UINT16) {
n_elems = *(uint16_t *)(opaque+field->num_offset);
n_elems = *(uint16_t *)(opaque + field->num_offset);
} else if (field->flags & VMS_VARRAY_UINT8) {
n_elems = *(uint8_t *)(opaque+field->num_offset);
n_elems = *(uint8_t *)(opaque + field->num_offset);
}
if (field->flags & VMS_MULTIPLY_ELEMENTS) {
@ -54,7 +54,7 @@ static int vmstate_size(void *opaque, const VMStateField *field)
int size = field->size;
if (field->flags & VMS_VBUFFER) {
size = *(int32_t *)(opaque+field->size_offset);
size = *(int32_t *)(opaque + field->size_offset);
if (field->flags & VMS_MULTIPLY) {
size *= field->size;
}

View File

@ -464,6 +464,10 @@ static void migrate_postcopy_start(QTestState *from, QTestState *to)
}
typedef struct {
/*
* QTEST_LOG=1 may override this. When QTEST_LOG=1, we always dump errors
* unconditionally, because it means the user would like to be verbose.
*/
bool hide_stderr;
bool use_shmem;
/* only launch the target process */
@ -557,7 +561,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
g_free(bootpath);
if (args->hide_stderr) {
if (!getenv("QTEST_LOG") && args->hide_stderr) {
ignore_stderr = "2>/dev/null";
} else {
ignore_stderr = "";