diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index d7c292fffa..b6a7491f2d 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -41,6 +41,12 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, size_t length); +/* + * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard + * however leaving it until after precopy means that most of the precopy + * data is still THPd + */ +int postcopy_ram_prepare_discard(MigrationIncomingState *mis); /* * Called at the start of each RAMBlock by the bitmap code. diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 4f1e329b5a..8e107fe8e9 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -226,12 +226,10 @@ static int cleanup_range(const char *block_name, void *host_addr, * We turned off hugepage for the precopy stage with postcopy enabled * we can turn it back on now. */ -#ifdef MADV_HUGEPAGE - if (madvise(host_addr, length, MADV_HUGEPAGE)) { + if (qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE)) { error_report("%s HUGEPAGE: %s", __func__, strerror(errno)); return -1; } -#endif /* * We can also turn off userfault now since we should have all the @@ -307,6 +305,43 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return 0; } +/* + * Disable huge pages on an area + */ +static int nhp_range(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + trace_postcopy_nhp_range(block_name, host_addr, offset, length); + + /* + * Before we do discards we need to ensure those discards really + * do delete areas of the page, even if THP thinks a hugepage would + * be a good idea, so force hugepages off. + */ + if (qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE)) { + error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno)); + return -1; + } + + return 0; +} + +/* + * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard + * however leaving it until after precopy means that most of the precopy + * data is still THPd + */ +int postcopy_ram_prepare_discard(MigrationIncomingState *mis) +{ + if (qemu_ram_foreach_block(nhp_range, mis)) { + return -1; + } + + postcopy_state_set(POSTCOPY_INCOMING_DISCARD); + + return 0; +} + /* * Mark the given area of RAM as requiring notification to unwritten areas * Used as a callback on qemu_ram_foreach_block. @@ -583,6 +618,12 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, return -1; } +int postcopy_ram_prepare_discard(MigrationIncomingState *mis) +{ + assert(0); + return -1; +} + int postcopy_ram_enable_notify(MigrationIncomingState *mis) { assert(0); diff --git a/migration/savevm.c b/migration/savevm.c index eb32199888..0596f7bc61 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1316,7 +1316,7 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis, switch (ps) { case POSTCOPY_INCOMING_ADVISE: /* 1st discard */ - tmp = 0; /* TODO: later patch postcopy_ram_prepare_discard(mis); */ + tmp = postcopy_ram_prepare_discard(mis); if (tmp) { return tmp; } @@ -1448,6 +1448,13 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); return -1; } + if (ps == POSTCOPY_INCOMING_ADVISE) { + /* + * A rare case, we entered listen without having to do any discards, + * so do the setup that's normally done at the time of the 1st discard. + */ + postcopy_ram_prepare_discard(mis); + } /* * Sensitise RAM - can now generate requests for blocks that don't exist diff --git a/trace-events b/trace-events index a89778f99b..452435d33f 100644 --- a/trace-events +++ b/trace-events @@ -1556,6 +1556,7 @@ postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx" postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" +postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_place_page(void *host_addr) "host=%p" postcopy_place_page_zero(void *host_addr) "host=%p" postcopy_ram_enable_notify(void) ""