diff --git a/cpus.c b/cpus.c index dddd056454..d44c0eda89 100644 --- a/cpus.c +++ b/cpus.c @@ -69,6 +69,14 @@ static CPUState *next_cpu; int64_t max_delay; int64_t max_advance; +/* vcpu throttling controls */ +static QEMUTimer *throttle_timer; +static unsigned int throttle_percentage; + +#define CPU_THROTTLE_PCT_MIN 1 +#define CPU_THROTTLE_PCT_MAX 99 +#define CPU_THROTTLE_TIMESLICE_NS 10000000 + bool cpu_is_stopped(CPUState *cpu) { return cpu->stopped || !runstate_is_running(); @@ -505,10 +513,80 @@ static const VMStateDescription vmstate_timers = { } }; +static void cpu_throttle_thread(void *opaque) +{ + CPUState *cpu = opaque; + double pct; + double throttle_ratio; + long sleeptime_ns; + + if (!cpu_throttle_get_percentage()) { + return; + } + + pct = (double)cpu_throttle_get_percentage()/100; + throttle_ratio = pct / (1 - pct); + sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS); + + qemu_mutex_unlock_iothread(); + atomic_set(&cpu->throttle_thread_scheduled, 0); + g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */ + qemu_mutex_lock_iothread(); +} + +static void cpu_throttle_timer_tick(void *opaque) +{ + CPUState *cpu; + double pct; + + /* Stop the timer if needed */ + if (!cpu_throttle_get_percentage()) { + return; + } + CPU_FOREACH(cpu) { + if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) { + async_run_on_cpu(cpu, cpu_throttle_thread, cpu); + } + } + + pct = (double)cpu_throttle_get_percentage()/100; + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS / (1-pct)); +} + +void cpu_throttle_set(int new_throttle_pct) +{ + /* Ensure throttle percentage is within valid range */ + new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); + new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); + + atomic_set(&throttle_percentage, new_throttle_pct); + + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS); +} + +void cpu_throttle_stop(void) +{ + atomic_set(&throttle_percentage, 0); +} + +bool cpu_throttle_active(void) +{ + return (cpu_throttle_get_percentage() != 0); +} + +int cpu_throttle_get_percentage(void) +{ + return atomic_read(&throttle_percentage); +} + void cpu_ticks_init(void) { seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); + throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, + cpu_throttle_timer_tick, NULL); } void configure_icount(QemuOpts *opts, Error **errp) diff --git a/hmp.c b/hmp.c index 3f807b75f1..5048eeeb2d 100644 --- a/hmp.c +++ b/hmp.c @@ -232,6 +232,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->overflow); } + if (info->has_x_cpu_throttle_percentage) { + monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n", + info->x_cpu_throttle_percentage); + } + qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } @@ -272,6 +277,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, " %s: %" PRId64, MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS], params->decompress_threads); + monitor_printf(mon, " %s: %" PRId64, + MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL], + params->x_cpu_throttle_initial); + monitor_printf(mon, " %s: %" PRId64, + MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT], + params->x_cpu_throttle_increment); monitor_printf(mon, "\n"); } @@ -1221,6 +1232,8 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) bool has_compress_level = false; bool has_compress_threads = false; bool has_decompress_threads = false; + bool has_x_cpu_throttle_initial = false; + bool has_x_cpu_throttle_increment = false; int i; for (i = 0; i < MIGRATION_PARAMETER_MAX; i++) { @@ -1235,10 +1248,18 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) case MIGRATION_PARAMETER_DECOMPRESS_THREADS: has_decompress_threads = true; break; + case MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL: + has_x_cpu_throttle_initial = true; + break; + case MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT: + has_x_cpu_throttle_increment = true; + break; } qmp_migrate_set_parameters(has_compress_level, value, has_compress_threads, value, has_decompress_threads, value, + has_x_cpu_throttle_initial, value, + has_x_cpu_throttle_increment, value, &err); break; } diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 302673dbad..9405554a2b 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -321,6 +321,11 @@ struct CPUState { uint32_t can_do_io; int32_t exception_index; /* used by m68k TCG */ + /* Used to keep track of an outstanding cpu throttle thread for migration + * autoconverge + */ + bool throttle_thread_scheduled; + /* Note that this is accessed at the start of every TB via a negative offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code @@ -565,6 +570,43 @@ CPUState *qemu_get_cpu(int index); */ bool cpu_exists(int64_t id); +/** + * cpu_throttle_set: + * @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99. + * + * Throttles all vcpus by forcing them to sleep for the given percentage of + * time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly. + * (example: 10ms sleep for every 30ms awake). + * + * cpu_throttle_set can be called as needed to adjust new_throttle_pct. + * Once the throttling starts, it will remain in effect until cpu_throttle_stop + * is called. + */ +void cpu_throttle_set(int new_throttle_pct); + +/** + * cpu_throttle_stop: + * + * Stops the vcpu throttling started by cpu_throttle_set. + */ +void cpu_throttle_stop(void); + +/** + * cpu_throttle_active: + * + * Returns: %true if the vcpus are currently being throttled, %false otherwise. + */ +bool cpu_throttle_active(void); + +/** + * cpu_throttle_get_percentage: + * + * Returns the vcpu throttle percentage. See cpu_throttle_set for details. + * + * Returns: The throttle percentage in range 1 to 99. + */ +int cpu_throttle_get_percentage(void); + #ifndef CONFIG_USER_ONLY typedef void (*CPUInterruptHandler)(CPUState *, int); diff --git a/migration/migration.c b/migration/migration.c index e48dd13720..b7de9b7b3f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -29,8 +29,9 @@ #include "trace.h" #include "qapi/util.h" #include "qapi-event.h" +#include "qom/cpu.h" -#define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ /* Amount of time to allocate to each "chunk" of bandwidth-throttled * data. */ @@ -44,6 +45,9 @@ #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +/* Define default autoconverge cpu throttle migration parameters */ +#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20 +#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10 /* Migration XBZRLE default cache size */ #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) @@ -71,6 +75,10 @@ MigrationState *migrate_get_current(void) DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, + .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL, + .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT, }; return ¤t_migration; @@ -372,6 +380,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; params->decompress_threads = s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; + params->x_cpu_throttle_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + params->x_cpu_throttle_increment = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; return params; } @@ -435,6 +447,11 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->disk->total = blk_mig_bytes_total(); } + if (cpu_throttle_active()) { + info->has_x_cpu_throttle_percentage = true; + info->x_cpu_throttle_percentage = cpu_throttle_get_percentage(); + } + get_xbzrle_cache_stats(info); break; case MIGRATION_STATUS_COMPLETED: @@ -494,7 +511,11 @@ void qmp_migrate_set_parameters(bool has_compress_level, bool has_compress_threads, int64_t compress_threads, bool has_decompress_threads, - int64_t decompress_threads, Error **errp) + int64_t decompress_threads, + bool has_x_cpu_throttle_initial, + int64_t x_cpu_throttle_initial, + bool has_x_cpu_throttle_increment, + int64_t x_cpu_throttle_increment, Error **errp) { MigrationState *s = migrate_get_current(); @@ -517,6 +538,18 @@ void qmp_migrate_set_parameters(bool has_compress_level, "is invalid, it should be in the range of 1 to 255"); return; } + if (has_x_cpu_throttle_initial && + (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "x_cpu_throttle_initial", + "an integer in the range of 1 to 99"); + } + if (has_x_cpu_throttle_increment && + (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "x_cpu_throttle_increment", + "an integer in the range of 1 to 99"); + } if (has_compress_level) { s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level; @@ -528,6 +561,15 @@ void qmp_migrate_set_parameters(bool has_compress_level, s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_threads; } + if (has_x_cpu_throttle_initial) { + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + x_cpu_throttle_initial; + } + + if (has_x_cpu_throttle_increment) { + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + x_cpu_throttle_increment; + } } /* shared migration helpers */ @@ -643,6 +685,10 @@ static MigrationState *migrate_init(const MigrationParams *params) s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; int decompress_thread_count = s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; + int x_cpu_throttle_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + int x_cpu_throttle_increment = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; memcpy(enabled_capabilities, s->enabled_capabilities, sizeof(enabled_capabilities)); @@ -658,6 +704,10 @@ static MigrationState *migrate_init(const MigrationParams *params) compress_thread_count; s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_thread_count; + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + x_cpu_throttle_initial; + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + x_cpu_throttle_increment; s->bandwidth_limit = bandwidth_limit; migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); @@ -1026,6 +1076,9 @@ static void *migration_thread(void *opaque) } } + /* If we enabled cpu throttling for auto-converge, turn it off. */ + cpu_throttle_stop(); + qemu_mutex_lock_iothread(); if (s->state == MIGRATION_STATUS_COMPLETED) { int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); diff --git a/migration/ram.c b/migration/ram.c index 5187637d45..2d1d0b99e4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -47,9 +47,7 @@ do { } while (0) #endif -static bool mig_throttle_on; static int dirty_rate_high_cnt; -static void check_guest_throttling(void); static uint64_t bitmap_sync_count; @@ -407,6 +405,29 @@ static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) return size; } +/* Reduce amount of guest cpu execution to hopefully slow down memory writes. + * If guest dirty memory rate is reduced below the rate at which we can + * transfer pages to the destination then we should be able to complete + * migration. Some workloads dirty memory way too fast and will not effectively + * converge, even with auto-converge. + */ +static void mig_throttle_guest_down(void) +{ + MigrationState *s = migrate_get_current(); + uint64_t pct_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + uint64_t pct_icrement = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; + + /* We have not started throttling yet. Let's start it. */ + if (!cpu_throttle_active()) { + cpu_throttle_set(pct_initial); + } else { + /* Throttling already on, just increase the rate */ + cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); + } +} + /* Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. @@ -599,21 +620,21 @@ static void migration_bitmap_sync(void) /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. amount of bytes that just got transferred since the last time we - were in this routine. If that happens >N times (for now N==4) - we turn on the throttle down logic */ + were in this routine. If that happens twice, start or increase + throttling */ bytes_xfer_now = ram_bytes_transferred(); + if (s->dirty_pages_rate && (num_dirty_pages_period * TARGET_PAGE_SIZE > (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ > 4)) { + (dirty_rate_high_cnt++ >= 2)) { trace_migration_throttle(); - mig_throttle_on = true; dirty_rate_high_cnt = 0; + mig_throttle_guest_down(); } bytes_xfer_prev = bytes_xfer_now; - } else { - mig_throttle_on = false; } + if (migrate_use_xbzrle()) { if (iterations_prev != acct_info.iterations) { acct_info.xbzrle_cache_miss_rate = @@ -1146,7 +1167,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque) RAMBlock *block; int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ - mig_throttle_on = false; dirty_rate_high_cnt = 0; bitmap_sync_count = 0; migration_bitmap_sync_init(); @@ -1251,7 +1271,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } pages_sent += pages; acct_info.iterations++; - check_guest_throttling(); + /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. qemu_get_clock_ns() is a bit expensive, so we only check each some @@ -1664,52 +1684,3 @@ void ram_mig_init(void) qemu_mutex_init(&XBZRLE.lock); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); } -/* Stub function that's gets run on the vcpu when its brought out of the - VM to run inside qemu via async_run_on_cpu()*/ - -static void mig_sleep_cpu(void *opq) -{ - qemu_mutex_unlock_iothread(); - g_usleep(30*1000); - qemu_mutex_lock_iothread(); -} - -/* To reduce the dirty rate explicitly disallow the VCPUs from spending - much time in the VM. The migration thread will try to catchup. - Workload will experience a performance drop. -*/ -static void mig_throttle_guest_down(void) -{ - CPUState *cpu; - - qemu_mutex_lock_iothread(); - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, mig_sleep_cpu, NULL); - } - qemu_mutex_unlock_iothread(); -} - -static void check_guest_throttling(void) -{ - static int64_t t0; - int64_t t1; - - if (!mig_throttle_on) { - return; - } - - if (!t0) { - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - return; - } - - t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - - /* If it has been more than 40 ms since the last time the guest - * was throttled then do it again. - */ - if (40 < (t1-t0)/1000000) { - mig_throttle_guest_down(); - t0 = t1; - } -} diff --git a/qapi-schema.json b/qapi-schema.json index 582a817215..8b0520c2d0 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -480,6 +480,10 @@ # may be expensive, but do not actually occur during the iterative # migration rounds themselves. (since 1.6) # +# @x-cpu-throttle-percentage: #optional percentage of time guest cpus are being +# throttled during auto-converge. This is only present when auto-converge +# has started throttling guest cpus. (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'MigrationInfo', @@ -489,7 +493,8 @@ '*total-time': 'int', '*expected-downtime': 'int', '*downtime': 'int', - '*setup-time': 'int'} } + '*setup-time': 'int', + '*x-cpu-throttle-percentage': 'int'} } ## # @query-migrate @@ -596,10 +601,18 @@ # compression, so set the decompress-threads to the number about 1/4 # of compress-threads is adequate. # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) # Since: 2.4 ## { 'enum': 'MigrationParameter', - 'data': ['compress-level', 'compress-threads', 'decompress-threads'] } + 'data': ['compress-level', 'compress-threads', 'decompress-threads', + 'x-cpu-throttle-initial', 'x-cpu-throttle-increment'] } # # @migrate-set-parameters @@ -612,12 +625,21 @@ # # @decompress-threads: decompression thread count # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) # Since: 2.4 ## { 'command': 'migrate-set-parameters', 'data': { '*compress-level': 'int', '*compress-threads': 'int', - '*decompress-threads': 'int'} } + '*decompress-threads': 'int', + '*x-cpu-throttle-initial': 'int', + '*x-cpu-throttle-increment': 'int'} } # # @MigrationParameters @@ -628,12 +650,22 @@ # # @decompress-threads: decompression thread count # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) +# # Since: 2.4 ## { 'struct': 'MigrationParameters', 'data': { 'compress-level': 'int', 'compress-threads': 'int', - 'decompress-threads': 'int'} } + 'decompress-threads': 'int', + 'x-cpu-throttle-initial': 'int', + 'x-cpu-throttle-increment': 'int'} } ## # @query-migrate-parameters # diff --git a/vl.c b/vl.c index e211f6aa36..8d1846c06c 100644 --- a/vl.c +++ b/vl.c @@ -580,6 +580,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, + { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },