diff --git a/arch_init.c b/arch_init.c index 8daeafaf5c..32b437897c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -379,6 +379,8 @@ static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, return ret; } +/* Needs iothread lock! */ + static void migration_bitmap_sync(void) { RAMBlock *block; @@ -690,7 +692,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; if (remaining_size < max_size) { + qemu_mutex_lock_iothread(); migration_bitmap_sync(); + qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } return remaining_size; diff --git a/block-migration.c b/block-migration.c index b726c6c002..8da5f868af 100644 --- a/block-migration.c +++ b/block-migration.c @@ -107,6 +107,10 @@ static void blk_mig_unlock(void) qemu_mutex_unlock(&block_mig_state.lock); } +/* Must run outside of the iothread lock during the bulk phase, + * or the VM will stall. + */ + static void blk_send(QEMUFile *f, BlkMigBlock * blk) { int len; @@ -226,6 +230,8 @@ static void blk_mig_read_cb(void *opaque, int ret) blk_mig_unlock(); } +/* Called with no lock taken. */ + static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) { int64_t total_sectors = bmds->total_sectors; @@ -235,11 +241,13 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) int nr_sectors; if (bmds->shared_base) { + qemu_mutex_lock_iothread(); while (cur_sector < total_sectors && !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { cur_sector += nr_sectors; } + qemu_mutex_unlock_iothread(); } if (cur_sector >= total_sectors) { @@ -272,15 +280,19 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) block_mig_state.submitted++; blk_mig_unlock(); + qemu_mutex_lock_iothread(); blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); bdrv_reset_dirty(bs, cur_sector, nr_sectors); - bmds->cur_sector = cur_sector + nr_sectors; + qemu_mutex_unlock_iothread(); + bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); } +/* Called with iothread lock taken. */ + static void set_dirty_tracking(int enable) { BlkMigDevState *bmds; @@ -336,6 +348,8 @@ static void init_blk_migration(QEMUFile *f) bdrv_iterate(init_blk_migration_it, NULL); } +/* Called with no lock taken. */ + static int blk_mig_save_bulked_block(QEMUFile *f) { int64_t completed_sector_sum = 0; @@ -382,6 +396,8 @@ static void blk_mig_reset_dirty_cursor(void) } } +/* Called with iothread lock taken. */ + static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, int is_async) { @@ -451,7 +467,9 @@ error: return ret; } -/* return value: +/* Called with iothread lock taken. + * + * return value: * 0: too much data for max_downtime * 1: few enough data for max_downtime */ @@ -470,6 +488,8 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) return ret; } +/* Called with no locks taken. */ + static int flush_blks(QEMUFile *f) { BlkMigBlock *blk; @@ -509,6 +529,8 @@ static int flush_blks(QEMUFile *f) return ret; } +/* Called with iothread lock taken. */ + static int64_t get_remaining_dirty(void) { BlkMigDevState *bmds; @@ -521,6 +543,8 @@ static int64_t get_remaining_dirty(void) return dirty << BDRV_SECTOR_BITS; } +/* Called with iothread lock taken. */ + static void blk_mig_cleanup(void) { BlkMigDevState *bmds; @@ -600,7 +624,12 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } ret = 0; } else { + /* Always called with iothread lock taken for + * simplicity, block_save_complete also calls it. + */ + qemu_mutex_lock_iothread(); ret = blk_mig_save_dirty_block(f, 1); + qemu_mutex_unlock_iothread(); } if (ret < 0) { return ret; @@ -622,6 +651,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque) return qemu_ftell(f) - last_ftell; } +/* Called with iothread lock taken. */ + static int block_save_complete(QEMUFile *f, void *opaque) { int ret; @@ -665,6 +696,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) /* Estimate pending number of bytes to send */ uint64_t pending; + qemu_mutex_lock_iothread(); blk_mig_lock(); pending = get_remaining_dirty() + block_mig_state.submitted * BLOCK_SIZE + @@ -675,6 +707,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) pending = BLOCK_SIZE; } blk_mig_unlock(); + qemu_mutex_unlock_iothread(); DPRINTF("Enter save live pending %" PRIu64 "\n", pending); return pending; diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index fdf4e651ad..a816ac3243 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -32,14 +32,25 @@ typedef void SaveStateHandler(QEMUFile *f, void *opaque); typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ void (*set_params)(const MigrationParams *params, void * opaque); SaveStateHandler *save_state; int (*save_live_setup)(QEMUFile *f, void *opaque); void (*cancel)(void *opaque); int (*save_live_complete)(QEMUFile *f, void *opaque); + + /* This runs both outside and inside the iothread lock. */ bool (*is_active)(void *opaque); + + /* This runs outside the iothread lock in the migration case, and + * within the lock in the savevm case. The callback had better only + * use data that is local to the migration thread or protected + * by other locks. + */ int (*save_live_iterate)(QEMUFile *f, void *opaque); + + /* This runs outside the iothread lock! */ uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); LoadStateHandler *load_state; diff --git a/migration.c b/migration.c index 729578b730..92a7152d67 100644 --- a/migration.c +++ b/migration.c @@ -670,7 +670,6 @@ static void *buffered_file_thread(void *opaque) uint64_t pending_size; if (s->bytes_xfer < s->xfer_limit) { - qemu_mutex_lock_iothread(); DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %lu max %lu\n", pending_size, max_size); @@ -678,6 +677,7 @@ static void *buffered_file_thread(void *opaque) qemu_savevm_state_iterate(s->file); } else { DPRINTF("done iterating\n"); + qemu_mutex_lock_iothread(); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); old_vm_running = runstate_is_running(); @@ -685,8 +685,8 @@ static void *buffered_file_thread(void *opaque) s->xfer_limit = INT_MAX; qemu_savevm_state_complete(s->file); last_round = true; + qemu_mutex_unlock_iothread(); } - qemu_mutex_unlock_iothread(); } current_time = qemu_get_clock_ms(rt_clock);