migration/calc-dirty-rate: replaced CRC32 with xxHash

This significantly reduces overhead of dirty page
rate calculation in sampling mode.
Tested using 32GiB VM on E5-2690 CPU.

With CRC32:
total_pages=8388608 sampled_pages=16384 millis=71

With xxHash:
total_pages=8388608 sampled_pages=16384 millis=14

Signed-off-by: Andrei Gudkov <gudkov.andrei@huawei.com>
Message-Id: <cd115a89fc81d5f2eeb4ea7d57a98b84f794f340.1682598010.git.gudkov.andrei@huawei.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
master
Andrei Gudkov 2023-04-27 15:42:57 +03:00 committed by Juan Quintela
parent 8844bb8d89
commit 00a3f9c60a
2 changed files with 38 additions and 11 deletions

View File

@ -29,6 +29,7 @@
#include "sysemu/kvm.h"
#include "sysemu/runstate.h"
#include "exec/memory.h"
#include "qemu/xxhash.h"
/*
* total_dirty_pages is procted by BQL and is used
@ -308,6 +309,33 @@ static void update_dirtyrate(uint64_t msec)
DirtyStat.dirty_rate = dirtyrate;
}
/*
* Compute hash of a single page of size TARGET_PAGE_SIZE.
*/
static uint32_t compute_page_hash(void *ptr)
{
uint32_t i;
uint64_t v1, v2, v3, v4;
uint64_t res;
const uint64_t *p = ptr;
v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
v3 = QEMU_XXHASH_SEED + 0;
v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
for (i = 0; i < TARGET_PAGE_SIZE / 8; i += 4) {
v1 = XXH64_round(v1, p[i + 0]);
v2 = XXH64_round(v2, p[i + 1]);
v3 = XXH64_round(v3, p[i + 2]);
v4 = XXH64_round(v4, p[i + 3]);
}
res = XXH64_mergerounds(v1, v2, v3, v4);
res += TARGET_PAGE_SIZE;
res = XXH64_avalanche(res);
return (uint32_t)(res & UINT32_MAX);
}
/*
* get hash result for the sampled memory with length of TARGET_PAGE_SIZE
* in ramblock, which starts from ramblock base address.
@ -315,13 +343,12 @@ static void update_dirtyrate(uint64_t msec)
static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
uint64_t vfn)
{
uint32_t crc;
uint32_t hash;
crc = crc32(0, (info->ramblock_addr +
vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE);
hash = compute_page_hash(info->ramblock_addr + vfn * TARGET_PAGE_SIZE);
trace_get_ramblock_vfn_hash(info->idstr, vfn, crc);
return crc;
trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
return hash;
}
static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
@ -454,13 +481,13 @@ out:
static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
{
uint32_t crc;
uint32_t hash;
int i;
for (i = 0; i < info->sample_pages_count; i++) {
crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
if (crc != info->hash_result[i]) {
trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]);
hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
if (hash != info->hash_result[i]) {
trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
info->sample_dirty_count++;
}
}

View File

@ -342,8 +342,8 @@ dirty_bitmap_load_success(void) ""
# dirtyrate.c
dirtyrate_set_state(const char *new_state) "new state %s"
query_dirty_rate_info(const char *new_state) "current state %s"
get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock name: %s, vfn: %"PRIu64 ", crc: %" PRIu32
calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32
get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t hash) "ramblock name: %s, vfn: %"PRIu64 ", hash: %" PRIu32
calc_page_dirty_rate(const char *idstr, uint32_t new_hash, uint32_t old_hash) "ramblock name: %s, new hash: %" PRIu32 ", old hash: %" PRIu32
skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64
find_page_matched(const char *idstr) "ramblock %s addr or size changed"
dirtyrate_calculate(int64_t dirtyrate) "dirty rate: %" PRIi64 " MB/s"