From 85fa38955775b977939119584cbc5ff0b99dcae1 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 14 Jul 2022 02:26:30 +0300 Subject: [PATCH] Add a test for disk-tool resize --- src/disk_tool.cpp | 63 +++++++++++++++++++++++++++------------- tests/test_resize.sh | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 19 deletions(-) create mode 100755 tests/test_resize.sh diff --git a/src/disk_tool.cpp b/src/disk_tool.cpp index 33bc0e21..543ff23d 100644 --- a/src/disk_tool.cpp +++ b/src/disk_tool.cpp @@ -64,7 +64,7 @@ struct disk_tool_t ring_loop_t *ringloop; ring_consumer_t ring_consumer; int remap_active; - uint8_t *new_buf, *new_journal_ptr, *new_journal_data; + uint8_t *new_journal_buf, *new_meta_buf, *new_journal_ptr, *new_journal_data; uint64_t new_journal_in_pos; int64_t data_idx_diff; uint64_t total_blocks, free_first, free_last; @@ -101,7 +101,9 @@ struct disk_tool_t int resize_remap_blocks(); int resize_copy_data(); int resize_rewrite_journal(); + int resize_write_new_journal(); int resize_rewrite_meta(); + int resize_write_new_meta(); }; int main(int argc, char *argv[]) @@ -696,13 +698,23 @@ int disk_tool_t::resize_data() if (r != 0) return r; // Rewrite journal - fprintf(stderr, "Writing new journal\n"); + fprintf(stderr, "Rebuilding journal\n"); r = resize_rewrite_journal(); if (r != 0) return r; // Rewrite metadata - fprintf(stderr, "Writing new metadata\n"); + fprintf(stderr, "Rebuilding metadata\n"); r = resize_rewrite_meta(); + if (r != 0) + return r; + // Write new journal + fprintf(stderr, "Writing new journal\n"); + r = resize_write_new_journal(); + if (r != 0) + return r; + // Write new metadata + fprintf(stderr, "Writing new metadata\n"); + r = resize_write_new_meta(); if (r != 0) return r; fprintf(stderr, "Done\n"); @@ -809,7 +821,7 @@ void disk_tool_t::resize_init(blockstore_meta_header_v1_t *hdr) fprintf(stderr, "New journal area overlaps with data\n"); exit(1); } - if (new_journal_device == dsk.meta_device && new_journal_offset < new_meta_offset+new_meta_len && + if (new_journal_device == new_meta_device && new_journal_offset < new_meta_offset+new_meta_len && new_journal_offset+new_journal_len > new_meta_offset) { fprintf(stderr, "New journal area overlaps with metadata\n"); @@ -949,6 +961,7 @@ int disk_tool_t::resize_copy_data() ringloop->unregister_consumer(&ring_consumer); free(moving_blocks[0].buf); delete[] moving_blocks; + moving_blocks = NULL; close(dsk.data_fd); dsk.data_fd = -1; delete ringloop; @@ -960,11 +973,11 @@ int disk_tool_t::resize_rewrite_journal() { // Simply overwriting on the fly may be impossible because old and new areas may overlap // For now, just build new journal data in memory - new_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_journal_len); - new_journal_ptr = new_buf; + new_journal_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_journal_len); + new_journal_ptr = new_journal_buf; new_journal_data = new_journal_ptr + dsk.journal_block_size; new_journal_in_pos = 0; - memset(new_buf, 0, new_journal_len); + memset(new_journal_buf, 0, new_journal_len); process_journal([this](void *buf) { return process_journal_block(buf, [this](int num, journal_entry *je) @@ -989,7 +1002,7 @@ int disk_tool_t::resize_rewrite_journal() if (dsk.journal_block_size < new_journal_in_pos+je->size) { new_journal_ptr = new_journal_data; - if (new_journal_ptr-new_buf >= new_journal_len) + if (new_journal_ptr-new_journal_buf >= new_journal_len) { fprintf(stderr, "Error: live entries don't fit to the new journal\n"); exit(1); @@ -1017,7 +1030,7 @@ int disk_tool_t::resize_rewrite_journal() } else if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT) { - ne->small_write.data_offset = new_journal_data-new_buf; + ne->small_write.data_offset = new_journal_data-new_journal_buf; if (ne->small_write.data_offset + ne->small_write.len > new_journal_len) { fprintf(stderr, "Error: live entries don't fit to the new journal\n"); @@ -1032,7 +1045,11 @@ int disk_tool_t::resize_rewrite_journal() } }); }); - // FIXME: Write new journal and metadata with journaling if they overlap with old + return 0; +} + +int disk_tool_t::resize_write_new_journal() +{ new_journal_fd = open(new_journal_device.c_str(), O_DIRECT|O_RDWR); if (new_journal_fd < 0) { @@ -1040,22 +1057,23 @@ int disk_tool_t::resize_rewrite_journal() return 1; } lseek64(new_journal_fd, new_journal_offset, 0); - write_blocking(new_journal_fd, new_buf, new_journal_len); + write_blocking(new_journal_fd, new_journal_buf, new_journal_len); fsync(new_journal_fd); close(new_journal_fd); new_journal_fd = -1; - free(new_buf); + free(new_journal_buf); + new_journal_buf = NULL; return 0; } int disk_tool_t::resize_rewrite_meta() { - new_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_meta_len); - memset(new_buf, 0, new_meta_len); + new_meta_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_meta_len); + memset(new_meta_buf, 0, new_meta_len); int r = process_meta( [this](blockstore_meta_header_v1_t *hdr) { - blockstore_meta_header_v1_t *new_hdr = (blockstore_meta_header_v1_t *)new_buf; + blockstore_meta_header_v1_t *new_hdr = (blockstore_meta_header_v1_t *)new_meta_buf; new_hdr->zero = 0; new_hdr->magic = BLOCKSTORE_META_MAGIC_V1; new_hdr->version = BLOCKSTORE_META_VERSION_V1; @@ -1074,7 +1092,7 @@ int disk_tool_t::resize_rewrite_meta() exit(1); } block_num += data_idx_diff; - clean_disk_entry *new_entry = (clean_disk_entry*)(new_buf + dsk.meta_block_size + + clean_disk_entry *new_entry = (clean_disk_entry*)(new_meta_buf + dsk.meta_block_size + dsk.meta_block_size*(block_num / new_entries_per_block) + new_clean_entry_size*(block_num % new_entries_per_block)); new_entry->oid = entry->oid; @@ -1087,9 +1105,15 @@ int disk_tool_t::resize_rewrite_meta() ); if (r != 0) { - free(new_buf); + free(new_meta_buf); + new_meta_buf = NULL; return r; } + return 0; +} + +int disk_tool_t::resize_write_new_meta() +{ new_meta_fd = open(new_meta_device.c_str(), O_DIRECT|O_RDWR); if (new_meta_fd < 0) { @@ -1097,10 +1121,11 @@ int disk_tool_t::resize_rewrite_meta() return 1; } lseek64(new_meta_fd, new_meta_offset, 0); - write_blocking(new_meta_fd, new_buf, new_meta_len); + write_blocking(new_meta_fd, new_meta_buf, new_meta_len); fsync(new_meta_fd); close(new_meta_fd); new_meta_fd = -1; - free(new_buf); + free(new_meta_buf); + new_meta_buf = NULL; return 0; } diff --git a/tests/test_resize.sh b/tests/test_resize.sh new file mode 100755 index 00000000..e4f6faf5 --- /dev/null +++ b/tests/test_resize.sh @@ -0,0 +1,68 @@ +#!/bin/bash -ex + +PG_COUNT=${PG_COUNT:-32} + +. `dirname $0`/run_3osds.sh + +LD_PRELOAD="build/src/libfio_vitastor.so" \ + fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=4 \ + -rw=write -etcd=$ETCD_URL -end_fsync=1 -pool=1 -inode=1 -size=256M -runtime=10 + +LD_PRELOAD="build/src/libfio_vitastor.so" \ + fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=32 \ + -rw=randwrite -etcd=$ETCD_URL -end_fsync=1 -pool=1 -inode=1 -size=256M -runtime=10 -number_ios=1024 + +qemu-img convert -S 4096 -p \ + -f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \ + -O raw ./testdata/before.bin + +for i in $(seq 1 $OSD_COUNT); do + pid=OSD${i}_PID + pid=${!pid} + kill -9 $pid +done + +for i in $(seq 1 $OSD_COUNT); do + offsets=$(build/src/vitastor-cli simple-offsets --format json ./testdata/test_osd$i.bin) + meta_offset=$(echo $offsets | jq -r .meta_offset) + data_offset=$(echo $offsets | jq -r .data_offset) + build/src/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json + build/src/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json + build/src/vitastor-disk resize \ + $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) \ + --new_meta_offset 0 \ + --new_meta_len $((1024*1024)) \ + --new_journal_offset $((1024*1024)) \ + --new_data_offset $((128*1024*1024)) + build/src/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json + build/src/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json + if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \ + jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then + format_error "OSD $i metadata corrupted after resizing" + fi + if ! (cat ./testdata/journal_before_resize.json ./testdata/journal_after_resize.json | \ + jq -e -s 'map([ .[].entries[] | del(.crc32, .crc32_prev, .valid, .loc, .start) ]) | .[0] == .[1] and (.[0] | length) > 1'); then + format_error "OSD $i journal corrupted after resizing" + fi +done + +$ETCDCTL del --prefix /vitastor/osd/state/ + +for i in $(seq 1 $OSD_COUNT); do + build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \ + --data_device ./testdata/test_osd$i.bin \ + --meta_offset 0 \ + --journal_offset $((1024*1024)) \ + --data_offset $((128*1024*1024)) &>./testdata/osd$i.log & + eval OSD${i}_PID=$! +done + +qemu-img convert -S 4096 -p \ + -f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \ + -O raw ./testdata/after.bin + +if ! cmp ./testdata/before.bin ./testdata/after.bin; then + format_error "Data differs after resizing" +fi + +format_green OK