// Copyright (c) Vitaliy Filippov, 2019+ // License: VNPL-1.1 (see README.md for details) #include "disk_tool.h" int disk_tool_t::dump_journal() { dump_with_blocks = options["format"] == "blocks"; dump_with_data = options["format"] == "data" || options["format"] == "blocks,data"; if (dsk.journal_block_size < DIRECT_IO_ALIGNMENT || (dsk.journal_block_size % DIRECT_IO_ALIGNMENT) || dsk.journal_block_size > 128*1024) { fprintf(stderr, "Invalid journal block size\n"); return 1; } first_block = true; if (json) printf("[\n"); if (all) { dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY); if (dsk.journal_fd < 0) { fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno)); return 1; } void *journal_buf = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size); journal_pos = 0; while (journal_pos < dsk.journal_len) { int r = pread(dsk.journal_fd, journal_buf, dsk.journal_block_size, dsk.journal_offset+journal_pos); assert(r == dsk.journal_block_size); uint64_t s; for (s = 0; s < dsk.journal_block_size; s += 8) { if (*((uint64_t*)((uint8_t*)journal_buf+s)) != 0) break; } if (json) { printf("%s{\"offset\":\"0x%lx\"", first_block ? "" : ",\n", journal_pos); first_block = false; } if (s == dsk.journal_block_size) { if (json) printf(",\"type\":\"zero\"}"); else printf("offset %08lx: zeroes\n", journal_pos); journal_pos += dsk.journal_block_size; } else if (((journal_entry*)journal_buf)->magic == JOURNAL_MAGIC) { if (!json) printf("offset %08lx:\n", journal_pos); else printf(",\"entries\":[\n"); first_entry = true; process_journal_block(journal_buf, [this](int num, journal_entry *je) { dump_journal_entry(num, je, json); }); if (json) printf(first_entry ? "]}" : "\n]}"); } else { if (json) printf(",\"type\":\"data\",\"pattern\":\"%08lx\"}", *((uint64_t*)journal_buf)); else printf("offset %08lx: no magic in the beginning, looks like random data (pattern=%08lx)\n", journal_pos, *((uint64_t*)journal_buf)); journal_pos += dsk.journal_block_size; } } free(journal_buf); close(dsk.journal_fd); dsk.journal_fd = -1; } else { first_entry = true; process_journal([this](void *data) { if (json && dump_with_blocks) first_entry = true; if (!json) printf("offset %08lx:\n", journal_pos); auto pos = journal_pos; int r = process_journal_block(data, [this, pos](int num, journal_entry *je) { if (json && dump_with_blocks && first_entry) printf("%s{\"offset\":\"0x%lx\",\"entries\":[\n", first_block ? "" : ",\n", pos); dump_journal_entry(num, je, json); first_block = false; }); if (json && dump_with_blocks && !first_entry) printf("\n]}"); else if (!json && r <= 0) printf("end of the journal\n"); return r; }); } if (json) printf(first_block ? "]\n" : "\n]\n"); return 0; } int disk_tool_t::process_journal(std::function block_fn) { dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY); if (dsk.journal_fd < 0) { fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno)); return 1; } void *data = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size); journal_pos = 0; int r = pread(dsk.journal_fd, data, dsk.journal_block_size, dsk.journal_offset+journal_pos); assert(r == dsk.journal_block_size); journal_entry *je = (journal_entry*)(data); if (je->magic != JOURNAL_MAGIC || je->type != JE_START || je_crc32(je) != je->crc32) { fprintf(stderr, "offset %08lx: journal superblock is invalid\n", journal_pos); r = 1; } else { started = false; crc32_last = 0; block_fn(data); started = false; crc32_last = 0; journal_pos = je->start.journal_start; while (1) { if (journal_pos >= dsk.journal_len) journal_pos = dsk.journal_block_size; r = pread(dsk.journal_fd, data, dsk.journal_block_size, dsk.journal_offset+journal_pos); assert(r == dsk.journal_block_size); r = block_fn(data); if (r <= 0) break; } } close(dsk.journal_fd); dsk.journal_fd = -1; free(data); return r; } int disk_tool_t::process_journal_block(void *buf, std::function iter_fn) { uint32_t pos = 0; journal_pos += dsk.journal_block_size; int entry = 0; bool wrapped = false; while (pos <= dsk.journal_block_size-JOURNAL_ENTRY_HEADER_SIZE) { journal_entry *je = (journal_entry*)((uint8_t*)buf + pos); if (je->magic != JOURNAL_MAGIC || je->type < JE_MIN || je->type > JE_MAX || !all && started && je->crc32_prev != crc32_last || pos > dsk.journal_block_size-je->size) { break; } bool crc32_valid = je_crc32(je) == je->crc32; if (!all && !crc32_valid) { break; } started = true; crc32_last = je->crc32; if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT) { journal_calc_data_pos = journal_pos; if (journal_pos + je->small_write.len > dsk.journal_len) { // data continues from the beginning of the journal journal_calc_data_pos = journal_pos = dsk.journal_block_size; wrapped = true; } journal_pos += je->small_write.len; if (journal_pos >= dsk.journal_len) { journal_pos = dsk.journal_block_size; wrapped = true; } small_write_data = memalign_or_die(MEM_ALIGNMENT, je->small_write.len); assert(pread(dsk.journal_fd, small_write_data, je->small_write.len, dsk.journal_offset+je->small_write.data_offset) == je->small_write.len); data_crc32 = crc32c(0, small_write_data, je->small_write.len); } iter_fn(entry, je); if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT) { free(small_write_data); small_write_data = NULL; } pos += je->size; entry++; } if (wrapped) { journal_pos = dsk.journal_len; } return entry; } void disk_tool_t::dump_journal_entry(int num, journal_entry *je, bool json) { if (json) { if (!first_entry) printf(",\n"); first_entry = false; printf( "{\"crc32\":\"%08x\",\"valid\":%s,\"crc32_prev\":\"%08x\"", je->crc32, (je_crc32(je) == je->crc32 ? "true" : "false"), je->crc32_prev ); } else { printf( "entry % 3d: crc32=%08x %s prev=%08x ", num, je->crc32, (je_crc32(je) == je->crc32 ? "(valid)" : "(invalid)"), je->crc32_prev ); } if (je->type == JE_START) { printf( json ? ",\"type\":\"start\",\"start\":\"0x%lx\"}" : "je_start start=%08lx\n", je->start.journal_start ); } else if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT) { printf( json ? ",\"type\":\"small_write%s\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\",\"offset\":%u,\"len\":%u,\"loc\":\"0x%lx\"" : "je_small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u loc=%08lx", je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "", je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version, je->small_write.offset, je->small_write.len, je->small_write.data_offset ); if (journal_calc_data_pos != je->small_write.data_offset) { printf(json ? ",\"bad_loc\":true,\"calc_loc\":\"0x%lx\"" : " (mismatched, calculated = %lu)", journal_pos); } if (je->small_write.size > sizeof(journal_entry_small_write)) { printf(json ? ",\"bitmap\":\"" : " (bitmap: "); for (int i = sizeof(journal_entry_small_write); i < je->small_write.size; i++) { printf("%02x", ((uint8_t*)je)[i]); } printf(json ? "\"" : ")"); } if (dump_with_data) { printf(json ? ",\"data\":\"" : " (data: "); for (int i = 0; i < je->small_write.len; i++) { printf("%02x", ((uint8_t*)small_write_data)[i]); } printf(json ? "\"" : ")"); } printf( json ? ",\"data_crc32\":\"%08x\",\"data_valid\":%s}" : " data_crc32=%08x%s\n", je->small_write.crc32_data, (data_crc32 != je->small_write.crc32_data ? (json ? "false" : " (invalid)") : (json ? "true" : " (valid)")) ); } else if (je->type == JE_BIG_WRITE || je->type == JE_BIG_WRITE_INSTANT) { printf( json ? ",\"type\":\"big_write%s\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\",\"offset\":%u,\"len\":%u,\"loc\":\"0x%lx\"" : "je_big_write%s oid=%lx:%lx ver=%lu offset=%u len=%u loc=%08lx", je->type == JE_BIG_WRITE_INSTANT ? "_instant" : "", je->big_write.oid.inode, je->big_write.oid.stripe, je->big_write.version, je->big_write.offset, je->big_write.len, je->big_write.location ); if (je->big_write.size > sizeof(journal_entry_big_write)) { printf(json ? ",\"bitmap\":\"" : " (bitmap: "); for (int i = sizeof(journal_entry_big_write); i < je->small_write.size; i++) { printf("%02x", ((uint8_t*)je)[i]); } printf(json ? "\"" : ")"); } printf(json ? "}" : "\n"); } else if (je->type == JE_STABLE) { printf( json ? ",\"type\":\"stable\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\"}" : "je_stable oid=%lx:%lx ver=%lu\n", je->stable.oid.inode, je->stable.oid.stripe, je->stable.version ); } else if (je->type == JE_ROLLBACK) { printf( json ? ",\"type\":\"rollback\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\"}" : "je_rollback oid=%lx:%lx ver=%lu\n", je->rollback.oid.inode, je->rollback.oid.stripe, je->rollback.version ); } else if (je->type == JE_DELETE) { printf( json ? ",\"type\":\"delete\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\"}" : "je_delete oid=%lx:%lx ver=%lu\n", je->del.oid.inode, je->del.oid.stripe, je->del.version ); } } int disk_tool_t::write_json_journal(json11::Json entries) { new_journal_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_journal_len); new_journal_ptr = new_journal_buf; new_journal_data = new_journal_ptr + dsk.journal_block_size; new_journal_in_pos = 0; memset(new_journal_buf, 0, new_journal_len); std::map type_by_name = { { "start", JE_START }, { "small_write", JE_SMALL_WRITE }, { "small_write_instant", JE_SMALL_WRITE_INSTANT }, { "big_write", JE_BIG_WRITE }, { "big_write_instant", JE_BIG_WRITE_INSTANT }, { "stable", JE_STABLE }, { "delete", JE_DELETE }, { "rollback", JE_ROLLBACK }, }; // Write start entry into the first block *((journal_entry_start*)new_journal_buf) = (journal_entry_start){ .magic = JOURNAL_MAGIC, .type = JE_START, .size = sizeof(journal_entry_start), .journal_start = dsk.journal_block_size, .version = JOURNAL_VERSION, }; ((journal_entry*)new_journal_buf)->crc32 = je_crc32((journal_entry*)new_journal_buf); new_journal_ptr += dsk.journal_block_size; new_journal_data = new_journal_ptr+dsk.journal_block_size; new_journal_in_pos = 0; for (const auto & rec: entries.array_items()) { auto t_it = type_by_name.find(rec["type"].string_value()); if (t_it == type_by_name.end()) { fprintf(stderr, "Unknown journal entry type \"%s\", skipping\n", rec["type"].string_value().c_str()); continue; } uint16_t type = t_it->second; if (type == JE_START) continue; uint32_t entry_size = (type == JE_START ? sizeof(journal_entry_start) : (type == JE_SMALL_WRITE || type == JE_SMALL_WRITE_INSTANT ? sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size : (type == JE_BIG_WRITE || type == JE_BIG_WRITE_INSTANT ? sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size : sizeof(journal_entry_del)))); if (dsk.journal_block_size < new_journal_in_pos + entry_size) { new_journal_ptr = new_journal_data; if (new_journal_ptr-new_journal_buf >= new_journal_len) { fprintf(stderr, "Error: entries don't fit to the new journal\n"); free(new_journal_buf); return 1; } new_journal_data = new_journal_ptr+dsk.journal_block_size; new_journal_in_pos = 0; if (dsk.journal_block_size < entry_size) { fprintf(stderr, "Error: journal entry too large (%u bytes)\n", entry_size); free(new_journal_buf); return 1; } } journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos); if (type == JE_SMALL_WRITE || type == JE_SMALL_WRITE_INSTANT) { if (new_journal_data - new_journal_buf + ne->small_write.len > new_journal_len) { fprintf(stderr, "Error: entries don't fit to the new journal\n"); free(new_journal_buf); return 1; } *((journal_entry_small_write*)ne) = (journal_entry_small_write){ .magic = JOURNAL_MAGIC, .type = type, .size = entry_size, .crc32_prev = new_crc32_prev, .oid = { .inode = sscanf_json(NULL, rec["inode"]), .stripe = sscanf_json(NULL, rec["stripe"]), }, .version = rec["ver"].uint64_value(), .offset = (uint32_t)rec["offset"].uint64_value(), .len = (uint32_t)rec["len"].uint64_value(), .data_offset = (uint64_t)(new_journal_data-new_journal_buf), .crc32_data = (uint32_t)sscanf_json("%x", rec["data_crc32"]), }; fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_small_write)); fromhexstr(rec["data"].string_value(), ne->small_write.len, new_journal_data); if (rec["data"].is_string()) ne->small_write.crc32_data = crc32c(0, new_journal_data, ne->small_write.len); new_journal_data += ne->small_write.len; } else if (type == JE_BIG_WRITE || type == JE_BIG_WRITE_INSTANT) { *((journal_entry_big_write*)ne) = (journal_entry_big_write){ .magic = JOURNAL_MAGIC, .type = type, .size = entry_size, .crc32_prev = new_crc32_prev, .oid = { .inode = sscanf_json(NULL, rec["inode"]), .stripe = sscanf_json(NULL, rec["stripe"]), }, .version = rec["ver"].uint64_value(), .offset = (uint32_t)rec["offset"].uint64_value(), .len = (uint32_t)rec["len"].uint64_value(), .location = sscanf_json(NULL, rec["loc"]), }; fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_big_write)); } else if (type == JE_STABLE || type == JE_ROLLBACK || type == JE_DELETE) { *((journal_entry_del*)ne) = (journal_entry_del){ .magic = JOURNAL_MAGIC, .type = type, .size = entry_size, .crc32_prev = new_crc32_prev, .oid = { .inode = sscanf_json(NULL, rec["inode"]), .stripe = sscanf_json(NULL, rec["stripe"]), }, .version = rec["ver"].uint64_value(), }; } ne->crc32 = je_crc32(ne); new_crc32_prev = ne->crc32; new_journal_in_pos += ne->size; } int r = resize_write_new_journal(); free(new_journal_buf); return r; }