From 105a405b0a0dd6c55a684f2f6c9e89eb99585013 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 20 Apr 2023 23:36:08 +0300 Subject: [PATCH] Implement vitastor-cli fix --- src/CMakeLists.txt | 1 + src/cli.cpp | 22 +++ src/cli.h | 1 + src/cli_describe.cpp | 16 +- src/cli_fix.cpp | 313 ++++++++++++++++++++++++++++++++++++++++ src/cli_fix.h | 25 ++++ src/disk_tool_utils.cpp | 17 --- src/str_util.cpp | 19 +++ src/str_util.h | 1 + 9 files changed, 383 insertions(+), 32 deletions(-) create mode 100644 src/cli_fix.cpp create mode 100644 src/cli_fix.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c7c1d628..d109d3c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -142,6 +142,7 @@ add_library(vitastor_client SHARED cli_alloc_osd.cpp cli_status.cpp cli_describe.cpp + cli_fix.cpp cli_df.cpp cli_ls.cpp cli_create.cpp diff --git a/src/cli.cpp b/src/cli.cpp index a83644c2..5d9bb912 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -87,6 +87,23 @@ static const char* help_text = " --min-offset, --max-offset\n" " Restrict listing to specific offsets inside inodes.\n" "\n" + "vitastor-cli fix [--objects ] [--bad-osds ] [--check no]\n" + " Fix inconsistent objects in the cluster by deleting some copies.\n" + " --objects \n" + " Objects to fix, either in plain text or JSON format. If not specified,\n" + " object list will be read from STDIN in one of the same formats.\n" + " Plain text format: 0x:0x 0x:0x ...\n" + " JSON format: [{\"inode\":\"0x...\",\"stripe\":\"0x...\"},...]\n" + " --bad-osds \n" + " Remove inconsistent copies/parts of objects from these OSDs,\n" + " effectively marking them bad and allowing Vitastor to recover.\n" + " --part \n" + " Only remove EC part , required for extreme edge cases\n" + " where one OSD has multiple parts of a EC object.\n" + " --check no\n" + " Do not recheck that requested objects are actually inconsistent,\n" + " delete requested copies/parts anyway.\n" + "\n" "vitastor-cli alloc-osd\n" " Allocate a new OSD number and reserve it by creating empty /osd/stats/ key.\n" "\n" @@ -296,6 +313,11 @@ static int run(cli_tool_t *p, json11::Json::object cfg) // Describe unclean objects action_cb = p->start_describe(cfg); } + else if (cmd[0] == "fix") + { + // Fix inconsistent objects (by deleting some copies) + action_cb = p->start_fix(cfg); + } else if (cmd[0] == "alloc-osd") { // Allocate a new OSD number diff --git a/src/cli.h b/src/cli.h index ee012b17..c6aecc61 100644 --- a/src/cli.h +++ b/src/cli.h @@ -57,6 +57,7 @@ public: std::function start_status(json11::Json); std::function start_describe(json11::Json); + std::function start_fix(json11::Json); std::function start_df(json11::Json); std::function start_ls(json11::Json); std::function start_create(json11::Json); diff --git a/src/cli_describe.cpp b/src/cli_describe.cpp index f3d71ad4..444f2c96 100644 --- a/src/cli_describe.cpp +++ b/src/cli_describe.cpp @@ -1,25 +1,11 @@ // Copyright (c) Vitaliy Filippov, 2019+ // License: VNPL-1.1 (see README.md for details) -#include "cli.h" +#include "cli_fix.h" #include "cluster_client.h" #include "pg_states.h" #include "str_util.h" -void remove_duplicates(std::vector & ret) -{ - if (!ret.size()) - return; - std::sort(ret.begin(), ret.end()); - int j = 0; - for (int i = 1; i < ret.size(); i++) - { - if (ret[i] != ret[j]) - ret[++j] = ret[i]; - } - ret.resize(j+1); -} - std::vector parse_uint64_list(json11::Json val) { std::vector ret; diff --git a/src/cli_fix.cpp b/src/cli_fix.cpp new file mode 100644 index 00000000..a8d210b3 --- /dev/null +++ b/src/cli_fix.cpp @@ -0,0 +1,313 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) + +#include "cli_fix.h" +#include "cluster_client.h" +#include "pg_states.h" +#include "str_util.h" + +struct cli_fix_t +{ + std::vector objects; + int part = -1; + int processed_count = 0; + std::set bad_osds; + bool no_check = false; + + cli_tool_t *parent = NULL; + int state = 0; + + json11::Json options; + cli_result_t result; + json11::Json::array fix_result; + + bool is_done() + { + return state == 100; + } + + void parse_objects_str(std::string str) + { + str = trim(str); + if (str[0] == '[') + { + std::string json_err; + json11::Json list = json11::Json::parse(str, json_err); + if (json_err != "") + fprintf(stderr, "Invalid JSON object list input: %s\n", json_err.c_str()); + else + parse_object_list(list); + } + else + { + const char *s = str.c_str(); + char *e = NULL; + int len = str.size(); + object_id oid; + for (int p = 0; p < len; p++) + { + if (isdigit(s[p])) + { + int p0 = p; + oid.inode = strtoull(s+p, &e, 0); + p = e-s; + while (p < len && !isdigit(s[p]) && s[p] != ':') + p++; + if (s[p] != ':') + { + fprintf(stderr, "Invalid object ID in input: %s\n", std::string(s+p0, p-p0).c_str()); + continue; + } + p++; + while (p < len && !isdigit(s[p])) + p++; + oid.stripe = strtoull(s+p, &e, 0) & ~STRIPE_MASK; + p = e-s; + if (oid.inode) + objects.push_back(oid); + else + fprintf(stderr, "Invalid object ID in input: %s\n", std::string(s+p0, p-p0).c_str()); + } + } + } + } + + void parse_object_list(json11::Json list) + { + for (auto & obj: list.array_items()) + { + object_id oid = (object_id){ + .inode = stoull_full(obj["inode"].string_value(), 0), + .stripe = stoull_full(obj["stripe"].string_value(), 0) & ~STRIPE_MASK, + }; + if (oid.inode) + objects.push_back(oid); + else + fprintf(stderr, "Invalid JSON object ID in input: %s, bad or missing \"inode\" field\n", obj.dump().c_str()); + } + } + + void parse_options(json11::Json cfg) + { + json11::Json object_list; + if (cfg["objects"].is_null()) + parse_objects_str(read_all_fd(0)); + else if (cfg["objects"].is_string()) + parse_objects_str(cfg["objects"].string_value()); + else + parse_object_list(cfg["objects"].array_items()); + for (auto osd_num: parse_uint64_list(cfg["bad_osds"])) + bad_osds.insert(osd_num); + no_check = json_is_false(cfg["check"]); + if (cfg["part"].is_number() || cfg["part"].is_string()) + part = cfg["part"].uint64_value(); + } + + void loop() + { + if (state == 1) + goto resume_1; + if (state == 100) + return; + parse_options(options); + if (!objects.size()) + { + result = (cli_result_t){ .err = EINVAL, .text = "Object list is not specified" }; + state = 100; + return; + } + if (!bad_osds.size()) + { + result = (cli_result_t){ .err = EINVAL, .text = "OSDs are not specified" }; + state = 100; + return; + } + remove_duplicates(objects); + parent->cli->init_msgr(); + resume_1: + state = 1; + while (processed_count < objects.size()) + { + if (parent->waiting >= parent->iodepth*parent->parallel_osds) + { + return; + } + auto & obj = objects[processed_count++]; + auto pool_cfg_it = parent->cli->st_cli.pool_config.find(INODE_POOL(obj.inode)); + if (pool_cfg_it == parent->cli->st_cli.pool_config.end()) + { + fprintf(stderr, "Object %lx:%lx is from unknown pool\n", obj.inode, obj.stripe); + continue; + } + auto & pool_cfg = pool_cfg_it->second; + pg_num_t pg_num = (obj.stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg() + auto pg_it = pool_cfg.pg_config.find(pg_num); + if (pg_it == pool_cfg.pg_config.end() || + !pg_it->second.cur_primary || !(pg_it->second.cur_state & PG_ACTIVE)) + { + fprintf( + stderr, "Object %lx:%lx is from PG %u/%u which is not currently active\n", + obj.inode, obj.stripe, pool_cfg_it->first, pg_num + ); + continue; + } + osd_num_t primary_osd = pg_it->second.cur_primary; + // Describe -> Remove some copies -> Scrub again + osd_op_t *op = new osd_op_t; + op->req = (osd_any_op_t){ + .describe = { + .header = { + .magic = SECONDARY_OSD_OP_MAGIC, + .id = parent->cli->next_op_id(), + .opcode = OSD_OP_DESCRIBE, + }, + .min_inode = obj.inode, + .min_offset = obj.stripe, + .max_inode = obj.inode, + .max_offset = obj.stripe, + }, + }; + op->callback = [this, primary_osd, &obj](osd_op_t *op) + { + if (op->reply.hdr.retval < 0 || op->reply.describe.result_bytes != op->reply.hdr.retval * sizeof(osd_reply_describe_item_t)) + { + fprintf(stderr, "Failed to describe objects on OSD %lu (retval=%ld)\n", primary_osd, op->reply.hdr.retval); + parent->waiting--; + loop(); + } + else + { + osd_reply_describe_item_t *items = (osd_reply_describe_item_t *)op->buf; + int *rm_count = (int*)malloc_or_die(sizeof(int)); + *rm_count = 1; // just in case if anything gets called instantly + for (int i = 0; i < op->reply.hdr.retval; i++) + { + if (((items[i].loc_bad & LOC_INCONSISTENT) || no_check) && + bad_osds.find(items[i].osd_num) != bad_osds.end() && + (part == -1 || items[i].role == part)) + { + // Remove + uint64_t rm_osd_num = items[i].osd_num; + osd_op_t *rm_op = new osd_op_t; + rm_op->req = (osd_any_op_t){ + .sec_del = { + .header = { + .magic = SECONDARY_OSD_OP_MAGIC, + .id = parent->cli->next_op_id(), + .opcode = OSD_OP_SEC_DELETE, + }, + .oid = { + .inode = op->req.describe.min_inode, + .stripe = op->req.describe.min_offset | items[i].role, + }, + .version = 0, + }, + }; + rm_op->callback = [this, primary_osd, rm_osd_num, rm_count, &obj](osd_op_t *rm_op) + { + (*rm_count)--; + if (rm_op->reply.hdr.retval < 0) + { + fprintf( + stderr, "Failed to remove object %lx:%lx from OSD %lu (retval=%ld)\n", + rm_op->req.sec_del.oid.inode, rm_op->req.sec_del.oid.stripe, + rm_osd_num, rm_op->reply.hdr.retval + ); + } + else if (parent->json_output) + { + fix_result.push_back(json11::Json::object { + { "inode", (uint64_t)rm_op->req.sec_del.oid.inode }, + { "stripe", (uint64_t)rm_op->req.sec_del.oid.stripe & ~STRIPE_MASK }, + { "part", (uint64_t)rm_op->req.sec_del.oid.stripe & STRIPE_MASK }, + { "osd_num", (uint64_t)rm_osd_num }, + }); + } + else + { + printf( + "Removed %lx:%lx (part %lu) from OSD %lu\n", + rm_op->req.sec_del.oid.inode, rm_op->req.sec_del.oid.stripe & ~STRIPE_MASK, + rm_op->req.sec_del.oid.stripe & STRIPE_MASK, rm_osd_num + ); + } + delete rm_op; + if (!(*rm_count)) + { + // Scrub + free(rm_count); + osd_op_t *scrub_op = new osd_op_t; + scrub_op->req = (osd_any_op_t){ + .rw = { + .header = { + .magic = SECONDARY_OSD_OP_MAGIC, + .id = parent->cli->next_op_id(), + .opcode = OSD_OP_SCRUB, + }, + .inode = obj.inode, + .offset = obj.stripe, + .len = 0, + }, + }; + scrub_op->callback = [this, primary_osd, &obj](osd_op_t *scrub_op) + { + if (scrub_op->reply.hdr.retval < 0 && scrub_op->reply.hdr.retval != -ENOENT) + { + fprintf( + stderr, "Failed to scrub %lx:%lx on OSD %lu (retval=%ld)\n", + obj.inode, obj.stripe, primary_osd, scrub_op->reply.hdr.retval + ); + } + delete scrub_op; + parent->waiting--; + loop(); + }; + parent->cli->execute_raw(primary_osd, scrub_op); + } + }; + (*rm_count)++; + parent->cli->execute_raw(rm_osd_num, rm_op); + } + } + (*rm_count)--; + if (!*rm_count) + { + free(rm_count); + parent->waiting--; + loop(); + } + } + delete op; + }; + parent->waiting++; + parent->cli->execute_raw(primary_osd, op); + } + if (parent->waiting > 0) + { + return; + } + if (parent->json_output) + { + result.data = fix_result; + } + state = 100; + } +}; + +std::function cli_tool_t::start_fix(json11::Json cfg) +{ + auto fixer = new cli_fix_t(); + fixer->parent = this; + fixer->options = cfg; + return [fixer](cli_result_t & result) + { + fixer->loop(); + if (fixer->is_done()) + { + result = fixer->result; + delete fixer; + return true; + } + return false; + }; +} diff --git a/src/cli_fix.h b/src/cli_fix.h new file mode 100644 index 00000000..96757ef2 --- /dev/null +++ b/src/cli_fix.h @@ -0,0 +1,25 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) + +#pragma once + +#include "cli.h" + +std::vector parse_uint64_list(json11::Json val); + +template void remove_duplicates(std::vector & ret) +{ + if (!ret.size()) + return; + std::sort(ret.begin(), ret.end()); + int j = 0; + for (int i = 1; i < ret.size(); i++) + { + if (ret[i] != ret[j]) + ret[++j] = ret[i]; + } + ret.resize(j+1); +} + +// from http_client.cpp... +bool json_is_false(const json11::Json & val); diff --git a/src/disk_tool_utils.cpp b/src/disk_tool_utils.cpp index 8b051f85..706820eb 100644 --- a/src/disk_tool_utils.cpp +++ b/src/disk_tool_utils.cpp @@ -55,23 +55,6 @@ std::string realpath_str(std::string path, bool nofail) return rp; } -std::string read_all_fd(int fd) -{ - int res_size = 0; - std::string res; - while (1) - { - res.resize(res_size+1024); - int r = read(fd, (char*)res.data()+res_size, res.size()-res_size); - if (r > 0) - res_size += r; - else if (!r || errno != EAGAIN && errno != EINTR) - break; - } - res.resize(res_size); - return res; -} - std::string read_file(std::string file, bool allow_enoent) { std::string res; diff --git a/src/str_util.cpp b/src/str_util.cpp index 42f22f22..ef0fb124 100644 --- a/src/str_util.cpp +++ b/src/str_util.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "str_util.h" std::string base64_encode(const std::string &in) @@ -281,3 +282,21 @@ uint64_t parse_time(std::string time_str, bool *ok) *ok = !(ts == 0 && time_str != "0" && (time_str != "" || mul != 1)); return ts; } + +std::string read_all_fd(int fd) +{ + int res_size = 0, res_alloc = 0; + std::string res; + while (1) + { + if (res_size >= res_alloc) + res.resize((res_alloc = (res_alloc ? res_alloc*2 : 1024))); + int r = read(fd, (char*)res.data()+res_size, res_alloc-res_size); + if (r > 0) + res_size += r; + else if (!r || errno != EAGAIN && errno != EINTR) + break; + } + res.resize(res_size); + return res; +} diff --git a/src/str_util.h b/src/str_util.h index 83704b5a..b0f871a7 100644 --- a/src/str_util.h +++ b/src/str_util.h @@ -16,3 +16,4 @@ uint64_t stoull_full(const std::string & str, int base = 0); std::string format_size(uint64_t size, bool nobytes = false); void print_help(const char *help_text, std::string exe_name, std::string cmd, bool all); uint64_t parse_time(std::string time_str, bool *ok = NULL); +std::string read_all_fd(int fd);