From 0e5d0e02a98599cb0d2423eaa6e220a6229456be Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Tue, 18 Apr 2023 01:51:18 +0300 Subject: [PATCH] Add "vitastor-cli describe" command --- src/CMakeLists.txt | 1 + src/cli.cpp | 20 +++ src/cli.h | 2 + src/cli_describe.cpp | 270 +++++++++++++++++++++++++++++++++++++++++ src/cluster_client.cpp | 31 +++++ src/cluster_client.h | 3 + 6 files changed, 327 insertions(+) create mode 100644 src/cli_describe.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bacab75a..c7c1d628 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -141,6 +141,7 @@ add_library(vitastor_client SHARED cli_common.cpp cli_alloc_osd.cpp cli_status.cpp + cli_describe.cpp cli_df.cpp cli_ls.cpp cli_create.cpp diff --git a/src/cli.cpp b/src/cli.cpp index 27eddfa4..a83644c2 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -73,6 +73,20 @@ static const char* help_text = " must be a child of and may be one of the layers between\n" " and , including and .\n" "\n" + "vitastor-cli describe [--osds ] [--object-state ] [--pool ] [--inode ] [--min-inode ] [--max-inode ] [--min-offset ] [--max-offset ]\n" + " Describe unclean object locations in the cluster.\n" + " --osds \n" + " Only list objects from master OSD(s) .\n" + " --object-state \n" + " Only list objects in given state(s). State(s) may include:\n" + " degraded, misplaced, incomplete, corrupted, inconsistent.\n" + " --pool \n" + " Only list objects in the given pool.\n" + " --inode, --min-inode, --max-inode\n" + " Restrict listing to specific inode numbers.\n" + " --min-offset, --max-offset\n" + " Restrict listing to specific offsets inside inodes.\n" + "\n" "vitastor-cli alloc-osd\n" " Allocate a new OSD number and reserve it by creating empty /osd/stats/ key.\n" "\n" @@ -168,6 +182,7 @@ static json11::Json::object parse_args(int narg, const char *args[]) static int run(cli_tool_t *p, json11::Json::object cfg) { cli_result_t result = {}; + p->is_command_line = true; p->parse_config(cfg); json11::Json::array cmd = cfg["command"].array_items(); cfg.erase("command"); @@ -276,6 +291,11 @@ static int run(cli_tool_t *p, json11::Json::object cfg) } action_cb = p->start_rm(cfg); } + else if (cmd[0] == "describe") + { + // Describe unclean objects + action_cb = p->start_describe(cfg); + } else if (cmd[0] == "alloc-osd") { // Allocate a new OSD number diff --git a/src/cli.h b/src/cli.h index c01adaf4..ee012b17 100644 --- a/src/cli.h +++ b/src/cli.h @@ -34,6 +34,7 @@ public: bool list_first = false; bool json_output = false; int log_level = 0; + bool is_command_line = false; bool color = false; ring_loop_t *ringloop = NULL; @@ -55,6 +56,7 @@ public: friend struct snap_remover_t; std::function start_status(json11::Json); + std::function start_describe(json11::Json); std::function start_df(json11::Json); std::function start_ls(json11::Json); std::function start_create(json11::Json); diff --git a/src/cli_describe.cpp b/src/cli_describe.cpp new file mode 100644 index 00000000..f3d71ad4 --- /dev/null +++ b/src/cli_describe.cpp @@ -0,0 +1,270 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) + +#include "cli.h" +#include "cluster_client.h" +#include "pg_states.h" +#include "str_util.h" + +void remove_duplicates(std::vector & ret) +{ + if (!ret.size()) + return; + std::sort(ret.begin(), ret.end()); + int j = 0; + for (int i = 1; i < ret.size(); i++) + { + if (ret[i] != ret[j]) + ret[++j] = ret[i]; + } + ret.resize(j+1); +} + +std::vector parse_uint64_list(json11::Json val) +{ + std::vector ret; + if (val.is_number()) + ret.push_back(val.uint64_value()); + else if (val.is_string()) + { + const std::string & s = val.string_value(); + for (int i = 0, p = -1; i <= s.size(); i++) + { + if (p < 0 && i < s.size() && (isdigit(s[i]) || s[i] == 'x')) + p = i; + else if (p >= 0 && (i >= s.size() || !isdigit(s[i]) && s[i] != 'x')) + { + ret.push_back(stoull_full(s.substr(p, i-p), 0)); + p = -1; + } + } + } + else if (val.is_array()) + { + for (auto & pg_num: val.array_items()) + ret.push_back(pg_num.uint64_value()); + } + return ret; +} + +struct cli_describe_t +{ + uint64_t object_state = 0; + pool_id_t only_pool = 0; + std::vector only_osds; + uint64_t min_inode = 0, max_inode = 0; + uint64_t min_offset = 0, max_offset = 0; + + cli_tool_t *parent = NULL; + int state = 0; + int count = 0; + + json11::Json options; + cli_result_t result; + json11::Json::array describe_items; + + bool is_done() + { + return state == 100; + } + + void parse_options(json11::Json cfg) + { + only_pool = cfg["pool"].uint64_value(); + if (!only_pool && cfg["pool"].is_string()) + { + for (auto & pp: parent->cli->st_cli.pool_config) + { + if (pp.second.name == cfg["pool"].string_value()) + { + only_pool = pp.first; + break; + } + } + } + min_inode = cfg["inode"].uint64_value(); + if (min_inode) + { + if (!INODE_POOL(min_inode)) + min_inode |= (uint64_t)only_pool << (64-POOL_ID_BITS); + max_inode = min_inode; + min_offset = max_offset = 0; + } + else + { + min_inode = stoull_full(cfg["min_inode"].string_value(), 0); // to support 0x... + max_inode = stoull_full(cfg["max_inode"].string_value(), 0); + min_offset = stoull_full(cfg["min_offset"].string_value(), 0); + max_offset = stoull_full(cfg["max_offset"].string_value(), 0); + if (!min_inode && !max_inode && only_pool) + { + min_inode = (uint64_t)only_pool << (64-POOL_ID_BITS); + max_inode = ((uint64_t)only_pool << (64-POOL_ID_BITS)) | + (((uint64_t)1 << (64-POOL_ID_BITS)) - 1); + } + } + only_osds = parse_uint64_list(cfg["osds"]); + object_state = stoull_full(cfg["object_state"].string_value(), 0); + if (!object_state && cfg["object_state"].is_string()) + { + if (cfg["object_state"].string_value().find("inconsistent") != std::string::npos) + object_state |= OBJ_INCONSISTENT; + if (cfg["object_state"].string_value().find("corrupted") != std::string::npos) + object_state |= OBJ_CORRUPTED; + if (cfg["object_state"].string_value().find("incomplete") != std::string::npos) + object_state |= OBJ_INCOMPLETE; + if (cfg["object_state"].string_value().find("degraded") != std::string::npos) + object_state |= OBJ_DEGRADED; + if (cfg["object_state"].string_value().find("misplaced") != std::string::npos) + object_state |= OBJ_MISPLACED; + } + } + + void loop() + { + if (state == 1) + goto resume_1; + if (state == 100) + return; + parse_options(options); + if (min_inode && !INODE_POOL(min_inode)) + { + result = (cli_result_t){ .err = EINVAL, .text = "Pool is not specified" }; + state = 100; + return; + } + if (!only_osds.size()) + { + uint64_t min_pool = min_inode >> (64-POOL_ID_BITS); + uint64_t max_pool = max_inode >> (64-POOL_ID_BITS); + for (auto & pp: parent->cli->st_cli.pool_config) + { + if (pp.first >= min_pool && (!max_pool || pp.first <= max_pool)) + { + for (auto & pgp: pp.second.pg_config) + only_osds.push_back(pgp.second.cur_primary); + } + } + } + remove_duplicates(only_osds); + parent->cli->init_msgr(); + if (parent->json_output && parent->is_command_line) + { + printf("[\n"); + } + for (int i = 0; i < only_osds.size(); i++) + { + osd_op_t *op = new osd_op_t; + op->req = (osd_any_op_t){ + .describe = { + .header = { + .magic = SECONDARY_OSD_OP_MAGIC, + .id = parent->cli->next_op_id(), + .opcode = OSD_OP_DESCRIBE, + }, + .object_state = object_state, + .min_inode = min_inode, + .min_offset = min_offset, + .max_inode = max_inode, + .max_offset = max_offset, + }, + }; + op->callback = [this, osd_num = only_osds[i]](osd_op_t *op) + { + if (op->reply.hdr.retval < 0) + { + fprintf( + stderr, "Failed to describe objects on OSD %lu (retval=%ld)\n", + osd_num, op->reply.hdr.retval + ); + } + else if (op->reply.describe.result_bytes != op->reply.hdr.retval * sizeof(osd_reply_describe_item_t)) + { + fprintf( + stderr, "Invalid response size from OSD %lu (expected %lu bytes, got %lu bytes)\n", + osd_num, op->reply.hdr.retval * sizeof(osd_reply_describe_item_t), op->reply.describe.result_bytes + ); + } + else + { + osd_reply_describe_item_t *items = (osd_reply_describe_item_t *)op->buf; + for (int i = 0; i < op->reply.hdr.retval; i++) + { + if (!parent->json_output || parent->is_command_line) + { +#define FMT "{\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"part\":%u,\"osd_num\":%u%s%s%s}" + printf( + (parent->json_output + ? (count > 0 ? ",\n " FMT : " " FMT) + : "%lx:%lx part %u on OSD %u%s%s%s\n"), +#undef FMT + items[i].inode, items[i].stripe, + items[i].role, items[i].osd_num, + (items[i].loc_bad & LOC_CORRUPTED ? (parent->json_output ? ",\"corrupted\":true" : " corrupted") : ""), + (items[i].loc_bad & LOC_INCONSISTENT ? (parent->json_output ? ",\"inconsistent\":true" : " inconsistent") : ""), + (items[i].loc_bad & LOC_OUTDATED ? (parent->json_output ? ",\"outdated\":true" : " outdated") : "") + ); + } + else + { + auto json_item = json11::Json::object { + { "inode", (uint64_t)items[i].inode }, + { "stripe", (uint64_t)items[i].stripe }, + { "part", (uint64_t)items[i].role }, + { "osd_num", (uint64_t)items[i].osd_num }, + }; + if (items[i].loc_bad & LOC_CORRUPTED) + json_item["corrupted"] = true; + if (items[i].loc_bad & LOC_INCONSISTENT) + json_item["inconsistent"] = true; + if (items[i].loc_bad & LOC_OUTDATED) + json_item["outdated"] = true; + describe_items.push_back(json_item); + } + count++; + } + } + delete op; + parent->waiting--; + if (!parent->waiting) + loop(); + }; + parent->waiting++; + parent->cli->execute_raw(only_osds[i], op); + } + resume_1: + state = 1; + if (parent->waiting > 0) + { + return; + } + if (parent->json_output && parent->is_command_line) + { + printf(count > 0 ? "\n]\n" : "]\n"); + } + else + { + result.data = describe_items; + } + state = 100; + describe_items.clear(); + } +}; + +std::function cli_tool_t::start_describe(json11::Json cfg) +{ + auto describer = new cli_describe_t(); + describer->parent = this; + describer->options = cfg; + return [describer](cli_result_t & result) + { + describer->loop(); + if (describer->is_done()) + { + result = describer->result; + delete describer; + return true; + } + return false; + }; +} diff --git a/src/cluster_client.cpp b/src/cluster_client.cpp index c2842164..2eba937b 100644 --- a/src/cluster_client.cpp +++ b/src/cluster_client.cpp @@ -35,6 +35,7 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd // peer_osd just connected continue_ops(); continue_lists(); + continue_raw_ops(peer_osd); } else if (dirty_buffers.size()) { @@ -104,6 +105,19 @@ cluster_op_t::~cluster_op_t() } } +void cluster_client_t::continue_raw_ops(osd_num_t peer_osd) +{ + auto it = raw_ops.find(peer_osd); + while (it != raw_ops.end() && it->first == peer_osd) + { + auto op = it->second; + op->op_type = OSD_OP_OUT; + op->peer_fd = msgr.osd_peer_fds.at(peer_osd); + msgr.outbox_push(op); + raw_ops.erase(it++); + } +} + void cluster_client_t::init_msgr() { if (msgr_initialized) @@ -512,6 +526,23 @@ void cluster_client_t::execute(cluster_op_t *op) } } +void cluster_client_t::execute_raw(osd_num_t osd_num, osd_op_t *op) +{ + auto fd_it = msgr.osd_peer_fds.find(osd_num); + if (fd_it != msgr.osd_peer_fds.end()) + { + op->op_type = OSD_OP_OUT; + op->peer_fd = fd_it->second; + msgr.outbox_push(op); + } + else + { + if (msgr.wanted_peers.find(osd_num) == msgr.wanted_peers.end()) + msgr.connect_peer(osd_num, st_cli.peer_states[osd_num]); + raw_ops.emplace(osd_num, op); + } +} + void cluster_client_t::copy_write(cluster_op_t *op, std::map & dirty_buffers) { // Save operation for replay when one of PGs goes out of sync diff --git a/src/cluster_client.h b/src/cluster_client.h index 73d56e5c..0edc7c7b 100644 --- a/src/cluster_client.h +++ b/src/cluster_client.h @@ -103,6 +103,7 @@ class cluster_client_t ring_consumer_t consumer; std::vector> on_ready_hooks; std::vector lists; + std::multimap raw_ops; int continuing_ops = 0; bool msgr_initialized = false; @@ -118,6 +119,7 @@ public: cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config); ~cluster_client_t(); void execute(cluster_op_t *op); + void execute_raw(osd_num_t osd_num, osd_op_t *op); bool is_ready(); void on_ready(std::function fn); @@ -153,4 +155,5 @@ protected: void continue_lists(); void continue_listing(inode_list_t *lst); void send_list(inode_list_osd_t *cur_list); + void continue_raw_ops(osd_num_t peer_osd); };