From 8603b5cb1da818ab455481f54dcf41a657b77133 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Fri, 30 Jun 2023 00:05:04 +0300 Subject: [PATCH] Do not hang on inactive OSDs during delete, report and skip them instead --- src/cli_rm_data.cpp | 22 +++++++++++++++++++++- src/cluster_client.h | 1 + src/cluster_client_list.cpp | 25 ++++++++++++++++++++----- src/osd_primary.cpp | 6 ------ 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/cli_rm_data.cpp b/src/cli_rm_data.cpp index c5eb91cd..a976aa45 100644 --- a/src/cli_rm_data.cpp +++ b/src/cli_rm_data.cpp @@ -28,6 +28,7 @@ struct rm_inode_t cli_tool_t *parent = NULL; inode_list_t *lister = NULL; std::vector lists; + std::vector inactive_osds; uint64_t total_count = 0, total_done = 0, total_prev_pct = 0; uint64_t pgs_to_list = 0; bool lists_done = false; @@ -86,6 +87,16 @@ struct rm_inode_t state = 100; return; } + inactive_osds = parent->cli->list_inode_get_inactive_osds(lister); + if (inactive_osds.size() && !parent->json_output) + { + fprintf(stderr, "Some data may remain after delete on OSDs which are currently down: "); + for (int i = 0; i < inactive_osds.size(); i++) + { + fprintf(stderr, i > 0 ? ", %lu" : "%lu", inactive_osds[i]); + } + fprintf(stderr, "\n"); + } pgs_to_list = parent->cli->list_pg_count(lister); parent->cli->list_inode_next(lister, parent->parallel_osds); } @@ -167,7 +178,7 @@ struct rm_inode_t } if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct) { - printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list); + fprintf(stderr, "\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list); total_prev_pct = total_done*1000/total_count; } if (lists_done && !lists.size()) @@ -177,8 +188,17 @@ struct rm_inode_t .text = error_count > 0 ? "Some blocks were not removed" : ( "Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+ std::to_string(pool_id)+" removed"), + .data = json11::Json::object { + { "removed_objects", total_done }, + { "total_objects", total_count }, + { "inactive_osds", inactive_osds }, + }, }; state = 100; + if (parent->progress && total_count > 0) + { + fprintf(stderr, "\n"); + } } } diff --git a/src/cluster_client.h b/src/cluster_client.h index 0edc7c7b..acc95df4 100644 --- a/src/cluster_client.h +++ b/src/cluster_client.h @@ -130,6 +130,7 @@ public: inode_list_t *list_inode_start(inode_t inode, std::function&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback); int list_pg_count(inode_list_t *lst); + const std::vector & list_inode_get_inactive_osds(inode_list_t *lst); void list_inode_next(inode_list_t *lst, int next_pgs); //inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; } //inline uint64_t get_bs_block_size() { return st_cli.global_block_size; } diff --git a/src/cluster_client_list.cpp b/src/cluster_client_list.cpp index 30f57b49..f435c726 100644 --- a/src/cluster_client_list.cpp +++ b/src/cluster_client_list.cpp @@ -36,6 +36,7 @@ struct inode_list_t inode_t inode = 0; int done_pgs = 0; int want = 0; + std::vector inactive_osds; std::vector pgs; std::function&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback; }; @@ -60,6 +61,7 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode, lst->inode = inode; lst->callback = callback; auto pool_cfg = st_cli.pool_config[pool_id]; + std::set inactive_osd_set; for (auto & pg_item: pool_cfg.pg_config) { auto & pg = pg_item.second; @@ -106,11 +108,18 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode, } for (osd_num_t peer_osd: all_peers) { - r->list_osds.push_back((inode_list_osd_t){ - .pg = r, - .osd_num = peer_osd, - .sent = false, - }); + if (st_cli.peer_states.find(peer_osd) != st_cli.peer_states.end()) + { + r->list_osds.push_back((inode_list_osd_t){ + .pg = r, + .osd_num = peer_osd, + .sent = false, + }); + } + else + { + inactive_osd_set.insert(peer_osd); + } } } else @@ -132,6 +141,7 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode, { lst->pgs[i]->pos = i; } + lst->inactive_osds.insert(lst->inactive_osds.end(), inactive_osd_set.begin(), inactive_osd_set.end()); lists.push_back(lst); return lst; } @@ -141,6 +151,11 @@ int cluster_client_t::list_pg_count(inode_list_t *lst) return lst->pgs.size(); } +const std::vector & cluster_client_t::list_inode_get_inactive_osds(inode_list_t *lst) +{ + return lst->inactive_osds; +} + void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs) { if (next_pgs >= 0) diff --git a/src/osd_primary.cpp b/src/osd_primary.cpp index 81a6b08f..fc1b00d5 100644 --- a/src/osd_primary.cpp +++ b/src/osd_primary.cpp @@ -647,12 +647,6 @@ void osd_t::continue_primary_del(osd_op_t *cur_op) else if (op_data->st == 4) goto resume_4; else if (op_data->st == 5) goto resume_5; assert(op_data->st == 0); - // Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs - if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD)) - { - finish_op(cur_op, -EBUSY); - return; - } if (!check_write_queue(cur_op, pg)) { return;