From 293cb5bd1df27b755bac1929a817e8d2f15908d9 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Wed, 2 Sep 2020 21:54:32 +0300 Subject: [PATCH] Parse pool configuration in etcd_state_client --- etcd_state_client.cpp | 139 +++++++++++++++++++++++++++++++++--------- etcd_state_client.h | 30 +++++++-- lp/mon.js | 15 +++-- osd_peering.cpp | 6 ++ 4 files changed, 152 insertions(+), 38 deletions(-) diff --git a/etcd_state_client.cpp b/etcd_state_client.cpp index 99e93353..c73f5e0b 100644 --- a/etcd_state_client.cpp +++ b/etcd_state_client.cpp @@ -233,6 +233,11 @@ void etcd_state_client_t::load_global_config() void etcd_state_client_t::load_pgs() { json11::Json::array txn = { + json11::Json::object { + { "request_range", json11::Json::object { + { "key", base64_encode(etcd_prefix+"/config/pools") }, + } } + }, json11::Json::object { { "request_range", json11::Json::object { { "key", base64_encode(etcd_prefix+"/config/pgs") }, @@ -293,47 +298,120 @@ void etcd_state_client_t::load_pgs() void etcd_state_client_t::parse_state(const std::string & key, const json11::Json & value) { - if (key == etcd_prefix+"/config/pgs") + if (key == etcd_prefix+"/config/pools") { - for (auto & pg_item: this->pg_config) + for (auto & pool_item: this->pool_config) { - pg_item.second.exists = false; + pool_item.second.exists = false; } - for (auto & pg_item: value["items"].object_items()) + for (auto & pool_item: value.object_items()) { - pg_num_t pg_num = stoull_full(pg_item.first); - if (!pg_num) + pool_id_t pool_id = stoull_full(pool_item.first); + if (!pool_id || pool_id >= POOL_ID_MAX) { - printf("Bad key in PG configuration: %s (must be a number), skipped\n", pg_item.first.c_str()); + printf("Pool ID %s is invalid (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX); continue; } - this->pg_config[pg_num].exists = true; - this->pg_config[pg_num].pause = pg_item.second["pause"].bool_value(); - this->pg_config[pg_num].primary = pg_item.second["primary"].uint64_value(); - this->pg_config[pg_num].target_set.clear(); - for (auto pg_osd: pg_item.second["osd_set"].array_items()) + if (pool_item.second["pg_size"].uint64_value() < 1 || + pool_item.second["scheme"] == "xor" && pool_item.second["pg_size"].uint64_value() < 3) { - this->pg_config[pg_num].target_set.push_back(pg_osd.uint64_value()); + printf("Pool %lu has invalid pg_size, skipping pool\n", pool_id); + continue; } - if (this->pg_config[pg_num].target_set.size() != 3) + if (pool_item.second["pg_minsize"].uint64_value() < 1 || + pool_item.second["pg_minsize"].uint64_value() > pool_item.second["pg_size"].uint64_value() || + pool_item.second["pg_minsize"].uint64_value() < (pool_item.second["pg_size"].uint64_value() - 1)) { - printf("Bad PG %u config format: incorrect osd_set = %s\n", pg_num, pg_item.second["osd_set"].dump().c_str()); - this->pg_config[pg_num].target_set.resize(3); - this->pg_config[pg_num].pause = true; + printf("Pool %lu has invalid pg_minsize, skipping pool\n", pool_id); + continue; + } + if (pool_item.second["pg_count"].uint64_value() < 1) + { + printf("Pool %lu has invalid pg_count, skipping pool\n", pool_id); + continue; + } + if (pool_item.second["name"].string_value() == "") + { + printf("Pool %lu has empty name, skipping pool\n", pool_id); + continue; + } + if (pool_item.second["scheme"] != "replicated" && pool_item.second["scheme"] != "xor") + { + printf("Pool %lu has invalid coding scheme (only \"xor\" and \"replicated\" are allowed), skipping pool\n", pool_id); + continue; + } + if (pool_item.second["max_osd_combinations"].uint64_value() > 0 && + pool_item.second["max_osd_combinations"].uint64_value() < 100) + { + printf("Pool %lu has invalid max_osd_combinations (must be at least 100), skipping pool\n", pool_id); + continue; + } + this->pool_config[pool_id].exists = true; + this->pool_config[pool_id].id = pool_id; + this->pool_config[pool_id].name = pool_item.second["name"].string_value(); + this->pool_config[pool_id].scheme = pool_item.second["scheme"] == "replicated" ? POOL_SCHEME_REPLICATED : POOL_SCHEME_XOR; + this->pool_config[pool_id].pg_size = pool_item.second["pg_size"].uint64_value(); + this->pool_config[pool_id].pg_minsize = pool_item.second["pg_minsize"].uint64_value(); + this->pool_config[pool_id].pg_count = pool_item.second["pg_count"].uint64_value(); + this->pool_config[pool_id].failure_domain = pool_item.second["failure_domain"].string_value(); + this->pool_config[pool_id].max_osd_combinations = pool_item.second["max_osd_combinations"].uint64_value(); + if (!this->pool_config[pool_id].max_osd_combinations) + { + this->pool_config[pool_id].max_osd_combinations = 10000; + } + } + } + else if (key == etcd_prefix+"/config/pgs") + { + for (auto & pool_item: this->pool_config) + { + for (auto & pg_item: pool_item.second.pg_config) + { + pg_item.second.exists = false; + } + } + for (auto & pool_item: value["items"].object_items()) + { + pool_id_t pool_id = stoull_full(pool_item.first); + if (!pool_id || pool_id >= POOL_ID_MAX) + { + printf("Pool ID %s is invalid in PG configuration (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX); + continue; + } + for (auto & pg_item: pool_item.second.object_items()) + { + pg_num_t pg_num = stoull_full(pg_item.first); + if (!pg_num) + { + printf("Bad key in pool %lu PG configuration: %s (must be a number), skipped\n", pool_id, pg_item.first.c_str()); + continue; + } + auto & parsed_cfg = this->pool_config[pool_id].pg_config[pg_num]; + parsed_cfg.exists = true; + parsed_cfg.pause = pg_item.second["pause"].bool_value(); + parsed_cfg.primary = pg_item.second["primary"].uint64_value(); + parsed_cfg.target_set.clear(); + for (auto & pg_osd: pg_item.second["osd_set"].array_items()) + { + parsed_cfg.target_set.push_back(pg_osd.uint64_value()); + } } } } else if (key.substr(0, etcd_prefix.length()+12) == etcd_prefix+"/pg/history/") { - // /pg/history/%d - pg_num_t pg_num = stoull_full(key.substr(etcd_prefix.length()+12)); - if (!pg_num) + // /pg/history/%d/%d + pool_id_t pool_id = 0; + pg_num_t pg_num = 0; + char null_byte = 0; + sscanf(key.c_str() + etcd_prefix.length()+12, "%lu/%u%c", &pool_id, &pg_num, &null_byte); + if (!pool_id || pool_id >= POOL_ID_MAX || !pg_num || null_byte != 0) { printf("Bad etcd key %s, ignoring\n", key.c_str()); } else { - auto & pg_cfg = this->pg_config[pg_num]; + auto & pg_cfg = this->pool_config[pool_id].pg_config[pg_num]; pg_cfg.target_history.clear(); pg_cfg.all_peers.clear(); // Refuse to start PG if any set of the has no live OSDs @@ -355,22 +433,25 @@ void etcd_state_client_t::parse_state(const std::string & key, const json11::Jso pg_cfg.epoch = value["epoch"].uint64_value(); if (on_change_pg_history_hook != NULL) { - on_change_pg_history_hook(pg_num); + on_change_pg_history_hook(pool_id, pg_num); } } } else if (key.substr(0, etcd_prefix.length()+10) == etcd_prefix+"/pg/state/") { - // /pg/state/%d - pg_num_t pg_num = stoull_full(key.substr(etcd_prefix.length()+10)); - if (!pg_num) + // /pg/state/%d/%d + pool_id_t pool_id = 0; + pg_num_t pg_num = 0; + char null_byte = 0; + sscanf(key.c_str() + etcd_prefix.length()+10, "%lu/%u%c", &pool_id, &pg_num, &null_byte); + if (!pool_id || pool_id >= POOL_ID_MAX || !pg_num || null_byte != 0) { printf("Bad etcd key %s, ignoring\n", key.c_str()); } else if (value.is_null()) { - this->pg_config[pg_num].cur_primary = 0; - this->pg_config[pg_num].cur_state = 0; + this->pool_config[pool_id].pg_config[pg_num].cur_primary = 0; + this->pool_config[pool_id].pg_config[pg_num].cur_state = 0; } else { @@ -401,8 +482,8 @@ void etcd_state_client_t::parse_state(const std::string & key, const json11::Jso printf("Unexpected PG %u state in etcd: primary=%lu, state=%s\n", pg_num, cur_primary, value["state"].dump().c_str()); return; } - this->pg_config[pg_num].cur_primary = cur_primary; - this->pg_config[pg_num].cur_state = state; + this->pool_config[pool_id].pg_config[pg_num].cur_primary = cur_primary; + this->pool_config[pool_id].pg_config[pg_num].cur_state = state; } } else if (key.substr(0, etcd_prefix.length()+11) == etcd_prefix+"/osd/state/") diff --git a/etcd_state_client.h b/etcd_state_client.h index ec312fb3..c608ea62 100644 --- a/etcd_state_client.h +++ b/etcd_state_client.h @@ -13,6 +13,17 @@ #define ETCD_SLOW_TIMEOUT 5000 #define ETCD_QUICK_TIMEOUT 1000 +#define POOL_SCHEME_REPLICATED 1 +#define POOL_SCHEME_XOR 2 +#define POOL_ID_MAX 0x10000 +#define POOL_ID_BITS 16 + +struct json_kv_t +{ + std::string key; + json11::Json value; +}; + struct pg_config_t { bool exists; @@ -26,10 +37,19 @@ struct pg_config_t uint64_t epoch; }; -struct json_kv_t +typedef uint64_t pool_id_t; + +struct pool_config_t { - std::string key; - json11::Json value; + bool exists; + pool_id_t id; + std::string name; + uint64_t scheme; + uint64_t pg_size, pg_minsize; + uint64_t pg_count; + std::string failure_domain; + uint64_t max_osd_combinations; + std::map pg_config; }; struct etcd_state_client_t @@ -42,14 +62,14 @@ struct etcd_state_client_t int etcd_watches_initialised = 0; uint64_t etcd_watch_revision = 0; websocket_t *etcd_watch_ws = NULL; - std::map pg_config; + std::map pool_config; std::map peer_states; std::function on_change_hook; std::function on_load_config_hook; std::function load_pgs_checks_hook; std::function on_load_pgs_hook; - std::function on_change_pg_history_hook; + std::function on_change_pg_history_hook; std::function on_change_osd_state_hook; json_kv_t parse_etcd_kv(const json11::Json & kv_json); diff --git a/lp/mon.js b/lp/mon.js index aee3e188..c70f181a 100644 --- a/lp/mon.js +++ b/lp/mon.js @@ -557,12 +557,14 @@ class Mon console.log('Pool ID '+pool_id+' is invalid'); return false; } - if (!pool_cfg.pg_size || pool_cfg.pg_size < 1) + if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || + pool_cfg.scheme === 'xor' && pool_cfg.pg_size < 3) { console.log('Pool '+pool_id+' has invalid pg_size'); return false; } - if (!pool_cfg.pg_minsize || pool_cfg.pg_minsize < 1 || pool_cfg.pg_minsize > pool_cfg.pg_size) + if (!pool_cfg.pg_minsize || pool_cfg.pg_minsize < 1 || pool_cfg.pg_minsize > pool_cfg.pg_size || + pool_cfg.scheme === 'xor' && pool_cfg.pg_minsize < (pool_cfg.pg_size - 1)) { console.log('Pool '+pool_id+' has invalid pg_minsize'); return false; @@ -574,12 +576,17 @@ class Mon } if (!pool_cfg.name) { - console.log('Pool '+pool_id+' has invalid pg_count'); + console.log('Pool '+pool_id+' has empty name'); + return false; + } + if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated') + { + console.log('Pool '+pool_id+' has invalid coding scheme (only "xor" and "replicated" are allowed)'); return false; } if (pool_cfg.max_osd_combinations < 100) { - console.log('Pool '+pool_id+' has invalid max_osd_combinations'); + console.log('Pool '+pool_id+' has invalid max_osd_combinations (must be at least 100)'); return false; } return true; diff --git a/osd_peering.cpp b/osd_peering.cpp index a98e0213..abf5e3fb 100644 --- a/osd_peering.cpp +++ b/osd_peering.cpp @@ -102,6 +102,12 @@ void osd_t::start_pg_peering(pg_num_t pg_num) pg.state = PG_PEERING; this->peering_state |= OSD_PEERING_PGS; report_pg_state(pg); + if (parsed_cfg.target_set.size() != 3) + { + printf("Bad PG %u config format: incorrect osd_set = %s\n", pg_num, pg_item.second["osd_set"].dump().c_str()); + parsed_cfg.target_set.resize(3); + parsed_cfg.pause = true; + } // Reset PG state pg.cur_peers.clear(); pg.state_dict.clear();