Compare commits
2 Commits
262c581400
...
66c9271cbd
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | 66c9271cbd | |
Vitaliy Filippov | 7b37ba921d |
|
@ -56,6 +56,7 @@ const etcd_tree = {
|
||||||
osd_out_time: 600, // seconds. min: 0
|
osd_out_time: 600, // seconds. min: 0
|
||||||
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
|
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
|
||||||
use_old_pg_combinator: false,
|
use_old_pg_combinator: false,
|
||||||
|
osd_backfillfull_ratio: 0.99,
|
||||||
// client and osd
|
// client and osd
|
||||||
tcp_header_buffer_size: 65536,
|
tcp_header_buffer_size: 65536,
|
||||||
use_sync_send_recv: false,
|
use_sync_send_recv: false,
|
||||||
|
|
37
mon/mon.js
37
mon/mon.js
|
@ -74,6 +74,7 @@ class Mon
|
||||||
this.state = JSON.parse(JSON.stringify(etcd_tree));
|
this.state = JSON.parse(JSON.stringify(etcd_tree));
|
||||||
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
||||||
this.recheck_pgs_active = false;
|
this.recheck_pgs_active = false;
|
||||||
|
this.updating_total_stats = false;
|
||||||
this.watcher_active = false;
|
this.watcher_active = false;
|
||||||
this.old_pg_config = false;
|
this.old_pg_config = false;
|
||||||
this.old_pg_stats_seen = false;
|
this.old_pg_stats_seen = false;
|
||||||
|
@ -658,7 +659,13 @@ class Mon
|
||||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
||||||
}
|
}
|
||||||
new_pg_config.hash = tree_hash;
|
new_pg_config.hash = tree_hash;
|
||||||
return await this.save_pg_config(new_pg_config, etcd_request);
|
const { backfillfull_pools } = sum_object_counts({ ...this.state, pg: { ...this.state.pg, config: new_pg_config } }, this.config);
|
||||||
|
new_pg_config.backfillfull_pools = backfillfull_pools.length ? backfillfull_pools : undefined;
|
||||||
|
if (!await this.save_pg_config(new_pg_config, etcd_request))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
|
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
|
||||||
|
@ -730,7 +737,7 @@ class Mon
|
||||||
async update_total_stats()
|
async update_total_stats()
|
||||||
{
|
{
|
||||||
const txn = [];
|
const txn = [];
|
||||||
const { object_counts, object_bytes } = sum_object_counts(this.state, this.config);
|
const { object_counts, object_bytes, backfillfull_pools } = sum_object_counts(this.state, this.config);
|
||||||
let stats = sum_op_stats(this.state.osd, this.prev_stats);
|
let stats = sum_op_stats(this.state.osd, this.prev_stats);
|
||||||
let { inode_stats, seen_pools } = sum_inode_stats(this.state, this.prev_stats);
|
let { inode_stats, seen_pools } = sum_inode_stats(this.state, this.prev_stats);
|
||||||
stats.object_counts = object_counts;
|
stats.object_counts = object_counts;
|
||||||
|
@ -783,6 +790,16 @@ class Mon
|
||||||
{
|
{
|
||||||
await this.etcd.etcd_call('/kv/txn', { success: txn }, this.config.etcd_mon_timeout, 0);
|
await this.etcd.etcd_call('/kv/txn', { success: txn }, this.config.etcd_mon_timeout, 0);
|
||||||
}
|
}
|
||||||
|
if (!this.recheck_pgs_active &&
|
||||||
|
backfillfull_pools.join(',') != ((this.state.pg.config||{}).no_rebalance_pools||[]).join(','))
|
||||||
|
{
|
||||||
|
console.log(
|
||||||
|
(backfillfull_pools.length ? 'Pool(s) '+backfillfull_pools.join(', ') : 'No pools')+
|
||||||
|
' are backfillfull, applying rebalance configuration'
|
||||||
|
);
|
||||||
|
const new_pg_config = { ...this.state.pg.config, backfillfull_pools: backfillfull_pools.length ? backfillfull_pools : undefined };
|
||||||
|
await this.save_pg_config(new_pg_config);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
schedule_update_stats()
|
schedule_update_stats()
|
||||||
|
@ -794,7 +811,21 @@ class Mon
|
||||||
this.stats_timer = setTimeout(() =>
|
this.stats_timer = setTimeout(() =>
|
||||||
{
|
{
|
||||||
this.stats_timer = null;
|
this.stats_timer = null;
|
||||||
this.update_total_stats().catch(console.error);
|
if (this.updating_total_stats)
|
||||||
|
{
|
||||||
|
this.schedule_update_stats();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.updating_total_stats = true;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
this.update_total_stats().catch(console.error);
|
||||||
|
}
|
||||||
|
catch (e)
|
||||||
|
{
|
||||||
|
console.error(e);
|
||||||
|
}
|
||||||
|
this.updating_total_stats = false;
|
||||||
}, this.config.mon_stats_timeout);
|
}, this.config.mon_stats_timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
32
mon/stats.js
32
mon/stats.js
|
@ -109,6 +109,8 @@ function sum_object_counts(state, global_config)
|
||||||
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
|
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
const pool_per_osd = {};
|
||||||
|
const clean_per_osd = {};
|
||||||
for (const pool_id in pgstats)
|
for (const pool_id in pgstats)
|
||||||
{
|
{
|
||||||
let object_size = 0;
|
let object_size = 0;
|
||||||
|
@ -143,10 +145,38 @@ function sum_object_counts(state, global_config)
|
||||||
object_bytes[k] += BigInt(st[k+'_count']) * object_size;
|
object_bytes[k] += BigInt(st[k+'_count']) * object_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (st.object_count)
|
||||||
|
{
|
||||||
|
for (const pg_osd in (((state.pg.config.items||{})[pool_id]||{})[pg_num]||{}).osd_set||[])
|
||||||
|
{
|
||||||
|
if (!(pg_osd in clean_per_osd))
|
||||||
|
{
|
||||||
|
clean_per_osd[pg_osd] = 0n;
|
||||||
|
}
|
||||||
|
clean_per_osd[pg_osd] += BigInt(st.object_count);
|
||||||
|
pool_per_osd[pg_osd] = pool_per_osd[pg_osd]||{};
|
||||||
|
pool_per_osd[pg_osd][pool_id] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { object_counts, object_bytes };
|
// If clean_per_osd[osd] is larger than osd capacity then it will fill up during rebalance
|
||||||
|
let backfillfull_pools = {};
|
||||||
|
for (const osd in clean_per_osd)
|
||||||
|
{
|
||||||
|
const st = state.osd.stats[osd];
|
||||||
|
if (st && st.size && st.data_block_size && (BigInt(st.size)/BigInt(st.data_block_size)*
|
||||||
|
BigInt((global_config.osd_backfillfull_ratio||0.99)*1000000)/1000000n) < clean_per_osd[osd])
|
||||||
|
{
|
||||||
|
for (const pool_id in pool_per_osd[osd])
|
||||||
|
{
|
||||||
|
backfillfull_pools[pool_id] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
backfillfull_pools = Object.keys(backfillfull_pools).sort();
|
||||||
|
return { object_counts, object_bytes, backfillfull_pools };
|
||||||
}
|
}
|
||||||
|
|
||||||
// sum_inode_stats(this.state, this.prev_stats)
|
// sum_inode_stats(this.state, this.prev_stats)
|
||||||
|
|
|
@ -785,7 +785,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
}
|
}
|
||||||
for (auto & pool_item: value.object_items())
|
for (auto & pool_item: value.object_items())
|
||||||
{
|
{
|
||||||
pool_config_t pc;
|
pool_config_t pc = {};
|
||||||
// ID
|
// ID
|
||||||
pool_id_t pool_id;
|
pool_id_t pool_id;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
|
@ -931,12 +931,28 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
// Ignore old key if the new one is present
|
// Ignore old key if the new one is present
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
for (auto & pool_id_json: value["backfillfull_pools"].array_items())
|
||||||
|
{
|
||||||
|
auto pool_id = pool_id_json.uint64_value();
|
||||||
|
auto pool_it = this->pool_config.find(pool_id);
|
||||||
|
if (pool_it != this->pool_config.end())
|
||||||
|
{
|
||||||
|
pool_it->second.backfillfull |= 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (auto & pool_item: this->pool_config)
|
for (auto & pool_item: this->pool_config)
|
||||||
{
|
{
|
||||||
for (auto & pg_item: pool_item.second.pg_config)
|
for (auto & pg_item: pool_item.second.pg_config)
|
||||||
{
|
{
|
||||||
pg_item.second.config_exists = false;
|
pg_item.second.config_exists = false;
|
||||||
}
|
}
|
||||||
|
// 3 = was 1 and became 1, 0 = was 0 and became 0
|
||||||
|
if (pool_item.second.backfillfull == 2 || pool_item.second.backfillfull == 1)
|
||||||
|
{
|
||||||
|
if (on_change_backfillfull_hook)
|
||||||
|
on_change_backfillfull_hook(pool_item.first);
|
||||||
|
}
|
||||||
|
pool_item.second.backfillfull = pool_item.second.backfillfull >> 1;
|
||||||
}
|
}
|
||||||
for (auto & pool_item: value["items"].object_items())
|
for (auto & pool_item: value["items"].object_items())
|
||||||
{
|
{
|
||||||
|
|
|
@ -62,6 +62,7 @@ struct pool_config_t
|
||||||
std::map<pg_num_t, pg_config_t> pg_config;
|
std::map<pg_num_t, pg_config_t> pg_config;
|
||||||
uint64_t scrub_interval;
|
uint64_t scrub_interval;
|
||||||
std::string used_for_fs;
|
std::string used_for_fs;
|
||||||
|
int backfillfull;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct inode_config_t
|
struct inode_config_t
|
||||||
|
@ -131,6 +132,7 @@ public:
|
||||||
std::function<json11::Json()> load_pgs_checks_hook;
|
std::function<json11::Json()> load_pgs_checks_hook;
|
||||||
std::function<void(bool)> on_load_pgs_hook;
|
std::function<void(bool)> on_load_pgs_hook;
|
||||||
std::function<void()> on_change_pool_config_hook;
|
std::function<void()> on_change_pool_config_hook;
|
||||||
|
std::function<void(pool_id_t)> on_change_backfillfull_hook;
|
||||||
std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook;
|
std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook;
|
||||||
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
|
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
|
||||||
std::function<void(osd_num_t)> on_change_osd_state_hook;
|
std::function<void(osd_num_t)> on_change_osd_state_hook;
|
||||||
|
|
|
@ -35,6 +35,7 @@ struct pool_creator_t
|
||||||
uint64_t new_pools_mod_rev;
|
uint64_t new_pools_mod_rev;
|
||||||
json11::Json state_node_tree;
|
json11::Json state_node_tree;
|
||||||
json11::Json new_pools;
|
json11::Json new_pools;
|
||||||
|
std::map<osd_num_t, json11::Json> osd_stats;
|
||||||
|
|
||||||
bool is_done() { return state == 100; }
|
bool is_done() { return state == 100; }
|
||||||
|
|
||||||
|
@ -46,8 +47,6 @@ struct pool_creator_t
|
||||||
goto resume_2;
|
goto resume_2;
|
||||||
else if (state == 3)
|
else if (state == 3)
|
||||||
goto resume_3;
|
goto resume_3;
|
||||||
else if (state == 4)
|
|
||||||
goto resume_4;
|
|
||||||
else if (state == 5)
|
else if (state == 5)
|
||||||
goto resume_5;
|
goto resume_5;
|
||||||
else if (state == 6)
|
else if (state == 6)
|
||||||
|
@ -121,15 +120,15 @@ resume_2:
|
||||||
// Get state_node_tree based on node_placement and osd stats
|
// Get state_node_tree based on node_placement and osd stats
|
||||||
{
|
{
|
||||||
auto node_placement_kv = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][0]["response_range"]["kvs"][0]);
|
auto node_placement_kv = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][0]["response_range"]["kvs"][0]);
|
||||||
std::map<osd_num_t, json11::Json> osd_stats;
|
|
||||||
timespec tv_now;
|
timespec tv_now;
|
||||||
clock_gettime(CLOCK_REALTIME, &tv_now);
|
clock_gettime(CLOCK_REALTIME, &tv_now);
|
||||||
uint64_t osd_out_time = parent->cli->config["osd_out_time"].uint64_value();
|
uint64_t osd_out_time = parent->cli->config["osd_out_time"].uint64_value();
|
||||||
if (!osd_out_time)
|
if (!osd_out_time)
|
||||||
osd_out_time = 600;
|
osd_out_time = 600;
|
||||||
|
osd_stats.clear();
|
||||||
parent->iterate_kvs_1(parent->etcd_result["responses"][1]["response_range"]["kvs"], "/osd/stats/", [&](uint64_t cur_osd, json11::Json value)
|
parent->iterate_kvs_1(parent->etcd_result["responses"][1]["response_range"]["kvs"], "/osd/stats/", [&](uint64_t cur_osd, json11::Json value)
|
||||||
{
|
{
|
||||||
if (value["time"].uint64_value()+osd_out_time >= tv_now.tv_sec)
|
if ((uint64_t)value["time"].number_value()+osd_out_time >= tv_now.tv_sec)
|
||||||
osd_stats[cur_osd] = value;
|
osd_stats[cur_osd] = value;
|
||||||
});
|
});
|
||||||
state_node_tree = get_state_node_tree(node_placement_kv.value.object_items(), osd_stats);
|
state_node_tree = get_state_node_tree(node_placement_kv.value.object_items(), osd_stats);
|
||||||
|
@ -175,42 +174,18 @@ resume_3:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get stats (for block_size, bitmap_granularity, ...) of osds in state_node_tree
|
|
||||||
{
|
|
||||||
json11::Json::array osd_stats;
|
|
||||||
|
|
||||||
for (auto osd_num: state_node_tree["osds"].array_items())
|
|
||||||
{
|
|
||||||
osd_stats.push_back(json11::Json::object {
|
|
||||||
{ "request_range", json11::Json::object {
|
|
||||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/osd/stats/"+osd_num.as_string()) },
|
|
||||||
} }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
parent->etcd_txn(json11::Json::object{ { "success", osd_stats } });
|
|
||||||
}
|
|
||||||
|
|
||||||
state = 4;
|
|
||||||
resume_4:
|
|
||||||
if (parent->waiting > 0)
|
|
||||||
return;
|
|
||||||
if (parent->etcd_err.err)
|
|
||||||
{
|
|
||||||
result = parent->etcd_err;
|
|
||||||
state = 100;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter osds from state_node_tree based on pool parameters and osd stats
|
// Filter osds from state_node_tree based on pool parameters and osd stats
|
||||||
{
|
{
|
||||||
std::vector<json11::Json> osd_stats;
|
std::vector<json11::Json> filtered_osd_stats;
|
||||||
for (auto & ocr: parent->etcd_result["responses"].array_items())
|
for (auto & osd_num: state_node_tree["osds"].array_items())
|
||||||
{
|
{
|
||||||
auto kv = parent->cli->st_cli.parse_etcd_kv(ocr["response_range"]["kvs"][0]);
|
auto st_it = osd_stats.find(osd_num.uint64_value());
|
||||||
osd_stats.push_back(kv.value);
|
if (st_it != osd_stats.end())
|
||||||
|
{
|
||||||
|
filtered_osd_stats.push_back(st_it->second);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
guess_block_size(osd_stats);
|
guess_block_size(filtered_osd_stats);
|
||||||
state_node_tree = filter_state_node_tree_by_stats(state_node_tree, osd_stats);
|
state_node_tree = filter_state_node_tree_by_stats(state_node_tree, osd_stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,8 +193,7 @@ resume_4:
|
||||||
{
|
{
|
||||||
auto failure_domain = cfg["failure_domain"].string_value() == ""
|
auto failure_domain = cfg["failure_domain"].string_value() == ""
|
||||||
? "host" : cfg["failure_domain"].string_value();
|
? "host" : cfg["failure_domain"].string_value();
|
||||||
uint64_t max_pg_size = get_max_pg_size(state_node_tree["nodes"].object_items(),
|
uint64_t max_pg_size = get_max_pg_size(state_node_tree, failure_domain, cfg["root_node"].string_value());
|
||||||
failure_domain, cfg["root_node"].string_value());
|
|
||||||
|
|
||||||
if (cfg["pg_size"].uint64_value() > max_pg_size)
|
if (cfg["pg_size"].uint64_value() > max_pg_size)
|
||||||
{
|
{
|
||||||
|
@ -411,13 +385,11 @@ resume_8:
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add osd if necessary
|
// Add osd
|
||||||
if (node_placement.find(osd_num) == node_placement.end())
|
node_placement[osd_num] = json11::Json::object {
|
||||||
{
|
{ "parent", node_placement[osd_num]["parent"].is_null() ? osd_host : node_placement[osd_num]["parent"] },
|
||||||
node_placement[osd_num] = json11::Json::object {
|
{ "level", "osd" },
|
||||||
{ "parent", osd_host }
|
};
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return json11::Json::object { { "osds", existing_osds }, { "nodes", node_placement } };
|
return json11::Json::object { { "osds", existing_osds }, { "nodes", node_placement } };
|
||||||
|
@ -547,15 +519,13 @@ resume_8:
|
||||||
// filtered out by stats parameters (block_size, bitmap_granularity) in
|
// filtered out by stats parameters (block_size, bitmap_granularity) in
|
||||||
// given osd_stats and current pool config.
|
// given osd_stats and current pool config.
|
||||||
// Requires: state_node_tree["osds"] must match osd_stats 1-1
|
// Requires: state_node_tree["osds"] must match osd_stats 1-1
|
||||||
json11::Json filter_state_node_tree_by_stats(const json11::Json & state_node_tree, std::vector<json11::Json> & osd_stats)
|
json11::Json filter_state_node_tree_by_stats(const json11::Json & state_node_tree, std::map<osd_num_t, json11::Json> & osd_stats)
|
||||||
{
|
{
|
||||||
auto & osds = state_node_tree["osds"].array_items();
|
|
||||||
|
|
||||||
// Accepted state_node_tree nodes
|
// Accepted state_node_tree nodes
|
||||||
auto accepted_nodes = state_node_tree["nodes"].object_items();
|
auto accepted_nodes = state_node_tree["nodes"].object_items();
|
||||||
|
|
||||||
// List of accepted osds
|
// List of accepted osds
|
||||||
std::vector<std::string> accepted_osds;
|
json11::Json::array accepted_osds;
|
||||||
|
|
||||||
block_size = cfg["block_size"].uint64_value()
|
block_size = cfg["block_size"].uint64_value()
|
||||||
? cfg["block_size"].uint64_value()
|
? cfg["block_size"].uint64_value()
|
||||||
|
@ -567,21 +537,25 @@ resume_8:
|
||||||
? etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value(), IMMEDIATE_ALL)
|
? etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value(), IMMEDIATE_ALL)
|
||||||
: parent->cli->st_cli.global_immediate_commit;
|
: parent->cli->st_cli.global_immediate_commit;
|
||||||
|
|
||||||
for (size_t i = 0; i < osd_stats.size(); i++)
|
for (auto osd_num_json: state_node_tree["osds"].array_items())
|
||||||
{
|
{
|
||||||
auto & os = osd_stats[i];
|
auto osd_num = osd_num_json.uint64_value();
|
||||||
// Get osd number
|
auto os_it = osd_stats.find(osd_num);
|
||||||
auto osd_num = osds[i].as_string();
|
if (os_it == osd_stats.end())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto & os = os_it->second;
|
||||||
if (!os["data_block_size"].is_null() && os["data_block_size"] != block_size ||
|
if (!os["data_block_size"].is_null() && os["data_block_size"] != block_size ||
|
||||||
!os["bitmap_granularity"].is_null() && os["bitmap_granularity"] != bitmap_granularity ||
|
!os["bitmap_granularity"].is_null() && os["bitmap_granularity"] != bitmap_granularity ||
|
||||||
!os["immediate_commit"].is_null() &&
|
!os["immediate_commit"].is_null() &&
|
||||||
etcd_state_client_t::parse_immediate_commit(os["immediate_commit"].string_value(), IMMEDIATE_NONE) < immediate_commit)
|
etcd_state_client_t::parse_immediate_commit(os["immediate_commit"].string_value(), IMMEDIATE_NONE) < immediate_commit)
|
||||||
{
|
{
|
||||||
accepted_nodes.erase(osd_num);
|
accepted_nodes.erase(osd_num_json.as_string());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
accepted_osds.push_back(osd_num);
|
accepted_osds.push_back(osd_num_json);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -589,81 +563,28 @@ resume_8:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns maximum pg_size possible for given node_tree and failure_domain, starting at parent_node
|
// Returns maximum pg_size possible for given node_tree and failure_domain, starting at parent_node
|
||||||
uint64_t get_max_pg_size(json11::Json::object node_tree, const std::string & level, const std::string & parent_node)
|
uint64_t get_max_pg_size(json11::Json state_node_tree, const std::string & level, const std::string & root_node)
|
||||||
{
|
{
|
||||||
uint64_t max_pg_sz = 0;
|
std::set<std::string> level_seen;
|
||||||
|
for (auto & osd: state_node_tree["osds"].array_items())
|
||||||
std::vector<std::string> nodes;
|
|
||||||
|
|
||||||
// Check if parent node is an osd (numeric)
|
|
||||||
if (parent_node != "" && stoull_full(parent_node))
|
|
||||||
{
|
{
|
||||||
// Add it to node list if osd is in node tree
|
// find OSD parent at <level>, but stop at <root_node>
|
||||||
if (node_tree.find(parent_node) != node_tree.end())
|
auto cur_id = osd.string_value();
|
||||||
nodes.push_back(parent_node);
|
auto cur = state_node_tree["nodes"][cur_id];
|
||||||
}
|
while (!cur.is_null())
|
||||||
// If parent node given, ...
|
|
||||||
else if (parent_node != "")
|
|
||||||
{
|
|
||||||
// ... look for child nodes of this parent
|
|
||||||
for (auto & sn: node_tree)
|
|
||||||
{
|
{
|
||||||
if (sn.second["parent"] == parent_node)
|
if (cur["level"] == level)
|
||||||
{
|
{
|
||||||
nodes.push_back(sn.first);
|
level_seen.insert(cur_id);
|
||||||
|
break;
|
||||||
// If we're not looking for all osds, we only need a single
|
|
||||||
// child osd node
|
|
||||||
if (level != "osd" && stoull_full(sn.first))
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
if (cur_id == root_node)
|
||||||
|
break;
|
||||||
|
cur_id = cur["parent"].string_value();
|
||||||
|
cur = state_node_tree["nodes"][cur_id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// No parent node given, and we're not looking for all osds
|
return level_seen.size();
|
||||||
else if (level != "osd")
|
|
||||||
{
|
|
||||||
// ... look for all level nodes
|
|
||||||
for (auto & sn: node_tree)
|
|
||||||
{
|
|
||||||
if (sn.second["level"] == level)
|
|
||||||
{
|
|
||||||
nodes.push_back(sn.first);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Otherwise, ...
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// ... we're looking for osd nodes only
|
|
||||||
for (auto & sn: node_tree)
|
|
||||||
{
|
|
||||||
if (stoull_full(sn.first))
|
|
||||||
{
|
|
||||||
nodes.push_back(sn.first);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process gathered nodes
|
|
||||||
for (auto & node: nodes)
|
|
||||||
{
|
|
||||||
// Check for osd node, return constant max size
|
|
||||||
if (stoull_full(node))
|
|
||||||
{
|
|
||||||
max_pg_sz += 1;
|
|
||||||
}
|
|
||||||
// Otherwise, ...
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// ... exclude parent node from tree, and ...
|
|
||||||
node_tree.erase(parent_node);
|
|
||||||
|
|
||||||
// ... descend onto the resulting tree
|
|
||||||
max_pg_sz += get_max_pg_size(node_tree, level, node);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return max_pg_sz;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
json11::Json create_pool(const etcd_kv_t & kv)
|
json11::Json create_pool(const etcd_kv_t & kv)
|
||||||
|
|
|
@ -226,6 +226,7 @@ class osd_t
|
||||||
void parse_config(bool init);
|
void parse_config(bool init);
|
||||||
void init_cluster();
|
void init_cluster();
|
||||||
void on_change_osd_state_hook(osd_num_t peer_osd);
|
void on_change_osd_state_hook(osd_num_t peer_osd);
|
||||||
|
void on_change_backfillfull_hook(pool_id_t pool_id);
|
||||||
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
|
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
|
||||||
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
|
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
|
||||||
void on_load_config_hook(json11::Json::object & changes);
|
void on_load_config_hook(json11::Json::object & changes);
|
||||||
|
|
|
@ -65,6 +65,7 @@ void osd_t::init_cluster()
|
||||||
st_cli.tfd = tfd;
|
st_cli.tfd = tfd;
|
||||||
st_cli.log_level = log_level;
|
st_cli.log_level = log_level;
|
||||||
st_cli.on_change_osd_state_hook = [this](osd_num_t peer_osd) { on_change_osd_state_hook(peer_osd); };
|
st_cli.on_change_osd_state_hook = [this](osd_num_t peer_osd) { on_change_osd_state_hook(peer_osd); };
|
||||||
|
st_cli.on_change_backfillfull_hook = [this](pool_id_t pool_id) { on_change_backfillfull_hook(pool_id); };
|
||||||
st_cli.on_change_pg_history_hook = [this](pool_id_t pool_id, pg_num_t pg_num) { on_change_pg_history_hook(pool_id, pg_num); };
|
st_cli.on_change_pg_history_hook = [this](pool_id_t pool_id, pg_num_t pg_num) { on_change_pg_history_hook(pool_id, pg_num); };
|
||||||
st_cli.on_change_hook = [this](std::map<std::string, etcd_kv_t> & changes) { on_change_etcd_state_hook(changes); };
|
st_cli.on_change_hook = [this](std::map<std::string, etcd_kv_t> & changes) { on_change_etcd_state_hook(changes); };
|
||||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
||||||
|
@ -414,6 +415,14 @@ void osd_t::on_change_osd_state_hook(osd_num_t peer_osd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void osd_t::on_change_backfillfull_hook(pool_id_t pool_id)
|
||||||
|
{
|
||||||
|
if (!(peering_state & (OSD_RECOVERING | OSD_FLUSHING_PGS)))
|
||||||
|
{
|
||||||
|
peering_state = peering_state | OSD_RECOVERING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes)
|
void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes)
|
||||||
{
|
{
|
||||||
if (changes.find(st_cli.etcd_prefix+"/config/global") != changes.end())
|
if (changes.find(st_cli.etcd_prefix+"/config/global") != changes.end())
|
||||||
|
|
|
@ -252,10 +252,18 @@ bool osd_t::pick_next_recovery(osd_recovery_op_t &op)
|
||||||
auto mask = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_DEGRADED | PG_HAS_MISPLACED);
|
auto mask = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_DEGRADED | PG_HAS_MISPLACED);
|
||||||
auto check = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_HAS_MISPLACED);
|
auto check = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_HAS_MISPLACED);
|
||||||
// Restart scanning from the same PG as the last time
|
// Restart scanning from the same PG as the last time
|
||||||
|
restart:
|
||||||
for (auto pg_it = pgs.lower_bound(recovery_last_pg); pg_it != pgs.end(); pg_it++)
|
for (auto pg_it = pgs.lower_bound(recovery_last_pg); pg_it != pgs.end(); pg_it++)
|
||||||
{
|
{
|
||||||
if ((pg_it->second.state & mask) == check)
|
if ((pg_it->second.state & mask) == check)
|
||||||
{
|
{
|
||||||
|
auto pool_it = st_cli.pool_config.find(pg_it->first.pool_id);
|
||||||
|
if (pool_it != st_cli.pool_config.end() && pool_it->second.backfillfull)
|
||||||
|
{
|
||||||
|
// Skip the pool
|
||||||
|
recovery_last_pg.pool_id++;
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
auto & src = recovery_last_degraded ? pg_it->second.degraded_objects : pg_it->second.misplaced_objects;
|
auto & src = recovery_last_degraded ? pg_it->second.degraded_objects : pg_it->second.misplaced_objects;
|
||||||
assert(src.size() > 0);
|
assert(src.size() > 0);
|
||||||
// Restart scanning from the next object
|
// Restart scanning from the next object
|
||||||
|
|
Loading…
Reference in New Issue