Compare commits

...

1 Commits

Author SHA1 Message Date
Vitaliy Filippov e0e515865a Pause pool rebalance when monitor detects that it can lead to any OSD becoming full
Test / test_rebalance_verify_ec (push) Successful in 1m38s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m41s Details
Test / test_write_no_same (push) Successful in 8s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 31s Details
Test / test_write_xor (push) Successful in 36s Details
Test / test_heal_pg_size_2 (push) Successful in 2m17s Details
Test / test_heal_ec (push) Successful in 2m18s Details
Test / test_heal_antietcd (push) Successful in 2m17s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m21s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m15s Details
Test / test_heal_csum_32k (push) Successful in 2m18s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m19s Details
Test / test_resize_auto (push) Successful in 9s Details
Test / test_resize (push) Successful in 14s Details
Test / test_osd_tags (push) Successful in 8s Details
Test / test_enospc (push) Successful in 10s Details
Test / test_snapshot_pool2 (push) Successful in 16s Details
Test / test_enospc_xor (push) Successful in 14s Details
Test / test_enospc_imm (push) Successful in 12s Details
Test / test_enospc_imm_xor (push) Successful in 13s Details
Test / test_scrub (push) Successful in 16s Details
Test / test_scrub_zero_osd_2 (push) Successful in 13s Details
Test / test_scrub_xor (push) Successful in 14s Details
Test / test_scrub_pg_size_3 (push) Successful in 17s Details
Test / test_scrub_ec (push) Successful in 14s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s Details
Test / test_nfs (push) Successful in 12s Details
Test / test_heal_csum_4k (push) Successful in 2m17s Details
2024-11-21 23:56:04 +03:00
8 changed files with 103 additions and 5 deletions

View File

@ -56,6 +56,7 @@ const etcd_tree = {
osd_out_time: 600, // seconds. min: 0 osd_out_time: 600, // seconds. min: 0
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... }, placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
use_old_pg_combinator: false, use_old_pg_combinator: false,
osd_backfillfull_ratio: 0.99,
// client and osd // client and osd
tcp_header_buffer_size: 65536, tcp_header_buffer_size: 65536,
use_sync_send_recv: false, use_sync_send_recv: false,

View File

@ -74,6 +74,7 @@ class Mon
this.state = JSON.parse(JSON.stringify(etcd_tree)); this.state = JSON.parse(JSON.stringify(etcd_tree));
this.prev_stats = { osd_stats: {}, osd_diff: {} }; this.prev_stats = { osd_stats: {}, osd_diff: {} };
this.recheck_pgs_active = false; this.recheck_pgs_active = false;
this.updating_total_stats = false;
this.watcher_active = false; this.watcher_active = false;
this.old_pg_config = false; this.old_pg_config = false;
this.old_pg_stats_seen = false; this.old_pg_stats_seen = false;
@ -658,7 +659,13 @@ class Mon
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history); this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
} }
new_pg_config.hash = tree_hash; new_pg_config.hash = tree_hash;
return await this.save_pg_config(new_pg_config, etcd_request); const { backfillfull_pools } = sum_object_counts({ ...this.state, pg: { ...this.state.pg, config: new_pg_config } }, this.config);
new_pg_config.backfillfull_pools = backfillfull_pools.length ? backfillfull_pools : undefined;
if (!await this.save_pg_config(new_pg_config, etcd_request))
{
return false;
}
return true;
} }
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] }) async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
@ -730,7 +737,7 @@ class Mon
async update_total_stats() async update_total_stats()
{ {
const txn = []; const txn = [];
const { object_counts, object_bytes } = sum_object_counts(this.state, this.config); const { object_counts, object_bytes, backfillfull_pools } = sum_object_counts(this.state, this.config);
let stats = sum_op_stats(this.state.osd, this.prev_stats); let stats = sum_op_stats(this.state.osd, this.prev_stats);
let { inode_stats, seen_pools } = sum_inode_stats(this.state, this.prev_stats); let { inode_stats, seen_pools } = sum_inode_stats(this.state, this.prev_stats);
stats.object_counts = object_counts; stats.object_counts = object_counts;
@ -783,6 +790,16 @@ class Mon
{ {
await this.etcd.etcd_call('/kv/txn', { success: txn }, this.config.etcd_mon_timeout, 0); await this.etcd.etcd_call('/kv/txn', { success: txn }, this.config.etcd_mon_timeout, 0);
} }
if (!this.recheck_pgs_active &&
backfillfull_pools.join(',') != ((this.state.pg.config||{}).no_rebalance_pools||[]).join(','))
{
console.log(
(backfillfull_pools.length ? 'Pool(s) '+backfillfull_pools.join(', ') : 'No pools')+
' are backfillfull, applying rebalance configuration'
);
const new_pg_config = { ...this.state.pg.config, backfillfull_pools: backfillfull_pools.length ? backfillfull_pools : undefined };
await this.save_pg_config(new_pg_config);
}
} }
schedule_update_stats() schedule_update_stats()
@ -794,7 +811,21 @@ class Mon
this.stats_timer = setTimeout(() => this.stats_timer = setTimeout(() =>
{ {
this.stats_timer = null; this.stats_timer = null;
if (this.updating_total_stats)
{
this.schedule_update_stats();
return;
}
this.updating_total_stats = true;
try
{
this.update_total_stats().catch(console.error); this.update_total_stats().catch(console.error);
}
catch (e)
{
console.error(e);
}
this.updating_total_stats = false;
}, this.config.mon_stats_timeout); }, this.config.mon_stats_timeout);
} }

View File

@ -109,6 +109,8 @@ function sum_object_counts(state, global_config)
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) }; pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
} }
} }
const pool_per_osd = {};
const clean_per_osd = {};
for (const pool_id in pgstats) for (const pool_id in pgstats)
{ {
let object_size = 0; let object_size = 0;
@ -143,10 +145,38 @@ function sum_object_counts(state, global_config)
object_bytes[k] += BigInt(st[k+'_count']) * object_size; object_bytes[k] += BigInt(st[k+'_count']) * object_size;
} }
} }
if (st.object_count)
{
for (const pg_osd in (((state.pg.config.items||{})[pool_id]||{})[pg_num]||{}).osd_set||[])
{
if (!(pg_osd in clean_per_osd))
{
clean_per_osd[pg_osd] = 0n;
}
clean_per_osd[pg_osd] += BigInt(st.object_count);
pool_per_osd[pg_osd] = pool_per_osd[pg_osd]||{};
pool_per_osd[pg_osd][pool_id] = true;
} }
} }
} }
return { object_counts, object_bytes }; }
}
// If clean_per_osd[osd] is larger than osd capacity then it will fill up during rebalance
let backfillfull_pools = {};
for (const osd in clean_per_osd)
{
const st = state.osd.stats[osd];
if (st && st.size && st.data_block_size && (BigInt(st.size)/BigInt(st.data_block_size)*
BigInt((global_config.osd_backfillfull_ratio||0.99)*1000000)/1000000n) < clean_per_osd[osd])
{
for (const pool_id in pool_per_osd[osd])
{
backfillfull_pools[pool_id] = true;
}
}
}
backfillfull_pools = Object.keys(backfillfull_pools).sort();
return { object_counts, object_bytes, backfillfull_pools };
} }
// sum_inode_stats(this.state, this.prev_stats) // sum_inode_stats(this.state, this.prev_stats)

View File

@ -785,7 +785,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
} }
for (auto & pool_item: value.object_items()) for (auto & pool_item: value.object_items())
{ {
pool_config_t pc; pool_config_t pc = {};
// ID // ID
pool_id_t pool_id; pool_id_t pool_id;
char null_byte = 0; char null_byte = 0;
@ -931,12 +931,28 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
// Ignore old key if the new one is present // Ignore old key if the new one is present
return; return;
} }
for (auto & pool_id_json: value["backfillfull_pools"].array_items())
{
auto pool_id = pool_id_json.uint64_value();
auto pool_it = this->pool_config.find(pool_id);
if (pool_it != this->pool_config.end())
{
pool_it->second.backfillfull |= 2;
}
}
for (auto & pool_item: this->pool_config) for (auto & pool_item: this->pool_config)
{ {
for (auto & pg_item: pool_item.second.pg_config) for (auto & pg_item: pool_item.second.pg_config)
{ {
pg_item.second.config_exists = false; pg_item.second.config_exists = false;
} }
// 3 = was 1 and became 1, 0 = was 0 and became 0
if (pool_item.second.backfillfull == 2 || pool_item.second.backfillfull == 1)
{
if (on_change_backfillfull_hook)
on_change_backfillfull_hook(pool_item.first);
}
pool_item.second.backfillfull = pool_item.second.backfillfull >> 1;
} }
for (auto & pool_item: value["items"].object_items()) for (auto & pool_item: value["items"].object_items())
{ {

View File

@ -62,6 +62,7 @@ struct pool_config_t
std::map<pg_num_t, pg_config_t> pg_config; std::map<pg_num_t, pg_config_t> pg_config;
uint64_t scrub_interval; uint64_t scrub_interval;
std::string used_for_fs; std::string used_for_fs;
int backfillfull;
}; };
struct inode_config_t struct inode_config_t
@ -131,6 +132,7 @@ public:
std::function<json11::Json()> load_pgs_checks_hook; std::function<json11::Json()> load_pgs_checks_hook;
std::function<void(bool)> on_load_pgs_hook; std::function<void(bool)> on_load_pgs_hook;
std::function<void()> on_change_pool_config_hook; std::function<void()> on_change_pool_config_hook;
std::function<void(pool_id_t)> on_change_backfillfull_hook;
std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook; std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook;
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook; std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
std::function<void(osd_num_t)> on_change_osd_state_hook; std::function<void(osd_num_t)> on_change_osd_state_hook;

View File

@ -226,6 +226,7 @@ class osd_t
void parse_config(bool init); void parse_config(bool init);
void init_cluster(); void init_cluster();
void on_change_osd_state_hook(osd_num_t peer_osd); void on_change_osd_state_hook(osd_num_t peer_osd);
void on_change_backfillfull_hook(pool_id_t pool_id);
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num); void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes); void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
void on_load_config_hook(json11::Json::object & changes); void on_load_config_hook(json11::Json::object & changes);

View File

@ -65,6 +65,7 @@ void osd_t::init_cluster()
st_cli.tfd = tfd; st_cli.tfd = tfd;
st_cli.log_level = log_level; st_cli.log_level = log_level;
st_cli.on_change_osd_state_hook = [this](osd_num_t peer_osd) { on_change_osd_state_hook(peer_osd); }; st_cli.on_change_osd_state_hook = [this](osd_num_t peer_osd) { on_change_osd_state_hook(peer_osd); };
st_cli.on_change_backfillfull_hook = [this](pool_id_t pool_id) { on_change_backfillfull_hook(pool_id); };
st_cli.on_change_pg_history_hook = [this](pool_id_t pool_id, pg_num_t pg_num) { on_change_pg_history_hook(pool_id, pg_num); }; st_cli.on_change_pg_history_hook = [this](pool_id_t pool_id, pg_num_t pg_num) { on_change_pg_history_hook(pool_id, pg_num); };
st_cli.on_change_hook = [this](std::map<std::string, etcd_kv_t> & changes) { on_change_etcd_state_hook(changes); }; st_cli.on_change_hook = [this](std::map<std::string, etcd_kv_t> & changes) { on_change_etcd_state_hook(changes); };
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); }; st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
@ -414,6 +415,14 @@ void osd_t::on_change_osd_state_hook(osd_num_t peer_osd)
} }
} }
void osd_t::on_change_backfillfull_hook(pool_id_t pool_id)
{
if (!(peering_state & (OSD_RECOVERING | OSD_FLUSHING_PGS)))
{
peering_state = peering_state | OSD_RECOVERING;
}
}
void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes) void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes)
{ {
if (changes.find(st_cli.etcd_prefix+"/config/global") != changes.end()) if (changes.find(st_cli.etcd_prefix+"/config/global") != changes.end())

View File

@ -252,10 +252,18 @@ bool osd_t::pick_next_recovery(osd_recovery_op_t &op)
auto mask = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_DEGRADED | PG_HAS_MISPLACED); auto mask = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_DEGRADED | PG_HAS_MISPLACED);
auto check = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_HAS_MISPLACED); auto check = recovery_last_degraded ? (PG_ACTIVE | PG_HAS_DEGRADED) : (PG_ACTIVE | PG_HAS_MISPLACED);
// Restart scanning from the same PG as the last time // Restart scanning from the same PG as the last time
restart:
for (auto pg_it = pgs.lower_bound(recovery_last_pg); pg_it != pgs.end(); pg_it++) for (auto pg_it = pgs.lower_bound(recovery_last_pg); pg_it != pgs.end(); pg_it++)
{ {
if ((pg_it->second.state & mask) == check) if ((pg_it->second.state & mask) == check)
{ {
auto pool_it = st_cli.pool_config.find(pg_it->first.pool_id);
if (pool_it != st_cli.pool_config.end() && pool_it->second.backfillfull)
{
// Skip the pool
recovery_last_pg.pool_id++;
goto restart;
}
auto & src = recovery_last_degraded ? pg_it->second.degraded_objects : pg_it->second.misplaced_objects; auto & src = recovery_last_degraded ? pg_it->second.degraded_objects : pg_it->second.misplaced_objects;
assert(src.size() > 0); assert(src.size() > 0);
// Restart scanning from the next object // Restart scanning from the next object