Allow to disable per-inode stats for VitastorFS pools

master
Vitaliy Filippov 2024-03-01 01:42:46 +03:00
parent ecfc753e93
commit 3aee37eadd
14 changed files with 162 additions and 11 deletions

View File

@ -267,6 +267,7 @@ Optional parameters:
| `--immediate_commit none` | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
| `--no_inode_stats 1` | Disable per-inode statistics for this pool (use for VitastorFS pools) |
| `--pg_stripe_size <number>` | Increase object grouping stripe |
| `--max_osd_combinations 10000` | Maximum number of random combinations for LP solver input |
| `--wait` | Wait for the new pool to come online |
@ -288,7 +289,7 @@ Modify an existing pool. Modifiable parameters:
```
[-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]
[--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>]
[--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--no_inode_stats 0|1]
[--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]
```

View File

@ -1737,8 +1737,11 @@ class Mon
for (const inode_num in this.state.osd.space[osd_num][pool_id])
{
const u = BigInt(this.state.osd.space[osd_num][pool_id][inode_num]||0);
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
inode_stats[pool_id][inode_num].raw_used += u;
if (inode_num)
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
inode_stats[pool_id][inode_num].raw_used += u;
}
this.state.pool.stats[pool_id].used_raw_tb += u;
}
}

View File

@ -82,3 +82,8 @@ uint32_t blockstore_t::get_bitmap_granularity()
{
return impl->get_bitmap_granularity();
}
void blockstore_t::set_no_inode_stats(const std::vector<uint64_t> & pool_ids)
{
impl->set_no_inode_stats(pool_ids);
}

View File

@ -216,6 +216,9 @@ public:
// Get per-inode space usage statistics
std::map<uint64_t, uint64_t> & get_inode_space_stats();
// Set per-pool no_inode_stats
void set_no_inode_stats(const std::vector<uint64_t> & pool_ids);
// Print diagnostics to stdout
void dump_diagnostics();

View File

@ -733,3 +733,86 @@ void blockstore_impl_t::disk_error_abort(const char *op, int retval, int expecte
fprintf(stderr, "Disk %s failed: result is %d, expected %d. Can't continue, sorry :-(\n", op, retval, expected);
exit(1);
}
void blockstore_impl_t::set_no_inode_stats(const std::vector<uint64_t> & pool_ids)
{
for (auto & np: no_inode_stats)
{
np.second = 2;
}
for (auto pool_id: pool_ids)
{
if (!no_inode_stats[pool_id])
recalc_inode_space_stats(pool_id, false);
no_inode_stats[pool_id] = 1;
}
for (auto np_it = no_inode_stats.begin(); np_it != no_inode_stats.end(); )
{
if (np_it->second == 2)
{
recalc_inode_space_stats(np_it->first, true);
no_inode_stats.erase(np_it++);
}
else
np_it++;
}
}
void blockstore_impl_t::recalc_inode_space_stats(uint64_t pool_id, bool per_inode)
{
auto sp_begin = inode_space_stats.lower_bound((pool_id << (64-POOL_ID_BITS)));
auto sp_end = inode_space_stats.lower_bound(((pool_id+1) << (64-POOL_ID_BITS)));
inode_space_stats.erase(sp_begin, sp_end);
auto sh_it = clean_db_shards.lower_bound((pool_id << (64-POOL_ID_BITS)));
while (sh_it != clean_db_shards.end() &&
(sh_it->first >> (64-POOL_ID_BITS)) == pool_id)
{
for (auto & pair: sh_it->second)
{
uint64_t space_id = per_inode ? pair.first.inode : (pool_id << (64-POOL_ID_BITS));
inode_space_stats[space_id] += dsk.data_block_size;
}
sh_it++;
}
object_id last_oid = {};
bool last_exists = false;
auto dirty_it = dirty_db.lower_bound((obj_ver_id){ .oid = { .inode = (pool_id << (64-POOL_ID_BITS)) } });
while (dirty_it != dirty_db.end() && (dirty_it->first.oid.inode >> (64-POOL_ID_BITS)) == pool_id)
{
if (IS_STABLE(dirty_it->second.state) && (IS_BIG_WRITE(dirty_it->second.state) || IS_DELETE(dirty_it->second.state)))
{
bool exists = false;
if (last_oid == dirty_it->first.oid)
{
exists = last_exists;
}
else
{
auto & clean_db = clean_db_shard(dirty_it->first.oid);
auto clean_it = clean_db.find(dirty_it->first.oid);
exists = clean_it != clean_db.end();
}
uint64_t space_id = per_inode ? dirty_it->first.oid.inode : (pool_id << (64-POOL_ID_BITS));
if (IS_BIG_WRITE(dirty_it->second.state))
{
if (!exists)
inode_space_stats[space_id] += dsk.data_block_size;
last_exists = true;
}
else
{
if (exists)
{
auto & sp = inode_space_stats[space_id];
if (sp > dsk.data_block_size)
sp -= dsk.data_block_size;
else
inode_space_stats.erase(space_id);
}
last_exists = false;
}
last_oid = dirty_it->first.oid;
}
dirty_it++;
}
}

View File

@ -272,6 +272,7 @@ class blockstore_impl_t
std::map<pool_id_t, pool_shard_settings_t> clean_db_settings;
std::map<pool_pg_id_t, blockstore_clean_db_t> clean_db_shards;
std::map<uint64_t, int> no_inode_stats;
uint8_t *clean_bitmaps = NULL;
blockstore_dirty_db_t dirty_db;
std::vector<blockstore_op_t*> submit_queue;
@ -318,6 +319,7 @@ class blockstore_impl_t
blockstore_clean_db_t& clean_db_shard(object_id oid);
void reshard_clean_db(pool_id_t pool_id, uint32_t pg_count, uint32_t pg_stripe_size);
void recalc_inode_space_stats(uint64_t pool_id, bool per_inode);
// Journaling
void prepare_journal_sector_write(int sector, blockstore_op_t *op);
@ -428,6 +430,9 @@ public:
// Space usage statistics
std::map<uint64_t, uint64_t> inode_space_stats;
// Set per-pool no_inode_stats
void set_no_inode_stats(const std::vector<uint64_t> & pool_ids);
// Print diagnostics to stdout
void dump_diagnostics();

View File

@ -487,18 +487,24 @@ void blockstore_impl_t::mark_stable(obj_ver_id v, bool forget_dirty)
}
if (!exists)
{
inode_space_stats[dirty_it->first.oid.inode] += dsk.data_block_size;
uint64_t space_id = dirty_it->first.oid.inode;
if (no_inode_stats[dirty_it->first.oid.inode >> (64-POOL_ID_BITS)])
space_id = space_id & ~(((uint64_t)1 << (64-POOL_ID_BITS)) - 1);
inode_space_stats[space_id] += dsk.data_block_size;
used_blocks++;
}
big_to_flush++;
}
else if (IS_DELETE(dirty_it->second.state))
{
auto & sp = inode_space_stats[dirty_it->first.oid.inode];
uint64_t space_id = dirty_it->first.oid.inode;
if (no_inode_stats[dirty_it->first.oid.inode >> (64-POOL_ID_BITS)])
space_id = space_id & ~(((uint64_t)1 << (64-POOL_ID_BITS)) - 1);
auto & sp = inode_space_stats[space_id];
if (sp > dsk.data_block_size)
sp -= dsk.data_block_size;
else
inode_space_stats.erase(dirty_it->first.oid.inode);
inode_space_stats.erase(space_id);
used_blocks--;
big_to_flush++;
}

View File

@ -131,6 +131,7 @@ static const char* help_text =
" --immediate_commit none Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
" --no_inode_stats 1 Disable per-inode statistics for this pool (use for VitastorFS pools)\n"
" --pg_stripe_size <number> Increase object grouping stripe\n"
" --max_osd_combinations 10000 Maximum number of random combinations for LP solver input\n"
" --wait Wait for the new pool to come online\n"
@ -142,7 +143,7 @@ static const char* help_text =
"vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]\n"
" Modify an existing pool. Modifiable parameters:\n"
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>]\n"
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--no_inode_stats 0|1]\n"
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
" [--block_size <size>] [--bitmap_granularity <size>]\n"

View File

@ -81,6 +81,11 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
}
value = value.uint64_value();
}
else if (key == "no_inode_stats" && value.bool_value())
{
// Leave true, remove false
value = true;
}
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
key == "failure_domain" || key == "root_node" || key == "scrub_interval")
{
@ -248,7 +253,7 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
// immediate_commit
if (!cfg["immediate_commit"].is_null() && !etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value()))
{
return "immediate_commit must be one of \"all\", \"small\", or \"none\", but it is "+cfg["scrub_interval"].as_string();
return "immediate_commit must be one of \"all\", \"small\", or \"none\", but it is "+cfg["immediate_commit"].as_string();
}
// scrub_interval

View File

@ -529,6 +529,8 @@ resume_3:
st["block_size_fmt"] = format_size(st["block_size"].uint64_value());
if (st["bitmap_granularity"].uint64_value())
st["bitmap_granularity_fmt"] = format_size(st["bitmap_granularity"].uint64_value());
if (st["no_inode_stats"].bool_value())
st["inode_stats_fmt"] = "disabled";
}
// All pool parameters are only displayed in the "detailed" mode
// because there's too many of them to show them in table
@ -547,6 +549,7 @@ resume_3:
{ "bitmap_granularity_fmt", "Bitmap granularity" },
{ "immediate_commit", "Immediate commit" },
{ "scrub_interval", "Scrub interval" },
{ "inode_stats_fmt", "Per-inode stats" },
{ "pg_stripe_size", "PG stripe size" },
{ "max_osd_combinations", "Max OSD combinations" },
{ "total_fmt", "Total" },

View File

@ -863,6 +863,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
pc.scrub_interval = parse_time(pool_item.second["scrub_interval"].string_value());
if (!pc.scrub_interval)
pc.scrub_interval = 0;
// Disable per-inode stats
pc.no_inode_stats = pool_item.second["no_inode_stats"].bool_value();
// Immediate Commit Mode
pc.immediate_commit = pool_item.second["immediate_commit"].is_string()
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value())

View File

@ -60,6 +60,7 @@ struct pool_config_t
uint64_t pg_stripe_size;
std::map<pg_num_t, pg_config_t> pg_config;
uint64_t scrub_interval;
bool no_inode_stats;
};
struct inode_config_t

View File

@ -239,6 +239,7 @@ class osd_t
void report_statistics();
void report_pg_state(pg_t & pg);
void report_pg_states();
void apply_no_inode_stats();
void apply_pg_count();
void apply_pg_config();

View File

@ -388,9 +388,18 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
etcd_global_config = changes[st_cli.etcd_prefix+"/config/global"].value.object_items();
parse_config(false);
}
bool pools = changes.find(st_cli.etcd_prefix+"/config/pools") != changes.end();
if (pools)
{
apply_no_inode_stats();
}
if (run_primary)
{
apply_pg_count();
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
if (pools || pgs)
{
apply_pg_count();
}
apply_pg_config();
}
}
@ -414,6 +423,8 @@ void osd_t::on_reload_config_hook(json11::Json::object & global_config)
// Acquire lease
void osd_t::acquire_lease()
{
// Apply no_inode_stats before the first statistics report
apply_no_inode_stats();
// Maximum lease TTL is (report interval) + retries * (timeout + repeat interval)
st_cli.etcd_call("/lease/grant", json11::Json::object {
{ "TTL", etcd_report_interval+(st_cli.max_etcd_attempts*(2*st_cli.etcd_quick_timeout)+999)/1000 }
@ -602,11 +613,32 @@ void osd_t::on_load_pgs_hook(bool success)
else
{
peering_state &= ~OSD_LOADING_PGS;
apply_pg_count();
apply_pg_config();
apply_no_inode_stats();
if (run_primary)
{
apply_pg_count();
apply_pg_config();
}
}
}
void osd_t::apply_no_inode_stats()
{
if (!bs)
{
return;
}
std::vector<uint64_t> no_inode_stats;
for (auto & pool_item: st_cli.pool_config)
{
if (pool_item.second.no_inode_stats)
{
no_inode_stats.push_back(pool_item.first);
}
}
bs->set_no_inode_stats(no_inode_stats);
}
void osd_t::apply_pg_count()
{
for (auto & pool_item: st_cli.pool_config)