Allow to disable per-inode stats for VitastorFS pools
parent
ecfc753e93
commit
3aee37eadd
|
@ -267,6 +267,7 @@ Optional parameters:
|
|||
| `--immediate_commit none` | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
|
||||
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
|
||||
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
|
||||
| `--no_inode_stats 1` | Disable per-inode statistics for this pool (use for VitastorFS pools) |
|
||||
| `--pg_stripe_size <number>` | Increase object grouping stripe |
|
||||
| `--max_osd_combinations 10000` | Maximum number of random combinations for LP solver input |
|
||||
| `--wait` | Wait for the new pool to come online |
|
||||
|
@ -288,7 +289,7 @@ Modify an existing pool. Modifiable parameters:
|
|||
|
||||
```
|
||||
[-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]
|
||||
[--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>]
|
||||
[--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--no_inode_stats 0|1]
|
||||
[--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]
|
||||
```
|
||||
|
||||
|
|
|
@ -1737,8 +1737,11 @@ class Mon
|
|||
for (const inode_num in this.state.osd.space[osd_num][pool_id])
|
||||
{
|
||||
const u = BigInt(this.state.osd.space[osd_num][pool_id][inode_num]||0);
|
||||
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
|
||||
inode_stats[pool_id][inode_num].raw_used += u;
|
||||
if (inode_num)
|
||||
{
|
||||
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
|
||||
inode_stats[pool_id][inode_num].raw_used += u;
|
||||
}
|
||||
this.state.pool.stats[pool_id].used_raw_tb += u;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,3 +82,8 @@ uint32_t blockstore_t::get_bitmap_granularity()
|
|||
{
|
||||
return impl->get_bitmap_granularity();
|
||||
}
|
||||
|
||||
void blockstore_t::set_no_inode_stats(const std::vector<uint64_t> & pool_ids)
|
||||
{
|
||||
impl->set_no_inode_stats(pool_ids);
|
||||
}
|
||||
|
|
|
@ -216,6 +216,9 @@ public:
|
|||
// Get per-inode space usage statistics
|
||||
std::map<uint64_t, uint64_t> & get_inode_space_stats();
|
||||
|
||||
// Set per-pool no_inode_stats
|
||||
void set_no_inode_stats(const std::vector<uint64_t> & pool_ids);
|
||||
|
||||
// Print diagnostics to stdout
|
||||
void dump_diagnostics();
|
||||
|
||||
|
|
|
@ -733,3 +733,86 @@ void blockstore_impl_t::disk_error_abort(const char *op, int retval, int expecte
|
|||
fprintf(stderr, "Disk %s failed: result is %d, expected %d. Can't continue, sorry :-(\n", op, retval, expected);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void blockstore_impl_t::set_no_inode_stats(const std::vector<uint64_t> & pool_ids)
|
||||
{
|
||||
for (auto & np: no_inode_stats)
|
||||
{
|
||||
np.second = 2;
|
||||
}
|
||||
for (auto pool_id: pool_ids)
|
||||
{
|
||||
if (!no_inode_stats[pool_id])
|
||||
recalc_inode_space_stats(pool_id, false);
|
||||
no_inode_stats[pool_id] = 1;
|
||||
}
|
||||
for (auto np_it = no_inode_stats.begin(); np_it != no_inode_stats.end(); )
|
||||
{
|
||||
if (np_it->second == 2)
|
||||
{
|
||||
recalc_inode_space_stats(np_it->first, true);
|
||||
no_inode_stats.erase(np_it++);
|
||||
}
|
||||
else
|
||||
np_it++;
|
||||
}
|
||||
}
|
||||
|
||||
void blockstore_impl_t::recalc_inode_space_stats(uint64_t pool_id, bool per_inode)
|
||||
{
|
||||
auto sp_begin = inode_space_stats.lower_bound((pool_id << (64-POOL_ID_BITS)));
|
||||
auto sp_end = inode_space_stats.lower_bound(((pool_id+1) << (64-POOL_ID_BITS)));
|
||||
inode_space_stats.erase(sp_begin, sp_end);
|
||||
auto sh_it = clean_db_shards.lower_bound((pool_id << (64-POOL_ID_BITS)));
|
||||
while (sh_it != clean_db_shards.end() &&
|
||||
(sh_it->first >> (64-POOL_ID_BITS)) == pool_id)
|
||||
{
|
||||
for (auto & pair: sh_it->second)
|
||||
{
|
||||
uint64_t space_id = per_inode ? pair.first.inode : (pool_id << (64-POOL_ID_BITS));
|
||||
inode_space_stats[space_id] += dsk.data_block_size;
|
||||
}
|
||||
sh_it++;
|
||||
}
|
||||
object_id last_oid = {};
|
||||
bool last_exists = false;
|
||||
auto dirty_it = dirty_db.lower_bound((obj_ver_id){ .oid = { .inode = (pool_id << (64-POOL_ID_BITS)) } });
|
||||
while (dirty_it != dirty_db.end() && (dirty_it->first.oid.inode >> (64-POOL_ID_BITS)) == pool_id)
|
||||
{
|
||||
if (IS_STABLE(dirty_it->second.state) && (IS_BIG_WRITE(dirty_it->second.state) || IS_DELETE(dirty_it->second.state)))
|
||||
{
|
||||
bool exists = false;
|
||||
if (last_oid == dirty_it->first.oid)
|
||||
{
|
||||
exists = last_exists;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & clean_db = clean_db_shard(dirty_it->first.oid);
|
||||
auto clean_it = clean_db.find(dirty_it->first.oid);
|
||||
exists = clean_it != clean_db.end();
|
||||
}
|
||||
uint64_t space_id = per_inode ? dirty_it->first.oid.inode : (pool_id << (64-POOL_ID_BITS));
|
||||
if (IS_BIG_WRITE(dirty_it->second.state))
|
||||
{
|
||||
if (!exists)
|
||||
inode_space_stats[space_id] += dsk.data_block_size;
|
||||
last_exists = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (exists)
|
||||
{
|
||||
auto & sp = inode_space_stats[space_id];
|
||||
if (sp > dsk.data_block_size)
|
||||
sp -= dsk.data_block_size;
|
||||
else
|
||||
inode_space_stats.erase(space_id);
|
||||
}
|
||||
last_exists = false;
|
||||
}
|
||||
last_oid = dirty_it->first.oid;
|
||||
}
|
||||
dirty_it++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -272,6 +272,7 @@ class blockstore_impl_t
|
|||
|
||||
std::map<pool_id_t, pool_shard_settings_t> clean_db_settings;
|
||||
std::map<pool_pg_id_t, blockstore_clean_db_t> clean_db_shards;
|
||||
std::map<uint64_t, int> no_inode_stats;
|
||||
uint8_t *clean_bitmaps = NULL;
|
||||
blockstore_dirty_db_t dirty_db;
|
||||
std::vector<blockstore_op_t*> submit_queue;
|
||||
|
@ -318,6 +319,7 @@ class blockstore_impl_t
|
|||
|
||||
blockstore_clean_db_t& clean_db_shard(object_id oid);
|
||||
void reshard_clean_db(pool_id_t pool_id, uint32_t pg_count, uint32_t pg_stripe_size);
|
||||
void recalc_inode_space_stats(uint64_t pool_id, bool per_inode);
|
||||
|
||||
// Journaling
|
||||
void prepare_journal_sector_write(int sector, blockstore_op_t *op);
|
||||
|
@ -428,6 +430,9 @@ public:
|
|||
// Space usage statistics
|
||||
std::map<uint64_t, uint64_t> inode_space_stats;
|
||||
|
||||
// Set per-pool no_inode_stats
|
||||
void set_no_inode_stats(const std::vector<uint64_t> & pool_ids);
|
||||
|
||||
// Print diagnostics to stdout
|
||||
void dump_diagnostics();
|
||||
|
||||
|
|
|
@ -487,18 +487,24 @@ void blockstore_impl_t::mark_stable(obj_ver_id v, bool forget_dirty)
|
|||
}
|
||||
if (!exists)
|
||||
{
|
||||
inode_space_stats[dirty_it->first.oid.inode] += dsk.data_block_size;
|
||||
uint64_t space_id = dirty_it->first.oid.inode;
|
||||
if (no_inode_stats[dirty_it->first.oid.inode >> (64-POOL_ID_BITS)])
|
||||
space_id = space_id & ~(((uint64_t)1 << (64-POOL_ID_BITS)) - 1);
|
||||
inode_space_stats[space_id] += dsk.data_block_size;
|
||||
used_blocks++;
|
||||
}
|
||||
big_to_flush++;
|
||||
}
|
||||
else if (IS_DELETE(dirty_it->second.state))
|
||||
{
|
||||
auto & sp = inode_space_stats[dirty_it->first.oid.inode];
|
||||
uint64_t space_id = dirty_it->first.oid.inode;
|
||||
if (no_inode_stats[dirty_it->first.oid.inode >> (64-POOL_ID_BITS)])
|
||||
space_id = space_id & ~(((uint64_t)1 << (64-POOL_ID_BITS)) - 1);
|
||||
auto & sp = inode_space_stats[space_id];
|
||||
if (sp > dsk.data_block_size)
|
||||
sp -= dsk.data_block_size;
|
||||
else
|
||||
inode_space_stats.erase(dirty_it->first.oid.inode);
|
||||
inode_space_stats.erase(space_id);
|
||||
used_blocks--;
|
||||
big_to_flush++;
|
||||
}
|
||||
|
|
|
@ -131,6 +131,7 @@ static const char* help_text =
|
|||
" --immediate_commit none Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
|
||||
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
|
||||
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
|
||||
" --no_inode_stats 1 Disable per-inode statistics for this pool (use for VitastorFS pools)\n"
|
||||
" --pg_stripe_size <number> Increase object grouping stripe\n"
|
||||
" --max_osd_combinations 10000 Maximum number of random combinations for LP solver input\n"
|
||||
" --wait Wait for the new pool to come online\n"
|
||||
|
@ -142,7 +143,7 @@ static const char* help_text =
|
|||
"vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]\n"
|
||||
" Modify an existing pool. Modifiable parameters:\n"
|
||||
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
|
||||
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>]\n"
|
||||
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--no_inode_stats 0|1]\n"
|
||||
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
|
||||
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
|
||||
" [--block_size <size>] [--bitmap_granularity <size>]\n"
|
||||
|
|
|
@ -81,6 +81,11 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
|||
}
|
||||
value = value.uint64_value();
|
||||
}
|
||||
else if (key == "no_inode_stats" && value.bool_value())
|
||||
{
|
||||
// Leave true, remove false
|
||||
value = true;
|
||||
}
|
||||
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
|
||||
key == "failure_domain" || key == "root_node" || key == "scrub_interval")
|
||||
{
|
||||
|
@ -248,7 +253,7 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
|||
// immediate_commit
|
||||
if (!cfg["immediate_commit"].is_null() && !etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value()))
|
||||
{
|
||||
return "immediate_commit must be one of \"all\", \"small\", or \"none\", but it is "+cfg["scrub_interval"].as_string();
|
||||
return "immediate_commit must be one of \"all\", \"small\", or \"none\", but it is "+cfg["immediate_commit"].as_string();
|
||||
}
|
||||
|
||||
// scrub_interval
|
||||
|
|
|
@ -529,6 +529,8 @@ resume_3:
|
|||
st["block_size_fmt"] = format_size(st["block_size"].uint64_value());
|
||||
if (st["bitmap_granularity"].uint64_value())
|
||||
st["bitmap_granularity_fmt"] = format_size(st["bitmap_granularity"].uint64_value());
|
||||
if (st["no_inode_stats"].bool_value())
|
||||
st["inode_stats_fmt"] = "disabled";
|
||||
}
|
||||
// All pool parameters are only displayed in the "detailed" mode
|
||||
// because there's too many of them to show them in table
|
||||
|
@ -547,6 +549,7 @@ resume_3:
|
|||
{ "bitmap_granularity_fmt", "Bitmap granularity" },
|
||||
{ "immediate_commit", "Immediate commit" },
|
||||
{ "scrub_interval", "Scrub interval" },
|
||||
{ "inode_stats_fmt", "Per-inode stats" },
|
||||
{ "pg_stripe_size", "PG stripe size" },
|
||||
{ "max_osd_combinations", "Max OSD combinations" },
|
||||
{ "total_fmt", "Total" },
|
||||
|
|
|
@ -863,6 +863,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
|||
pc.scrub_interval = parse_time(pool_item.second["scrub_interval"].string_value());
|
||||
if (!pc.scrub_interval)
|
||||
pc.scrub_interval = 0;
|
||||
// Disable per-inode stats
|
||||
pc.no_inode_stats = pool_item.second["no_inode_stats"].bool_value();
|
||||
// Immediate Commit Mode
|
||||
pc.immediate_commit = pool_item.second["immediate_commit"].is_string()
|
||||
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value())
|
||||
|
|
|
@ -60,6 +60,7 @@ struct pool_config_t
|
|||
uint64_t pg_stripe_size;
|
||||
std::map<pg_num_t, pg_config_t> pg_config;
|
||||
uint64_t scrub_interval;
|
||||
bool no_inode_stats;
|
||||
};
|
||||
|
||||
struct inode_config_t
|
||||
|
|
|
@ -239,6 +239,7 @@ class osd_t
|
|||
void report_statistics();
|
||||
void report_pg_state(pg_t & pg);
|
||||
void report_pg_states();
|
||||
void apply_no_inode_stats();
|
||||
void apply_pg_count();
|
||||
void apply_pg_config();
|
||||
|
||||
|
|
|
@ -388,9 +388,18 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
|
|||
etcd_global_config = changes[st_cli.etcd_prefix+"/config/global"].value.object_items();
|
||||
parse_config(false);
|
||||
}
|
||||
bool pools = changes.find(st_cli.etcd_prefix+"/config/pools") != changes.end();
|
||||
if (pools)
|
||||
{
|
||||
apply_no_inode_stats();
|
||||
}
|
||||
if (run_primary)
|
||||
{
|
||||
apply_pg_count();
|
||||
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
|
||||
if (pools || pgs)
|
||||
{
|
||||
apply_pg_count();
|
||||
}
|
||||
apply_pg_config();
|
||||
}
|
||||
}
|
||||
|
@ -414,6 +423,8 @@ void osd_t::on_reload_config_hook(json11::Json::object & global_config)
|
|||
// Acquire lease
|
||||
void osd_t::acquire_lease()
|
||||
{
|
||||
// Apply no_inode_stats before the first statistics report
|
||||
apply_no_inode_stats();
|
||||
// Maximum lease TTL is (report interval) + retries * (timeout + repeat interval)
|
||||
st_cli.etcd_call("/lease/grant", json11::Json::object {
|
||||
{ "TTL", etcd_report_interval+(st_cli.max_etcd_attempts*(2*st_cli.etcd_quick_timeout)+999)/1000 }
|
||||
|
@ -602,11 +613,32 @@ void osd_t::on_load_pgs_hook(bool success)
|
|||
else
|
||||
{
|
||||
peering_state &= ~OSD_LOADING_PGS;
|
||||
apply_pg_count();
|
||||
apply_pg_config();
|
||||
apply_no_inode_stats();
|
||||
if (run_primary)
|
||||
{
|
||||
apply_pg_count();
|
||||
apply_pg_config();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void osd_t::apply_no_inode_stats()
|
||||
{
|
||||
if (!bs)
|
||||
{
|
||||
return;
|
||||
}
|
||||
std::vector<uint64_t> no_inode_stats;
|
||||
for (auto & pool_item: st_cli.pool_config)
|
||||
{
|
||||
if (pool_item.second.no_inode_stats)
|
||||
{
|
||||
no_inode_stats.push_back(pool_item.first);
|
||||
}
|
||||
}
|
||||
bs->set_no_inode_stats(no_inode_stats);
|
||||
}
|
||||
|
||||
void osd_t::apply_pg_count()
|
||||
{
|
||||
for (auto & pool_item: st_cli.pool_config)
|
||||
|
|
Loading…
Reference in New Issue