Rework & fix pool-create / pool-modify / pool-ls

master
Vitaliy Filippov 2024-02-23 02:41:10 +03:00
parent dc92851322
commit 86243b7101
13 changed files with 885 additions and 1368 deletions

View File

@ -113,104 +113,53 @@ static const char* help_text =
" With --dry-run only checks if deletion is possible without data loss and\n"
" redundancy degradation.\n"
"\n"
"vitastor-cli create-pool <name> --scheme <scheme> -s <pg_size> --pg_minsize <pg_minsize> -n <pg_count> --parity_chunks <number> [OPTIONS]\n"
" Create a pool.\n"
" --scheme <scheme>\n"
" Redundancy scheme used for data in this pool. One of: \"replicated\", \"xor\", \"ec\" or \"jerasure\".\n"
" It's \"replicated\" by default.\n"
" --ec <N>+<K>\n"
" Shortcut for 'ec' scheme. scheme = ec, pg_size = N+K, parity_chunks = K.\n"
" -s|--pg_size <size>\n"
" Total number of disks for PGs of this pool - i.e., number of replicas for replicated pools and number of data plus parity disks for EC/XOR pools.\n"
" --pg_minsize <size>\n"
" Number of available live OSDs for PGs of this pool to remain active.\n"
" -n|--pg_count <count>\n"
" Number of PGs for this pool.\n"
" --parity_chunks <number>\n"
" Number of parity chunks for EC/XOR pools\n"
" -f|--force\n"
" Proceed without checking pool/OSD params (pg_size, block_size, bitmap_granularity, and immediate_commit).\n"
" --failure_domain <failure_domain>\n"
" Failure domain specification. Must be \"host\" or \"osd\" or refer to one of the placement tree levels, defined in placement_levels.\n"
" --max_osd_combinations <number>\n"
" This parameter specifies the maximum number of combinations to generate when optimising PG placement.\n"
" --block_size <size>\n"
" Block size for this pool.\n"
" --bitmap_granularity <granularity>\n"
" \"Sector\" size of virtual disks in this pool.\n"
" --immediate_commit <all|small|none>\n"
" Immediate commit setting for this pool. One of \"all\", \"small\" and \"none\".\n"
" --pg_stripe_size <size>\n"
" Specifies the stripe size for this pool according to which images are split into different PGs.\n"
" --root_node <node>\n"
" Specifies the root node of the OSD tree to restrict this pool OSDs to.\n"
" --osd_tags <tags>\n"
" Specifies OSD tags to restrict this pool to.\n"
" Example: --osd_tags tag0 or --osd_tags tag0,tag1\n"
" --primary_affinity_tags <tags>\n"
" Specifies OSD tags to prefer putting primary OSDs in this pool to.\n"
" Example: --primary_affinity_tags tag0 or --primary_affinity_tags tag0,tag1\n"
" --scrub_interval <time_interval>\n"
" Automatic scrubbing interval for this pool. Format: number + unit s/m/h/d/M/y.\n"
"vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]\n"
" Create a pool. Required parameters:\n"
" -s|--pg_size R Number of replicas for replicated pools\n"
" --ec N+K Number of data (N) and parity (K) chunks for erasure-coded pools\n"
" -n|--pg_count N PG count for the new pool (start with 10*<OSD count>/pg_size rounded to a power of 2)\n"
" Optional parameters:\n"
" --pg_minsize <number> R or N+K minus number of failures to tolerate without downtime\n"
" --failure_domain host Failure domain: host, osd or a level from placement_levels. Default: host\n"
" --root_node <node> Put pool on child OSDs of this placement tree node\n"
" --osd_tags <tag>[,<tag>]... Put pool on OSDs tagged with all specified tags\n"
" --block_size 128k Put pool on OSDs with this data block size\n"
" --bitmap_granularity 4k Put pool on OSDs with this logical sector size\n"
" --immediate_commit none Put pool on OSDs with this or larger immediate_commit (none < small < all)\n"
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
" --pg_stripe_size <number> Increase object grouping stripe. Default: block_size*data_parts\n"
" --max_osd_combinations 10000 Maximum number of random combinations for LP solver input. Default: 10000\n"
" --wait Wait for the new pool to come online\n"
" -f|--force Do not check that cluster has enough OSDs to create the pool\n"
" Examples:\n"
" vitastor-cli create-pool test_x4 -s 4 -n 32\n"
" vitastor-cli create-pool test_ec42 --ec 4+2 -n 32\n"
" vitastor-cli create-pool test_x4 -s 4 -n 32\n"
" vitastor-cli create-pool test_ec42 --ec 4+2 -n 32\n"
"\n"
"vitastor-cli modify-pool <id|name> [--name <new_name>] [-s <pg_size>] [--pg_minsize <pg_minsize>] [-n <pg_count>] [OPTIONS]\n"
" Modify an existing pool.\n"
" --name <new_name>\n"
" Change name of this pool.\n"
" -s|--pg_size <size>\n"
" Total number of disks for PGs of this pool - i.e., number of replicas for replicated pools and number of data plus parity disks for EC/XOR pools.\n"
" --pg_minsize <size>\n"
" Number of available live OSDs for PGs of this pool to remain active.\n"
" -n|--pg_count <count>\n"
" Number of PGs for this pool.\n"
" -f|--force\n"
" Proceed without checking pool/OSD params (block_size, bitmap_granularity and immediate_commit).\n"
" --failure_domain <failure_domain>\n"
" Failure domain specification. Must be \"host\" or \"osd\" or refer to one of the placement tree levels, defined in placement_levels.\n"
" --max_osd_combinations <number>\n"
" This parameter specifies the maximum number of combinations to generate when optimising PG placement.\n"
" --block_size <size>\n"
" Block size for this pool.\n"
" --immediate_commit <all|small|none>\n"
" Immediate commit setting for this pool. One of \"all\", \"small\" and \"none\".\n"
" --pg_stripe_size <size>\n"
" Specifies the stripe size for this pool according to which images are split into different PGs.\n"
" --root_node <node>\n"
" Specifies the root node of the OSD tree to restrict this pool OSDs to.\n"
" --osd_tags <tags>\n"
" Specifies OSD tags to restrict this pool to.\n"
" Example: --osd_tags tag0 or --osd_tags tag0,tag1\n"
" --primary_affinity_tags <tags>\n"
" Specifies OSD tags to prefer putting primary OSDs in this pool to.\n"
" Example: --primary_affinity_tags tag0 or --primary_affinity_tags tag0,tag1\n"
" --scrub_interval <time_interval>\n"
" Automatic scrubbing interval for this pool. Format: number + unit s/m/h/d/M/y.\n"
"vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]\n"
" Modify an existing pool. Modifiable parameters:\n"
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>]\n"
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
" [--block_size <size>] [--bitmap_granularity <size>]\n"
" [--immediate_commit <all|small|none>] [--pg_stripe_size <size>]\n"
" These, however, can still be modified with -f|--force.\n"
" See create-pool for parameter descriptions.\n"
" Examples:\n"
" vitastor-cli modify-pool pool_A -name pool_B\n"
" vitastor-cli modify-pool 2 -s 4 -n 128 --block_size 262144\n"
" vitastor-cli modify-pool pool_A --name pool_B\n"
" vitastor-cli modify-pool 2 --pg_size 4 -n 128\n"
"\n"
"vitastor-cli rm-pool [--force] <id|name>\n"
" Remove existing pool from cluster.\n"
" Refuses to remove pools with related Image and/or Snapshot data without --force.\n"
" Examples:\n"
" vitastor-cli rm-pool test_pool\n"
" vitastor-cli rm-pool --force 2\n"
"vitastor-cli rm-pool|pool-rm [--force] <id|name>\n"
" Remove existing pool. Refuses to remove pools with data without --force.\n"
"\n"
"vitastor-cli ls-pool [-l] [-p POOL] [--sort FIELD] [-r] [-n N] [--stats] [<glob> ...]\n"
" List pool (only matching <glob> patterns if passed).\n"
" -p|--pool POOL Show in detail pool ID or name\n"
" -l|--long Show all available field\n"
" --sort FIELD Sort by specified field (id, name, pg_count, scheme_name, used_byte, total, max_available, used_pct, space_efficiency, status, restore, root_node, failure_domain, osd_tags, primary_affinity_tags)\n"
"vitastor-cli ls-pools|pool-ls|ls-pool|pools [-l] [--detail] [--sort FIELD] [-r] [-n N] [--stats] [<glob> ...]\n"
" List pools (only matching <glob> patterns if passed).\n"
" -l|--long Also report PG states and I/O statistics\n"
" --detail Use list format (not table), show all details\n"
" --sort FIELD Sort by specified field\n"
" -r|--reverse Sort in descending order\n"
" -n|--count N Only list first N items\n"
" --stats Performance statistics\n"
" Examples:\n"
" vitastor-cli ls-pool -l\n"
" vitastor-cli ls-pool -l --sort pool_name\n"
" vitastor-cli ls-pool -p 2\n"
"\n"
"Use vitastor-cli --help <command> for command details or vitastor-cli --help --all for all details.\n"
"\n"
@ -265,15 +214,15 @@ static json11::Json::object parse_args(int narg, const char *args[])
else if (args[i][0] == '-' && args[i][1] == '-')
{
const char *opt = args[i]+2;
if (!strcmp(opt, "json") ||
if (!strcmp(opt, "json") || !strcmp(opt, "wait") ||
!strcmp(opt, "wait-list") || !strcmp(opt, "wait_list") ||
!strcmp(opt, "long") || !strcmp(opt, "del") ||
!strcmp(opt, "long") || !strcmp(opt, "detail") || !strcmp(opt, "del") ||
!strcmp(opt, "no-color") || !strcmp(opt, "no_color") ||
!strcmp(opt, "readonly") || !strcmp(opt, "readwrite") ||
!strcmp(opt, "force") || !strcmp(opt, "reverse") ||
!strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") ||
!strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") ||
!strcmp(opt, "help") || !strcmp(opt, "all") || !strcmp(opt, "stats") ||
!strcmp(opt, "help") || !strcmp(opt, "all") ||
!strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped"))
{
cfg[opt] = "1";
@ -324,7 +273,6 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
else if (cmd[0] == "df")
{
// Show pool space stats
cfg["dfformat"] = "1";
action_cb = p->start_pool_ls(cfg);
}
else if (cmd[0] == "ls")
@ -432,7 +380,7 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
// Allocate a new OSD number
action_cb = p->start_alloc_osd(cfg);
}
else if (cmd[0] == "create-pool")
else if (cmd[0] == "create-pool" || cmd[0] == "pool-create")
{
// Create a new pool
if (cmd.size() > 1 && cfg["name"].is_null())
@ -441,16 +389,16 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
}
action_cb = p->start_pool_create(cfg);
}
else if (cmd[0] == "modify-pool")
else if (cmd[0] == "modify-pool" || cmd[0] == "pool-modify")
{
// Modify existing pool
if (cmd.size() > 1)
{
cfg["pool"] = cmd[1];
cfg["old_name"] = cmd[1];
}
action_cb = p->start_pool_modify(cfg);
}
else if (cmd[0] == "rm-pool")
else if (cmd[0] == "rm-pool" || cmd[0] == "pool-rm")
{
// Remove existing pool
if (cmd.size() > 1)
@ -459,9 +407,10 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
}
action_cb = p->start_pool_rm(cfg);
}
else if (cmd[0] == "ls-pool")
else if (cmd[0] == "ls-pool" || cmd[0] == "pool-ls" || cmd[0] == "ls-pools" || cmd[0] == "pools")
{
// Show pool list
cfg["show_recovery"] = 1;
if (cmd.size() > 1)
{
cmd.erase(cmd.begin(), cmd.begin()+1);

View File

@ -81,8 +81,12 @@ public:
std::string print_table(json11::Json items, json11::Json header, bool use_esc);
std::string print_detail(json11::Json item, std::vector<std::pair<std::string, std::string>> names, bool use_esc);
std::string format_lat(uint64_t lat);
std::string format_q(double depth);
bool stupid_glob(const std::string str, const std::string glob);
std::string implode(const std::string & sep, json11::Json array);

View File

@ -126,32 +126,6 @@ void cli_tool_t::parse_config(json11::Json::object & cfg)
list_first = cfg["wait_list"].uint64_value() ? true : false;
}
json11::Json cli_tool_t::parse_tags(std::string tags)
{
json11::Json json_tags;
// Format: "tag0" or "tag0,tag1,tag2"
if (tags.find(',') == std::string::npos)
{
json_tags = tags;
}
else
{
json11::Json::array json_tags_array;
while (tags.size())
{
auto pos = tags.find(',');
auto tag = tags.substr(0, pos);
if (tag != "")
{
json_tags_array.push_back(tag);
}
tags = pos == std::string::npos ? std::string("") : tags.substr(pos+1);
}
json_tags = json_tags_array;
}
return json_tags;
};
struct cli_result_looper_t
{
ring_consumer_t consumer;

View File

@ -1,473 +1,263 @@
/*
=========================================================================
Copyright (c) 2023 MIND Software LLC. All Rights Reserved.
This file is part of the Software-Defined Storage MIND UStor Project.
For more information about this product, please visit https://mindsw.io
or contact us directly at info@mindsw.io
=========================================================================
*/
// Copyright (c) Vitaliy Filippov, 2024
// License: VNPL-1.1 (see README.md for details)
#include "cli_pool_cfg.h"
#include "etcd_state_client.h"
#include "str_util.h"
bool pool_configurator_t::is_valid_scheme_string(std::string scheme_str)
std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json old_cfg,
uint64_t global_block_size, uint64_t global_bitmap_granularity, bool force)
{
if (scheme_str != "replicated" && scheme_str != "xor" && scheme_str != "ec")
// short option names
if (new_cfg.find("count") != new_cfg.end())
{
error = "Coding scheme should be one of \"xor\", \"replicated\", \"ec\" or \"jerasure\"";
return false;
new_cfg["pg_count"] = new_cfg["count"];
new_cfg.erase("count");
}
return true;
}
bool pool_configurator_t::is_valid_immediate_commit_string(std::string immediate_commit_str)
{
if (immediate_commit != "" && immediate_commit != "all" && immediate_commit != "small" && immediate_commit != "none")
if (new_cfg.find("size") != new_cfg.end())
{
error = "Immediate Commit should be one of \"all\", \"small\", or \"none\"";
return false;
new_cfg["pg_size"] = new_cfg["size"];
new_cfg.erase("size");
}
return true;
}
std::string pool_configurator_t::get_error_string()
{
return error;
}
bool pool_configurator_t::parse(json11::Json cfg, bool new_pool)
{
if (new_pool) // New pool configuration
// --ec shortcut
if (new_cfg.find("ec") != new_cfg.end())
{
// Pool name (req)
name = cfg["name"].string_value();
if (name == "")
if (new_cfg.find("scheme") != new_cfg.end() ||
new_cfg.find("pg_size") != new_cfg.end() ||
new_cfg.find("parity_chunks") != new_cfg.end())
{
error = "Pool name must be given";
return false;
return "--ec can't be used with --pg_size, --parity_chunks or --scheme";
}
// Exclusive ec shortcut check
if (!cfg["ec"].is_null() &&
(!cfg["scheme"].is_null() || !cfg["size"].is_null() || !cfg["pg_size"].is_null() || !cfg["parity_chunks"].is_null()))
// pg_size = N+K
// parity_chunks = K
uint64_t data_chunks = 0, parity_chunks = 0;
char null_byte = 0;
int ret = sscanf(new_cfg["ec"].string_value().c_str(), "%ju+%ju%c", &data_chunks, &parity_chunks, &null_byte);
if (ret != 2 || !data_chunks || !parity_chunks)
{
error = "You cannot use 'ec' shortcut together with PG size, parity chunks and scheme arguments";
return false;
return "--ec should be <N>+<K> format (<N>, <K> - numbers)";
}
new_cfg.erase("ec");
new_cfg["scheme"] = "ec";
new_cfg["pg_size"] = data_chunks+parity_chunks;
new_cfg["parity_chunks"] = parity_chunks;
}
// ec = N+K (opt)
if (cfg["ec"].is_string())
if (old_cfg.is_null() && new_cfg["scheme"].string_value() == "")
{
// Default scheme
new_cfg["scheme"] = "replicated";
}
if (old_cfg.is_null() && !new_cfg["pg_minsize"].uint64_value())
{
// Default pg_minsize
if (new_cfg["scheme"] == "replicated")
{
scheme = "ec";
// pg_size = N+K
// parity_chunks = K
int ret = sscanf(cfg["ec"].string_value().c_str(), "%lu+%lu", &pg_size, &parity_chunks);
if (ret != 2)
{
error = "Shortcut for 'ec' scheme has an invalid value. Format: --ec <N>+<K>";
return false;
}
if (!pg_size || !parity_chunks)
{
error = "<N>+<K> values for 'ec' scheme cannot be 0";
return false;
}
pg_size += parity_chunks;
// pg_minsize = (N+K > 2) ? 2 : 1
new_cfg["pg_minsize"] = new_cfg["pg_size"].uint64_value() > 2 ? 2 : 1;
}
// scheme (opt) + pg_size (req) + parity_chunks (req)
else
else // ec or xor
{
scheme = cfg["scheme"].is_string() ?
(cfg["scheme"].string_value() == "jerasure" ? "ec" : cfg["scheme"].string_value()) : "replicated";
// pg_minsize = (K > 1) ? N + 1 : N
new_cfg["pg_minsize"] = new_cfg["pg_size"].uint64_value() - new_cfg["parity_chunks"].uint64_value() +
(new_cfg["parity_chunks"].uint64_value() > 1 ? 1 : 0);
}
}
if (new_cfg["scheme"] != "ec")
{
new_cfg.erase("parity_chunks");
}
if (!is_valid_scheme_string(scheme))
// Check integer values and unknown keys
for (auto kv_it = new_cfg.begin(); kv_it != new_cfg.end(); )
{
auto & key = kv_it->first;
auto & value = kv_it->second;
if (key == "pg_size" || key == "parity_chunks" || key == "pg_minsize" ||
key == "pg_count" || key == "max_osd_combinations" || key == "block_size" ||
key == "bitmap_granularity" || key == "pg_stripe_size")
{
if (value.is_number() && value.uint64_value() != value.number_value() ||
value.is_string() && !value.uint64_value() && value.string_value() != "0")
{
return false;
}
if (!cfg["size"].is_null() && !cfg["pg_size"].is_null() ||
!cfg["size"].is_null() && !cfg["size"].uint64_value() ||
!cfg["pg_size"].is_null() && !cfg["pg_size"].uint64_value())
{
error = "PG size has an invalid value";
return false;
}
pg_size = !cfg["size"].is_null() ? cfg["size"].uint64_value() : cfg["pg_size"].uint64_value();
if (!pg_size)
{
error = "PG size must be given with value >= 1";
return false;
}
if (!cfg["parity_chunks"].is_null() && !cfg["parity_chunks"].uint64_value())
{
error = "Parity chunks has an invalid value";
return false;
}
parity_chunks = cfg["parity_chunks"].uint64_value();
if (scheme == "xor" && !parity_chunks)
{
parity_chunks = 1;
}
if (!parity_chunks)
{
error = "Parity Chunks must be given with value >= 1";
return false;
return key+" must be a non-negative integer";
}
}
// pg_minsize (opt)
if (cfg["pg_minsize"].uint64_value())
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
key == "failure_domain" || key == "root_node" || key == "scrub_interval")
{
pg_minsize = cfg["pg_minsize"].uint64_value();
// OK
}
else if (key == "osd_tags" || key == "primary_affinity_tags")
{
if (value.is_string())
{
value = explode(",", value.string_value(), true);
}
}
else
{
if (!cfg["pg_minsize"].is_null())
{
error = "PG minsize has an invalid value";
return false;
}
if (scheme == "replicated")
{
// pg_minsize = (N+K > 2) ? 2 : 1
pg_minsize = pg_size > 2 ? 2 : 1;
}
else // ec or xor
{
// pg_minsize = (K > 1) ? N + 1 : N
pg_minsize = pg_size - parity_chunks + (parity_chunks > 1 ? 1 : 0);
}
// Unknown parameter
new_cfg.erase(kv_it++);
continue;
}
if (!pg_minsize)
{
error = "PG minsize must be given with value >= 1";
return false;
}
// pg_count (req)
if (!cfg["count"].is_null() && !cfg["pg_count"].is_null() ||
!cfg["count"].is_null() && !cfg["count"].uint64_value() ||
!cfg["pg_count"].is_null() && !cfg["pg_count"].uint64_value())
{
error = "PG count has an invalid value";
return false;
}
pg_count = !cfg["count"].is_null() ? cfg["count"].uint64_value() : cfg["pg_count"].uint64_value();
if (!pg_count)
{
error = "PG count must be given with value >= 1";
return false;
}
// Optional params
failure_domain = cfg["failure_domain"].string_value();
if (!cfg["max_osd_combinations"].is_null() && !cfg["max_osd_combinations"].uint64_value())
{
error = "Max OSD combinations has an invalid value";
return false;
}
max_osd_combinations = cfg["max_osd_combinations"].uint64_value();
if (!cfg["block_size"].is_null() && !cfg["block_size"].uint64_value())
{
error = "Block size has an invalid value";
return false;
}
block_size = cfg["block_size"].uint64_value();
if (!cfg["bitmap_granularity"].is_null() && !cfg["bitmap_granularity"].uint64_value())
{
error = "Bitmap granularity has an invalid value";
return false;
}
bitmap_granularity = cfg["bitmap_granularity"].uint64_value();
if (!is_valid_immediate_commit_string(cfg["immediate_commit"].string_value()))
{
return false;
}
immediate_commit = cfg["immediate_commit"].string_value();
if (!cfg["pg_stripe_size"].is_null() && !cfg["pg_stripe_size"].uint64_value())
{
error = "PG stripe size has an invalid value";
return false;
}
pg_stripe_size = cfg["pg_stripe_size"].uint64_value();
root_node = cfg["root_node"].string_value();
osd_tags = cfg["osd_tags"].string_value();
primary_affinity_tags = cfg["primary_affinity_tags"].string_value();
scrub_interval = cfg["scrub_interval"].string_value();
kv_it++;
}
else // Modified pool configuration
// Merge with the old config
if (!old_cfg.is_null())
{
bool has_changes = false;
// Unsupported parameters
if (!cfg["scheme"].is_null() || !cfg["parity_chunks"].is_null() || !cfg["ec"].is_null() || !cfg["bitmap_granularity"].is_null())
for (auto & kv: old_cfg.object_items())
{
error = "Scheme, parity_chunks and bitmap_granularity parameters cannot be modified";
return false;
}
// Supported parameters
if (!cfg["name"].is_null())
{
name = cfg["name"].string_value();
has_changes = true;
}
if (!cfg["size"].is_null() || !cfg["pg_size"].is_null())
{
if (!cfg["size"].is_null() && !cfg["pg_size"].is_null())
if (new_cfg.find(kv.first) == new_cfg.end())
{
error = "Cannot use both size and pg_size parameters at the same time.";
return false;
new_cfg[kv.first] = kv.second;
}
else if (!cfg["size"].is_null() && !cfg["size"].uint64_value() ||
!cfg["pg_size"].is_null() && !cfg["pg_size"].uint64_value())
{
error = "PG size has an invalid value";
return false;
}
pg_size = !cfg["size"].is_null() ? cfg["size"].uint64_value() : cfg["pg_size"].uint64_value();
has_changes = true;
}
if (!cfg["pg_minsize"].is_null())
{
if (!cfg["pg_minsize"].uint64_value())
{
error = "PG minsize has an invalid value";
return false;
}
pg_minsize = cfg["pg_minsize"].uint64_value();
has_changes = true;
}
if (!cfg["count"].is_null() || !cfg["pg_count"].is_null())
{
if (!cfg["count"].is_null() && !cfg["pg_count"].is_null())
{
error = "Cannot use both count and pg_count parameters at the same time.";
return false;
}
else if (!cfg["count"].is_null() && !cfg["count"].uint64_value() ||
!cfg["pg_count"].is_null() && !cfg["pg_count"].uint64_value())
{
error = "PG count has an invalid value";
return false;
}
pg_count = !cfg["count"].is_null() ? cfg["count"].uint64_value() : cfg["pg_count"].uint64_value();
has_changes = true;
}
if (!cfg["failure_domain"].is_null())
{
failure_domain = cfg["failure_domain"].string_value();
has_changes = true;
}
if (!cfg["max_osd_combinations"].is_null())
{
if (!cfg["max_osd_combinations"].uint64_value())
{
error = "Max OSD combinations has an invalid value";
return false;
}
max_osd_combinations = cfg["max_osd_combinations"].uint64_value();
has_changes = true;
}
if (!cfg["block_size"].is_null())
{
if (!cfg["block_size"].uint64_value())
{
error = "Block size has an invalid value";
return false;
}
block_size = cfg["block_size"].uint64_value();
has_changes = true;
}
if (!cfg["immediate_commit"].is_null())
{
if (!is_valid_immediate_commit_string(cfg["immediate_commit"].string_value()))
{
return false;
}
immediate_commit = cfg["immediate_commit"].string_value();
has_changes = true;
}
if (!cfg["pg_stripe_size"].is_null())
{
if (!cfg["pg_stripe_size"].uint64_value())
{
error = "PG stripe size has an invalid value";
return false;
}
pg_stripe_size = cfg["pg_stripe_size"].uint64_value();
has_changes = true;
}
if (!cfg["root_node"].is_null())
{
root_node = cfg["root_node"].string_value();
has_changes = true;
}
if (!cfg["osd_tags"].is_null())
{
osd_tags = cfg["osd_tags"].string_value();
has_changes = true;
}
if (!cfg["primary_affinity_tags"].is_null())
{
primary_affinity_tags = cfg["primary_affinity_tags"].string_value();
has_changes = true;
}
if (!cfg["scrub_interval"].is_null())
{
scrub_interval = cfg["scrub_interval"].string_value();
has_changes = true;
}
if (!has_changes)
{
error = "No changes were provided to modify pool";
return false;
}
}
return true;
}
// Prevent autovivification of object keys. Now we don't modify the config, we just check it
json11::Json cfg = new_cfg;
bool pool_configurator_t::validate(etcd_state_client_t &st_cli, pool_config_t *pool_config, bool strict)
{
// Validate pool parameters
// Scheme
uint64_t p_scheme = (scheme != "" ?
(scheme == "xor" ? POOL_SCHEME_XOR : (scheme == "ec" ? POOL_SCHEME_EC : POOL_SCHEME_REPLICATED)) :
(pool_config ? pool_config->scheme : 0));
// PG size
uint64_t p_pg_size = (pg_size ? pg_size : (pool_config ? pool_config->pg_size : 0));
if (p_pg_size)
// Validate changes
if (!old_cfg.is_null() && !force)
{
// Min PG size
if ((p_scheme == POOL_SCHEME_XOR || p_scheme == POOL_SCHEME_EC) && p_pg_size < 3)
if (old_cfg["scheme"] != cfg["scheme"])
{
error = "PG size cannot be less than 3 for XOR/EC pool";
return false;
return "Changing scheme for an existing pool will lead to data loss. Use --force to proceed";
}
// Max PG size
else if (p_pg_size > 256)
if (etcd_state_client_t::parse_scheme(old_cfg["scheme"].string_value()) == POOL_SCHEME_EC)
{
error = "PG size cannot be greater than 256";
return false;
uint64_t old_data_chunks = old_cfg["pg_size"].uint64_value() - old_cfg["parity_chunks"].uint64_value();
uint64_t new_data_chunks = cfg["pg_size"].uint64_value() - cfg["parity_chunks"].uint64_value();
if (old_data_chunks != new_data_chunks)
{
return "Changing EC data chunk count for an existing pool will lead to data loss. Use --force to proceed";
}
}
if (old_cfg["block_size"] != cfg["block_size"] ||
old_cfg["bitmap_granularity"] != cfg["bitmap_granularity"] ||
old_cfg["immediate_commit"] != cfg["immediate_commit"])
{
return "Changing block_size, bitmap_granularity or immediate_commit"
" for an existing pool will lead to incomplete PGs. Use --force to proceed";
}
if (old_cfg["pg_stripe_size"] != cfg["pg_stripe_size"])
{
return "Changing pg_stripe_size for an existing pool will lead to data loss. Use --force to proceed";
}
}
// Parity Chunks
uint64_t p_parity_chunks = (parity_chunks ? parity_chunks : (pool_config ? pool_config->parity_chunks : 0));
if (p_parity_chunks)
// Validate values
if (cfg["name"].string_value() == "")
{
if (p_scheme == POOL_SCHEME_XOR && p_parity_chunks > 1)
{
error = "Parity Chunks must be 1 for XOR pool";
return false;
}
if (p_scheme == POOL_SCHEME_EC && (p_parity_chunks < 1 || p_parity_chunks > p_pg_size-2))
{
error = "Parity Chunks must be between 1 and pg_size-2 for EC pool";
return false;
}
return "Non-empty pool name is required";
}
// PG minsize
uint64_t p_pg_minsize = (pg_minsize ? pg_minsize : (pool_config ? pool_config->pg_minsize : 0));
if (p_pg_minsize)
// scheme
auto scheme = etcd_state_client_t::parse_scheme(cfg["scheme"].string_value());
if (!scheme)
{
// Max PG minsize relative to PG size
if (p_pg_minsize > p_pg_size)
{
error = "PG minsize cannot be greater than "+std::to_string(p_pg_size)+" (PG size)";
return false;
}
// PG minsize relative to PG size and Parity Chunks
else if ((p_scheme == POOL_SCHEME_XOR || p_scheme == POOL_SCHEME_EC) && p_pg_minsize < (p_pg_size - p_parity_chunks))
{
error =
"PG minsize cannot be less than "+std::to_string(p_pg_size - p_parity_chunks)+" "
"(PG size - Parity Chunks) for XOR/EC pool";
return false;
}
return "Scheme must be one of \"replicated\", \"ec\" or \"xor\"";
}
// Max OSD Combinations (optional)
if (max_osd_combinations > 0 && max_osd_combinations < 100)
// pg_size
auto pg_size = cfg["pg_size"].uint64_value();
if (!pg_size)
{
error = "Max OSD Combinations must be at least 100";
return false;
return "Non-zero PG size is required";
}
if (scheme != POOL_SCHEME_REPLICATED && pg_size < 3)
{
return "PG size can't be smaller than 3 for EC/XOR pools";
}
if (pg_size > 256)
{
return "PG size can't be greater than 256";
}
// Scrub interval (optional)
if (scrub_interval != "")
// parity_chunks
uint64_t parity_chunks = 1;
if (scheme == POOL_SCHEME_EC)
{
parity_chunks = cfg["parity_chunks"].uint64_value();
if (!parity_chunks)
{
return "Non-zero parity_chunks is required";
}
if (parity_chunks > pg_size-2)
{
return "parity_chunks can't be greater than "+std::to_string(pg_size-2)+" (PG size - 2)";
}
}
// pg_minsize
auto pg_minsize = cfg["pg_minsize"].uint64_value();
if (!pg_minsize)
{
return "Non-zero pg_minsize is required";
}
else if (pg_minsize > pg_size)
{
return "pg_minsize can't be greater than "+std::to_string(pg_size)+" (PG size)";
}
else if (scheme != POOL_SCHEME_REPLICATED && pg_minsize < pg_size-parity_chunks)
{
return "pg_minsize can't be smaller than "+std::to_string(pg_size-parity_chunks)+
" (pg_size - parity_chunks) for XOR/EC pool";
}
// pg_count
if (!cfg["pg_count"].uint64_value())
{
return "Non-zero pg_count is required";
}
// max_osd_combinations
if (!cfg["max_osd_combinations"].is_null() && cfg["max_osd_combinations"].uint64_value() < 100)
{
return "max_osd_combinations must be at least 100, but it is "+cfg["max_osd_combinations"].as_string();
}
// block_size
auto block_size = cfg["block_size"].uint64_value();
if (!cfg["block_size"].is_null() && ((block_size & (block_size-1)) ||
block_size < MIN_DATA_BLOCK_SIZE || block_size > MAX_DATA_BLOCK_SIZE))
{
return "block_size must be a power of two between "+std::to_string(MIN_DATA_BLOCK_SIZE)+
" and "+std::to_string(MAX_DATA_BLOCK_SIZE)+", but it is "+std::to_string(block_size);
}
block_size = (block_size ? block_size : global_block_size);
// bitmap_granularity
auto bitmap_granularity = cfg["bitmap_granularity"].uint64_value();
if (!cfg["bitmap_granularity"].is_null() && (!bitmap_granularity || (bitmap_granularity % 512)))
{
return "bitmap_granularity must be a multiple of 512, but it is "+std::to_string(bitmap_granularity);
}
bitmap_granularity = (bitmap_granularity ? bitmap_granularity : global_bitmap_granularity);
if (block_size % bitmap_granularity)
{
return "bitmap_granularity must divide data block size ("+std::to_string(block_size)+"), but it is "+std::to_string(bitmap_granularity);
}
// immediate_commit
if (!cfg["immediate_commit"].is_null() && !etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value()))
{
return "immediate_commit must be one of \"all\", \"small\", or \"none\", but it is "+cfg["scrub_interval"].as_string();
}
// scrub_interval
if (!cfg["scrub_interval"].is_null())
{
bool ok;
parse_time(scrub_interval, &ok);
parse_time(cfg["scrub_interval"].string_value(), &ok);
if (!ok)
{
error = "Failed to parse scrub interval. Format: number + unit s/m/h/d/M/y";
return false;
return "scrub_interval must be a time interval (number + unit s/m/h/d/M/y), but it is "+cfg["scrub_interval"].as_string();
}
}
// Additional checks (only if strict)
if (strict)
{
uint64_t p_block_size = block_size ? block_size :
(pool_config ? pool_config->data_block_size : st_cli.global_block_size);
uint64_t p_bitmap_granularity = bitmap_granularity ? bitmap_granularity :
(pool_config ? pool_config->bitmap_granularity : st_cli.global_bitmap_granularity);
// Block size value and range
if ((p_block_size & (p_block_size-1)) || p_block_size < MIN_DATA_BLOCK_SIZE || p_block_size > MAX_DATA_BLOCK_SIZE)
{
error =
"Data block size must be a power of two between "+std::to_string(MIN_DATA_BLOCK_SIZE)+" "
"and "+std::to_string(MAX_DATA_BLOCK_SIZE);
return false;
}
// Block size relative to bitmap granularity
if (p_block_size % p_bitmap_granularity)
{
error = "Data block size must be devisible by "+std::to_string(p_bitmap_granularity)+" (Bitmap Granularity)";
return false;
}
}
return true;
return "";
}

View File

@ -1,48 +1,10 @@
/*
=========================================================================
Copyright (c) 2023 MIND Software LLC. All Rights Reserved.
This file is part of the Software-Defined Storage MIND UStor Project.
For more information about this product, please visit https://mindsw.io
or contact us directly at info@mindsw.io
=========================================================================
*/
// Copyright (c) Vitaliy Filippov, 2024
// License: VNPL-1.1 (see README.md for details)
#pragma once
#include "json11/json11.hpp"
#include <stdint.h>
#include "etcd_state_client.h"
#include "str_util.h"
struct pool_configurator_t
{
protected:
std::string error;
bool is_valid_scheme_string(std::string scheme_str);
bool is_valid_immediate_commit_string(std::string immediate_commit_str);
public:
std::string name;
std::string scheme;
uint64_t pg_size, pg_minsize, pg_count;
uint64_t parity_chunks;
std::string immediate_commit;
std::string failure_domain;
std::string root_node;
uint64_t max_osd_combinations;
uint64_t block_size, bitmap_granularity;
uint64_t pg_stripe_size;
std::string osd_tags;
std::string primary_affinity_tags;
std::string scrub_interval;
std::string get_error_string();
bool parse(json11::Json cfg, bool new_pool);
bool validate(etcd_state_client_t &st_cli, pool_config_t *pool_config, bool strict);
};
std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json old_cfg,
uint64_t global_block_size, uint64_t global_bitmap_granularity, bool force);

View File

@ -1,11 +1,7 @@
/*
=========================================================================
Copyright (c) 2023 MIND Software LLC. All Rights Reserved.
This file is part of the Software-Defined Storage MIND UStor Project.
For more information about this product, please visit https://mindsw.io
or contact us directly at info@mindsw.io
=========================================================================
*/
// Copyright (c) MIND Software LLC, 2023 (info@mindsw.io)
// I accept Vitastor CLA: see CLA-en.md for details
// Copyright (c) Vitaliy Filippov, 2024
// License: VNPL-1.1 (see README.md for details)
#include <ctype.h>
#include "cli.h"
@ -18,9 +14,10 @@
struct pool_creator_t
{
cli_tool_t *parent;
json11::Json::object cfg;
bool force;
pool_configurator_t *cfg;
bool force = false;
bool wait = false;
int state = 0;
cli_result_t result;
@ -35,8 +32,6 @@ struct pool_creator_t
uint64_t new_pools_mod_rev;
json11::Json state_node_tree;
json11::Json new_pools;
json11::Json osd_tags_json;
json11::Json primary_affinity_tags_json;
bool is_done() { return state == 100; }
@ -60,22 +55,14 @@ struct pool_creator_t
goto resume_8;
// Validate pool parameters
if (!cfg->validate(parent->cli->st_cli, NULL, !force))
result.text = validate_pool_config(cfg, json11::Json(), parent->cli->st_cli.global_block_size,
parent->cli->st_cli.global_bitmap_granularity, force);
if (result.text != "")
{
result = (cli_result_t){ .err = EINVAL, .text = cfg->get_error_string() + "\n" };
result.err = EINVAL;
state = 100;
return;
}
// OSD tags
if (cfg->osd_tags != "")
{
osd_tags_json = parent->parse_tags(cfg->osd_tags);
}
// Primary affinity tags
if (cfg->primary_affinity_tags != "")
{
primary_affinity_tags_json = parent->parse_tags(cfg->primary_affinity_tags);
}
state = 1;
resume_1:
// If not forced, check that we have enough osds for pg_size
@ -110,7 +97,7 @@ resume_2:
}
// Skip tag checks, if pool has none
if (!osd_tags_json.is_null())
if (cfg["osd_tags"].array_items().size())
{
// Get osd configs (for tags) of osds in state_node_tree
{
@ -189,18 +176,20 @@ resume_4:
// Check that pg_size <= max_pg_size
{
auto failure_domain = cfg["failure_domain"].string_value() == ""
? "host" : cfg["failure_domain"].string_value();
uint64_t max_pg_size = get_max_pg_size(state_node_tree["nodes"].object_items(),
cfg->failure_domain, cfg->root_node);
failure_domain, cfg["root_node"].string_value());
if (cfg->pg_size > max_pg_size)
if (cfg["pg_size"].uint64_value() > max_pg_size)
{
result = (cli_result_t){
.err = EINVAL,
.text =
"There are "+std::to_string(max_pg_size)+" failure domains with OSDs matching tags and "
"block_size/bitmap_granularity/immediate_commit parameters, but you want to create a "
"pool with "+std::to_string(cfg->pg_size)+" OSDs from different failure domains in a PG. "
"Change parameters or add --force if you want to create a degraded pool and add OSDs later."
"There are "+std::to_string(max_pg_size)+" \""+failure_domain+"\" failure domains with OSDs matching tags and"
" block_size/bitmap_granularity/immediate_commit parameters, but you want to create a"
" pool with "+cfg["pg_size"].as_string()+" OSDs from different failure domains in a PG."
" Change parameters or add --force if you want to create a degraded pool and add OSDs later."
};
state = 100;
return;
@ -283,20 +272,19 @@ resume_8:
return;
// Unless forced, check that pool was created and is active
if (force)
if (!wait)
{
create_check.passed = true;
}
else if (create_check.retries)
{
create_check.retries--;
parent->waiting++;
parent->epmgr->tfd->set_timer(create_check.interval, false, [this](int timer_id)
{
if (parent->cli->st_cli.pool_config.find(new_id) != parent->cli->st_cli.pool_config.end())
{
auto & pool_cfg = parent->cli->st_cli.pool_config[new_id];
create_check.passed = pool_cfg.real_pg_count > 0;
for (auto pg_it = pool_cfg.pg_config.begin(); pg_it != pool_cfg.pg_config.end(); pg_it++)
{
@ -306,15 +294,12 @@ resume_8:
break;
}
}
if (create_check.passed)
create_check.retries = 0;
}
parent->waiting--;
parent->ringloop->wakeup();
});
return;
}
@ -322,18 +307,17 @@ resume_8:
{
result = (cli_result_t) {
.err = EAGAIN,
.text =
"Pool "+cfg->name+" was created, but failed to become active. This may indicate that cluster "
"state has changed while the pool was being created. Please check the current state and "
"correct the pool's configuration if necessary.\n"
.text = "Pool "+cfg["name"].string_value()+" was created, but failed to become active."
" This may indicate that cluster state has changed while the pool was being created."
" Please check the current state and adjust the pool configuration if necessary.",
};
}
else
{
result = (cli_result_t){
.err = 0,
.text = "Pool "+cfg->name+" created",
.data = new_pools[std::to_string(new_id)]
.text = "Pool "+cfg["name"].string_value()+" created",
.data = new_pools[std::to_string(new_id)],
};
}
state = 100;
@ -423,7 +407,7 @@ resume_8:
else
{
// If all pool tags are in osd tags, accept osd
if (all_in_tags(osd_configs[i]["tags"], osd_tags_json))
if (all_in_tags(osd_configs[i]["tags"], cfg["osd_tags"]))
{
accepted_osds.push_back(osd_num);
}
@ -453,72 +437,45 @@ resume_8:
// List of accepted osds
std::vector<std::string> accepted_osds;
uint64_t p_block_size = cfg["block_size"].uint64_value()
? cfg["block_size"].uint64_value()
: parent->cli->st_cli.global_block_size;
uint64_t p_bitmap_granularity = cfg["bitmap_granularity"].uint64_value()
? cfg["bitmap_granularity"].uint64_value()
: parent->cli->st_cli.global_bitmap_granularity;
uint32_t p_immediate_commit = cfg["immediate_commit"].is_string()
? etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value())
: parent->cli->st_cli.global_immediate_commit;
for (size_t i = 0; i < osd_stats.size(); i++)
{
auto & os = osd_stats[i].object_items();
auto & os = osd_stats[i];
// Get osd number
auto osd_num = osds[i].as_string();
// Check data_block_size
if (os.find("data_block_size") != os.end())
if (!os["data_block_size"].is_null() && os["data_block_size"] != p_block_size ||
!os["bitmap_granularity"].is_null() && os["bitmap_granularity"] != p_bitmap_granularity ||
!os["immediate_commit"].is_null() &&
etcd_state_client_t::parse_immediate_commit(os["immediate_commit"].string_value()) < p_immediate_commit)
{
uint64_t p_block_size = cfg->block_size ? cfg->block_size : parent->cli->st_cli.global_block_size;
uint64_t o_block_size = osd_stats[i]["data_block_size"].int64_value();
if (p_block_size != o_block_size)
{
accepted_nodes.erase(osd_num);
continue;
}
accepted_nodes.erase(osd_num);
}
// Check bitmap_granularity
if (os.find("bitmap_granularity") != os.end())
else
{
uint64_t p_bitmap_granularity = cfg->bitmap_granularity ?
cfg->bitmap_granularity : parent->cli->st_cli.global_bitmap_granularity;
uint64_t o_bitmap_granularity = osd_stats[i]["bitmap_granularity"].int64_value();
if (p_bitmap_granularity != o_bitmap_granularity)
{
accepted_nodes.erase(osd_num);
continue;
}
accepted_osds.push_back(osd_num);
}
// Check immediate_commit
if (os.find("immediate_commit") != os.end())
{
uint32_t p_immediate_commit = (cfg->immediate_commit != "") ?
parent->cli->st_cli.parse_immediate_commit_string(cfg->immediate_commit) : parent->cli->st_cli.global_immediate_commit;
uint32_t o_immediate_commit = parent->cli->st_cli.parse_immediate_commit_string(osd_stats[i]["immediate_commit"].string_value());
if (o_immediate_commit < p_immediate_commit)
{
accepted_nodes.erase(osd_num);
continue;
}
}
// Accept osd if all checks passed
accepted_osds.push_back(osd_num);
}
return json11::Json::object { { "osds", accepted_osds }, { "nodes", accepted_nodes } };
}
// Returns maximum pg_size possible for given node_tree and failure_domain, starting at parent_node
uint64_t get_max_pg_size(json11::Json::object node_tree, const std::string & failure_domain = "", const std::string & parent_node = "")
uint64_t get_max_pg_size(json11::Json::object node_tree, const std::string & level, const std::string & parent_node)
{
uint64_t max_pg_sz = 0;
std::vector<std::string> nodes;
const std::string level = (failure_domain != "") ? failure_domain : "osd";
// Check if parnet node is an osd (numeric)
// Check if parent node is an osd (numeric)
if (parent_node != "" && stoull_full(parent_node))
{
// Add it to node list if osd is in node tree
@ -600,48 +557,16 @@ resume_8:
for (auto & p: kv.value.object_items())
{
// ID
uint64_t pool_id;
char null_byte = 0;
sscanf(p.first.c_str(), "%lu%c", &pool_id, &null_byte);
uint64_t pool_id = stoull_full(p.first);
new_id = std::max(pool_id+1, new_id);
// Name
if (p.second["name"].string_value() == cfg->name)
if (p.second["name"].string_value() == cfg["name"].string_value())
{
return json11::Json("Pool "+std::to_string(pool_id)+" has the same name\n");
return "Pool with name \""+cfg["name"].string_value()+"\" already exists (ID "+std::to_string(pool_id)+")";
}
}
json11::Json::object new_pool = json11::Json::object {
{ "name", cfg->name },
{ "scheme", cfg->scheme },
{ "pg_size", cfg->pg_size },
{ "pg_minsize", cfg->pg_minsize },
{ "pg_count", cfg->pg_count },
{ "parity_chunks", cfg->parity_chunks },
};
if (cfg->failure_domain != "")
new_pool["failure_domain"] = cfg->failure_domain;
if (cfg->max_osd_combinations)
new_pool["max_osd_combinations"] = cfg->max_osd_combinations;
if (cfg->block_size)
new_pool["block_size"] = cfg->block_size;
if (cfg->bitmap_granularity)
new_pool["bitmap_granularity"] = cfg->bitmap_granularity;
if (cfg->immediate_commit != "")
new_pool["immediate_commit"] = cfg->immediate_commit;
if (cfg->pg_stripe_size)
new_pool["pg_stripe_size"] = cfg->pg_stripe_size;
if (cfg->root_node != "")
new_pool["root_node"] = cfg->root_node;
if (cfg->scrub_interval != "")
new_pool["scrub_interval"] = cfg->scrub_interval;
if (cfg->osd_tags != "")
new_pool["osd_tags"] = osd_tags_json;
if (cfg->primary_affinity_tags != "")
new_pool["primary_affinity_tags"] = primary_affinity_tags_json;
auto res = kv.value.object_items();
res[std::to_string(new_id)] = new_pool;
res[std::to_string(new_id)] = cfg;
return res;
}
@ -680,20 +605,9 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_pool_create(json11::Json c
{
auto pool_creator = new pool_creator_t();
pool_creator->parent = this;
pool_creator->cfg = new pool_configurator_t();
if (!pool_creator->cfg->parse(cfg, true))
{
std::string err = pool_creator->cfg->get_error_string();
return [err](cli_result_t & result)
{
result = (cli_result_t){ .err = EINVAL, .text = err + "\n" };
return true;
};
}