Implement fool protection for FS pools
parent
dcbe1afac3
commit
6783d4a13c
|
@ -41,6 +41,7 @@ Parameters:
|
|||
- [osd_tags](#osd_tags)
|
||||
- [primary_affinity_tags](#primary_affinity_tags)
|
||||
- [scrub_interval](#scrub_interval)
|
||||
- [used_for_fs](#used_for_fs)
|
||||
|
||||
Examples:
|
||||
|
||||
|
@ -299,6 +300,25 @@ of the OSDs containing a data chunk for a PG.
|
|||
Automatic scrubbing interval for this pool. Overrides
|
||||
[global scrub_interval setting](osd.en.md#scrub_interval).
|
||||
|
||||
## used_for_fs
|
||||
|
||||
- Type: string
|
||||
|
||||
If non-empty, the pool is marked as used for VitastorFS with metadata stored
|
||||
in block image (regular Vitastor volume) named as the value of this pool parameter.
|
||||
|
||||
When a pool is marked as used for VitastorFS, regular block volume creation in it
|
||||
is disabled (vitastor-cli refuses to create images without --force) to protect
|
||||
the user from block volume and FS file ID collisions and data loss.
|
||||
|
||||
[vitastor-nfs](../usage/nfs.ru.md), in its turn, refuses to use pools not marked
|
||||
for the corresponding FS when starting. This also implies that you can use one
|
||||
pool only for one VitastorFS.
|
||||
|
||||
The second thing that is disabled for VitastorFS pools is reporting per-inode space
|
||||
usage statistics in etcd because a FS pool may store a very large number of files
|
||||
and statistics for them all would take a lot of space in etcd.
|
||||
|
||||
# Examples
|
||||
|
||||
## Replicated pool
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
- [osd_tags](#osd_tags)
|
||||
- [primary_affinity_tags](#primary_affinity_tags)
|
||||
- [scrub_interval](#scrub_interval)
|
||||
- [used_for_fs](#used_for_fs)
|
||||
|
||||
Примеры:
|
||||
|
||||
|
@ -306,6 +307,27 @@ OSD с "all".
|
|||
Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
|
||||
Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
|
||||
|
||||
## used_for_fs
|
||||
|
||||
- Type: string
|
||||
|
||||
Если непусто, пул помечается как используемый для файловой системы VitastorFS с
|
||||
метаданными, хранимыми в блочном образе Vitastor с именем, равным значению
|
||||
этого параметра.
|
||||
|
||||
Когда пул помечается как используемый для VitastorFS, создание обычных блочных
|
||||
образов в нём отключается (vitastor-cli отказывается создавать образы без --force),
|
||||
чтобы защитить пользователя от коллизий ID файлов и блочных образов и, таким
|
||||
образом, от потери данных.
|
||||
|
||||
[vitastor-nfs](../usage/nfs.ru.md), в свою очередь, при запуске отказывается
|
||||
использовать для ФС пулы, не выделенные для неё. Это также означает, что один
|
||||
пул может использоваться только для одной VitastorFS.
|
||||
|
||||
Также для ФС-пулов отключается передача статистики в etcd по отдельным инодам,
|
||||
так как ФС-пул может содержать очень много файлов и статистика по ним всем
|
||||
заняла бы очень много места в etcd.
|
||||
|
||||
# Примеры
|
||||
|
||||
## Реплицированный пул
|
||||
|
|
|
@ -267,7 +267,7 @@ Optional parameters:
|
|||
| `--immediate_commit none` | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
|
||||
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
|
||||
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
|
||||
| `--no_inode_stats 1` | Disable per-inode statistics for this pool (use for VitastorFS pools) |
|
||||
| `--used_for_fs <name>` | Mark pool as used for VitastorFS with metadata in image <name> |
|
||||
| `--pg_stripe_size <number>` | Increase object grouping stripe |
|
||||
| `--max_osd_combinations 10000` | Maximum number of random combinations for LP solver input |
|
||||
| `--wait` | Wait for the new pool to come online |
|
||||
|
|
|
@ -131,7 +131,7 @@ static const char* help_text =
|
|||
" --immediate_commit none Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
|
||||
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
|
||||
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
|
||||
" --no_inode_stats 1 Disable per-inode statistics for this pool (use for VitastorFS pools)\n"
|
||||
" --used_for_fs <name> Mark pool as used for VitastorFS with metadata in image <name>\n"
|
||||
" --pg_stripe_size <number> Increase object grouping stripe\n"
|
||||
" --max_osd_combinations 10000 Maximum number of random combinations for LP solver input\n"
|
||||
" --wait Wait for the new pool to come online\n"
|
||||
|
@ -143,7 +143,7 @@ static const char* help_text =
|
|||
"vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]\n"
|
||||
" Modify an existing pool. Modifiable parameters:\n"
|
||||
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
|
||||
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--no_inode_stats 0|1]\n"
|
||||
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--used_for_fs <name>]\n"
|
||||
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
|
||||
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
|
||||
" [--block_size <size>] [--bitmap_granularity <size>]\n"
|
||||
|
@ -186,7 +186,6 @@ static json11::Json::object parse_args(int narg, const char *args[])
|
|||
for (int i = 1; i < narg; i++)
|
||||
{
|
||||
bool argHasValue = (!(i == narg-1) && (args[i+1][0] != '-'));
|
||||
|
||||
if (args[i][0] == '-' && args[i][1] == 'h' && args[i][2] == 0)
|
||||
{
|
||||
cfg["help"] = "1";
|
||||
|
|
|
@ -27,6 +27,7 @@ struct image_creator_t
|
|||
std::string image_name, new_snap, new_parent;
|
||||
json11::Json new_meta;
|
||||
uint64_t size;
|
||||
bool force = false;
|
||||
bool force_size = false;
|
||||
|
||||
pool_id_t old_pool_id = 0;
|
||||
|
@ -45,6 +46,7 @@ struct image_creator_t
|
|||
|
||||
void loop()
|
||||
{
|
||||
auto & pools = parent->cli->st_cli.pool_config;
|
||||
if (state >= 1)
|
||||
goto resume_1;
|
||||
if (image_name == "")
|
||||
|
@ -62,7 +64,6 @@ struct image_creator_t
|
|||
}
|
||||
if (new_pool_id)
|
||||
{
|
||||
auto & pools = parent->cli->st_cli.pool_config;
|
||||
if (pools.find(new_pool_id) == pools.end())
|
||||
{
|
||||
result = (cli_result_t){ .err = ENOENT, .text = "Pool "+std::to_string(new_pool_id)+" does not exist" };
|
||||
|
@ -72,7 +73,7 @@ struct image_creator_t
|
|||
}
|
||||
else if (new_pool_name != "")
|
||||
{
|
||||
for (auto & ic: parent->cli->st_cli.pool_config)
|
||||
for (auto & ic: pools)
|
||||
{
|
||||
if (ic.second.name == new_pool_name)
|
||||
{
|
||||
|
@ -87,10 +88,20 @@ struct image_creator_t
|
|||
return;
|
||||
}
|
||||
}
|
||||
else if (parent->cli->st_cli.pool_config.size() == 1)
|
||||
else if (pools.size() == 1)
|
||||
{
|
||||
auto it = parent->cli->st_cli.pool_config.begin();
|
||||
new_pool_id = it->first;
|
||||
new_pool_id = pools.begin()->first;
|
||||
}
|
||||
if (new_pool_id && !pools.at(new_pool_id).used_for_fs.empty() && !force)
|
||||
{
|
||||
result = (cli_result_t){
|
||||
.err = EINVAL,
|
||||
.text = "Pool "+pools.at(new_pool_id).name+
|
||||
" is used for VitastorFS "+pools.at(new_pool_id).used_for_fs+
|
||||
". Use --force if you really know what you are doing",
|
||||
};
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
state = 1;
|
||||
resume_1:
|
||||
|
@ -532,6 +543,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_create(json11::Json cfg)
|
|||
image_creator->image_name = cfg["image"].string_value();
|
||||
image_creator->new_pool_id = cfg["pool"].uint64_value();
|
||||
image_creator->new_pool_name = cfg["pool"].string_value();
|
||||
image_creator->force = cfg["force"].bool_value();
|
||||
image_creator->force_size = cfg["force_size"].bool_value();
|
||||
if (cfg["image_meta"].is_object())
|
||||
{
|
||||
|
|
|
@ -81,13 +81,8 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
|||
}
|
||||
value = value.uint64_value();
|
||||
}
|
||||
else if (key == "no_inode_stats" && value.bool_value())
|
||||
{
|
||||
// Leave true, remove false
|
||||
value = true;
|
||||
}
|
||||
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
|
||||
key == "failure_domain" || key == "root_node" || key == "scrub_interval")
|
||||
key == "failure_domain" || key == "root_node" || key == "scrub_interval" || key == "used_for_fs")
|
||||
{
|
||||
// OK
|
||||
}
|
||||
|
@ -124,6 +119,10 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
|||
{
|
||||
new_cfg.erase("parity_chunks");
|
||||
}
|
||||
if (new_cfg.find("used_for_fs") != new_cfg.end() && new_cfg["used_for_fs"].string_value() == "")
|
||||
{
|
||||
new_cfg.erase("used_for_fs");
|
||||
}
|
||||
|
||||
// Prevent autovivification of object keys. Now we don't modify the config, we just check it
|
||||
json11::Json cfg = new_cfg;
|
||||
|
|
|
@ -529,8 +529,6 @@ resume_3:
|
|||
st["block_size_fmt"] = format_size(st["block_size"].uint64_value());
|
||||
if (st["bitmap_granularity"].uint64_value())
|
||||
st["bitmap_granularity_fmt"] = format_size(st["bitmap_granularity"].uint64_value());
|
||||
if (st["no_inode_stats"].bool_value())
|
||||
st["inode_stats_fmt"] = "disabled";
|
||||
}
|
||||
// All pool parameters are only displayed in the "detailed" mode
|
||||
// because there's too many of them to show them in table
|
||||
|
@ -538,6 +536,7 @@ resume_3:
|
|||
{ "name", "Name" },
|
||||
{ "id", "ID" },
|
||||
{ "scheme_name", "Scheme" },
|
||||
{ "used_for_fs", "Used for VitastorFS" },
|
||||
{ "status", "Status" },
|
||||
{ "pg_count_fmt", "PGs" },
|
||||
{ "pg_minsize", "PG minsize" },
|
||||
|
|
|
@ -112,6 +112,24 @@ resume_1:
|
|||
return;
|
||||
}
|
||||
|
||||
if (new_cfg.find("used_for_fs") != new_cfg.end() && !force)
|
||||
{
|
||||
// Check that pool doesn't have images
|
||||
auto img_it = parent->cli->st_cli.inode_config.lower_bound(INODE_WITH_POOL(pool_id, 0));
|
||||
if (img_it != parent->cli->st_cli.inode_config.end() && INODE_POOL(img_it->first) == pool_id &&
|
||||
img_it->second.name == new_cfg["used_for_fs"].string_value())
|
||||
{
|
||||
// Only allow metadata image to exist in the FS pool
|
||||
img_it++;
|
||||
}
|
||||
if (img_it != parent->cli->st_cli.inode_config.end() && INODE_POOL(img_it->first) == pool_id)
|
||||
{
|
||||
result = (cli_result_t){ .err = ENOENT, .text = "Pool "+pool_name+" has block images, delete them before using it for VitastorFS" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Update pool
|
||||
auto pls = kv.value.object_items();
|
||||
pls[std::to_string(pool_id)] = new_cfg;
|
||||
|
|
|
@ -863,8 +863,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
|||
pc.scrub_interval = parse_time(pool_item.second["scrub_interval"].string_value());
|
||||
if (!pc.scrub_interval)
|
||||
pc.scrub_interval = 0;
|
||||
// Disable per-inode stats
|
||||
pc.no_inode_stats = pool_item.second["no_inode_stats"].bool_value();
|
||||
// Mark pool as VitastorFS pool (disable per-inode stats and block volume creation)
|
||||
pc.used_for_fs = pool_item.second["used_for_fs"].as_string();
|
||||
// Immediate Commit Mode
|
||||
pc.immediate_commit = pool_item.second["immediate_commit"].is_string()
|
||||
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value())
|
||||
|
|
|
@ -60,7 +60,7 @@ struct pool_config_t
|
|||
uint64_t pg_stripe_size;
|
||||
std::map<pg_num_t, pg_config_t> pg_config;
|
||||
uint64_t scrub_interval;
|
||||
bool no_inode_stats;
|
||||
std::string used_for_fs;
|
||||
};
|
||||
|
||||
struct inode_config_t
|
||||
|
|
|
@ -196,6 +196,7 @@ void nfs_kv_procs(nfs_client_t *self)
|
|||
void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
|
||||
{
|
||||
this->proxy = proxy;
|
||||
auto & pool_cfg = proxy->cli->st_cli.pool_config.at(proxy->default_pool_id);
|
||||
fs_kv_inode = cfg["fs"].uint64_value();
|
||||
if (fs_kv_inode)
|
||||
{
|
||||
|
@ -221,6 +222,25 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
if (proxy->cli->st_cli.inode_config.find(fs_kv_inode) != proxy->cli->st_cli.inode_config.end())
|
||||
{
|
||||
auto & name = proxy->cli->st_cli.inode_config.at(fs_kv_inode).name;
|
||||
if (pool_cfg.used_for_fs != name)
|
||||
{
|
||||
fprintf(stderr, "Please mark pool as used for this file system with `vitastor-cli modify-pool --used-for-fs %s %s`\n",
|
||||
name.c_str(), cfg["fs"].string_value().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
auto img_it = proxy->cli->st_cli.inode_config.lower_bound(INODE_WITH_POOL(proxy->default_pool_id+1, 0));
|
||||
if (img_it != proxy->cli->st_cli.inode_config.begin())
|
||||
{
|
||||
img_it--;
|
||||
if (img_it != proxy->cli->st_cli.inode_config.begin() && INODE_POOL(img_it->first) == proxy->default_pool_id)
|
||||
{
|
||||
idgen[proxy->default_pool_id].min_id = INODE_NO_POOL(img_it->first) + 1;
|
||||
}
|
||||
}
|
||||
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
||||
if (!readdir_getattr_parallel)
|
||||
readdir_getattr_parallel = 8;
|
||||
|
@ -230,7 +250,6 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
|
|||
touch_interval = cfg["touch_interval"].uint64_value();
|
||||
if (touch_interval < 100) // ms
|
||||
touch_interval = 100;
|
||||
auto & pool_cfg = proxy->cli->st_cli.pool_config.at(proxy->default_pool_id);
|
||||
pool_block_size = pool_cfg.pg_stripe_size;
|
||||
pool_alignment = pool_cfg.bitmap_granularity;
|
||||
// Open DB and wait
|
||||
|
|
|
@ -44,6 +44,7 @@ struct kv_inode_extend_t
|
|||
struct kv_idgen_t
|
||||
{
|
||||
uint64_t next_id = 1, allocated_id = 0;
|
||||
uint64_t min_id = 1;
|
||||
std::vector<uint64_t> unallocated_ids;
|
||||
};
|
||||
|
||||
|
|
|
@ -25,8 +25,7 @@ void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(i
|
|||
cb(0, INODE_WITH_POOL(pool_id, idgen.next_id-1));
|
||||
return;
|
||||
}
|
||||
// FIXME: Partial per-pool max ID limits
|
||||
// FIXME: Fool protection from block volume and FS file ID overlap
|
||||
// FIXME: Maybe allow FS and block volumes to cohabitate in the same pool, but with different ID ranges
|
||||
else if (idgen.next_id >= ((uint64_t)1 << (64-POOL_ID_BITS)))
|
||||
{
|
||||
cb(-ENOSPC, 0);
|
||||
|
@ -34,6 +33,7 @@ void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(i
|
|||
}
|
||||
self->parent->db->get((pool_id ? "id"+std::to_string(pool_id) : "id"), [=](int res, const std::string & prev_str)
|
||||
{
|
||||
auto & idgen = self->parent->kvfs->idgen[pool_id];
|
||||
if (res < 0 && res != -ENOENT)
|
||||
{
|
||||
cb(res, 0);
|
||||
|
@ -45,9 +45,9 @@ void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(i
|
|||
cb(-ENOSPC, 0);
|
||||
return;
|
||||
}
|
||||
if (prev_val < 1)
|
||||
if (prev_val < idgen.min_id)
|
||||
{
|
||||
prev_val = 1;
|
||||
prev_val = idgen.min_id;
|
||||
}
|
||||
uint64_t new_val = prev_val + self->parent->kvfs->id_alloc_batch_size;
|
||||
if (new_val >= self->parent->kvfs->fs_inode_count)
|
||||
|
|
|
@ -631,7 +631,7 @@ void osd_t::apply_no_inode_stats()
|
|||
std::vector<uint64_t> no_inode_stats;
|
||||
for (auto & pool_item: st_cli.pool_config)
|
||||
{
|
||||
if (pool_item.second.no_inode_stats)
|
||||
if (!pool_item.second.used_for_fs.empty())
|
||||
{
|
||||
no_inode_stats.push_back(pool_item.first);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ PG_COUNT=16
|
|||
. `dirname $0`/run_3osds.sh
|
||||
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 10G fsmeta
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL modify-pool --used-for-fs fsmeta testpool
|
||||
build/src/vitastor-nfs start --fs fsmeta --etcd_address $ETCD_URL --portmap 0 --port 2050 --foreground 1 --trace 1 >>./testdata/nfs.log 2>&1 &
|
||||
NFS_PID=$!
|
||||
|
||||
|
|
Loading…
Reference in New Issue