WIP FS defragmentation
Test / buildenv (push) Successful in 12s Details
Test / build (push) Successful in 3m51s Details
Test / npm_lint (push) Successful in 10s Details
Test / test_cas (push) Successful in 11s Details
Test / make_test (push) Successful in 40s Details
Test / test_change_pg_size (push) Successful in 10s Details
Test / test_change_pg_count (push) Successful in 40s Details
Test / test_change_pg_count_ec (push) Successful in 40s Details
Test / test_create_nomaxid (push) Successful in 10s Details
Test / test_etcd_fail (push) Successful in 1m0s Details
Test / test_add_osd (push) Successful in 2m46s Details
Test / test_interrupted_rebalance_imm (push) Successful in 1m55s Details
Test / test_interrupted_rebalance (push) Successful in 2m37s Details
Test / test_failure_domain (push) Successful in 38s Details
Test / test_interrupted_rebalance_ec (push) Successful in 1m56s Details
Test / test_snapshot (push) Successful in 24s Details
Test / test_snapshot_ec (push) Successful in 25s Details
Test / test_minsize_1 (push) Successful in 19s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m22s Details
Test / test_rm (push) Successful in 15s Details
Test / test_move_reappear (push) Successful in 21s Details
Test / test_snapshot_down (push) Successful in 31s Details
Test / test_snapshot_down_ec (push) Successful in 32s Details
Test / test_splitbrain (push) Successful in 27s Details
Test / test_snapshot_chain (push) Successful in 2m38s Details
Test / test_snapshot_chain_ec (push) Successful in 3m7s Details
Test / test_rebalance_verify_imm (push) Successful in 5m47s Details
Test / test_root_node (push) Successful in 20s Details
Test / test_rebalance_verify (push) Successful in 6m46s Details
Test / test_switch_primary (push) Successful in 41s Details
Test / test_write (push) Successful in 45s Details
Test / test_write_no_same (push) Successful in 21s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 5m38s Details
Test / test_write_xor (push) Successful in 52s Details
Test / test_rebalance_verify_ec (push) Successful in 8m58s Details
Test / test_heal_pg_size_2 (push) Successful in 4m8s Details
Test / test_heal_csum_32k_dmj (push) Successful in 4m29s Details
Test / test_heal_ec (push) Successful in 5m26s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m10s Details
Test / test_heal_csum_32k (push) Successful in 6m11s Details
Test / test_osd_tags (push) Successful in 31s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m33s Details
Test / test_enospc (push) Successful in 1m13s Details
Test / test_enospc_xor (push) Successful in 59s Details
Test / test_enospc_imm (push) Successful in 1m27s Details
Test / test_enospc_imm_xor (push) Successful in 1m47s Details
Test / test_scrub (push) Successful in 32s Details
Test / test_heal_csum_4k (push) Successful in 4m54s Details
Test / test_scrub_zero_osd_2 (push) Successful in 29s Details
Test / test_scrub_xor (push) Successful in 30s Details
Test / test_scrub_pg_size_3 (push) Successful in 1m0s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s Details
Test / test_scrub_ec (push) Successful in 33s Details
Test / test_nfs (push) Successful in 16s Details
Test / test_heal_csum_4k_dj (push) Failing after 10m17s Details

Vitaliy Filippov 2024-07-11 02:16:37 +03:00
parent d88ab76636
commit bb2af834cb
4 changed files with 151 additions and 27 deletions

View File

@ -116,17 +116,37 @@ std::string kv_direntry_filename(const std::string & key)
return key;
}
std::string kv_inode_key(uint64_t ino)
std::string kv_inode_prefix_key(uint64_t ino, const char *prefix)
{
char key[32] = { 0 };
snprintf(key, sizeof(key), "i%x", INODE_POOL(ino));
int n = strnlen(key, sizeof(key)-1);
snprintf(key+n+1, sizeof(key)-n-1, "%jx", INODE_NO_POOL(ino));
int m = strnlen(key+n+1, sizeof(key)-n-2);
int max = 32+strlen(prefix);
char key[max] = { 0 };
snprintf(key, max, "%s%x", prefix, INODE_POOL(ino));
int n = strnlen(key, max-1);
snprintf(key+n+1, max-n-1, "%jx", INODE_NO_POOL(ino));
int m = strnlen(key+n+1, max-n-2);
key[n] = 'G'+m;
return std::string(key);
}
std::string kv_inode_key(uint64_t ino)
{
return kv_inode_prefix_key(ino, "i");
}
uint64_t kv_key_inode(const std::string & key, int prefix_len)
{
if (key.size() < prefix_len)
return 0;
uint32_t pool_id = 0;
char len_plus_g = 0;
uint64_t inode_id = 0;
char null_byte = 0;
int scanned = sscanf(key.c_str()+prefix_len, "%x%c%jx%c", &pool_id, &len_plus_g, &inode_id, &null_byte);
if (scanned != 3 || !pool_id || !inode_id || INODE_POOL(inode_id) != 0)
return 0;
return INODE_WITH_POOL(pool_id, inode_id);
}
std::string kv_fh(uint64_t ino)
{
char key[32] = { 0 };
@ -261,6 +281,7 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
{
kv_cfg[kv.first] = kv.second.as_string();
}
// Open K/V DB
proxy->db->open(fs_kv_inode, kv_cfg, [&](int res)
{
open_done = true;
@ -279,6 +300,27 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
strerror(-open_res), open_res);
exit(1);
}
// Run metadata updates (migrations)
open_done = false;
upgrade_db([&](int res)
{
open_done = true;
open_res = res;
});
while (!open_done)
{
proxy->ringloop->loop();
if (open_done)
break;
proxy->ringloop->wait();
}
if (open_res < 0)
{
fprintf(stderr, "Failed to check/apply filesystem metadata updates: %s (code %d)\n",
strerror(-open_res), open_res);
exit(1);
}
// Proceed
fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1;
shared_inode_threshold = pool_block_size;
if (!cfg["shared_inode_threshold"].is_null())
@ -335,3 +377,60 @@ void kv_fs_state_t::touch_inodes()
touch_inode(proxy, ino, true);
}
}
void kv_fs_state_t::upgrade_db(std::function<void(int)> cb)
{
// In the future, FS metadata format upgrades should be added here
// Currently we only do one thing: we create missing shared inode list keys ("sharedXXX")
void *list_shared = proxy->db->list_start("shared");
proxy->db->list_next(list_shared, [=](int res, const std::string & key, const std::string & value)
{
if (res == -ENOENT || key.substr(0, 6) != "shared")
{
proxy->db->list_close(list_shared);
auto list_inodes = proxy->db->list_start("i");
proxy->db->list_next(list_inodes, [=](int res, const std::string & key, const std::string & value)
{
if (res == -ENOENT || key.substr(0, 1) != "i")
{
proxy->db->list_close(list_inodes);
cb(0);
return;
}
uint64_t inode_id = kv_key_inode(key, 1);
if (!inode_id)
{
fprintf(stderr, "Invalid inode key %s, skipping\n", key.c_str());
}
else
{
std::string err;
auto ientry = json11::Json::parse(value, err);
if (err != "")
{
fprintf(stderr, "Invalid JSON in key %s (inode %ju), skipping\n", key.c_str(), inode_id);
}
else if (ientry["type"] == "shared")
{
proxy->db->set(kv_inode_prefix_key(inode_id, "shared"), "{}", [=](int res)
{
if (res < 0)
{
fprintf(stderr, "Error writing key %s: %s (code %d)\n",
kv_inode_prefix_key(inode_id, "shared").c_str(), strerror(-res), res);
}
proxy->db->list_next(list_inodes, NULL);
});
return;
}
}
proxy->db->list_next(list_inodes, NULL);
});
}
else
{
proxy->db->list_close(list_shared);
cb(0);
}
});
}

View File

@ -67,12 +67,14 @@ struct kv_fs_state_t
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
std::map<inode_t, kv_inode_extend_t> extends;
std::set<inode_t> touch_queue;
std::map<inode_t, uint64_t> shared_removed;
std::vector<uint8_t> zero_block;
std::vector<uint8_t> scrap_block;
void init(nfs_proxy_t *proxy, json11::Json cfg);
void touch_inodes();
void upgrade_db(std::function<void(int)> cb);
~kv_fs_state_t();
};
@ -105,7 +107,9 @@ int kv_map_type(const std::string & type);
fattr3 get_kv_attributes(nfs_client_t *self, uint64_t ino, json11::Json attrs);
std::string kv_direntry_key(uint64_t dir_ino, const std::string & filename);
std::string kv_direntry_filename(const std::string & key);
std::string kv_inode_prefix_key(uint64_t ino, const char *prefix);
std::string kv_inode_key(uint64_t ino);
uint64_t kv_key_inode(const std::string & key, int prefix_len = 1);
std::string kv_fh(uint64_t ino);
uint64_t kv_fh_inode(const std::string & fh);
bool kv_fh_valid(const std::string & fh);

View File

@ -228,28 +228,35 @@ resume_6:
return;
}
// (6) If regular file and inode is deleted: delete data
if ((!st->type || st->type == NF3REG) && st->ientry["nlink"].uint64_value() <= 1 &&
!st->ientry["shared_ino"].uint64_value())
if ((!st->type || st->type == NF3REG) && st->ientry["nlink"].uint64_value() <= 1)
{
// Remove data
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
{ "inode", INODE_NO_POOL(st->ino) },
{ "pool", (uint64_t)INODE_POOL(st->ino) },
}), [st](const cli_result_t & r)
if (!st->ientry["shared_ino"].uint64_value())
{
if (r.err)
// Remove data
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
{ "inode", INODE_NO_POOL(st->ino) },
{ "pool", (uint64_t)INODE_POOL(st->ino) },
}), [st](const cli_result_t & r)
{
fprintf(stderr, "Failed to remove inode %jx data: %s (code %d)\n",
st->ino, r.text.c_str(), r.err);
}
st->res = r.err;
nfs_kv_continue_delete(st, 7);
});
return;
if (r.err)
{
fprintf(stderr, "Failed to remove inode %jx data: %s (code %d)\n",
st->ino, r.text.c_str(), r.err);
}
st->res = r.err;
nfs_kv_continue_delete(st, 7);
});
return;
resume_7:
auto cb = std::move(st->cb);
cb(st->res);
return;
auto cb = std::move(st->cb);
cb(st->res);
return;
}
else
{
// Record removed part of the shared inode as obsolete in statistics
st->self->parent->kvfs->shared_removed[st->ientry["shared_ino"].uint64_value()] += st->ientry["shared_alloc"].uint64_value();
}
}
if (!st->res)
{

View File

@ -111,8 +111,20 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state)
if (res < 0)
{
st->self->parent->kvfs->cur_shared_inode = 0;
finish_allocate_shared(st->self, res);
}
else
{
st->self->parent->db->set(
kv_inode_prefix_key(st->self->parent->kvfs->cur_shared_inode, "shared"),
"{}", [st](int res)
{
if (res < 0)
st->self->parent->kvfs->cur_shared_inode = 0;
finish_allocate_shared(st->self, res);
}
);
}
finish_allocate_shared(st->self, res);
},
[](int res, const std::string & old_value)
{
@ -318,7 +330,7 @@ static void nfs_do_fsync(nfs_kv_write_state *st, int state)
st->self->parent->cli->execute(op);
}
static bool nfs_do_shared_readmodify(nfs_kv_write_state *st, int base_state, int state, bool unshare)
static bool nfs_do_shared_readmodify(nfs_kv_write_state *st, int base_state, int state)
{
assert(state <= base_state);
if (state < base_state) goto resume_0;
@ -785,7 +797,7 @@ resume_2:
return;
}
resume_3:
if (!nfs_do_shared_readmodify(st, 3, state, false))
if (!nfs_do_shared_readmodify(st, 3, state))
{
return;
}
@ -910,6 +922,8 @@ resume_12:
cb(st->res);
return;
}
// Record removed part of the shared inode as obsolete in statistics
st->self->parent->kvfs->shared_removed[st->ientry["shared_ino"].uint64_value()] += st->ientry["shared_alloc"].uint64_value();
st->ientry_text = new_unshared_ientry(st);
}
// Non-shared write