Store pool ID in inode metadata

master
Vitaliy Filippov 2024-03-10 16:04:56 +03:00
parent 0bde28c24a
commit dcbe1afac3
10 changed files with 73 additions and 51 deletions

View File

@ -118,41 +118,41 @@ std::string kv_direntry_filename(const std::string & key)
std::string kv_inode_key(uint64_t ino)
{
char key[24] = { 0 };
snprintf(key, sizeof(key), "i-%jx", ino);
int n = strnlen(key, sizeof(key)-1) - 2;
if (n < 10)
key[1] = '0'+n;
else
key[1] = 'A'+(n-10);
return std::string(key, n+2);
char key[32] = { 0 };
snprintf(key, sizeof(key), "i%x", INODE_POOL(ino));
int n = strnlen(key, sizeof(key)-1);
snprintf(key+n+1, sizeof(key)-n-1, "%jx", INODE_NO_POOL(ino));
int m = strnlen(key+n+1, sizeof(key)-n-2);
key[n] = 'G'+m;
return std::string(key);
}
std::string kv_fh(uint64_t ino)
{
return "S"+std::string((char*)&ino, 8);
char key[32] = { 0 };
snprintf(key, sizeof(key), "S%jx", ino);
return key;
}
uint64_t kv_fh_inode(const std::string & fh)
{
if (fh.size() == 1 && fh[0] == 'R')
if (fh == NFS_ROOT_HANDLE)
{
return 1;
}
else if (fh.size() == 9 && fh[0] == 'S')
else if (fh[0] == 'S')
{
return *(uint64_t*)&fh[1];
}
else if (fh.size() > 17 && fh[0] == 'I')
{
return *(uint64_t*)&fh[fh.size()-8];
uint64_t ino = 0;
int r = sscanf(fh.c_str()+1, "%jx", &ino);
if (r == 1)
return ino;
}
return 0;
}
bool kv_fh_valid(const std::string & fh)
{
return fh == NFS_ROOT_HANDLE || fh.size() == 9 && fh[0] == 'S' || fh.size() > 17 && fh[0] == 'I';
return fh == NFS_ROOT_HANDLE || fh[0] == 'S';
}
void nfs_kv_procs(nfs_client_t *self)
@ -255,7 +255,6 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
strerror(-open_res), open_res);
exit(1);
}
fs_base_inode = ((uint64_t)proxy->default_pool_id << (64-POOL_ID_BITS));
fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1;
shared_inode_threshold = pool_block_size;
if (!cfg["shared_inode_threshold"].is_null())

View File

@ -8,7 +8,6 @@
#include "nfs/nfs.h"
#define KV_ROOT_INODE 1
#define KV_NEXT_ID_KEY "id"
#define SHARED_FILE_MAGIC_V1 0x711A5158A6EDF17E
struct nfs_kv_write_state;
@ -42,13 +41,18 @@ struct kv_inode_extend_t
std::vector<std::function<void()>> waiters;
};
struct kv_idgen_t
{
uint64_t next_id = 1, allocated_id = 0;
std::vector<uint64_t> unallocated_ids;
};
struct kv_fs_state_t
{
nfs_proxy_t *proxy = NULL;
int touch_timer_id = -1;
uint64_t fs_kv_inode = 0;
uint64_t fs_base_inode = 0;
uint64_t fs_inode_count = 0;
int readdir_getattr_parallel = 8, id_alloc_batch_size = 200;
uint64_t pool_block_size = 0;
@ -57,8 +61,7 @@ struct kv_fs_state_t
uint64_t touch_interval = 1000;
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
uint64_t fs_next_id = 1, fs_allocated_id = 0;
std::vector<uint64_t> unallocated_ids;
std::map<pool_id_t, kv_idgen_t> idgen;
std::vector<shared_alloc_queue_t> allocating_shared;
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
std::map<inode_t, kv_inode_extend_t> extends;
@ -105,7 +108,7 @@ std::string kv_inode_key(uint64_t ino);
std::string kv_fh(uint64_t ino);
uint64_t kv_fh_inode(const std::string & fh);
bool kv_fh_valid(const std::string & fh);
void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t new_id)> cb);
void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(int res, uint64_t new_id)> cb);
void kv_read_inode(nfs_proxy_t *proxy, uint64_t ino,
std::function<void(int res, const std::string & value, json11::Json ientry)> cb,
bool allow_cache = false);

View File

@ -9,19 +9,30 @@
#include "nfs_proxy.h"
#include "nfs_kv.h"
void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t new_id)> cb)
void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(int res, uint64_t new_id)> cb)
{
if (self->parent->kvfs->fs_next_id <= self->parent->kvfs->fs_allocated_id)
auto & idgen = self->parent->kvfs->idgen[pool_id];
if (idgen.unallocated_ids.size())
{
cb(0, self->parent->kvfs->fs_next_id++);
auto new_id = idgen.unallocated_ids.back();
idgen.unallocated_ids.pop_back();
cb(0, INODE_WITH_POOL(pool_id, new_id));
return;
}
else if (self->parent->kvfs->fs_next_id > self->parent->kvfs->fs_inode_count)
else if (idgen.next_id <= idgen.allocated_id)
{
idgen.next_id++;
cb(0, INODE_WITH_POOL(pool_id, idgen.next_id-1));
return;
}
// FIXME: Partial per-pool max ID limits
// FIXME: Fool protection from block volume and FS file ID overlap
else if (idgen.next_id >= ((uint64_t)1 << (64-POOL_ID_BITS)))
{
cb(-ENOSPC, 0);
return;
}
self->parent->db->get(KV_NEXT_ID_KEY, [=](int res, const std::string & prev_str)
self->parent->db->get((pool_id ? "id"+std::to_string(pool_id) : "id"), [=](int res, const std::string & prev_str)
{
if (res < 0 && res != -ENOENT)
{
@ -29,7 +40,7 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
return;
}
uint64_t prev_val = stoull_full(prev_str);
if (prev_val >= self->parent->kvfs->fs_inode_count)
if (prev_val >= ((uint64_t)1 << (64-POOL_ID_BITS)))
{
cb(-ENOSPC, 0);
return;
@ -43,12 +54,12 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
{
new_val = self->parent->kvfs->fs_inode_count;
}
self->parent->db->set(KV_NEXT_ID_KEY, std::to_string(new_val), [=](int res)
self->parent->db->set((pool_id ? "id"+std::to_string(pool_id) : "id"), std::to_string(new_val), [=](int res)
{
if (res == -EAGAIN)
{
// CAS failure - retry
allocate_new_id(self, cb);
allocate_new_id(self, pool_id, cb);
}
else if (res < 0)
{
@ -56,9 +67,10 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
}
else
{
self->parent->kvfs->fs_next_id = prev_val+2;
self->parent->kvfs->fs_allocated_id = new_val;
cb(0, prev_val+1);
auto & idgen = self->parent->kvfs->idgen[pool_id];
idgen.next_id = prev_val+2;
idgen.allocated_id = new_val;
cb(0, INODE_WITH_POOL(pool_id, prev_val+1));
}
}, [prev_val](int res, const std::string & value)
{
@ -76,7 +88,9 @@ struct kv_create_state
uint64_t verf = 0;
uint64_t dir_ino = 0;
std::string filename;
// state
int res = 0;
pool_id_t pool_id = 0;
uint64_t new_id = 0;
json11::Json::object attrobj;
json11::Json attrs;
@ -107,7 +121,11 @@ static void kv_continue_create(kv_create_state *st, int state)
st->attrs = std::move(st->attrobj);
resume_1:
// Generate inode ID
allocate_new_id(st->self, [st](int res, uint64_t new_id)
// Directories and special files don't need pool
st->pool_id = kv_map_type(st->attrs["type"].string_value()) == NF3REG
? st->self->parent->default_pool_id
: 0;
allocate_new_id(st->self, st->pool_id, [st](int res, uint64_t new_id)
{
st->res = res;
st->new_id = new_id;
@ -195,7 +213,8 @@ resume_5:
}
else
{
st->self->parent->kvfs->unallocated_ids.push_back(st->new_id);
auto & idgen = st->self->parent->kvfs->idgen[INODE_POOL(st->new_id)];
idgen.unallocated_ids.push_back(INODE_NO_POOL(st->new_id));
}
if (st->dup_ino)
{

View File

@ -70,7 +70,7 @@ resume_1:
st->op = new cluster_op_t;
{
st->op->opcode = OSD_OP_READ;
st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value();
st->op->inode = st->ientry["shared_ino"].uint64_value();
// Always read including header to react if the file was possibly moved away
auto read_offset = st->ientry["shared_offset"].uint64_value();
st->op->offset = align_down(read_offset);
@ -136,7 +136,7 @@ resume_2:
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
st->op = new cluster_op_t;
st->op->opcode = OSD_OP_READ;
st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ino;
st->op->inode = st->ino;
st->op->offset = st->aligned_offset;
st->op->len = st->aligned_size;
st->op->iov.push_back(st->aligned_buf, st->aligned_size);

View File

@ -233,8 +233,8 @@ resume_6:
{
// Remove data
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
{ "inode", INODE_NO_POOL(st->ino) },
{ "pool", (uint64_t)INODE_POOL(st->ino) },
}), [st](const cli_result_t & r)
{
if (r.err)

View File

@ -280,8 +280,8 @@ resume_8:
if (st->rm_dest_data)
{
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
{ "inode", INODE_NO_POOL(st->new_direntry["ino"].uint64_value()) },
{ "pool", (uint64_t)INODE_POOL(st->new_direntry["ino"].uint64_value()) },
}), [st](const cli_result_t & r)
{
if (r.err)

View File

@ -118,8 +118,8 @@ resume_2:
{
// Delete extra data when downsizing
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
{ "inode", INODE_NO_POOL(st->ino) },
{ "pool", (uint64_t)INODE_POOL(st->ino) },
{ "min_offset", st->set_attrs["size"].uint64_value() },
}), [st](const cli_result_t & r)
{

View File

@ -95,7 +95,7 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state)
{
return;
}
allocate_new_id(st->self, [st](int res, uint64_t new_id)
allocate_new_id(st->self, st->self->parent->default_pool_id, [st](int res, uint64_t new_id)
{
if (res < 0)
{
@ -133,7 +133,7 @@ static void nfs_do_write(uint64_t ino, uint64_t offset, uint64_t size, std::func
{
auto op = new cluster_op_t;
op->opcode = OSD_OP_WRITE;
op->inode = st->self->parent->kvfs->fs_base_inode + ino;
op->inode = ino;
op->offset = offset;
op->len = size;
prepare(op);
@ -178,7 +178,7 @@ void nfs_do_rmw(nfs_rmw_t *rmw)
}
auto op = new cluster_op_t;
op->opcode = OSD_OP_READ;
op->inode = parent->kvfs->fs_base_inode + rmw->ino;
op->inode = rmw->ino;
op->offset = rmw->offset & ~(align-1);
op->len = align;
op->iov.push_back(rmw->part_buf, op->len);
@ -209,7 +209,7 @@ void nfs_do_rmw(nfs_rmw_t *rmw)
bool is_end = ((rmw->offset+rmw->size) % align);
auto op = new cluster_op_t;
op->opcode = OSD_OP_WRITE;
op->inode = parent->kvfs->fs_base_inode + rmw->ino;
op->inode = rmw->ino;
op->offset = rmw->offset & ~(align-1);
op->len = align;
op->version = rmw->version;
@ -263,7 +263,7 @@ static void nfs_do_shared_read(nfs_kv_write_state *st, int state)
uint64_t shared_offset = st->ientry["shared_offset"].uint64_value();
auto op = new cluster_op_t;
op->opcode = OSD_OP_READ;
op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value();
op->inode = st->ientry["shared_ino"].uint64_value();
op->offset = align_down(shared_offset);
// Allow unaligned shared reads
auto pre = shared_offset-align_down(shared_offset);
@ -759,7 +759,7 @@ resume_1:
cb(st->res == 0 ? -EINVAL : st->res);
return;
}
st->was_immediate = st->self->parent->cli->get_immediate_commit(st->self->parent->kvfs->fs_base_inode + st->ino);
st->was_immediate = st->self->parent->cli->get_immediate_commit(st->ino);
st->new_size = st->ientry["size"].uint64_value();
if (st->new_size < st->offset + st->size)
{

View File

@ -23,6 +23,7 @@ public:
std::string bind_address;
uint64_t fsid = 1;
uint64_t server_id = 0;
// FIXME: Maybe allow to create files in different pools?
std::string default_pool;
std::string export_root;
bool portmap_enabled;

View File

@ -11,7 +11,7 @@
#define POOL_ID_MAX 0x10000
#define POOL_ID_BITS 16
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
#define INODE_NO_POOL(inode) (inode_t)(inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
#define INODE_NO_POOL(inode) (inode_t)((inode) & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
#define INODE_WITH_POOL(pool_id, inode) (((inode_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
// Pool ID is 16 bits long