Store pool ID in inode metadata
parent
0bde28c24a
commit
dcbe1afac3
|
@ -118,41 +118,41 @@ std::string kv_direntry_filename(const std::string & key)
|
|||
|
||||
std::string kv_inode_key(uint64_t ino)
|
||||
{
|
||||
char key[24] = { 0 };
|
||||
snprintf(key, sizeof(key), "i-%jx", ino);
|
||||
int n = strnlen(key, sizeof(key)-1) - 2;
|
||||
if (n < 10)
|
||||
key[1] = '0'+n;
|
||||
else
|
||||
key[1] = 'A'+(n-10);
|
||||
return std::string(key, n+2);
|
||||
char key[32] = { 0 };
|
||||
snprintf(key, sizeof(key), "i%x", INODE_POOL(ino));
|
||||
int n = strnlen(key, sizeof(key)-1);
|
||||
snprintf(key+n+1, sizeof(key)-n-1, "%jx", INODE_NO_POOL(ino));
|
||||
int m = strnlen(key+n+1, sizeof(key)-n-2);
|
||||
key[n] = 'G'+m;
|
||||
return std::string(key);
|
||||
}
|
||||
|
||||
std::string kv_fh(uint64_t ino)
|
||||
{
|
||||
return "S"+std::string((char*)&ino, 8);
|
||||
char key[32] = { 0 };
|
||||
snprintf(key, sizeof(key), "S%jx", ino);
|
||||
return key;
|
||||
}
|
||||
|
||||
uint64_t kv_fh_inode(const std::string & fh)
|
||||
{
|
||||
if (fh.size() == 1 && fh[0] == 'R')
|
||||
if (fh == NFS_ROOT_HANDLE)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else if (fh.size() == 9 && fh[0] == 'S')
|
||||
else if (fh[0] == 'S')
|
||||
{
|
||||
return *(uint64_t*)&fh[1];
|
||||
}
|
||||
else if (fh.size() > 17 && fh[0] == 'I')
|
||||
{
|
||||
return *(uint64_t*)&fh[fh.size()-8];
|
||||
uint64_t ino = 0;
|
||||
int r = sscanf(fh.c_str()+1, "%jx", &ino);
|
||||
if (r == 1)
|
||||
return ino;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool kv_fh_valid(const std::string & fh)
|
||||
{
|
||||
return fh == NFS_ROOT_HANDLE || fh.size() == 9 && fh[0] == 'S' || fh.size() > 17 && fh[0] == 'I';
|
||||
return fh == NFS_ROOT_HANDLE || fh[0] == 'S';
|
||||
}
|
||||
|
||||
void nfs_kv_procs(nfs_client_t *self)
|
||||
|
@ -255,7 +255,6 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
|
|||
strerror(-open_res), open_res);
|
||||
exit(1);
|
||||
}
|
||||
fs_base_inode = ((uint64_t)proxy->default_pool_id << (64-POOL_ID_BITS));
|
||||
fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1;
|
||||
shared_inode_threshold = pool_block_size;
|
||||
if (!cfg["shared_inode_threshold"].is_null())
|
||||
|
|
13
src/nfs_kv.h
13
src/nfs_kv.h
|
@ -8,7 +8,6 @@
|
|||
#include "nfs/nfs.h"
|
||||
|
||||
#define KV_ROOT_INODE 1
|
||||
#define KV_NEXT_ID_KEY "id"
|
||||
#define SHARED_FILE_MAGIC_V1 0x711A5158A6EDF17E
|
||||
|
||||
struct nfs_kv_write_state;
|
||||
|
@ -42,13 +41,18 @@ struct kv_inode_extend_t
|
|||
std::vector<std::function<void()>> waiters;
|
||||
};
|
||||
|
||||
struct kv_idgen_t
|
||||
{
|
||||
uint64_t next_id = 1, allocated_id = 0;
|
||||
std::vector<uint64_t> unallocated_ids;
|
||||
};
|
||||
|
||||
struct kv_fs_state_t
|
||||
{
|
||||
nfs_proxy_t *proxy = NULL;
|
||||
int touch_timer_id = -1;
|
||||
|
||||
uint64_t fs_kv_inode = 0;
|
||||
uint64_t fs_base_inode = 0;
|
||||
uint64_t fs_inode_count = 0;
|
||||
int readdir_getattr_parallel = 8, id_alloc_batch_size = 200;
|
||||
uint64_t pool_block_size = 0;
|
||||
|
@ -57,8 +61,7 @@ struct kv_fs_state_t
|
|||
uint64_t touch_interval = 1000;
|
||||
|
||||
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
|
||||
uint64_t fs_next_id = 1, fs_allocated_id = 0;
|
||||
std::vector<uint64_t> unallocated_ids;
|
||||
std::map<pool_id_t, kv_idgen_t> idgen;
|
||||
std::vector<shared_alloc_queue_t> allocating_shared;
|
||||
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
|
||||
std::map<inode_t, kv_inode_extend_t> extends;
|
||||
|
@ -105,7 +108,7 @@ std::string kv_inode_key(uint64_t ino);
|
|||
std::string kv_fh(uint64_t ino);
|
||||
uint64_t kv_fh_inode(const std::string & fh);
|
||||
bool kv_fh_valid(const std::string & fh);
|
||||
void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t new_id)> cb);
|
||||
void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(int res, uint64_t new_id)> cb);
|
||||
void kv_read_inode(nfs_proxy_t *proxy, uint64_t ino,
|
||||
std::function<void(int res, const std::string & value, json11::Json ientry)> cb,
|
||||
bool allow_cache = false);
|
||||
|
|
|
@ -9,19 +9,30 @@
|
|||
#include "nfs_proxy.h"
|
||||
#include "nfs_kv.h"
|
||||
|
||||
void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t new_id)> cb)
|
||||
void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function<void(int res, uint64_t new_id)> cb)
|
||||
{
|
||||
if (self->parent->kvfs->fs_next_id <= self->parent->kvfs->fs_allocated_id)
|
||||
auto & idgen = self->parent->kvfs->idgen[pool_id];
|
||||
if (idgen.unallocated_ids.size())
|
||||
{
|
||||
cb(0, self->parent->kvfs->fs_next_id++);
|
||||
auto new_id = idgen.unallocated_ids.back();
|
||||
idgen.unallocated_ids.pop_back();
|
||||
cb(0, INODE_WITH_POOL(pool_id, new_id));
|
||||
return;
|
||||
}
|
||||
else if (self->parent->kvfs->fs_next_id > self->parent->kvfs->fs_inode_count)
|
||||
else if (idgen.next_id <= idgen.allocated_id)
|
||||
{
|
||||
idgen.next_id++;
|
||||
cb(0, INODE_WITH_POOL(pool_id, idgen.next_id-1));
|
||||
return;
|
||||
}
|
||||
// FIXME: Partial per-pool max ID limits
|
||||
// FIXME: Fool protection from block volume and FS file ID overlap
|
||||
else if (idgen.next_id >= ((uint64_t)1 << (64-POOL_ID_BITS)))
|
||||
{
|
||||
cb(-ENOSPC, 0);
|
||||
return;
|
||||
}
|
||||
self->parent->db->get(KV_NEXT_ID_KEY, [=](int res, const std::string & prev_str)
|
||||
self->parent->db->get((pool_id ? "id"+std::to_string(pool_id) : "id"), [=](int res, const std::string & prev_str)
|
||||
{
|
||||
if (res < 0 && res != -ENOENT)
|
||||
{
|
||||
|
@ -29,7 +40,7 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
|
|||
return;
|
||||
}
|
||||
uint64_t prev_val = stoull_full(prev_str);
|
||||
if (prev_val >= self->parent->kvfs->fs_inode_count)
|
||||
if (prev_val >= ((uint64_t)1 << (64-POOL_ID_BITS)))
|
||||
{
|
||||
cb(-ENOSPC, 0);
|
||||
return;
|
||||
|
@ -43,12 +54,12 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
|
|||
{
|
||||
new_val = self->parent->kvfs->fs_inode_count;
|
||||
}
|
||||
self->parent->db->set(KV_NEXT_ID_KEY, std::to_string(new_val), [=](int res)
|
||||
self->parent->db->set((pool_id ? "id"+std::to_string(pool_id) : "id"), std::to_string(new_val), [=](int res)
|
||||
{
|
||||
if (res == -EAGAIN)
|
||||
{
|
||||
// CAS failure - retry
|
||||
allocate_new_id(self, cb);
|
||||
allocate_new_id(self, pool_id, cb);
|
||||
}
|
||||
else if (res < 0)
|
||||
{
|
||||
|
@ -56,9 +67,10 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
|
|||
}
|
||||
else
|
||||
{
|
||||
self->parent->kvfs->fs_next_id = prev_val+2;
|
||||
self->parent->kvfs->fs_allocated_id = new_val;
|
||||
cb(0, prev_val+1);
|
||||
auto & idgen = self->parent->kvfs->idgen[pool_id];
|
||||
idgen.next_id = prev_val+2;
|
||||
idgen.allocated_id = new_val;
|
||||
cb(0, INODE_WITH_POOL(pool_id, prev_val+1));
|
||||
}
|
||||
}, [prev_val](int res, const std::string & value)
|
||||
{
|
||||
|
@ -76,7 +88,9 @@ struct kv_create_state
|
|||
uint64_t verf = 0;
|
||||
uint64_t dir_ino = 0;
|
||||
std::string filename;
|
||||
// state
|
||||
int res = 0;
|
||||
pool_id_t pool_id = 0;
|
||||
uint64_t new_id = 0;
|
||||
json11::Json::object attrobj;
|
||||
json11::Json attrs;
|
||||
|
@ -107,7 +121,11 @@ static void kv_continue_create(kv_create_state *st, int state)
|
|||
st->attrs = std::move(st->attrobj);
|
||||
resume_1:
|
||||
// Generate inode ID
|
||||
allocate_new_id(st->self, [st](int res, uint64_t new_id)
|
||||
// Directories and special files don't need pool
|
||||
st->pool_id = kv_map_type(st->attrs["type"].string_value()) == NF3REG
|
||||
? st->self->parent->default_pool_id
|
||||
: 0;
|
||||
allocate_new_id(st->self, st->pool_id, [st](int res, uint64_t new_id)
|
||||
{
|
||||
st->res = res;
|
||||
st->new_id = new_id;
|
||||
|
@ -195,7 +213,8 @@ resume_5:
|
|||
}
|
||||
else
|
||||
{
|
||||
st->self->parent->kvfs->unallocated_ids.push_back(st->new_id);
|
||||
auto & idgen = st->self->parent->kvfs->idgen[INODE_POOL(st->new_id)];
|
||||
idgen.unallocated_ids.push_back(INODE_NO_POOL(st->new_id));
|
||||
}
|
||||
if (st->dup_ino)
|
||||
{
|
||||
|
|
|
@ -70,7 +70,7 @@ resume_1:
|
|||
st->op = new cluster_op_t;
|
||||
{
|
||||
st->op->opcode = OSD_OP_READ;
|
||||
st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value();
|
||||
st->op->inode = st->ientry["shared_ino"].uint64_value();
|
||||
// Always read including header to react if the file was possibly moved away
|
||||
auto read_offset = st->ientry["shared_offset"].uint64_value();
|
||||
st->op->offset = align_down(read_offset);
|
||||
|
@ -136,7 +136,7 @@ resume_2:
|
|||
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
|
||||
st->op = new cluster_op_t;
|
||||
st->op->opcode = OSD_OP_READ;
|
||||
st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ino;
|
||||
st->op->inode = st->ino;
|
||||
st->op->offset = st->aligned_offset;
|
||||
st->op->len = st->aligned_size;
|
||||
st->op->iov.push_back(st->aligned_buf, st->aligned_size);
|
||||
|
|
|
@ -233,8 +233,8 @@ resume_6:
|
|||
{
|
||||
// Remove data
|
||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "inode", INODE_NO_POOL(st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->ino) },
|
||||
}), [st](const cli_result_t & r)
|
||||
{
|
||||
if (r.err)
|
||||
|
|
|
@ -280,8 +280,8 @@ resume_8:
|
|||
if (st->rm_dest_data)
|
||||
{
|
||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
|
||||
{ "inode", INODE_NO_POOL(st->new_direntry["ino"].uint64_value()) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->new_direntry["ino"].uint64_value()) },
|
||||
}), [st](const cli_result_t & r)
|
||||
{
|
||||
if (r.err)
|
||||
|
|
|
@ -118,8 +118,8 @@ resume_2:
|
|||
{
|
||||
// Delete extra data when downsizing
|
||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "inode", INODE_NO_POOL(st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->ino) },
|
||||
{ "min_offset", st->set_attrs["size"].uint64_value() },
|
||||
}), [st](const cli_result_t & r)
|
||||
{
|
||||
|
|
|
@ -95,7 +95,7 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state)
|
|||
{
|
||||
return;
|
||||
}
|
||||
allocate_new_id(st->self, [st](int res, uint64_t new_id)
|
||||
allocate_new_id(st->self, st->self->parent->default_pool_id, [st](int res, uint64_t new_id)
|
||||
{
|
||||
if (res < 0)
|
||||
{
|
||||
|
@ -133,7 +133,7 @@ static void nfs_do_write(uint64_t ino, uint64_t offset, uint64_t size, std::func
|
|||
{
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_WRITE;
|
||||
op->inode = st->self->parent->kvfs->fs_base_inode + ino;
|
||||
op->inode = ino;
|
||||
op->offset = offset;
|
||||
op->len = size;
|
||||
prepare(op);
|
||||
|
@ -178,7 +178,7 @@ void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
}
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_READ;
|
||||
op->inode = parent->kvfs->fs_base_inode + rmw->ino;
|
||||
op->inode = rmw->ino;
|
||||
op->offset = rmw->offset & ~(align-1);
|
||||
op->len = align;
|
||||
op->iov.push_back(rmw->part_buf, op->len);
|
||||
|
@ -209,7 +209,7 @@ void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
bool is_end = ((rmw->offset+rmw->size) % align);
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_WRITE;
|
||||
op->inode = parent->kvfs->fs_base_inode + rmw->ino;
|
||||
op->inode = rmw->ino;
|
||||
op->offset = rmw->offset & ~(align-1);
|
||||
op->len = align;
|
||||
op->version = rmw->version;
|
||||
|
@ -263,7 +263,7 @@ static void nfs_do_shared_read(nfs_kv_write_state *st, int state)
|
|||
uint64_t shared_offset = st->ientry["shared_offset"].uint64_value();
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_READ;
|
||||
op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value();
|
||||
op->inode = st->ientry["shared_ino"].uint64_value();
|
||||
op->offset = align_down(shared_offset);
|
||||
// Allow unaligned shared reads
|
||||
auto pre = shared_offset-align_down(shared_offset);
|
||||
|
@ -759,7 +759,7 @@ resume_1:
|
|||
cb(st->res == 0 ? -EINVAL : st->res);
|
||||
return;
|
||||
}
|
||||
st->was_immediate = st->self->parent->cli->get_immediate_commit(st->self->parent->kvfs->fs_base_inode + st->ino);
|
||||
st->was_immediate = st->self->parent->cli->get_immediate_commit(st->ino);
|
||||
st->new_size = st->ientry["size"].uint64_value();
|
||||
if (st->new_size < st->offset + st->size)
|
||||
{
|
||||
|
|
|
@ -23,6 +23,7 @@ public:
|
|||
std::string bind_address;
|
||||
uint64_t fsid = 1;
|
||||
uint64_t server_id = 0;
|
||||
// FIXME: Maybe allow to create files in different pools?
|
||||
std::string default_pool;
|
||||
std::string export_root;
|
||||
bool portmap_enabled;
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#define POOL_ID_MAX 0x10000
|
||||
#define POOL_ID_BITS 16
|
||||
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
|
||||
#define INODE_NO_POOL(inode) (inode_t)(inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
|
||||
#define INODE_NO_POOL(inode) (inode_t)((inode) & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
|
||||
#define INODE_WITH_POOL(pool_id, inode) (((inode_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
|
||||
|
||||
// Pool ID is 16 bits long
|
||||
|
|
Loading…
Reference in New Issue