diff --git a/src/nfs_kv.cpp b/src/nfs_kv.cpp index 4b58eb54..f749e1a1 100644 --- a/src/nfs_kv.cpp +++ b/src/nfs_kv.cpp @@ -118,41 +118,41 @@ std::string kv_direntry_filename(const std::string & key) std::string kv_inode_key(uint64_t ino) { - char key[24] = { 0 }; - snprintf(key, sizeof(key), "i-%jx", ino); - int n = strnlen(key, sizeof(key)-1) - 2; - if (n < 10) - key[1] = '0'+n; - else - key[1] = 'A'+(n-10); - return std::string(key, n+2); + char key[32] = { 0 }; + snprintf(key, sizeof(key), "i%x", INODE_POOL(ino)); + int n = strnlen(key, sizeof(key)-1); + snprintf(key+n+1, sizeof(key)-n-1, "%jx", INODE_NO_POOL(ino)); + int m = strnlen(key+n+1, sizeof(key)-n-2); + key[n] = 'G'+m; + return std::string(key); } std::string kv_fh(uint64_t ino) { - return "S"+std::string((char*)&ino, 8); + char key[32] = { 0 }; + snprintf(key, sizeof(key), "S%jx", ino); + return key; } uint64_t kv_fh_inode(const std::string & fh) { - if (fh.size() == 1 && fh[0] == 'R') + if (fh == NFS_ROOT_HANDLE) { return 1; } - else if (fh.size() == 9 && fh[0] == 'S') + else if (fh[0] == 'S') { - return *(uint64_t*)&fh[1]; - } - else if (fh.size() > 17 && fh[0] == 'I') - { - return *(uint64_t*)&fh[fh.size()-8]; + uint64_t ino = 0; + int r = sscanf(fh.c_str()+1, "%jx", &ino); + if (r == 1) + return ino; } return 0; } bool kv_fh_valid(const std::string & fh) { - return fh == NFS_ROOT_HANDLE || fh.size() == 9 && fh[0] == 'S' || fh.size() > 17 && fh[0] == 'I'; + return fh == NFS_ROOT_HANDLE || fh[0] == 'S'; } void nfs_kv_procs(nfs_client_t *self) @@ -255,7 +255,6 @@ void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg) strerror(-open_res), open_res); exit(1); } - fs_base_inode = ((uint64_t)proxy->default_pool_id << (64-POOL_ID_BITS)); fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1; shared_inode_threshold = pool_block_size; if (!cfg["shared_inode_threshold"].is_null()) diff --git a/src/nfs_kv.h b/src/nfs_kv.h index 7b28d6de..839d36b0 100644 --- a/src/nfs_kv.h +++ b/src/nfs_kv.h @@ -8,7 +8,6 @@ #include "nfs/nfs.h" #define KV_ROOT_INODE 1 -#define KV_NEXT_ID_KEY "id" #define SHARED_FILE_MAGIC_V1 0x711A5158A6EDF17E struct nfs_kv_write_state; @@ -42,13 +41,18 @@ struct kv_inode_extend_t std::vector> waiters; }; +struct kv_idgen_t +{ + uint64_t next_id = 1, allocated_id = 0; + std::vector unallocated_ids; +}; + struct kv_fs_state_t { nfs_proxy_t *proxy = NULL; int touch_timer_id = -1; uint64_t fs_kv_inode = 0; - uint64_t fs_base_inode = 0; uint64_t fs_inode_count = 0; int readdir_getattr_parallel = 8, id_alloc_batch_size = 200; uint64_t pool_block_size = 0; @@ -57,8 +61,7 @@ struct kv_fs_state_t uint64_t touch_interval = 1000; std::map list_cookies; - uint64_t fs_next_id = 1, fs_allocated_id = 0; - std::vector unallocated_ids; + std::map idgen; std::vector allocating_shared; uint64_t cur_shared_inode = 0, cur_shared_offset = 0; std::map extends; @@ -105,7 +108,7 @@ std::string kv_inode_key(uint64_t ino); std::string kv_fh(uint64_t ino); uint64_t kv_fh_inode(const std::string & fh); bool kv_fh_valid(const std::string & fh); -void allocate_new_id(nfs_client_t *self, std::function cb); +void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function cb); void kv_read_inode(nfs_proxy_t *proxy, uint64_t ino, std::function cb, bool allow_cache = false); diff --git a/src/nfs_kv_create.cpp b/src/nfs_kv_create.cpp index 2fdd01a9..0375c0cf 100644 --- a/src/nfs_kv_create.cpp +++ b/src/nfs_kv_create.cpp @@ -9,19 +9,30 @@ #include "nfs_proxy.h" #include "nfs_kv.h" -void allocate_new_id(nfs_client_t *self, std::function cb) +void allocate_new_id(nfs_client_t *self, pool_id_t pool_id, std::function cb) { - if (self->parent->kvfs->fs_next_id <= self->parent->kvfs->fs_allocated_id) + auto & idgen = self->parent->kvfs->idgen[pool_id]; + if (idgen.unallocated_ids.size()) { - cb(0, self->parent->kvfs->fs_next_id++); + auto new_id = idgen.unallocated_ids.back(); + idgen.unallocated_ids.pop_back(); + cb(0, INODE_WITH_POOL(pool_id, new_id)); return; } - else if (self->parent->kvfs->fs_next_id > self->parent->kvfs->fs_inode_count) + else if (idgen.next_id <= idgen.allocated_id) + { + idgen.next_id++; + cb(0, INODE_WITH_POOL(pool_id, idgen.next_id-1)); + return; + } + // FIXME: Partial per-pool max ID limits + // FIXME: Fool protection from block volume and FS file ID overlap + else if (idgen.next_id >= ((uint64_t)1 << (64-POOL_ID_BITS))) { cb(-ENOSPC, 0); return; } - self->parent->db->get(KV_NEXT_ID_KEY, [=](int res, const std::string & prev_str) + self->parent->db->get((pool_id ? "id"+std::to_string(pool_id) : "id"), [=](int res, const std::string & prev_str) { if (res < 0 && res != -ENOENT) { @@ -29,7 +40,7 @@ void allocate_new_id(nfs_client_t *self, std::function= self->parent->kvfs->fs_inode_count) + if (prev_val >= ((uint64_t)1 << (64-POOL_ID_BITS))) { cb(-ENOSPC, 0); return; @@ -43,12 +54,12 @@ void allocate_new_id(nfs_client_t *self, std::functionparent->kvfs->fs_inode_count; } - self->parent->db->set(KV_NEXT_ID_KEY, std::to_string(new_val), [=](int res) + self->parent->db->set((pool_id ? "id"+std::to_string(pool_id) : "id"), std::to_string(new_val), [=](int res) { if (res == -EAGAIN) { // CAS failure - retry - allocate_new_id(self, cb); + allocate_new_id(self, pool_id, cb); } else if (res < 0) { @@ -56,9 +67,10 @@ void allocate_new_id(nfs_client_t *self, std::functionparent->kvfs->fs_next_id = prev_val+2; - self->parent->kvfs->fs_allocated_id = new_val; - cb(0, prev_val+1); + auto & idgen = self->parent->kvfs->idgen[pool_id]; + idgen.next_id = prev_val+2; + idgen.allocated_id = new_val; + cb(0, INODE_WITH_POOL(pool_id, prev_val+1)); } }, [prev_val](int res, const std::string & value) { @@ -76,7 +88,9 @@ struct kv_create_state uint64_t verf = 0; uint64_t dir_ino = 0; std::string filename; + // state int res = 0; + pool_id_t pool_id = 0; uint64_t new_id = 0; json11::Json::object attrobj; json11::Json attrs; @@ -107,7 +121,11 @@ static void kv_continue_create(kv_create_state *st, int state) st->attrs = std::move(st->attrobj); resume_1: // Generate inode ID - allocate_new_id(st->self, [st](int res, uint64_t new_id) + // Directories and special files don't need pool + st->pool_id = kv_map_type(st->attrs["type"].string_value()) == NF3REG + ? st->self->parent->default_pool_id + : 0; + allocate_new_id(st->self, st->pool_id, [st](int res, uint64_t new_id) { st->res = res; st->new_id = new_id; @@ -195,7 +213,8 @@ resume_5: } else { - st->self->parent->kvfs->unallocated_ids.push_back(st->new_id); + auto & idgen = st->self->parent->kvfs->idgen[INODE_POOL(st->new_id)]; + idgen.unallocated_ids.push_back(INODE_NO_POOL(st->new_id)); } if (st->dup_ino) { diff --git a/src/nfs_kv_read.cpp b/src/nfs_kv_read.cpp index ccc9be9d..1e9a7629 100644 --- a/src/nfs_kv_read.cpp +++ b/src/nfs_kv_read.cpp @@ -70,7 +70,7 @@ resume_1: st->op = new cluster_op_t; { st->op->opcode = OSD_OP_READ; - st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value(); + st->op->inode = st->ientry["shared_ino"].uint64_value(); // Always read including header to react if the file was possibly moved away auto read_offset = st->ientry["shared_offset"].uint64_value(); st->op->offset = align_down(read_offset); @@ -136,7 +136,7 @@ resume_2: st->buf = st->aligned_buf + st->offset - st->aligned_offset; st->op = new cluster_op_t; st->op->opcode = OSD_OP_READ; - st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ino; + st->op->inode = st->ino; st->op->offset = st->aligned_offset; st->op->len = st->aligned_size; st->op->iov.push_back(st->aligned_buf, st->aligned_size); diff --git a/src/nfs_kv_remove.cpp b/src/nfs_kv_remove.cpp index 27dacf96..c6a55ad9 100644 --- a/src/nfs_kv_remove.cpp +++ b/src/nfs_kv_remove.cpp @@ -233,8 +233,8 @@ resume_6: { // Remove data st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object { - { "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) }, - { "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) }, + { "inode", INODE_NO_POOL(st->ino) }, + { "pool", (uint64_t)INODE_POOL(st->ino) }, }), [st](const cli_result_t & r) { if (r.err) diff --git a/src/nfs_kv_rename.cpp b/src/nfs_kv_rename.cpp index 24f30846..7bd27c11 100644 --- a/src/nfs_kv_rename.cpp +++ b/src/nfs_kv_rename.cpp @@ -280,8 +280,8 @@ resume_8: if (st->rm_dest_data) { st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object { - { "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) }, - { "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) }, + { "inode", INODE_NO_POOL(st->new_direntry["ino"].uint64_value()) }, + { "pool", (uint64_t)INODE_POOL(st->new_direntry["ino"].uint64_value()) }, }), [st](const cli_result_t & r) { if (r.err) diff --git a/src/nfs_kv_setattr.cpp b/src/nfs_kv_setattr.cpp index 90666958..5135b3a2 100644 --- a/src/nfs_kv_setattr.cpp +++ b/src/nfs_kv_setattr.cpp @@ -118,8 +118,8 @@ resume_2: { // Delete extra data when downsizing st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object { - { "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) }, - { "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) }, + { "inode", INODE_NO_POOL(st->ino) }, + { "pool", (uint64_t)INODE_POOL(st->ino) }, { "min_offset", st->set_attrs["size"].uint64_value() }, }), [st](const cli_result_t & r) { diff --git a/src/nfs_kv_write.cpp b/src/nfs_kv_write.cpp index 8b094349..e69d1130 100644 --- a/src/nfs_kv_write.cpp +++ b/src/nfs_kv_write.cpp @@ -95,7 +95,7 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state) { return; } - allocate_new_id(st->self, [st](int res, uint64_t new_id) + allocate_new_id(st->self, st->self->parent->default_pool_id, [st](int res, uint64_t new_id) { if (res < 0) { @@ -133,7 +133,7 @@ static void nfs_do_write(uint64_t ino, uint64_t offset, uint64_t size, std::func { auto op = new cluster_op_t; op->opcode = OSD_OP_WRITE; - op->inode = st->self->parent->kvfs->fs_base_inode + ino; + op->inode = ino; op->offset = offset; op->len = size; prepare(op); @@ -178,7 +178,7 @@ void nfs_do_rmw(nfs_rmw_t *rmw) } auto op = new cluster_op_t; op->opcode = OSD_OP_READ; - op->inode = parent->kvfs->fs_base_inode + rmw->ino; + op->inode = rmw->ino; op->offset = rmw->offset & ~(align-1); op->len = align; op->iov.push_back(rmw->part_buf, op->len); @@ -209,7 +209,7 @@ void nfs_do_rmw(nfs_rmw_t *rmw) bool is_end = ((rmw->offset+rmw->size) % align); auto op = new cluster_op_t; op->opcode = OSD_OP_WRITE; - op->inode = parent->kvfs->fs_base_inode + rmw->ino; + op->inode = rmw->ino; op->offset = rmw->offset & ~(align-1); op->len = align; op->version = rmw->version; @@ -263,7 +263,7 @@ static void nfs_do_shared_read(nfs_kv_write_state *st, int state) uint64_t shared_offset = st->ientry["shared_offset"].uint64_value(); auto op = new cluster_op_t; op->opcode = OSD_OP_READ; - op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value(); + op->inode = st->ientry["shared_ino"].uint64_value(); op->offset = align_down(shared_offset); // Allow unaligned shared reads auto pre = shared_offset-align_down(shared_offset); @@ -759,7 +759,7 @@ resume_1: cb(st->res == 0 ? -EINVAL : st->res); return; } - st->was_immediate = st->self->parent->cli->get_immediate_commit(st->self->parent->kvfs->fs_base_inode + st->ino); + st->was_immediate = st->self->parent->cli->get_immediate_commit(st->ino); st->new_size = st->ientry["size"].uint64_value(); if (st->new_size < st->offset + st->size) { diff --git a/src/nfs_proxy.h b/src/nfs_proxy.h index bc19b2b3..816a73a8 100644 --- a/src/nfs_proxy.h +++ b/src/nfs_proxy.h @@ -23,6 +23,7 @@ public: std::string bind_address; uint64_t fsid = 1; uint64_t server_id = 0; + // FIXME: Maybe allow to create files in different pools? std::string default_pool; std::string export_root; bool portmap_enabled; diff --git a/src/osd_id.h b/src/osd_id.h index e2855d7e..9688bfd1 100644 --- a/src/osd_id.h +++ b/src/osd_id.h @@ -11,7 +11,7 @@ #define POOL_ID_MAX 0x10000 #define POOL_ID_BITS 16 #define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS)) -#define INODE_NO_POOL(inode) (inode_t)(inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) +#define INODE_NO_POOL(inode) (inode_t)((inode) & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) #define INODE_WITH_POOL(pool_id, inode) (((inode_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode)) // Pool ID is 16 bits long