WIP VitastorFS with metadata storage in VitastorKV
parent
0cd455d17f
commit
8cdc38805b
|
@ -185,23 +185,29 @@ target_link_libraries(vitastor-nbd
|
||||||
vitastor_client
|
vitastor_client
|
||||||
)
|
)
|
||||||
|
|
||||||
# vitastor-kv
|
# libvitastor_kv.so
|
||||||
add_executable(vitastor-kv
|
add_library(vitastor_kv SHARED
|
||||||
kv_cli.cpp
|
|
||||||
kv_db.cpp
|
kv_db.cpp
|
||||||
kv_db.h
|
kv_db.h
|
||||||
)
|
)
|
||||||
target_link_libraries(vitastor-kv
|
target_link_libraries(vitastor_kv
|
||||||
vitastor_client
|
vitastor_client
|
||||||
)
|
)
|
||||||
|
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||||
|
|
||||||
|
# vitastor-kv
|
||||||
|
add_executable(vitastor-kv
|
||||||
|
kv_cli.cpp
|
||||||
|
)
|
||||||
|
target_link_libraries(vitastor-kv
|
||||||
|
vitastor_kv
|
||||||
|
)
|
||||||
|
|
||||||
add_executable(vitastor-kv-stress
|
add_executable(vitastor-kv-stress
|
||||||
kv_stress.cpp
|
kv_stress.cpp
|
||||||
kv_db.cpp
|
|
||||||
kv_db.h
|
|
||||||
)
|
)
|
||||||
target_link_libraries(vitastor-kv-stress
|
target_link_libraries(vitastor-kv-stress
|
||||||
vitastor_client
|
vitastor_kv
|
||||||
)
|
)
|
||||||
|
|
||||||
# vitastor-nfs
|
# vitastor-nfs
|
||||||
|
@ -217,6 +223,7 @@ add_executable(vitastor-nfs
|
||||||
)
|
)
|
||||||
target_link_libraries(vitastor-nfs
|
target_link_libraries(vitastor-nfs
|
||||||
vitastor_client
|
vitastor_client
|
||||||
|
vitastor_kv
|
||||||
)
|
)
|
||||||
|
|
||||||
# vitastor-cli
|
# vitastor-cli
|
||||||
|
|
2534
src/nfs_conn.cpp
2534
src/nfs_conn.cpp
File diff suppressed because it is too large
Load Diff
|
@ -31,6 +31,8 @@ const char *exe_name = NULL;
|
||||||
|
|
||||||
nfs_proxy_t::~nfs_proxy_t()
|
nfs_proxy_t::~nfs_proxy_t()
|
||||||
{
|
{
|
||||||
|
if (db)
|
||||||
|
delete db;
|
||||||
if (cmd)
|
if (cmd)
|
||||||
delete cmd;
|
delete cmd;
|
||||||
if (cli)
|
if (cli)
|
||||||
|
@ -57,6 +59,7 @@ json11::Json::object nfs_proxy_t::parse_args(int narg, const char *args[])
|
||||||
"\n"
|
"\n"
|
||||||
"USAGE:\n"
|
"USAGE:\n"
|
||||||
" %s [STANDARD OPTIONS] [OTHER OPTIONS]\n"
|
" %s [STANDARD OPTIONS] [OTHER OPTIONS]\n"
|
||||||
|
" --fs <META> mount VitastorFS with metadata in image <META>\n"
|
||||||
" --subdir <DIR> export images prefixed <DIR>/ (default empty - export all images)\n"
|
" --subdir <DIR> export images prefixed <DIR>/ (default empty - export all images)\n"
|
||||||
" --portmap 0 do not listen on port 111 (portmap/rpcbind, requires root)\n"
|
" --portmap 0 do not listen on port 111 (portmap/rpcbind, requires root)\n"
|
||||||
" --bind <IP> bind service to <IP> address (default 0.0.0.0)\n"
|
" --bind <IP> bind service to <IP> address (default 0.0.0.0)\n"
|
||||||
|
@ -92,6 +95,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
||||||
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
||||||
server_id = (uint64_t)lrand48() | ((uint64_t)lrand48() << 31) | ((uint64_t)lrand48() << 62);
|
server_id = (uint64_t)lrand48() | ((uint64_t)lrand48() << 31) | ((uint64_t)lrand48() << 62);
|
||||||
// Parse options
|
// Parse options
|
||||||
|
trace = cfg["log_level"].uint64_value() > 5;
|
||||||
bind_address = cfg["bind"].string_value();
|
bind_address = cfg["bind"].string_value();
|
||||||
if (bind_address == "")
|
if (bind_address == "")
|
||||||
bind_address = "0.0.0.0";
|
bind_address = "0.0.0.0";
|
||||||
|
@ -131,67 +135,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
||||||
cmd->ringloop = ringloop;
|
cmd->ringloop = ringloop;
|
||||||
cmd->epmgr = epmgr;
|
cmd->epmgr = epmgr;
|
||||||
cmd->cli = cli;
|
cmd->cli = cli;
|
||||||
// We need inode name hashes for NFS handles to remain stateless and <= 64 bytes long
|
|
||||||
dir_info[""] = (nfs_dir_t){
|
|
||||||
.id = 1,
|
|
||||||
.mod_rev = 0,
|
|
||||||
};
|
|
||||||
clock_gettime(CLOCK_REALTIME, &dir_info[""].mtime);
|
|
||||||
watch_stats();
|
watch_stats();
|
||||||
assert(cli->st_cli.on_inode_change_hook == NULL);
|
|
||||||
cli->st_cli.on_inode_change_hook = [this](inode_t changed_inode, bool removed)
|
|
||||||
{
|
|
||||||
auto inode_cfg_it = cli->st_cli.inode_config.find(changed_inode);
|
|
||||||
if (inode_cfg_it == cli->st_cli.inode_config.end())
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto & inode_cfg = inode_cfg_it->second;
|
|
||||||
std::string full_name = inode_cfg.name;
|
|
||||||
if (name_prefix != "" && full_name.substr(0, name_prefix.size()) != name_prefix)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Calculate directory modification time and revision (used as "cookie verifier")
|
|
||||||
timespec now;
|
|
||||||
clock_gettime(CLOCK_REALTIME, &now);
|
|
||||||
dir_info[""].mod_rev = dir_info[""].mod_rev < inode_cfg.mod_revision ? inode_cfg.mod_revision : dir_info[""].mod_rev;
|
|
||||||
dir_info[""].mtime = now;
|
|
||||||
int pos = full_name.find('/', name_prefix.size());
|
|
||||||
while (pos >= 0)
|
|
||||||
{
|
|
||||||
std::string dir = full_name.substr(0, pos);
|
|
||||||
auto & dinf = dir_info[dir];
|
|
||||||
if (!dinf.id)
|
|
||||||
dinf.id = next_dir_id++;
|
|
||||||
dinf.mod_rev = dinf.mod_rev < inode_cfg.mod_revision ? inode_cfg.mod_revision : dinf.mod_rev;
|
|
||||||
dinf.mtime = now;
|
|
||||||
dir_by_hash["S"+base64_encode(sha256(dir))] = dir;
|
|
||||||
pos = full_name.find('/', pos+1);
|
|
||||||
}
|
|
||||||
// Alter inode_by_hash
|
|
||||||
if (removed)
|
|
||||||
{
|
|
||||||
auto ino_it = hash_by_inode.find(changed_inode);
|
|
||||||
if (ino_it != hash_by_inode.end())
|
|
||||||
{
|
|
||||||
inode_by_hash.erase(ino_it->second);
|
|
||||||
hash_by_inode.erase(ino_it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::string hash = "S"+base64_encode(sha256(full_name));
|
|
||||||
auto hbi_it = hash_by_inode.find(changed_inode);
|
|
||||||
if (hbi_it != hash_by_inode.end() && hbi_it->second != hash)
|
|
||||||
{
|
|
||||||
// inode had a different name, remove old hash=>inode pointer
|
|
||||||
inode_by_hash.erase(hbi_it->second);
|
|
||||||
}
|
|
||||||
inode_by_hash[hash] = changed_inode;
|
|
||||||
hash_by_inode[changed_inode] = hash;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// Load image metadata
|
// Load image metadata
|
||||||
while (!cli->is_ready())
|
while (!cli->is_ready())
|
||||||
{
|
{
|
||||||
|
@ -202,6 +146,52 @@ void nfs_proxy_t::run(json11::Json cfg)
|
||||||
}
|
}
|
||||||
// Check default pool
|
// Check default pool
|
||||||
check_default_pool();
|
check_default_pool();
|
||||||
|
// Check if we're using VitastorFS
|
||||||
|
fs_kv_inode = cfg["fs"].uint64_value();
|
||||||
|
if (!fs_kv_inode && cfg["fs"].is_string())
|
||||||
|
{
|
||||||
|
for (auto & ic: cli->st_cli.inode_config)
|
||||||
|
{
|
||||||
|
if (ic.second.name == cfg["fs"].string_value())
|
||||||
|
{
|
||||||
|
fs_kv_inode = ic.first;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
||||||
|
if (!readdir_getattr_parallel)
|
||||||
|
readdir_getattr_parallel = 8;
|
||||||
|
id_alloc_batch_size = cfg["id_alloc_batch_size"].uint64_value();
|
||||||
|
if (!id_alloc_batch_size)
|
||||||
|
id_alloc_batch_size = 200;
|
||||||
|
if (fs_kv_inode)
|
||||||
|
{
|
||||||
|
// Open DB and wait
|
||||||
|
int open_res = 0;
|
||||||
|
bool open_done = false;
|
||||||
|
db = new kv_dbw_t(cli);
|
||||||
|
db->open(fs_kv_inode, cfg, [&](int res)
|
||||||
|
{
|
||||||
|
open_done = true;
|
||||||
|
open_res = res;
|
||||||
|
});
|
||||||
|
while (!open_done)
|
||||||
|
{
|
||||||
|
ringloop->loop();
|
||||||
|
if (open_done)
|
||||||
|
break;
|
||||||
|
ringloop->wait();
|
||||||
|
}
|
||||||
|
if (open_res < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to open key/value filesystem metadata index: %s (code %d)\n",
|
||||||
|
strerror(-open_res), open_res);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
fs_base_inode = ((uint64_t)default_pool_id << (64-POOL_ID_BITS));
|
||||||
|
fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1;
|
||||||
|
}
|
||||||
// Self-register portmap and NFS
|
// Self-register portmap and NFS
|
||||||
pmap.reg_ports.insert((portmap_id_t){
|
pmap.reg_ports.insert((portmap_id_t){
|
||||||
.prog = PMAP_PROGRAM,
|
.prog = PMAP_PROGRAM,
|
||||||
|
|
|
@ -4,16 +4,26 @@
|
||||||
#include "epoll_manager.h"
|
#include "epoll_manager.h"
|
||||||
#include "nfs_portmap.h"
|
#include "nfs_portmap.h"
|
||||||
#include "nfs/xdr_impl.h"
|
#include "nfs/xdr_impl.h"
|
||||||
|
#include "kv_db.h"
|
||||||
|
|
||||||
#define RPC_INIT_BUF_SIZE 32768
|
#define RPC_INIT_BUF_SIZE 32768
|
||||||
|
|
||||||
class cli_tool_t;
|
class cli_tool_t;
|
||||||
|
|
||||||
struct nfs_dir_t
|
struct list_cookie_t
|
||||||
{
|
{
|
||||||
uint64_t id;
|
uint64_t dir_ino, cookieverf, cookie;
|
||||||
uint64_t mod_rev;
|
};
|
||||||
timespec mtime;
|
|
||||||
|
inline bool operator < (const list_cookie_t & a, const list_cookie_t & b)
|
||||||
|
{
|
||||||
|
return a.dir_ino < b.dir_ino || a.dir_ino == b.dir_ino &&
|
||||||
|
(a.cookieverf < b.cookieverf || a.cookieverf == b.cookieverf && a.cookie < b.cookie);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct list_cookie_val_t
|
||||||
|
{
|
||||||
|
std::string key;
|
||||||
};
|
};
|
||||||
|
|
||||||
class nfs_proxy_t
|
class nfs_proxy_t
|
||||||
|
@ -27,6 +37,11 @@ public:
|
||||||
std::string export_root;
|
std::string export_root;
|
||||||
bool portmap_enabled;
|
bool portmap_enabled;
|
||||||
unsigned nfs_port;
|
unsigned nfs_port;
|
||||||
|
uint64_t fs_kv_inode = 0;
|
||||||
|
uint64_t fs_base_inode = 0;
|
||||||
|
uint64_t fs_inode_count = 0;
|
||||||
|
int readdir_getattr_parallel = 8, id_alloc_batch_size = 200;
|
||||||
|
int trace = 0;
|
||||||
|
|
||||||
pool_id_t default_pool_id;
|
pool_id_t default_pool_id;
|
||||||
|
|
||||||
|
@ -35,20 +50,13 @@ public:
|
||||||
epoll_manager_t *epmgr = NULL;
|
epoll_manager_t *epmgr = NULL;
|
||||||
cluster_client_t *cli = NULL;
|
cluster_client_t *cli = NULL;
|
||||||
cli_tool_t *cmd = NULL;
|
cli_tool_t *cmd = NULL;
|
||||||
|
kv_dbw_t *db = NULL;
|
||||||
|
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
|
||||||
|
uint64_t fs_next_id = 0, fs_allocated_id = 0;
|
||||||
|
std::vector<uint64_t> unallocated_ids;
|
||||||
|
|
||||||
std::vector<XDR*> xdr_pool;
|
std::vector<XDR*> xdr_pool;
|
||||||
|
|
||||||
// filehandle = "S"+base64(sha256(full name with prefix)) or "roothandle" for mount root)
|
|
||||||
|
|
||||||
uint64_t next_dir_id = 2;
|
|
||||||
// filehandle => dir with name_prefix
|
|
||||||
std::map<std::string, std::string> dir_by_hash;
|
|
||||||
// dir with name_prefix => dir info
|
|
||||||
std::map<std::string, nfs_dir_t> dir_info;
|
|
||||||
// filehandle => inode ID
|
|
||||||
std::map<std::string, inode_t> inode_by_hash;
|
|
||||||
// inode ID => filehandle
|
|
||||||
std::map<inode_t, std::string> hash_by_inode;
|
|
||||||
// inode ID => statistics
|
// inode ID => statistics
|
||||||
std::map<inode_t, json11::Json> inode_stats;
|
std::map<inode_t, json11::Json> inode_stats;
|
||||||
// pool ID => statistics
|
// pool ID => statistics
|
||||||
|
@ -106,6 +114,8 @@ struct extend_write_t
|
||||||
struct extend_inode_t
|
struct extend_inode_t
|
||||||
{
|
{
|
||||||
uint64_t cur_extend = 0, next_extend = 0;
|
uint64_t cur_extend = 0, next_extend = 0;
|
||||||
|
std::string old_ientry;
|
||||||
|
json11::Json::object attrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
class nfs_client_t
|
class nfs_client_t
|
||||||
|
|
Loading…
Reference in New Issue