forked from vitalif/vitastor
Implement auto-unmount local NFS server mode for vitastor-nfs
parent
57605a5c13
commit
f600ce98e2
|
@ -133,8 +133,6 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output);
|
|||
uint64_t sscanf_json(const char *fmt, const json11::Json & str);
|
||||
void fromhexstr(const std::string & from, int bytes, uint8_t *to);
|
||||
std::string realpath_str(std::string path, bool nofail = true);
|
||||
std::string read_all_fd(int fd);
|
||||
std::string read_file(std::string file, bool allow_enoent = false);
|
||||
int disable_cache(std::string dev);
|
||||
std::string get_parent_device(std::string dev);
|
||||
bool json_is_true(const json11::Json & val);
|
||||
|
|
|
@ -55,23 +55,6 @@ std::string realpath_str(std::string path, bool nofail)
|
|||
return rp;
|
||||
}
|
||||
|
||||
std::string read_file(std::string file, bool allow_enoent)
|
||||
{
|
||||
std::string res;
|
||||
int fd = open(file.c_str(), O_RDONLY);
|
||||
if (fd < 0 || (res = read_all_fd(fd)) == "")
|
||||
{
|
||||
int err = errno;
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
if (!allow_enoent || err != ENOENT)
|
||||
fprintf(stderr, "Can't read %s: %s\n", file.c_str(), strerror(err));
|
||||
return "";
|
||||
}
|
||||
close(fd);
|
||||
return res;
|
||||
}
|
||||
|
||||
// returns 1 = check error, 0 = write through, -1 = write back
|
||||
// (similar to 1 = warning, -1 = error, 0 = success in disable_cache)
|
||||
static int check_queue_cache(std::string dev, std::string parent_dev)
|
||||
|
|
|
@ -34,7 +34,7 @@ static std::string get_inode_name(nfs_client_t *self, diropargs3 & what)
|
|||
std::string name = what.name;
|
||||
return (dir.size()
|
||||
? dir+"/"+name
|
||||
: self->parent->name_prefix+name);
|
||||
: self->parent->blockfs->name_prefix+name);
|
||||
}
|
||||
|
||||
static fattr3 get_dir_attributes(nfs_client_t *self, std::string dir)
|
||||
|
@ -985,7 +985,7 @@ static void block_nfs3_readdir_common(void *opaque, rpc_op_t *rop, bool is_plus)
|
|||
if (dir_it != self->parent->blockfs->dir_by_hash.end())
|
||||
dir = dir_it->second;
|
||||
}
|
||||
std::string prefix = dir.size() ? dir+"/" : self->parent->name_prefix;
|
||||
std::string prefix = dir.size() ? dir+"/" : self->parent->blockfs->name_prefix;
|
||||
std::map<std::string, struct entryplus3> entries;
|
||||
for (auto & ic: self->parent->cli->st_cli.inode_config)
|
||||
{
|
||||
|
@ -1154,8 +1154,20 @@ static int block_nfs3_readdirplus_proc(void *opaque, rpc_op_t *rop)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void block_fs_state_t::init(nfs_proxy_t *proxy)
|
||||
void block_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
|
||||
{
|
||||
name_prefix = cfg["subdir"].string_value();
|
||||
{
|
||||
int e = name_prefix.size();
|
||||
while (e > 0 && name_prefix[e-1] == '/')
|
||||
e--;
|
||||
int s = 0;
|
||||
while (s < e && name_prefix[s] == '/')
|
||||
s++;
|
||||
name_prefix = name_prefix.substr(s, e-s);
|
||||
if (name_prefix.size())
|
||||
name_prefix += "/";
|
||||
}
|
||||
// We need inode name hashes for NFS handles to remain stateless and <= 64 bytes long
|
||||
dir_info[""] = (nfs_dir_t){
|
||||
.id = 1,
|
||||
|
@ -1172,7 +1184,7 @@ void block_fs_state_t::init(nfs_proxy_t *proxy)
|
|||
}
|
||||
auto & inode_cfg = inode_cfg_it->second;
|
||||
std::string full_name = inode_cfg.name;
|
||||
if (proxy->name_prefix != "" && full_name.substr(0, proxy->name_prefix.size()) != proxy->name_prefix)
|
||||
if (proxy->blockfs->name_prefix != "" && full_name.substr(0, proxy->blockfs->name_prefix.size()) != proxy->blockfs->name_prefix)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1181,7 +1193,7 @@ void block_fs_state_t::init(nfs_proxy_t *proxy)
|
|||
clock_gettime(CLOCK_REALTIME, &now);
|
||||
dir_info[""].mod_rev = dir_info[""].mod_rev < inode_cfg.mod_revision ? inode_cfg.mod_revision : dir_info[""].mod_rev;
|
||||
dir_info[""].mtime = now;
|
||||
int pos = full_name.find('/', proxy->name_prefix.size());
|
||||
int pos = full_name.find('/', proxy->blockfs->name_prefix.size());
|
||||
while (pos >= 0)
|
||||
{
|
||||
std::string dir = full_name.substr(0, pos);
|
||||
|
|
|
@ -36,6 +36,8 @@ struct extend_inode_t
|
|||
|
||||
struct block_fs_state_t
|
||||
{
|
||||
std::string name_prefix;
|
||||
|
||||
// filehandle = "S"+base64(sha256(full name with prefix)) or "roothandle" for mount root)
|
||||
uint64_t next_dir_id = 2;
|
||||
// filehandle => dir with name_prefix
|
||||
|
@ -51,7 +53,7 @@ struct block_fs_state_t
|
|||
std::map<inode_t, extend_inode_t> extends;
|
||||
std::multimap<extend_size_t, extend_write_t> extend_writes;
|
||||
|
||||
void init(nfs_proxy_t *proxy);
|
||||
void init(nfs_proxy_t *proxy, json11::Json cfg);
|
||||
};
|
||||
|
||||
nfsstat3 vitastor_nfs_map_err(int err);
|
||||
|
|
|
@ -190,3 +190,72 @@ void nfs_kv_procs(nfs_client_t *self)
|
|||
self->proc_table.insert(pt[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void kv_fs_state_t::init(nfs_proxy_t *proxy, json11::Json cfg)
|
||||
{
|
||||
// Check if we're using VitastorFS
|
||||
fs_kv_inode = cfg["fs"].uint64_value();
|
||||
if (fs_kv_inode)
|
||||
{
|
||||
if (!INODE_POOL(fs_kv_inode))
|
||||
{
|
||||
fprintf(stderr, "FS metadata inode number must include pool\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & ic: proxy->cli->st_cli.inode_config)
|
||||
{
|
||||
if (ic.second.name == cfg["fs"].string_value())
|
||||
{
|
||||
fs_kv_inode = ic.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!fs_kv_inode)
|
||||
{
|
||||
fprintf(stderr, "FS metadata image \"%s\" does not exist\n", cfg["fs"].string_value().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
||||
if (!readdir_getattr_parallel)
|
||||
readdir_getattr_parallel = 8;
|
||||
id_alloc_batch_size = cfg["id_alloc_batch_size"].uint64_value();
|
||||
if (!id_alloc_batch_size)
|
||||
id_alloc_batch_size = 200;
|
||||
auto & pool_cfg = proxy->cli->st_cli.pool_config.at(proxy->default_pool_id);
|
||||
pool_block_size = pool_cfg.pg_stripe_size;
|
||||
pool_alignment = pool_cfg.bitmap_granularity;
|
||||
// Open DB and wait
|
||||
int open_res = 0;
|
||||
bool open_done = false;
|
||||
proxy->db = new kv_dbw_t(proxy->cli);
|
||||
proxy->db->open(fs_kv_inode, cfg, [&](int res)
|
||||
{
|
||||
open_done = true;
|
||||
open_res = res;
|
||||
});
|
||||
while (!open_done)
|
||||
{
|
||||
proxy->ringloop->loop();
|
||||
if (open_done)
|
||||
break;
|
||||
proxy->ringloop->wait();
|
||||
}
|
||||
if (open_res < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to open key/value filesystem metadata index: %s (code %d)\n",
|
||||
strerror(-open_res), open_res);
|
||||
exit(1);
|
||||
}
|
||||
fs_base_inode = ((uint64_t)proxy->default_pool_id << (64-POOL_ID_BITS));
|
||||
fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1;
|
||||
shared_inode_threshold = pool_block_size;
|
||||
if (!cfg["shared_inode_threshold"].is_null())
|
||||
{
|
||||
shared_inode_threshold = cfg["shared_inode_threshold"].uint64_value();
|
||||
}
|
||||
zero_block.resize(pool_block_size);
|
||||
}
|
||||
|
|
10
src/nfs_kv.h
10
src/nfs_kv.h
|
@ -45,6 +45,14 @@ struct kv_inode_extend_t
|
|||
|
||||
struct kv_fs_state_t
|
||||
{
|
||||
uint64_t fs_kv_inode = 0;
|
||||
uint64_t fs_base_inode = 0;
|
||||
uint64_t fs_inode_count = 0;
|
||||
int readdir_getattr_parallel = 8, id_alloc_batch_size = 200;
|
||||
uint64_t pool_block_size = 0;
|
||||
uint64_t pool_alignment = 0;
|
||||
uint64_t shared_inode_threshold = 0;
|
||||
|
||||
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
|
||||
uint64_t fs_next_id = 1, fs_allocated_id = 0;
|
||||
std::vector<uint64_t> unallocated_ids;
|
||||
|
@ -52,6 +60,8 @@ struct kv_fs_state_t
|
|||
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
|
||||
std::map<inode_t, kv_inode_extend_t> extends;
|
||||
std::vector<uint8_t> zero_block;
|
||||
|
||||
void init(nfs_proxy_t *proxy, json11::Json cfg);
|
||||
};
|
||||
|
||||
struct shared_file_header_t
|
||||
|
|
|
@ -16,7 +16,7 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
|
|||
cb(0, self->parent->kvfs->fs_next_id++);
|
||||
return;
|
||||
}
|
||||
else if (self->parent->kvfs->fs_next_id > self->parent->fs_inode_count)
|
||||
else if (self->parent->kvfs->fs_next_id > self->parent->kvfs->fs_inode_count)
|
||||
{
|
||||
cb(-ENOSPC, 0);
|
||||
return;
|
||||
|
@ -29,7 +29,7 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
|
|||
return;
|
||||
}
|
||||
uint64_t prev_val = stoull_full(prev_str);
|
||||
if (prev_val >= self->parent->fs_inode_count)
|
||||
if (prev_val >= self->parent->kvfs->fs_inode_count)
|
||||
{
|
||||
cb(-ENOSPC, 0);
|
||||
return;
|
||||
|
@ -38,10 +38,10 @@ void allocate_new_id(nfs_client_t *self, std::function<void(int res, uint64_t ne
|
|||
{
|
||||
prev_val = 1;
|
||||
}
|
||||
uint64_t new_val = prev_val + self->parent->id_alloc_batch_size;
|
||||
if (new_val >= self->parent->fs_inode_count)
|
||||
uint64_t new_val = prev_val + self->parent->kvfs->id_alloc_batch_size;
|
||||
if (new_val >= self->parent->kvfs->fs_inode_count)
|
||||
{
|
||||
new_val = self->parent->fs_inode_count;
|
||||
new_val = self->parent->kvfs->fs_inode_count;
|
||||
}
|
||||
self->parent->db->set(KV_NEXT_ID_KEY, std::to_string(new_val), [=](int res)
|
||||
{
|
||||
|
|
|
@ -36,7 +36,7 @@ static void nfs_kv_continue_read(nfs_kv_read_state *st, int state)
|
|||
fprintf(stderr, "BUG: invalid state in nfs_kv_continue_read()");
|
||||
abort();
|
||||
}
|
||||
if (st->offset + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold)
|
||||
if (st->offset + sizeof(shared_file_header_t) < st->self->parent->kvfs->shared_inode_threshold)
|
||||
{
|
||||
kv_read_inode(st->self, st->ino, [st](int res, const std::string & value, json11::Json attrs)
|
||||
{
|
||||
|
@ -59,7 +59,7 @@ resume_1:
|
|||
st->buf = st->aligned_buf + sizeof(shared_file_header_t) + st->offset;
|
||||
st->op = new cluster_op_t;
|
||||
st->op->opcode = OSD_OP_READ;
|
||||
st->op->inode = st->self->parent->fs_base_inode + st->ientry["shared_ino"].uint64_value();
|
||||
st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value();
|
||||
st->op->offset = st->ientry["shared_offset"].uint64_value();
|
||||
if (st->offset+st->size > st->ientry["size"].uint64_value())
|
||||
{
|
||||
|
@ -99,14 +99,14 @@ resume_2:
|
|||
return;
|
||||
}
|
||||
}
|
||||
st->aligned_offset = (st->offset & ~(st->self->parent->pool_alignment-1));
|
||||
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment-1) &
|
||||
~(st->self->parent->pool_alignment-1)) - st->aligned_offset;
|
||||
st->aligned_offset = (st->offset & ~(st->self->parent->kvfs->pool_alignment-1));
|
||||
st->aligned_size = ((st->offset + st->size + st->self->parent->kvfs->pool_alignment-1) &
|
||||
~(st->self->parent->kvfs->pool_alignment-1)) - st->aligned_offset;
|
||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
|
||||
st->op = new cluster_op_t;
|
||||
st->op->opcode = OSD_OP_READ;
|
||||
st->op->inode = st->self->parent->fs_base_inode + st->ino;
|
||||
st->op->inode = st->self->parent->kvfs->fs_base_inode + st->ino;
|
||||
st->op->offset = st->aligned_offset;
|
||||
st->op->len = st->aligned_size;
|
||||
st->op->iov.push_back(st->aligned_buf, st->aligned_size);
|
||||
|
|
|
@ -46,7 +46,7 @@ static void nfs_kv_continue_readdir(nfs_kv_readdir_state *st, int state);
|
|||
|
||||
static void kv_getattr_next(nfs_kv_readdir_state *st)
|
||||
{
|
||||
while (st->is_plus && st->getattr_cur < st->entries.size() && st->getattr_running < st->self->parent->readdir_getattr_parallel)
|
||||
while (st->is_plus && st->getattr_cur < st->entries.size() && st->getattr_running < st->self->parent->kvfs->readdir_getattr_parallel)
|
||||
{
|
||||
auto idx = st->getattr_cur++;
|
||||
st->getattr_running++;
|
||||
|
|
|
@ -231,8 +231,8 @@ resume_6:
|
|||
{
|
||||
// Remove data
|
||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->fs_base_inode + st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->fs_base_inode + st->ino) },
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
}), [st](const cli_result_t & r)
|
||||
{
|
||||
if (r.err)
|
||||
|
|
|
@ -278,8 +278,8 @@ resume_8:
|
|||
if (st->rm_dest_data)
|
||||
{
|
||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->new_direntry["ino"].uint64_value()) },
|
||||
}), [st](const cli_result_t & r)
|
||||
{
|
||||
if (r.err)
|
||||
|
|
|
@ -104,8 +104,8 @@ resume_2:
|
|||
{
|
||||
// Delete extra data when downsizing
|
||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->fs_base_inode + st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->fs_base_inode + st->ino) },
|
||||
{ "inode", INODE_NO_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "pool", (uint64_t)INODE_POOL(st->self->parent->kvfs->fs_base_inode + st->ino) },
|
||||
{ "min_offset", st->set_attrs["size"].uint64_value() },
|
||||
}), [st](const cli_result_t & r)
|
||||
{
|
||||
|
|
|
@ -8,6 +8,9 @@
|
|||
#include "nfs_proxy.h"
|
||||
#include "nfs_kv.h"
|
||||
|
||||
// FIXME: Implement shared inode defragmentator
|
||||
// FIXME: Implement fsck for vitastor-fs and for vitastor-kv
|
||||
|
||||
struct nfs_rmw_t
|
||||
{
|
||||
nfs_kv_write_state *st = NULL;
|
||||
|
@ -67,7 +70,7 @@ static void finish_allocate_shared(nfs_client_t *self, int res)
|
|||
{
|
||||
w.st->shared_inode = self->parent->kvfs->cur_shared_inode;
|
||||
w.st->shared_offset = self->parent->kvfs->cur_shared_offset;
|
||||
self->parent->kvfs->cur_shared_offset += (w.size + self->parent->pool_alignment-1) & ~(self->parent->pool_alignment-1);
|
||||
self->parent->kvfs->cur_shared_offset += (w.size + self->parent->kvfs->pool_alignment-1) & ~(self->parent->kvfs->pool_alignment-1);
|
||||
}
|
||||
nfs_kv_continue_write(w.st, w.state);
|
||||
}
|
||||
|
@ -113,22 +116,22 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state, uint64_t si
|
|||
st->res = 0;
|
||||
st->shared_inode = st->self->parent->kvfs->cur_shared_inode;
|
||||
st->shared_offset = st->self->parent->kvfs->cur_shared_offset;
|
||||
st->self->parent->kvfs->cur_shared_offset += (size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1);
|
||||
st->self->parent->kvfs->cur_shared_offset += (size + st->self->parent->kvfs->pool_alignment-1) & ~(st->self->parent->kvfs->pool_alignment-1);
|
||||
nfs_kv_continue_write(st, state);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t align_shared_size(nfs_client_t *self, uint64_t size)
|
||||
{
|
||||
return (size + sizeof(shared_file_header_t) + self->parent->pool_alignment-1)
|
||||
& ~(self->parent->pool_alignment-1);
|
||||
return (size + sizeof(shared_file_header_t) + self->parent->kvfs->pool_alignment-1)
|
||||
& ~(self->parent->kvfs->pool_alignment-1);
|
||||
}
|
||||
|
||||
static void nfs_do_write(uint64_t ino, uint64_t offset, uint64_t size, std::function<void(cluster_op_t *op)> prepare, nfs_kv_write_state *st, int state)
|
||||
{
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_WRITE;
|
||||
op->inode = st->self->parent->fs_base_inode + ino;
|
||||
op->inode = st->self->parent->kvfs->fs_base_inode + ino;
|
||||
op->offset = offset;
|
||||
op->len = size;
|
||||
prepare(op);
|
||||
|
@ -151,8 +154,8 @@ static void nfs_do_write(uint64_t ino, uint64_t offset, uint64_t size, std::func
|
|||
|
||||
static void nfs_do_unshare_write(nfs_kv_write_state *st, int state)
|
||||
{
|
||||
uint64_t unshare_size = (st->ientry["size"].uint64_value() + st->self->parent->pool_alignment-1)
|
||||
& ~(st->self->parent->pool_alignment-1);
|
||||
uint64_t unshare_size = (st->ientry["size"].uint64_value() + st->self->parent->kvfs->pool_alignment-1)
|
||||
& ~(st->self->parent->kvfs->pool_alignment-1);
|
||||
nfs_do_write(st->ino, 0, unshare_size, [&](cluster_op_t *op)
|
||||
{
|
||||
op->iov.push_back(st->aligned_buf + sizeof(shared_file_header_t), unshare_size);
|
||||
|
@ -162,16 +165,16 @@ static void nfs_do_unshare_write(nfs_kv_write_state *st, int state)
|
|||
static void nfs_do_rmw(nfs_rmw_t *rmw)
|
||||
{
|
||||
auto parent = rmw->st->self->parent;
|
||||
auto align = parent->pool_alignment;
|
||||
auto align = parent->kvfs->pool_alignment;
|
||||
assert(rmw->size < align);
|
||||
assert((rmw->offset/parent->pool_block_size) == ((rmw->offset+rmw->size-1)/parent->pool_block_size));
|
||||
assert((rmw->offset/parent->kvfs->pool_block_size) == ((rmw->offset+rmw->size-1)/parent->kvfs->pool_block_size));
|
||||
if (!rmw->part_buf)
|
||||
{
|
||||
rmw->part_buf = (uint8_t*)malloc_or_die(align);
|
||||
}
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_READ;
|
||||
op->inode = parent->fs_base_inode + rmw->ino;
|
||||
op->inode = parent->kvfs->fs_base_inode + rmw->ino;
|
||||
op->offset = rmw->offset & ~(align-1);
|
||||
op->len = align;
|
||||
op->iov.push_back(rmw->part_buf, op->len);
|
||||
|
@ -196,7 +199,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
auto st = rmw->st;
|
||||
rmw->version = rd_op->version+1;
|
||||
if (st->rmw[0].st && st->rmw[1].st &&
|
||||
st->rmw[0].offset/st->self->parent->pool_block_size == st->rmw[1].offset/st->self->parent->pool_block_size)
|
||||
st->rmw[0].offset/st->self->parent->kvfs->pool_block_size == st->rmw[1].offset/st->self->parent->kvfs->pool_block_size)
|
||||
{
|
||||
// Same block... RMWs should be sequential
|
||||
int other = rmw == &st->rmw[0] ? 1 : 0;
|
||||
|
@ -204,12 +207,12 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
}
|
||||
}
|
||||
auto parent = rmw->st->self->parent;
|
||||
auto align = parent->pool_alignment;
|
||||
auto align = parent->kvfs->pool_alignment;
|
||||
bool is_begin = (rmw->offset % align);
|
||||
bool is_end = ((rmw->offset+rmw->size) % align);
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_WRITE;
|
||||
op->inode = rmw->st->self->parent->fs_base_inode + rmw->ino;
|
||||
op->inode = rmw->st->self->parent->kvfs->fs_base_inode + rmw->ino;
|
||||
op->offset = rmw->offset & ~(align-1);
|
||||
op->len = align;
|
||||
op->version = rmw->version;
|
||||
|
@ -258,7 +261,7 @@ static void nfs_do_shared_read(nfs_kv_write_state *st, int state)
|
|||
{
|
||||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_READ;
|
||||
op->inode = st->self->parent->fs_base_inode + st->ientry["shared_ino"].uint64_value();
|
||||
op->inode = st->self->parent->kvfs->fs_base_inode + st->ientry["shared_ino"].uint64_value();
|
||||
op->offset = st->ientry["shared_offset"].uint64_value();
|
||||
op->len = align_shared_size(st->self, st->ientry["size"].uint64_value());
|
||||
op->iov.push_back(st->aligned_buf, op->len);
|
||||
|
@ -291,7 +294,7 @@ static bool nfs_do_shared_readmodify(nfs_kv_write_state *st, int base_state, int
|
|||
else if (state == base_state) goto resume_0;
|
||||
assert(!st->aligned_buf);
|
||||
st->aligned_size = unshare
|
||||
? sizeof(shared_file_header_t) + ((st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1))
|
||||
? sizeof(shared_file_header_t) + ((st->new_size + st->self->parent->kvfs->pool_alignment-1) & ~(st->self->parent->kvfs->pool_alignment-1))
|
||||
: align_shared_size(st->self, st->new_size);
|
||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||
// FIXME do not allocate zeroes if we only need zeroes
|
||||
|
@ -351,7 +354,7 @@ static void nfs_do_shared_write(nfs_kv_write_state *st, int state, bool only_ali
|
|||
|
||||
static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t offset, uint64_t shared_alloc, int state)
|
||||
{
|
||||
auto alignment = st->self->parent->pool_alignment;
|
||||
auto alignment = st->self->parent->kvfs->pool_alignment;
|
||||
uint64_t end = (offset+st->size);
|
||||
uint8_t *good_buf = st->buf;
|
||||
uint64_t good_offset = offset;
|
||||
|
@ -667,18 +670,18 @@ resume_1:
|
|||
cb(st->res == 0 ? -EINVAL : st->res);
|
||||
return;
|
||||
}
|
||||
st->was_immediate = st->self->parent->cli->get_immediate_commit(st->self->parent->fs_base_inode + st->ino);
|
||||
st->was_immediate = st->self->parent->cli->get_immediate_commit(st->self->parent->kvfs->fs_base_inode + st->ino);
|
||||
st->new_size = st->ientry["size"].uint64_value();
|
||||
if (st->new_size < st->offset + st->size)
|
||||
{
|
||||
st->new_size = st->offset + st->size;
|
||||
}
|
||||
if (st->offset + st->size + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold)
|
||||
if (st->offset + st->size + sizeof(shared_file_header_t) < st->self->parent->kvfs->shared_inode_threshold)
|
||||
{
|
||||
if (st->ientry["size"].uint64_value() == 0 &&
|
||||
st->ientry["shared_ino"].uint64_value() == 0 ||
|
||||
st->ientry["empty"].bool_value() &&
|
||||
(st->ientry["size"].uint64_value() + sizeof(shared_file_header_t)) < st->self->parent->shared_inode_threshold ||
|
||||
(st->ientry["size"].uint64_value() + sizeof(shared_file_header_t)) < st->self->parent->kvfs->shared_inode_threshold ||
|
||||
st->ientry["shared_ino"].uint64_value() != 0 &&
|
||||
st->ientry["shared_alloc"].uint64_value() < sizeof(shared_file_header_t)+st->offset+st->size)
|
||||
{
|
||||
|
|
|
@ -10,9 +10,10 @@
|
|||
|
||||
#include <netinet/tcp.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
//#include <signal.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "nfs/nfs.h"
|
||||
#include "nfs/rpc.h"
|
||||
|
@ -34,6 +35,10 @@ const char *exe_name = NULL;
|
|||
|
||||
nfs_proxy_t::~nfs_proxy_t()
|
||||
{
|
||||
if (kvfs)
|
||||
delete kvfs;
|
||||
if (blockfs)
|
||||
delete blockfs;
|
||||
if (db)
|
||||
delete db;
|
||||
if (cmd)
|
||||
|
@ -49,45 +54,79 @@ nfs_proxy_t::~nfs_proxy_t()
|
|||
delete ringloop;
|
||||
}
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor NFS 3.0 proxy " VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"vitastor-nfs (--fs <NAME> | --block) mount <MOUNTPOINT>\n"
|
||||
" Start local filesystem server and mount file system to <MOUNTPOINT>.\n"
|
||||
" Use regular `umount <MOUNTPOINT>` to unmount the FS.\n"
|
||||
" The server will be automatically stopped when the FS is unmounted.\n"
|
||||
"\n"
|
||||
"vitastor-nfs (--fs <NAME> | --block) start\n"
|
||||
" Start network NFS server. Options:\n"
|
||||
" --bind <IP> bind service to <IP> address (default 0.0.0.0)\n"
|
||||
" --port <PORT> use port <PORT> for NFS services (default is 2049)\n"
|
||||
" --portmap 0 do not listen on port 111 (portmap/rpcbind, requires root)\n"
|
||||
"\n"
|
||||
"OPTIONS:\n"
|
||||
" --fs <NAME> use VitastorFS with metadata in image <NAME>\n"
|
||||
" --block use pseudo-FS presenting images as files\n"
|
||||
" --pool <POOL> use <POOL> as default pool for new files\n"
|
||||
" --subdir <DIR> export <DIR> instead of root directory\n"
|
||||
" --nfspath <PATH> set NFS export path to <PATH> (default is /)\n"
|
||||
" --pidfile <FILE> write process ID to the specified file\n"
|
||||
" --logfile <FILE> log to the specified file\n"
|
||||
" --foreground 1 stay in foreground, do not daemonize\n"
|
||||
"\n"
|
||||
"NFS proxy is stateless if you use immediate_commit=all in your cluster and if\n"
|
||||
"you do not use client_enable_writeback=true, so you can freely use multiple\n"
|
||||
"NFS proxies with L3 load balancing in this case.\n"
|
||||
"\n"
|
||||
"Example start and mount commands for a custom NFS port:\n"
|
||||
" vitastor-nfs start --block --etcd_address 192.168.5.10:2379 --portmap 0 --port 2050 --pool testpool\n"
|
||||
" mount localhost:/ /mnt/ -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp\n"
|
||||
"Or just:\n"
|
||||
" vitastor-nfs mount --block --pool testpool /mnt/\n"
|
||||
;
|
||||
|
||||
json11::Json::object nfs_proxy_t::parse_args(int narg, const char *args[])
|
||||
{
|
||||
json11::Json::object cfg;
|
||||
std::vector<std::string> cmd;
|
||||
for (int i = 1; i < narg; i++)
|
||||
{
|
||||
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
|
||||
{
|
||||
printf(
|
||||
"Vitastor NFS 3.0 proxy\n"
|
||||
"(c) Vitaliy Filippov, 2021-2022 (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"USAGE:\n"
|
||||
" %s [STANDARD OPTIONS] [OTHER OPTIONS]\n"
|
||||
" --fs <META> mount VitastorFS with metadata in image <META>\n"
|
||||
" --subdir <DIR> export images prefixed <DIR>/ (default empty - export all images)\n"
|
||||
" --portmap 0 do not listen on port 111 (portmap/rpcbind, requires root)\n"
|
||||
" --bind <IP> bind service to <IP> address (default 0.0.0.0)\n"
|
||||
" --nfspath <PATH> set NFS export path to <PATH> (default is /)\n"
|
||||
" --port <PORT> use port <PORT> for NFS services (default is 2049)\n"
|
||||
" --pool <POOL> use <POOL> as default pool for new files (images)\n"
|
||||
" --logfile <FILE> log to the specified file\n"
|
||||
" --foreground 1 stay in foreground, do not daemonize\n"
|
||||
"\n"
|
||||
"NFS proxy is stateless if you use immediate_commit=all in your cluster and if\n"
|
||||
"you do not use client_enable_writeback=true, so you can freely use multiple\n"
|
||||
"NFS proxies with L3 load balancing in this case.\n"
|
||||
"\n"
|
||||
"Example start and mount commands for a custom NFS port:\n"
|
||||
" %s --etcd_address 192.168.5.10:2379 --portmap 0 --port 2050 --pool testpool\n"
|
||||
" mount localhost:/ /mnt/ -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp\n",
|
||||
exe_name, exe_name
|
||||
);
|
||||
printf("%s", help_text);
|
||||
exit(0);
|
||||
}
|
||||
else if (args[i][0] == '-' && args[i][1] == '-')
|
||||
{
|
||||
const char *opt = args[i]+2;
|
||||
cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i];
|
||||
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "block") || i == narg-1 ? "1" : args[++i];
|
||||
}
|
||||
else
|
||||
{
|
||||
cmd.push_back(args[i]);
|
||||
}
|
||||
}
|
||||
if (cfg.find("block") == cfg.end() && cfg.find("fs") == cfg.end())
|
||||
{
|
||||
fprintf(stderr, "Specify one of --block or --fs NAME. Use vitastor-nfs --help for details\n");
|
||||
exit(1);
|
||||
}
|
||||
if (cmd.size() >= 2 && cmd[0] == "mount")
|
||||
{
|
||||
cfg["mount"] = cmd[1];
|
||||
}
|
||||
else if (cmd.size() >= 1 && cmd[0] == "start")
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%s", help_text);
|
||||
exit(1);
|
||||
}
|
||||
return cfg;
|
||||
}
|
||||
|
@ -101,6 +140,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
// Parse options
|
||||
if (cfg["logfile"].string_value() != "")
|
||||
logfile = cfg["logfile"].string_value();
|
||||
pidfile = cfg["pidfile"].string_value();
|
||||
trace = cfg["log_level"].uint64_value() > 5 || cfg["trace"].uint64_value() > 0;
|
||||
bind_address = cfg["bind"].string_value();
|
||||
if (bind_address == "")
|
||||
|
@ -113,18 +153,6 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
export_root = cfg["nfspath"].string_value();
|
||||
if (!export_root.size())
|
||||
export_root = "/";
|
||||
name_prefix = cfg["subdir"].string_value();
|
||||
{
|
||||
int e = name_prefix.size();
|
||||
while (e > 0 && name_prefix[e-1] == '/')
|
||||
e--;
|
||||
int s = 0;
|
||||
while (s < e && name_prefix[s] == '/')
|
||||
s++;
|
||||
name_prefix = name_prefix.substr(s, e-s);
|
||||
if (name_prefix.size())
|
||||
name_prefix += "/";
|
||||
}
|
||||
if (cfg["client_writeback_allowed"].is_null())
|
||||
{
|
||||
// NFS is always aware of fsync, so we allow write-back cache
|
||||
|
@ -133,6 +161,15 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
obj["client_writeback_allowed"] = true;
|
||||
cfg = obj;
|
||||
}
|
||||
mountpoint = cfg["mount"].string_value();
|
||||
if (mountpoint != "")
|
||||
{
|
||||
bind_address = "127.0.0.1";
|
||||
nfs_port = 0;
|
||||
portmap_enabled = false;
|
||||
exit_on_umount = true;
|
||||
}
|
||||
fsname = cfg["fs"].string_value();
|
||||
// Create client
|
||||
ringloop = new ring_loop_t(RINGLOOP_DEFAULT_SIZE);
|
||||
epmgr = new epoll_manager_t(ringloop);
|
||||
|
@ -142,11 +179,6 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
cmd->epmgr = epmgr;
|
||||
cmd->cli = cli;
|
||||
watch_stats();
|
||||
if (!fs_kv_inode)
|
||||
{
|
||||
blockfs = new block_fs_state_t();
|
||||
blockfs->init(this);
|
||||
}
|
||||
// Load image metadata
|
||||
while (!cli->is_ready())
|
||||
{
|
||||
|
@ -158,70 +190,15 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
// Check default pool
|
||||
check_default_pool();
|
||||
// Check if we're using VitastorFS
|
||||
fs_kv_inode = cfg["fs"].uint64_value();
|
||||
if (fs_kv_inode)
|
||||
if (fsname == "")
|
||||
{
|
||||
if (!INODE_POOL(fs_kv_inode))
|
||||
{
|
||||
fprintf(stderr, "FS metadata inode number must include pool\n");
|
||||
exit(1);
|
||||
}
|
||||
blockfs = new block_fs_state_t();
|
||||
blockfs->init(this, cfg);
|
||||
}
|
||||
else if (cfg["fs"].is_string())
|
||||
else
|
||||
{
|
||||
for (auto & ic: cli->st_cli.inode_config)
|
||||
{
|
||||
if (ic.second.name == cfg["fs"].string_value())
|
||||
{
|
||||
fs_kv_inode = ic.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!fs_kv_inode)
|
||||
{
|
||||
fprintf(stderr, "FS metadata image \"%s\" does not exist\n", cfg["fs"].string_value().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
||||
if (!readdir_getattr_parallel)
|
||||
readdir_getattr_parallel = 8;
|
||||
id_alloc_batch_size = cfg["id_alloc_batch_size"].uint64_value();
|
||||
if (!id_alloc_batch_size)
|
||||
id_alloc_batch_size = 200;
|
||||
if (fs_kv_inode)
|
||||
{
|
||||
// Open DB and wait
|
||||
int open_res = 0;
|
||||
bool open_done = false;
|
||||
db = new kv_dbw_t(cli);
|
||||
db->open(fs_kv_inode, cfg, [&](int res)
|
||||
{
|
||||
open_done = true;
|
||||
open_res = res;
|
||||
});
|
||||
while (!open_done)
|
||||
{
|
||||
ringloop->loop();
|
||||
if (open_done)
|
||||
break;
|
||||
ringloop->wait();
|
||||
}
|
||||
if (open_res < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to open key/value filesystem metadata index: %s (code %d)\n",
|
||||
strerror(-open_res), open_res);
|
||||
exit(1);
|
||||
}
|
||||
fs_base_inode = ((uint64_t)default_pool_id << (64-POOL_ID_BITS));
|
||||
fs_inode_count = ((uint64_t)1 << (64-POOL_ID_BITS)) - 1;
|
||||
shared_inode_threshold = pool_block_size;
|
||||
if (!cfg["shared_inode_threshold"].is_null())
|
||||
{
|
||||
shared_inode_threshold = cfg["shared_inode_threshold"].uint64_value();
|
||||
}
|
||||
kvfs = new kv_fs_state_t;
|
||||
kvfs->zero_block.resize(pool_block_size);
|
||||
kvfs = new kv_fs_state_t();
|
||||
kvfs->init(this, cfg);
|
||||
}
|
||||
// Self-register portmap and NFS
|
||||
pmap.reg_ports.insert((portmap_id_t){
|
||||
|
@ -253,7 +230,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
.addr = "0.0.0.0.0."+std::to_string(nfs_port),
|
||||
});
|
||||
// Create NFS socket and add it to epoll
|
||||
int nfs_socket = create_and_bind_socket(bind_address, nfs_port, 128, NULL);
|
||||
int nfs_socket = create_and_bind_socket(bind_address, nfs_port, 128, &listening_port);
|
||||
fcntl(nfs_socket, F_SETFL, fcntl(nfs_socket, F_GETFL, 0) | O_NONBLOCK);
|
||||
epmgr->tfd->set_fd_handler(nfs_socket, false, [this](int nfs_socket, int epoll_events)
|
||||
{
|
||||
|
@ -285,24 +262,43 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
}
|
||||
});
|
||||
}
|
||||
if (mountpoint != "")
|
||||
{
|
||||
mount_fs();
|
||||
}
|
||||
if (cfg["foreground"].is_null())
|
||||
{
|
||||
daemonize();
|
||||
}
|
||||
while (true)
|
||||
if (pidfile != "")
|
||||
{
|
||||
write_pid();
|
||||
}
|
||||
while (!finished)
|
||||
{
|
||||
ringloop->loop();
|
||||
ringloop->wait();
|
||||
}
|
||||
// Destroy the client
|
||||
cli->flush();
|
||||
delete kvfs;
|
||||
delete db;
|
||||
if (kvfs)
|
||||
{
|
||||
delete kvfs;
|
||||
kvfs = NULL;
|
||||
}
|
||||
if (blockfs)
|
||||
{
|
||||
delete blockfs;
|
||||
blockfs = NULL;
|
||||
}
|
||||
if (db)
|
||||
{
|
||||
delete db;
|
||||
db = NULL;
|
||||
}
|
||||
delete cli;
|
||||
delete epmgr;
|
||||
delete ringloop;
|
||||
kvfs = NULL;
|
||||
db = NULL;
|
||||
cli = NULL;
|
||||
epmgr = NULL;
|
||||
ringloop = NULL;
|
||||
|
@ -376,7 +372,7 @@ void nfs_proxy_t::parse_stats(etcd_kv_t & kv)
|
|||
inode_t inode_num = 0;
|
||||
char null_byte = 0;
|
||||
int scanned = sscanf(key.c_str() + cli->st_cli.etcd_prefix.length()+13, "%u/%ju%c", &pool_id, &inode_num, &null_byte);
|
||||
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !inode_num)
|
||||
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX)
|
||||
{
|
||||
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
||||
}
|
||||
|
@ -410,8 +406,6 @@ void nfs_proxy_t::check_default_pool()
|
|||
auto pool_it = cli->st_cli.pool_config.begin();
|
||||
default_pool_id = pool_it->first;
|
||||
default_pool = pool_it->second.name;
|
||||
pool_block_size = pool_it->second.pg_stripe_size;
|
||||
pool_alignment = pool_it->second.bitmap_granularity;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -426,8 +420,6 @@ void nfs_proxy_t::check_default_pool()
|
|||
if (p.second.name == default_pool)
|
||||
{
|
||||
default_pool_id = p.first;
|
||||
pool_block_size = p.second.pg_stripe_size;
|
||||
pool_alignment = p.second.bitmap_granularity;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -446,12 +438,14 @@ void nfs_proxy_t::do_accept(int listen_fd)
|
|||
int nfs_fd = 0;
|
||||
while ((nfs_fd = accept(listen_fd, (struct sockaddr *)&addr, &addr_size)) >= 0)
|
||||
{
|
||||
fprintf(stderr, "New client %d: connection from %s\n", nfs_fd, addr_to_string(addr).c_str());
|
||||
if (trace)
|
||||
fprintf(stderr, "New client %d: connection from %s\n", nfs_fd, addr_to_string(addr).c_str());
|
||||
active_connections++;
|
||||
fcntl(nfs_fd, F_SETFL, fcntl(nfs_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||
int one = 1;
|
||||
setsockopt(nfs_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
|
||||
auto cli = new nfs_client_t();
|
||||
if (fs_kv_inode)
|
||||
if (kvfs)
|
||||
nfs_kv_procs(cli);
|
||||
else
|
||||
nfs_block_procs(cli);
|
||||
|
@ -466,8 +460,12 @@ void nfs_proxy_t::do_accept(int listen_fd)
|
|||
// Handle incoming event
|
||||
if (epoll_events & EPOLLRDHUP)
|
||||
{
|
||||
fprintf(stderr, "Client %d disconnected\n", nfs_fd);
|
||||
auto parent = cli->parent;
|
||||
if (parent->trace)
|
||||
fprintf(stderr, "Client %d disconnected\n", nfs_fd);
|
||||
cli->stop();
|
||||
parent->active_connections--;
|
||||
parent->check_exit();
|
||||
return;
|
||||
}
|
||||
cli->epoll_events |= epoll_events;
|
||||
|
@ -1006,6 +1004,113 @@ void nfs_proxy_t::daemonize()
|
|||
open(logfile.c_str(), O_WRONLY|O_APPEND|O_CREAT, 0666);
|
||||
}
|
||||
|
||||
void nfs_proxy_t::write_pid()
|
||||
{
|
||||
int fd = open(pidfile.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
|
||||
if (fd < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to create pid file %s: %s (code %d)\n", pidfile.c_str(), strerror(errno), errno);
|
||||
return;
|
||||
}
|
||||
auto pid = std::to_string(getpid());
|
||||
if (write(fd, pid.c_str(), pid.size()) < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to write pid to %s: %s (code %d)\n", pidfile.c_str(), strerror(errno), errno);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static pid_t wanted_pid = 0;
|
||||
static bool child_finished = false;
|
||||
static int child_status = -1;
|
||||
|
||||
void single_child_handler(int signal)
|
||||
{
|
||||
child_finished = true;
|
||||
waitpid(wanted_pid, &child_status, WNOHANG);
|
||||
}
|
||||
|
||||
void nfs_proxy_t::mount_fs()
|
||||
{
|
||||
signal(SIGCHLD, single_child_handler);
|
||||
auto pid = fork();
|
||||
if (pid < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to fork: %s (code %d)\n", strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
if (pid > 0)
|
||||
{
|
||||
// Parent - loop and wait until child finishes
|
||||
wanted_pid = pid;
|
||||
while (!child_finished)
|
||||
{
|
||||
ringloop->loop();
|
||||
ringloop->wait();
|
||||
}
|
||||
if (!WIFEXITED(child_status) || WEXITSTATUS(child_status) != 0)
|
||||
{
|
||||
// Mounting failed
|
||||
exit(1);
|
||||
}
|
||||
if (fsname != "")
|
||||
fprintf(stderr, "Successfully mounted VitastorFS %s at %s\n", fsname.c_str(), mountpoint.c_str());
|
||||
else
|
||||
fprintf(stderr, "Successfully mounted Vitastor pseudo-FS at %s\n", mountpoint.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Child
|
||||
std::string src = ("localhost:"+export_root);
|
||||
std::string opts = ("port="+std::to_string(listening_port)+",mountport="+std::to_string(listening_port)+",nfsvers=3,soft,nolock,tcp");
|
||||
const char *args[] = { "mount", src.c_str(), mountpoint.c_str(), "-o", opts.c_str(), NULL };
|
||||
execvp("mount", (char* const*)args);
|
||||
fprintf(stderr, "Failed to run mount %s %s -o %s: %s (code %d)\n",
|
||||
src.c_str(), mountpoint.c_str(), opts.c_str(), strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void nfs_proxy_t::check_exit()
|
||||
{
|
||||
if (active_connections || !exit_on_umount)
|
||||
{
|
||||
return;
|
||||
}
|
||||
std::string mountstr = read_file("/proc/mounts");
|
||||
if (mountstr == "")
|
||||
{
|
||||
return;
|
||||
}
|
||||
auto port_opt = "port="+std::to_string(listening_port);
|
||||
auto mountport_opt = "port="+std::to_string(listening_port);
|
||||
auto mounts = explode("\n", mountstr, true);
|
||||
for (auto & str: mounts)
|
||||
{
|
||||
auto opts = explode(" ", str, true);
|
||||
if (opts[2].size() >= 3 && opts[2].substr(0, 3) == "nfs" && opts.size() >= 4)
|
||||
{
|
||||
opts = explode(",", opts[3], true);
|
||||
bool port_found = false;
|
||||
bool addr_found = false;
|
||||
for (auto & opt: opts)
|
||||
{
|
||||
if (opt == port_opt || opt == mountport_opt)
|
||||
port_found = true;
|
||||
if (opt == "addr=127.0.0.1" || opt == "mountaddr=127.0.0.1")
|
||||
addr_found = true;
|
||||
}
|
||||
if (port_found && addr_found)
|
||||
{
|
||||
// OK, do not unmount
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Not found, unmount
|
||||
finished = true;
|
||||
}
|
||||
|
||||
int main(int narg, const char *args[])
|
||||
{
|
||||
setvbuf(stdout, NULL, _IONBF, 0);
|
||||
|
|
|
@ -21,24 +21,23 @@ class nfs_proxy_t
|
|||
{
|
||||
public:
|
||||
std::string bind_address;
|
||||
std::string name_prefix;
|
||||
uint64_t fsid = 1;
|
||||
uint64_t server_id = 0;
|
||||
std::string default_pool;
|
||||
std::string export_root;
|
||||
bool portmap_enabled;
|
||||
unsigned nfs_port;
|
||||
uint64_t fs_kv_inode = 0;
|
||||
uint64_t fs_base_inode = 0;
|
||||
uint64_t fs_inode_count = 0;
|
||||
int readdir_getattr_parallel = 8, id_alloc_batch_size = 200;
|
||||
int trace = 0;
|
||||
std::string logfile = "/dev/null";
|
||||
std::string pidfile;
|
||||
bool exit_on_umount = false;
|
||||
std::string mountpoint;
|
||||
std::string fsname;
|
||||
|
||||
pool_id_t default_pool_id;
|
||||
uint64_t pool_block_size = 0;
|
||||
uint64_t pool_alignment = 0;
|
||||
uint64_t shared_inode_threshold = 0;
|
||||
int active_connections = 0;
|
||||
bool finished = false;
|
||||
int listening_port = 0;
|
||||
pool_id_t default_pool_id = 0;
|
||||
|
||||
portmap_service_t pmap;
|
||||
ring_loop_t *ringloop = NULL;
|
||||
|
@ -65,6 +64,9 @@ public:
|
|||
void check_default_pool();
|
||||
void do_accept(int listen_fd);
|
||||
void daemonize();
|
||||
void write_pid();
|
||||
void mount_fs();
|
||||
void check_exit();
|
||||
};
|
||||
|
||||
struct rpc_cur_buffer_t
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include "str_util.h"
|
||||
|
||||
std::string base64_encode(const std::string &in)
|
||||
|
@ -304,6 +305,23 @@ std::string read_all_fd(int fd)
|
|||
return res;
|
||||
}
|
||||
|
||||
std::string read_file(std::string file, bool allow_enoent)
|
||||
{
|
||||
std::string res;
|
||||
int fd = open(file.c_str(), O_RDONLY);
|
||||
if (fd < 0 || (res = read_all_fd(fd)) == "")
|
||||
{
|
||||
int err = errno;
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
if (!allow_enoent || err != ENOENT)
|
||||
fprintf(stderr, "Failed to read %s: %s (code %d)\n", file.c_str(), strerror(err), err);
|
||||
return "";
|
||||
}
|
||||
close(fd);
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string str_repeat(const std::string & str, int times)
|
||||
{
|
||||
std::string r;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
@ -18,6 +18,7 @@ std::string format_size(uint64_t size, bool nobytes = false);
|
|||
void print_help(const char *help_text, std::string exe_name, std::string cmd, bool all);
|
||||
uint64_t parse_time(std::string time_str, bool *ok = NULL);
|
||||
std::string read_all_fd(int fd);
|
||||
std::string read_file(std::string file, bool allow_enoent = false);
|
||||
std::string str_repeat(const std::string & str, int times);
|
||||
size_t utf8_length(const std::string & s);
|
||||
size_t utf8_length(const char *s);
|
||||
|
|
|
@ -4,7 +4,7 @@ PG_COUNT=16
|
|||
. `dirname $0`/run_3osds.sh
|
||||
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 10G fsmeta
|
||||
build/src/vitastor-nfs --fs fsmeta --etcd_address $ETCD_URL --portmap 0 --port 2050 --foreground 1 --trace 1 >>./testdata/nfs.log 2>&1 &
|
||||
build/src/vitastor-nfs start --fs fsmeta --etcd_address $ETCD_URL --portmap 0 --port 2050 --foreground 1 --trace 1 >>./testdata/nfs.log 2>&1 &
|
||||
NFS_PID=$!
|
||||
|
||||
mkdir -p testdata/nfs
|
||||
|
|
Loading…
Reference in New Issue