forked from vitalif/vitastor
Add basic NFS tests, fix bugs
parent
a574f9ad71
commit
ecfc753e93
|
@ -22,7 +22,7 @@ RUN apt-get update
|
||||||
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
|
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
|
||||||
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||||
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||||
RUN apt-get -y install jq lp-solve sudo
|
RUN apt-get -y install jq lp-solve sudo nfs-common
|
||||||
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||||
|
|
||||||
RUN set -ex; \
|
RUN set -ex; \
|
||||||
|
|
|
@ -856,3 +856,21 @@ jobs:
|
||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
|
test_nfs:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_nfs.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,29 @@ nfstime3 nfstime_from_str(const std::string & s)
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string timespec_to_str(timespec t)
|
||||||
|
{
|
||||||
|
char buf[64];
|
||||||
|
snprintf(buf, sizeof(buf), "%ju.%09ju", t.tv_sec, t.tv_nsec);
|
||||||
|
int l = strlen(buf);
|
||||||
|
while (l > 0 && buf[l-1] == '0')
|
||||||
|
l--;
|
||||||
|
if (l > 0 && buf[l-1] == '.')
|
||||||
|
l--;
|
||||||
|
buf[l] = 0;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
std::string nfstime_to_str(nfstime3 t)
|
std::string nfstime_to_str(nfstime3 t)
|
||||||
{
|
{
|
||||||
char buf[32];
|
return timespec_to_str((timespec){ .tv_sec = t.seconds, .tv_nsec = t.nseconds });
|
||||||
snprintf(buf, sizeof(buf), "%u.%09u", t.seconds, t.nseconds);
|
}
|
||||||
return buf;
|
|
||||||
|
std::string nfstime_now_str()
|
||||||
|
{
|
||||||
|
timespec t;
|
||||||
|
clock_gettime(CLOCK_REALTIME, &t);
|
||||||
|
return timespec_to_str(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kv_map_type(const std::string & type)
|
int kv_map_type(const std::string & type)
|
||||||
|
|
|
@ -46,7 +46,7 @@ struct kv_inode_extend_t
|
||||||
struct kv_fs_state_t
|
struct kv_fs_state_t
|
||||||
{
|
{
|
||||||
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
|
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
|
||||||
uint64_t fs_next_id = 0, fs_allocated_id = 0;
|
uint64_t fs_next_id = 1, fs_allocated_id = 0;
|
||||||
std::vector<uint64_t> unallocated_ids;
|
std::vector<uint64_t> unallocated_ids;
|
||||||
std::vector<shared_alloc_queue_t> allocating_shared;
|
std::vector<shared_alloc_queue_t> allocating_shared;
|
||||||
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
|
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
|
||||||
|
@ -57,12 +57,13 @@ struct shared_file_header_t
|
||||||
{
|
{
|
||||||
uint64_t magic = 0;
|
uint64_t magic = 0;
|
||||||
uint64_t inode = 0;
|
uint64_t inode = 0;
|
||||||
uint64_t size = 0;
|
uint64_t alloc = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
nfsstat3 vitastor_nfs_map_err(int err);
|
nfsstat3 vitastor_nfs_map_err(int err);
|
||||||
nfstime3 nfstime_from_str(const std::string & s);
|
nfstime3 nfstime_from_str(const std::string & s);
|
||||||
std::string nfstime_to_str(nfstime3 t);
|
std::string nfstime_to_str(nfstime3 t);
|
||||||
|
std::string nfstime_now_str();
|
||||||
int kv_map_type(const std::string & type);
|
int kv_map_type(const std::string & type);
|
||||||
fattr3 get_kv_attributes(nfs_client_t *self, uint64_t ino, json11::Json attrs);
|
fattr3 get_kv_attributes(nfs_client_t *self, uint64_t ino, json11::Json attrs);
|
||||||
std::string kv_direntry_key(uint64_t dir_ino, const std::string & filename);
|
std::string kv_direntry_key(uint64_t dir_ino, const std::string & filename);
|
||||||
|
|
|
@ -94,6 +94,10 @@ static void kv_do_create(kv_create_state *st)
|
||||||
cb(-EINVAL);
|
cb(-EINVAL);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (st->attrobj.find("mtime") == st->attrobj.end())
|
||||||
|
st->attrobj["mtime"] = nfstime_now_str();
|
||||||
|
if (st->attrobj.find("atime") == st->attrobj.end())
|
||||||
|
st->attrobj["atime"] = st->attrobj["mtime"];
|
||||||
// Generate inode ID
|
// Generate inode ID
|
||||||
allocate_new_id(st->self, [st](int res, uint64_t new_id)
|
allocate_new_id(st->self, [st](int res, uint64_t new_id)
|
||||||
{
|
{
|
||||||
|
@ -105,7 +109,8 @@ static void kv_do_create(kv_create_state *st)
|
||||||
}
|
}
|
||||||
st->new_id = new_id;
|
st->new_id = new_id;
|
||||||
auto direntry = json11::Json::object{ { "ino", st->new_id } };
|
auto direntry = json11::Json::object{ { "ino", st->new_id } };
|
||||||
if (st->attrobj["type"].string_value() == "dir")
|
if (st->attrobj.find("type") != st->attrobj.end() &&
|
||||||
|
st->attrobj["type"].string_value() == "dir")
|
||||||
{
|
{
|
||||||
direntry["type"] = "dir";
|
direntry["type"] = "dir";
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,6 +57,8 @@ int kv_nfs3_getattr_proc(void *opaque, rpc_op_t *rop)
|
||||||
}
|
}
|
||||||
kv_read_inode(self, ino, [=](int res, const std::string & value, json11::Json attrs)
|
kv_read_inode(self, ino, [=](int res, const std::string & value, json11::Json attrs)
|
||||||
{
|
{
|
||||||
|
if (self->parent->trace)
|
||||||
|
fprintf(stderr, "[%d] GETATTR %ju -> %s\n", self->nfs_fd, ino, value.c_str());
|
||||||
if (res < 0)
|
if (res < 0)
|
||||||
{
|
{
|
||||||
*reply = (GETATTR3res){ .status = vitastor_nfs_map_err(-res) };
|
*reply = (GETATTR3res){ .status = vitastor_nfs_map_err(-res) };
|
||||||
|
|
|
@ -85,8 +85,7 @@ resume_2:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto hdr = ((shared_file_header_t*)st->aligned_buf);
|
auto hdr = ((shared_file_header_t*)st->aligned_buf);
|
||||||
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino ||
|
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino)
|
||||||
align_shared_size(st->self, hdr->size) > align_shared_size(st->self, st->ientry["size"].uint64_value()))
|
|
||||||
{
|
{
|
||||||
// Got unrelated data - retry from the beginning
|
// Got unrelated data - retry from the beginning
|
||||||
free(st->aligned_buf);
|
free(st->aligned_buf);
|
||||||
|
@ -101,7 +100,7 @@ resume_2:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
st->aligned_offset = (st->offset & ~(st->self->parent->pool_alignment-1));
|
st->aligned_offset = (st->offset & ~(st->self->parent->pool_alignment-1));
|
||||||
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment) &
|
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment-1) &
|
||||||
~(st->self->parent->pool_alignment-1)) - st->aligned_offset;
|
~(st->self->parent->pool_alignment-1)) - st->aligned_offset;
|
||||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||||
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
|
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
|
||||||
|
@ -121,7 +120,7 @@ resume_2:
|
||||||
return;
|
return;
|
||||||
resume_3:
|
resume_3:
|
||||||
auto cb = std::move(st->cb);
|
auto cb = std::move(st->cb);
|
||||||
cb(st->res);
|
cb(st->res < 0 ? st->res : 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,6 +156,8 @@ int kv_nfs3_read_proc(void *opaque, rpc_op_t *rop)
|
||||||
rpc_queue_reply(st->rop);
|
rpc_queue_reply(st->rop);
|
||||||
delete st;
|
delete st;
|
||||||
};
|
};
|
||||||
|
if (st->self->parent->trace)
|
||||||
|
fprintf(stderr, "[%d] READ %ju %ju+%ju\n", st->self->nfs_fd, st->ino, st->offset, st->size);
|
||||||
nfs_kv_continue_read(st, 0);
|
nfs_kv_continue_read(st, 0);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -188,7 +188,7 @@ resume_2:
|
||||||
auto lc_it = st->self->parent->kvfs->list_cookies.find((list_cookie_t){ st->dir_ino, st->cookieverf, st->cookie });
|
auto lc_it = st->self->parent->kvfs->list_cookies.find((list_cookie_t){ st->dir_ino, st->cookieverf, st->cookie });
|
||||||
if (lc_it != st->self->parent->kvfs->list_cookies.end())
|
if (lc_it != st->self->parent->kvfs->list_cookies.end())
|
||||||
{
|
{
|
||||||
st->start = lc_it->second.key;
|
st->start = st->prefix+lc_it->second.key;
|
||||||
st->to_skip = 1;
|
st->to_skip = 1;
|
||||||
st->offset = st->cookie;
|
st->offset = st->cookie;
|
||||||
}
|
}
|
||||||
|
@ -235,7 +235,7 @@ resume_2:
|
||||||
st->self->parent->db->list_next(st->list_handle, NULL);
|
st->self->parent->db->list_next(st->list_handle, NULL);
|
||||||
return;
|
return;
|
||||||
resume_3:
|
resume_3:
|
||||||
if (st->res == -ENOENT || st->cur_key.size() > st->prefix.size() || st->cur_key.substr(0, st->prefix.size()) != st->prefix)
|
if (st->res == -ENOENT || st->cur_key.size() < st->prefix.size() || st->cur_key.substr(0, st->prefix.size()) != st->prefix)
|
||||||
{
|
{
|
||||||
st->self->parent->db->list_close(st->list_handle);
|
st->self->parent->db->list_close(st->list_handle);
|
||||||
st->list_handle = NULL;
|
st->list_handle = NULL;
|
||||||
|
@ -256,6 +256,11 @@ resume_3:
|
||||||
}
|
}
|
||||||
auto ino = direntry["ino"].uint64_value();
|
auto ino = direntry["ino"].uint64_value();
|
||||||
auto name = kv_direntry_filename(st->cur_key);
|
auto name = kv_direntry_filename(st->cur_key);
|
||||||
|
if (st->self->parent->trace)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "[%d] READDIR %ju %lu %s\n",
|
||||||
|
st->self->nfs_fd, st->dir_ino, st->offset, name.c_str());
|
||||||
|
}
|
||||||
auto fh = kv_fh(ino);
|
auto fh = kv_fh(ino);
|
||||||
// 1 entry3 is (8+4+(filename_len+3)/4*4+8) bytes
|
// 1 entry3 is (8+4+(filename_len+3)/4*4+8) bytes
|
||||||
// 1 entryplus3 is (8+4+(filename_len+3)/4*4+8
|
// 1 entryplus3 is (8+4+(filename_len+3)/4*4+8
|
||||||
|
@ -276,7 +281,7 @@ resume_3:
|
||||||
entry->name = xdr_copy_string(st->rop->xdrs, name);
|
entry->name = xdr_copy_string(st->rop->xdrs, name);
|
||||||
entry->fileid = ino;
|
entry->fileid = ino;
|
||||||
entry->cookie = st->offset++;
|
entry->cookie = st->offset++;
|
||||||
st->self->parent->kvfs->list_cookies[(list_cookie_t){ st->dir_ino, st->cookieverf, entry->cookie }] = { .key = entry->name };
|
st->self->parent->kvfs->list_cookies[(list_cookie_t){ st->dir_ino, st->cookieverf, entry->cookie }] = { .key = name };
|
||||||
if (st->is_plus)
|
if (st->is_plus)
|
||||||
{
|
{
|
||||||
entry->name_handle = (post_op_fh3){
|
entry->name_handle = (post_op_fh3){
|
||||||
|
@ -285,7 +290,6 @@ resume_3:
|
||||||
};
|
};
|
||||||
kv_getattr_next(st);
|
kv_getattr_next(st);
|
||||||
}
|
}
|
||||||
st->self->parent->db->list_next(st->list_handle, NULL);
|
|
||||||
}
|
}
|
||||||
resume_4:
|
resume_4:
|
||||||
while (st->getattr_running > 0)
|
while (st->getattr_running > 0)
|
||||||
|
|
|
@ -225,7 +225,7 @@ resume_6:
|
||||||
}
|
}
|
||||||
// (6) If regular file and inode is deleted: delete data
|
// (6) If regular file and inode is deleted: delete data
|
||||||
if ((!st->type || st->type == NF3REG) && st->ientry["nlink"].uint64_value() <= 1 &&
|
if ((!st->type || st->type == NF3REG) && st->ientry["nlink"].uint64_value() <= 1 &&
|
||||||
!st->ientry["shared_inode"].uint64_value())
|
!st->ientry["shared_ino"].uint64_value())
|
||||||
{
|
{
|
||||||
// Remove data
|
// Remove data
|
||||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||||
|
|
|
@ -99,7 +99,8 @@ resume_2:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!st->set_attrs["size"].is_null() &&
|
if (!st->set_attrs["size"].is_null() &&
|
||||||
st->ientry["size"].uint64_value() > st->set_attrs["size"].uint64_value())
|
st->ientry["size"].uint64_value() > st->set_attrs["size"].uint64_value() &&
|
||||||
|
!st->ientry["shared_ino"].uint64_value())
|
||||||
{
|
{
|
||||||
// Delete extra data when downsizing
|
// Delete extra data when downsizing
|
||||||
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object {
|
||||||
|
@ -147,11 +148,16 @@ int kv_nfs3_setattr_proc(void *opaque, rpc_op_t *rop)
|
||||||
st->set_attrs["uid"] = (uint64_t)args->new_attributes.uid.uid;
|
st->set_attrs["uid"] = (uint64_t)args->new_attributes.uid.uid;
|
||||||
if (args->new_attributes.gid.set_it)
|
if (args->new_attributes.gid.set_it)
|
||||||
st->set_attrs["gid"] = (uint64_t)args->new_attributes.gid.gid;
|
st->set_attrs["gid"] = (uint64_t)args->new_attributes.gid.gid;
|
||||||
if (args->new_attributes.atime.set_it)
|
if (args->new_attributes.atime.set_it == SET_TO_SERVER_TIME)
|
||||||
|
st->set_attrs["atime"] = nfstime_now_str();
|
||||||
|
else if (args->new_attributes.atime.set_it == SET_TO_CLIENT_TIME)
|
||||||
st->set_attrs["atime"] = nfstime_to_str(args->new_attributes.atime.atime);
|
st->set_attrs["atime"] = nfstime_to_str(args->new_attributes.atime.atime);
|
||||||
if (args->new_attributes.mtime.set_it)
|
if (args->new_attributes.mtime.set_it == SET_TO_SERVER_TIME)
|
||||||
|
st->set_attrs["mtime"] = nfstime_now_str();
|
||||||
|
else if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
|
||||||
st->set_attrs["mtime"] = nfstime_to_str(args->new_attributes.mtime.mtime);
|
st->set_attrs["mtime"] = nfstime_to_str(args->new_attributes.mtime.mtime);
|
||||||
fprintf(stderr, "SETATTR %ju ATTRS %s\n", st->ino, json11::Json(st->set_attrs).dump().c_str());
|
if (st->self->parent->trace)
|
||||||
|
fprintf(stderr, "[%d] SETATTR %ju ATTRS %s\n", st->self->nfs_fd, st->ino, json11::Json(st->set_attrs).dump().c_str());
|
||||||
st->cb = [st](int res)
|
st->cb = [st](int res)
|
||||||
{
|
{
|
||||||
auto reply = (SETATTR3res*)st->rop->reply;
|
auto reply = (SETATTR3res*)st->rop->reply;
|
||||||
|
|
|
@ -17,6 +17,7 @@ struct nfs_rmw_t
|
||||||
uint8_t *buf = NULL;
|
uint8_t *buf = NULL;
|
||||||
uint64_t size = 0;
|
uint64_t size = 0;
|
||||||
uint8_t *part_buf = NULL;
|
uint8_t *part_buf = NULL;
|
||||||
|
uint64_t version = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nfs_kv_write_state
|
struct nfs_kv_write_state
|
||||||
|
@ -106,6 +107,14 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state, uint64_t si
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
st->res = 0;
|
||||||
|
st->shared_inode = st->self->parent->kvfs->cur_shared_inode;
|
||||||
|
st->shared_offset = st->self->parent->kvfs->cur_shared_offset;
|
||||||
|
st->self->parent->kvfs->cur_shared_offset += (size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1);
|
||||||
|
nfs_kv_continue_write(st, state);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t align_shared_size(nfs_client_t *self, uint64_t size)
|
uint64_t align_shared_size(nfs_client_t *self, uint64_t size)
|
||||||
|
@ -154,10 +163,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
||||||
{
|
{
|
||||||
auto parent = rmw->st->self->parent;
|
auto parent = rmw->st->self->parent;
|
||||||
auto align = parent->pool_alignment;
|
auto align = parent->pool_alignment;
|
||||||
bool is_begin = (rmw->offset % align);
|
assert(rmw->size < align);
|
||||||
bool is_end = ((rmw->offset+rmw->size) % align);
|
|
||||||
// RMW either only at beginning or only at end and within a single block
|
|
||||||
assert(is_begin != is_end);
|
|
||||||
assert((rmw->offset/parent->pool_block_size) == ((rmw->offset+rmw->size-1)/parent->pool_block_size));
|
assert((rmw->offset/parent->pool_block_size) == ((rmw->offset+rmw->size-1)/parent->pool_block_size));
|
||||||
if (!rmw->part_buf)
|
if (!rmw->part_buf)
|
||||||
{
|
{
|
||||||
|
@ -166,7 +172,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
||||||
auto op = new cluster_op_t;
|
auto op = new cluster_op_t;
|
||||||
op->opcode = OSD_OP_READ;
|
op->opcode = OSD_OP_READ;
|
||||||
op->inode = parent->fs_base_inode + rmw->ino;
|
op->inode = parent->fs_base_inode + rmw->ino;
|
||||||
op->offset = (rmw->offset + (is_begin ? 0 : rmw->size)) & ~(align-1);
|
op->offset = rmw->offset & ~(align-1);
|
||||||
op->len = align;
|
op->len = align;
|
||||||
op->iov.push_back(rmw->part_buf, op->len);
|
op->iov.push_back(rmw->part_buf, op->len);
|
||||||
rmw->st->waiting++;
|
rmw->st->waiting++;
|
||||||
|
@ -185,30 +191,43 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (!rmw->version)
|
||||||
|
{
|
||||||
|
auto st = rmw->st;
|
||||||
|
rmw->version = rd_op->version+1;
|
||||||
|
if (st->rmw[0].st && st->rmw[1].st &&
|
||||||
|
st->rmw[0].offset/st->self->parent->pool_block_size == st->rmw[1].offset/st->self->parent->pool_block_size)
|
||||||
|
{
|
||||||
|
// Same block... RMWs should be sequential
|
||||||
|
int other = rmw == &st->rmw[0] ? 1 : 0;
|
||||||
|
st->rmw[other].version = rmw->version+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
auto parent = rmw->st->self->parent;
|
auto parent = rmw->st->self->parent;
|
||||||
auto align = parent->pool_alignment;
|
auto align = parent->pool_alignment;
|
||||||
bool is_begin = (rmw->offset % align);
|
bool is_begin = (rmw->offset % align);
|
||||||
|
bool is_end = ((rmw->offset+rmw->size) % align);
|
||||||
auto op = new cluster_op_t;
|
auto op = new cluster_op_t;
|
||||||
op->opcode = OSD_OP_WRITE;
|
op->opcode = OSD_OP_WRITE;
|
||||||
op->inode = rmw->st->self->parent->fs_base_inode + rmw->ino;
|
op->inode = rmw->st->self->parent->fs_base_inode + rmw->ino;
|
||||||
op->offset = rmw->offset & ~(align-1);
|
op->offset = rmw->offset & ~(align-1);
|
||||||
op->len = (rmw->size + align-1) & ~(align-1);
|
op->len = align;
|
||||||
op->version = rd_op->version+1;
|
op->version = rmw->version;
|
||||||
if (is_begin)
|
if (is_begin)
|
||||||
{
|
{
|
||||||
op->iov.push_back(rmw->part_buf, rmw->offset % align);
|
op->iov.push_back(rmw->part_buf, rmw->offset % align);
|
||||||
}
|
}
|
||||||
op->iov.push_back(rmw->buf, rmw->size);
|
op->iov.push_back(rmw->buf, rmw->size);
|
||||||
if (!is_begin)
|
if (is_end)
|
||||||
{
|
{
|
||||||
auto tail = ((rmw->offset+rmw->size) % align);
|
op->iov.push_back(rmw->part_buf + (rmw->offset % align) + rmw->size, align - (rmw->offset % align) - rmw->size);
|
||||||
op->iov.push_back(rmw->part_buf + tail, align - tail);
|
|
||||||
}
|
}
|
||||||
op->callback = [rmw](cluster_op_t *op)
|
op->callback = [rmw](cluster_op_t *op)
|
||||||
{
|
{
|
||||||
if (op->retval == -EAGAIN)
|
if (op->retval == -EINTR)
|
||||||
{
|
{
|
||||||
// CAS failure - retry
|
// CAS failure - retry
|
||||||
|
rmw->version = 0;
|
||||||
rmw->st->waiting--;
|
rmw->st->waiting--;
|
||||||
nfs_do_rmw(rmw);
|
nfs_do_rmw(rmw);
|
||||||
}
|
}
|
||||||
|
@ -272,11 +291,12 @@ static bool nfs_do_shared_readmodify(nfs_kv_write_state *st, int base_state, int
|
||||||
else if (state == base_state) goto resume_0;
|
else if (state == base_state) goto resume_0;
|
||||||
assert(!st->aligned_buf);
|
assert(!st->aligned_buf);
|
||||||
st->aligned_size = unshare
|
st->aligned_size = unshare
|
||||||
? sizeof(shared_file_header_t) + (st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1)
|
? sizeof(shared_file_header_t) + ((st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1))
|
||||||
: align_shared_size(st->self, st->new_size);
|
: align_shared_size(st->self, st->new_size);
|
||||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||||
memset(st->aligned_buf + sizeof(shared_file_header_t), 0, st->offset);
|
memset(st->aligned_buf + sizeof(shared_file_header_t), 0, st->offset);
|
||||||
if (st->ientry["shared_ino"].uint64_value() != 0)
|
if (st->ientry["shared_ino"].uint64_value() != 0 &&
|
||||||
|
st->ientry["size"].uint64_value() != 0)
|
||||||
{
|
{
|
||||||
// Read old data if shared non-empty
|
// Read old data if shared non-empty
|
||||||
nfs_do_shared_read(st, base_state);
|
nfs_do_shared_read(st, base_state);
|
||||||
|
@ -286,14 +306,15 @@ resume_0:
|
||||||
{
|
{
|
||||||
auto cb = std::move(st->cb);
|
auto cb = std::move(st->cb);
|
||||||
cb(st->res);
|
cb(st->res);
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
auto hdr = ((shared_file_header_t*)st->aligned_buf);
|
auto hdr = ((shared_file_header_t*)st->aligned_buf);
|
||||||
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino ||
|
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino)
|
||||||
align_shared_size(st->self, hdr->size) > align_shared_size(st->self, st->ientry["size"].uint64_value()))
|
|
||||||
{
|
{
|
||||||
// Got unrelated data - retry from the beginning
|
// Got unrelated data - retry from the beginning
|
||||||
st->allow_cache = false;
|
st->allow_cache = false;
|
||||||
|
free(st->aligned_buf);
|
||||||
|
st->aligned_buf = NULL;
|
||||||
nfs_kv_continue_write(st, 0);
|
nfs_kv_continue_write(st, 0);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -301,7 +322,7 @@ resume_0:
|
||||||
*((shared_file_header_t*)st->aligned_buf) = {
|
*((shared_file_header_t*)st->aligned_buf) = {
|
||||||
.magic = SHARED_FILE_MAGIC_V1,
|
.magic = SHARED_FILE_MAGIC_V1,
|
||||||
.inode = st->ino,
|
.inode = st->ino,
|
||||||
.size = st->new_size,
|
.alloc = st->aligned_size,
|
||||||
};
|
};
|
||||||
memcpy(st->aligned_buf + sizeof(shared_file_header_t) + st->offset, st->buf, st->size);
|
memcpy(st->aligned_buf + sizeof(shared_file_header_t) + st->offset, st->buf, st->size);
|
||||||
memset(st->aligned_buf + sizeof(shared_file_header_t) + st->offset + st->size, 0,
|
memset(st->aligned_buf + sizeof(shared_file_header_t) + st->offset + st->size, 0,
|
||||||
|
@ -316,6 +337,8 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
|
||||||
uint64_t good_offset = offset;
|
uint64_t good_offset = offset;
|
||||||
uint64_t good_size = st->size;
|
uint64_t good_size = st->size;
|
||||||
st->waiting++;
|
st->waiting++;
|
||||||
|
st->rmw[0].st = NULL;
|
||||||
|
st->rmw[1].st = NULL;
|
||||||
if (offset % alignment)
|
if (offset % alignment)
|
||||||
{
|
{
|
||||||
// Requires read-modify-write in the beginning
|
// Requires read-modify-write in the beginning
|
||||||
|
@ -337,17 +360,18 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
|
||||||
.buf = st->buf,
|
.buf = st->buf,
|
||||||
.size = s,
|
.size = s,
|
||||||
};
|
};
|
||||||
|
// FIXME: skip rmw at shared beginning
|
||||||
nfs_do_rmw(&st->rmw[0]);
|
nfs_do_rmw(&st->rmw[0]);
|
||||||
}
|
}
|
||||||
if ((offset+st->size-1) % alignment)
|
if ((offset+st->size) % alignment)
|
||||||
{
|
{
|
||||||
// Requires read-modify-write in the end
|
// Requires read-modify-write in the end
|
||||||
auto s = ((offset+st->size-1) % alignment);
|
auto s = ((offset+st->size) % alignment);
|
||||||
if (good_size > s)
|
if (good_size > s)
|
||||||
good_size -= s;
|
good_size -= s;
|
||||||
else
|
else
|
||||||
good_size = 0;
|
good_size = 0;
|
||||||
if (((offset+st->size-1) / alignment) > (offset / alignment))
|
if ((offset+st->size)/alignment > offset/alignment)
|
||||||
{
|
{
|
||||||
st->rmw[1] = {
|
st->rmw[1] = {
|
||||||
.st = st,
|
.st = st,
|
||||||
|
@ -357,6 +381,7 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
|
||||||
.buf = st->buf + st->size-s,
|
.buf = st->buf + st->size-s,
|
||||||
.size = s,
|
.size = s,
|
||||||
};
|
};
|
||||||
|
// FIXME: skip rmw at end
|
||||||
nfs_do_rmw(&st->rmw[1]);
|
nfs_do_rmw(&st->rmw[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -405,19 +430,28 @@ static std::string new_shared_ientry(nfs_kv_write_state *st)
|
||||||
return json11::Json(ni).dump();
|
return json11::Json(ni).dump();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state)
|
static std::string new_unshared_ientry(nfs_kv_write_state *st)
|
||||||
{
|
{
|
||||||
if (state == 1)
|
auto ni = st->ientry.object_items();
|
||||||
{
|
ni.erase("empty");
|
||||||
|
ni.erase("shared_ino");
|
||||||
|
ni.erase("shared_offset");
|
||||||
|
ni.erase("shared_alloc");
|
||||||
|
ni.erase("shared_ver");
|
||||||
|
return json11::Json(ni).dump();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state, int base_state)
|
||||||
|
{
|
||||||
|
if (state == base_state+1)
|
||||||
goto resume_1;
|
goto resume_1;
|
||||||
}
|
|
||||||
st->ext->cur_extend = st->ext->next_extend;
|
st->ext->cur_extend = st->ext->next_extend;
|
||||||
st->ext->next_extend = 0;
|
st->ext->next_extend = 0;
|
||||||
st->res2 = -EAGAIN;
|
st->res2 = -EAGAIN;
|
||||||
st->self->parent->db->set(kv_inode_key(st->ino), new_normal_ientry(st), [st](int res)
|
st->self->parent->db->set(kv_inode_key(st->ino), new_normal_ientry(st), [st, base_state](int res)
|
||||||
{
|
{
|
||||||
st->res = res;
|
st->res = res;
|
||||||
nfs_kv_continue_write(st, 13);
|
nfs_kv_continue_write(st, base_state+1);
|
||||||
}, [st](int res, const std::string & old_value)
|
}, [st](int res, const std::string & old_value)
|
||||||
{
|
{
|
||||||
if (res != 0)
|
if (res != 0)
|
||||||
|
@ -432,6 +466,7 @@ static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state)
|
||||||
auto ientry = json11::Json::parse(old_value, err).object_items();
|
auto ientry = json11::Json::parse(old_value, err).object_items();
|
||||||
if (err != "")
|
if (err != "")
|
||||||
{
|
{
|
||||||
|
fprintf(stderr, "Invalid JSON in inode %lu = %s: %s\n", st->ino, old_value.c_str(), err.c_str());
|
||||||
st->res2 = -EINVAL;
|
st->res2 = -EINVAL;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -501,24 +536,20 @@ resume_1:
|
||||||
// - In parallel: check if data fits into inode size and extend if it doesn't
|
// - In parallel: check if data fits into inode size and extend if it doesn't
|
||||||
// - If CAS failure: re-read inode and retry to extend the size
|
// - If CAS failure: re-read inode and retry to extend the size
|
||||||
// - If shared:
|
// - If shared:
|
||||||
// - Read whole file from shared inode
|
|
||||||
// - If the file header in data doesn't match: re-read inode and restart
|
|
||||||
// - If data doesn't fit into the same shared inode:
|
// - If data doesn't fit into the same shared inode:
|
||||||
// - Allocate space in a new shared inode
|
// - Allocate space in a new shared inode
|
||||||
|
// - Read whole file from shared inode
|
||||||
// - Write data into the new shared inode
|
// - Write data into the new shared inode
|
||||||
// - If CAS failure: allocate another shared inode and retry
|
// - If CAS failure: allocate another shared inode and retry
|
||||||
// - Update inode metadata (set new size and new shared inode)
|
// - Update inode metadata (set new size and new shared inode)
|
||||||
// - If CAS failure: free allocated shared space, re-read inode and restart
|
// - If CAS failure: free allocated shared space, re-read inode and restart
|
||||||
// - If it fits:
|
// - If it fits:
|
||||||
// - Write updated data into the shared inode
|
// - Update shared inode data in-place
|
||||||
// - Update inode entry in any case to block parallel non-shared writes
|
// - Update inode entry in any case to block parallel non-shared writes
|
||||||
// - If CAS failure: re-read inode and restart
|
// - If CAS failure: re-read inode and restart
|
||||||
// - Otherwise:
|
// - Otherwise:
|
||||||
// - Write data into non-shared inode
|
// - Read inode
|
||||||
// - Read inode in parallel
|
// - If not a regular file - stop with -EINVAL
|
||||||
// - If not a regular file:
|
|
||||||
// - Remove data
|
|
||||||
// - Stop with -EINVAL
|
|
||||||
// - If shared:
|
// - If shared:
|
||||||
// - Read whole file from shared inode
|
// - Read whole file from shared inode
|
||||||
// - Write data into non-shared inode
|
// - Write data into non-shared inode
|
||||||
|
@ -526,7 +557,7 @@ resume_1:
|
||||||
// - Update inode metadata (make non-shared, update size)
|
// - Update inode metadata (make non-shared, update size)
|
||||||
// - If CAS failure: restart
|
// - If CAS failure: restart
|
||||||
// - Zero out the shared inode header
|
// - Zero out the shared inode header
|
||||||
// - If CAS failure: restart
|
// - Write data into non-shared inode
|
||||||
// - Check if size fits
|
// - Check if size fits
|
||||||
// - Extend if it doesn't
|
// - Extend if it doesn't
|
||||||
// Read:
|
// Read:
|
||||||
|
@ -557,6 +588,9 @@ static void nfs_kv_continue_write(nfs_kv_write_state *st, int state)
|
||||||
else if (state == 11) goto resume_11;
|
else if (state == 11) goto resume_11;
|
||||||
else if (state == 12) goto resume_12;
|
else if (state == 12) goto resume_12;
|
||||||
else if (state == 13) goto resume_13;
|
else if (state == 13) goto resume_13;
|
||||||
|
else if (state == 14) goto resume_14;
|
||||||
|
else if (state == 15) goto resume_15;
|
||||||
|
else if (state == 16) goto resume_16;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "BUG: invalid state in nfs_kv_continue_write()");
|
fprintf(stderr, "BUG: invalid state in nfs_kv_continue_write()");
|
||||||
|
@ -578,9 +612,7 @@ resume_0:
|
||||||
}, st->allow_cache);
|
}, st->allow_cache);
|
||||||
return;
|
return;
|
||||||
resume_1:
|
resume_1:
|
||||||
if (st->res < 0 ||
|
if (st->res < 0 || kv_map_type(st->ientry["type"].string_value()) != NF3REG)
|
||||||
st->ientry["type"].uint64_value() != 0 &&
|
|
||||||
st->ientry["type"].uint64_value() != NF3REG)
|
|
||||||
{
|
{
|
||||||
auto cb = std::move(st->cb);
|
auto cb = std::move(st->cb);
|
||||||
cb(st->res == 0 ? -EINVAL : st->res);
|
cb(st->res == 0 ? -EINVAL : st->res);
|
||||||
|
@ -594,11 +626,11 @@ resume_1:
|
||||||
}
|
}
|
||||||
if (st->offset + st->size + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold)
|
if (st->offset + st->size + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold)
|
||||||
{
|
{
|
||||||
if (st->ientry["size"].uint64_value() == 0 ||
|
if (st->ientry["size"].uint64_value() == 0 &&
|
||||||
|
st->ientry["shared_ino"].uint64_value() == 0 ||
|
||||||
st->ientry["empty"].bool_value() &&
|
st->ientry["empty"].bool_value() &&
|
||||||
st->ientry["size"].uint64_value() + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold ||
|
(st->ientry["size"].uint64_value() + sizeof(shared_file_header_t)) < st->self->parent->shared_inode_threshold ||
|
||||||
st->ientry["shared_ino"].uint64_value() != 0 &&
|
st->ientry["shared_ino"].uint64_value() != 0 &&
|
||||||
st->ientry["size"].uint64_value() < st->offset+st->size &&
|
|
||||||
st->ientry["shared_alloc"].uint64_value() < align_shared_size(st->self, st->offset+st->size))
|
st->ientry["shared_alloc"].uint64_value() < align_shared_size(st->self, st->offset+st->size))
|
||||||
{
|
{
|
||||||
// Either empty, or shared and requires moving into a larger place (redirect-write)
|
// Either empty, or shared and requires moving into a larger place (redirect-write)
|
||||||
|
@ -669,17 +701,18 @@ resume_7:
|
||||||
nfs_do_fsync(st, 8);
|
nfs_do_fsync(st, 8);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
resume_8:
|
||||||
// We always have to change inode entry on shared writes
|
// We always have to change inode entry on shared writes
|
||||||
st->self->parent->db->set(kv_inode_key(st->ino), new_shared_ientry(st), [st](int res)
|
st->self->parent->db->set(kv_inode_key(st->ino), new_shared_ientry(st), [st](int res)
|
||||||
{
|
{
|
||||||
st->res = res;
|
st->res = res;
|
||||||
nfs_kv_continue_write(st, 8);
|
nfs_kv_continue_write(st, 9);
|
||||||
}, [st](int res, const std::string & old_value)
|
}, [st](int res, const std::string & old_value)
|
||||||
{
|
{
|
||||||
return res == 0 && old_value == st->ientry_text;
|
return res == 0 && old_value == st->ientry_text;
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
resume_8:
|
resume_9:
|
||||||
if (st->res == -EAGAIN)
|
if (st->res == -EAGAIN)
|
||||||
{
|
{
|
||||||
goto resume_0;
|
goto resume_0;
|
||||||
|
@ -690,28 +723,55 @@ resume_8:
|
||||||
}
|
}
|
||||||
// Fall through for non-shared
|
// Fall through for non-shared
|
||||||
}
|
}
|
||||||
// Non-shared write
|
// Unshare?
|
||||||
if (st->ientry["shared_ino"].uint64_value() != 0)
|
if (st->ientry["shared_ino"].uint64_value() != 0)
|
||||||
{
|
{
|
||||||
// Unshare
|
if (st->ientry["size"].uint64_value() != 0)
|
||||||
resume_9:
|
|
||||||
if (!nfs_do_shared_readmodify(st, 9, state, true))
|
|
||||||
return;
|
|
||||||
nfs_do_unshare_write(st, 10);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
// Just write
|
assert(!st->aligned_buf);
|
||||||
nfs_do_align_write(st, st->ino, st->offset, 10);
|
st->aligned_size = align_shared_size(st->self, st->ientry["size"].uint64_value());
|
||||||
}
|
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||||
|
nfs_do_shared_read(st, 10);
|
||||||
|
return;
|
||||||
resume_10:
|
resume_10:
|
||||||
|
nfs_do_unshare_write(st, 11);
|
||||||
|
return;
|
||||||
|
resume_11:
|
||||||
|
;
|
||||||
|
}
|
||||||
|
st->self->parent->db->set(kv_inode_key(st->ino), new_unshared_ientry(st), [st](int res)
|
||||||
|
{
|
||||||
|
st->res = res;
|
||||||
|
nfs_kv_continue_write(st, 12);
|
||||||
|
}, [st](int res, const std::string & old_value)
|
||||||
|
{
|
||||||
|
return res == 0 && old_value == st->ientry_text;
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
resume_12:
|
||||||
|
if (st->res == -EAGAIN)
|
||||||
|
{
|
||||||
|
// Restart
|
||||||
|
goto resume_0;
|
||||||
|
}
|
||||||
|
if (st->res < 0)
|
||||||
|
{
|
||||||
|
auto cb = std::move(st->cb);
|
||||||
|
cb(st->res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
st->ientry_text = new_unshared_ientry(st);
|
||||||
|
}
|
||||||
|
// Non-shared write
|
||||||
|
nfs_do_align_write(st, st->ino, st->offset, 13);
|
||||||
|
return;
|
||||||
|
resume_13:
|
||||||
if (st->res == 0 && st->stable && !st->was_immediate)
|
if (st->res == 0 && st->stable && !st->was_immediate)
|
||||||
{
|
{
|
||||||
nfs_do_fsync(st, 11);
|
nfs_do_fsync(st, 14);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
resume_11:
|
resume_14:
|
||||||
if (st->res < 0)
|
if (st->res < 0)
|
||||||
{
|
{
|
||||||
auto cb = std::move(st->cb);
|
auto cb = std::move(st->cb);
|
||||||
|
@ -724,7 +784,7 @@ resume_11:
|
||||||
{
|
{
|
||||||
st->ext = &st->self->parent->kvfs->extends[st->ino];
|
st->ext = &st->self->parent->kvfs->extends[st->ino];
|
||||||
st->ext->refcnt++;
|
st->ext->refcnt++;
|
||||||
resume_12:
|
resume_15:
|
||||||
if (st->ext->next_extend < st->new_size)
|
if (st->ext->next_extend < st->new_size)
|
||||||
{
|
{
|
||||||
// Aggregate inode extension requests
|
// Aggregate inode extension requests
|
||||||
|
@ -733,15 +793,15 @@ resume_12:
|
||||||
if (st->ext->cur_extend > 0)
|
if (st->ext->cur_extend > 0)
|
||||||
{
|
{
|
||||||
// Wait for current extend which is already in progress
|
// Wait for current extend which is already in progress
|
||||||
st->ext->waiters.push_back([st](){ nfs_kv_continue_write(st, 12); });
|
st->ext->waiters.push_back([st](){ nfs_kv_continue_write(st, 15); });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (st->ext->done_extend < st->new_size)
|
if (st->ext->done_extend < st->new_size)
|
||||||
{
|
{
|
||||||
nfs_kv_extend_inode(st, 0);
|
nfs_kv_extend_inode(st, 15, 15);
|
||||||
return;
|
return;
|
||||||
resume_13:
|
resume_16:
|
||||||
nfs_kv_extend_inode(st, 1);
|
nfs_kv_extend_inode(st, 16, 15);
|
||||||
}
|
}
|
||||||
st->ext->refcnt--;
|
st->ext->refcnt--;
|
||||||
assert(st->ext->refcnt >= 0);
|
assert(st->ext->refcnt >= 0);
|
||||||
|
@ -769,6 +829,8 @@ int kv_nfs3_write_proc(void *opaque, rpc_op_t *rop)
|
||||||
st->ino = kv_fh_inode(args->file);
|
st->ino = kv_fh_inode(args->file);
|
||||||
st->offset = args->offset;
|
st->offset = args->offset;
|
||||||
st->size = (args->count > args->data.size ? args->data.size : args->count);
|
st->size = (args->count > args->data.size ? args->data.size : args->count);
|
||||||
|
if (st->self->parent->trace)
|
||||||
|
fprintf(stderr, "[%d] WRITE %ju %ju+%ju\n", st->self->nfs_fd, st->ino, st->offset, st->size);
|
||||||
if (!st->ino || st->size > MAX_REQUEST_SIZE)
|
if (!st->ino || st->size > MAX_REQUEST_SIZE)
|
||||||
{
|
{
|
||||||
*reply = (WRITE3res){ .status = NFS3ERR_INVAL };
|
*reply = (WRITE3res){ .status = NFS3ERR_INVAL };
|
||||||
|
|
|
@ -98,7 +98,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
||||||
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
||||||
server_id = (uint64_t)lrand48() | ((uint64_t)lrand48() << 31) | ((uint64_t)lrand48() << 62);
|
server_id = (uint64_t)lrand48() | ((uint64_t)lrand48() << 31) | ((uint64_t)lrand48() << 62);
|
||||||
// Parse options
|
// Parse options
|
||||||
trace = cfg["log_level"].uint64_value() > 5;
|
trace = cfg["log_level"].uint64_value() > 5 || cfg["trace"].uint64_value() > 0;
|
||||||
bind_address = cfg["bind"].string_value();
|
bind_address = cfg["bind"].string_value();
|
||||||
if (bind_address == "")
|
if (bind_address == "")
|
||||||
bind_address = "0.0.0.0";
|
bind_address = "0.0.0.0";
|
||||||
|
@ -156,7 +156,15 @@ void nfs_proxy_t::run(json11::Json cfg)
|
||||||
check_default_pool();
|
check_default_pool();
|
||||||
// Check if we're using VitastorFS
|
// Check if we're using VitastorFS
|
||||||
fs_kv_inode = cfg["fs"].uint64_value();
|
fs_kv_inode = cfg["fs"].uint64_value();
|
||||||
if (!fs_kv_inode && cfg["fs"].is_string())
|
if (fs_kv_inode)
|
||||||
|
{
|
||||||
|
if (!INODE_POOL(fs_kv_inode))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "FS metadata inode number must include pool\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (cfg["fs"].is_string())
|
||||||
{
|
{
|
||||||
for (auto & ic: cli->st_cli.inode_config)
|
for (auto & ic: cli->st_cli.inode_config)
|
||||||
{
|
{
|
||||||
|
@ -166,6 +174,11 @@ void nfs_proxy_t::run(json11::Json cfg)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!fs_kv_inode)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "FS metadata image \"%s\" does not exist\n", cfg["fs"].string_value().c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
||||||
if (!readdir_getattr_parallel)
|
if (!readdir_getattr_parallel)
|
||||||
|
|
|
@ -68,3 +68,5 @@ SCHEME=xor ./test_scrub.sh
|
||||||
PG_SIZE=3 ./test_scrub.sh
|
PG_SIZE=3 ./test_scrub.sh
|
||||||
PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec ./test_scrub.sh
|
PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec ./test_scrub.sh
|
||||||
SCHEME=ec ./test_scrub.sh
|
SCHEME=ec ./test_scrub.sh
|
||||||
|
|
||||||
|
./test_nfs.sh
|
||||||
|
|
|
@ -0,0 +1,149 @@
|
||||||
|
#!/bin/bash -ex
|
||||||
|
|
||||||
|
PG_COUNT=16
|
||||||
|
. `dirname $0`/run_3osds.sh
|
||||||
|
|
||||||
|
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 10G fsmeta
|
||||||
|
build/src/vitastor-nfs --fs fsmeta --etcd_address $ETCD_URL --portmap 0 --port 2050 --foreground 1 --trace 1 >>./testdata/nfs.log 2>&1 &
|
||||||
|
NFS_PID=$!
|
||||||
|
|
||||||
|
mkdir -p testdata/nfs
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
MNT=$(pwd)/testdata/nfs
|
||||||
|
trap "sudo umount -f $MNT"' || true; kill -9 $(jobs -p)' EXIT
|
||||||
|
|
||||||
|
# write small file
|
||||||
|
ls -l ./testdata/nfs
|
||||||
|
dd if=/dev/urandom of=./testdata/f1 bs=100k count=1
|
||||||
|
cp testdata/f1 ./testdata/nfs/
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep f1
|
||||||
|
diff ./testdata/f1 ./testdata/nfs/f1
|
||||||
|
format_green "100K file ok"
|
||||||
|
|
||||||
|
# overwrite it inplace
|
||||||
|
dd if=/dev/urandom of=./testdata/f1_90k bs=90k count=1
|
||||||
|
cp testdata/f1_90k ./testdata/nfs/f1
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
format_green "inplace overwrite 90K ok"
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep f1
|
||||||
|
# create another copy
|
||||||
|
dd if=./testdata/f1_90k of=./testdata/nfs/f1_nfs bs=1M
|
||||||
|
diff ./testdata/f1_90k ./testdata/nfs/f1_nfs
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
format_green "another copy 90K ok"
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep f1
|
||||||
|
cp ./testdata/nfs/f1 ./testdata/f1_nfs
|
||||||
|
diff ./testdata/f1_90k ./testdata/nfs/f1
|
||||||
|
format_green "90K data ok"
|
||||||
|
|
||||||
|
# move it inplace
|
||||||
|
dd if=/dev/urandom of=./testdata/f1_110k bs=110k count=1
|
||||||
|
cp testdata/f1_110k ./testdata/nfs/f1
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep f1
|
||||||
|
diff ./testdata/f1_110k ./testdata/nfs/f1
|
||||||
|
format_green "move shared 90K -> 110K ok"
|
||||||
|
|
||||||
|
# extend it to large file + rm
|
||||||
|
dd if=/dev/urandom of=./testdata/f1_2M bs=2M count=1
|
||||||
|
cp ./testdata/f1_2M ./testdata/nfs/f1
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep f1
|
||||||
|
cp ./testdata/nfs/f1 ./testdata/f1_nfs
|
||||||
|
diff ./testdata/f1_2M ./testdata/nfs/f1
|
||||||
|
rm ./testdata/nfs/f1
|
||||||
|
format_green "extend to 2M + rm ok"
|
||||||
|
|
||||||
|
# mkdir
|
||||||
|
mkdir -p ./testdata/nfs/dir1/dir2
|
||||||
|
echo abcdef > ./testdata/nfs/dir1/dir2/hnpfls
|
||||||
|
# rename dir
|
||||||
|
mv ./testdata/nfs/dir1 ./testdata/nfs/dir3
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep dir3
|
||||||
|
ls -l ./testdata/nfs/dir3 | grep dir2
|
||||||
|
ls -l ./testdata/nfs/dir3/dir2 | grep hnpfls
|
||||||
|
echo abcdef > ./testdata/hnpfls
|
||||||
|
diff ./testdata/hnpfls ./testdata/nfs/dir3/dir2/hnpfls
|
||||||
|
format_green "rename dir with file ok"
|
||||||
|
|
||||||
|
# touch
|
||||||
|
touch -t 202401011404 ./testdata/nfs/dir3/dir2/hnpfls
|
||||||
|
sudo chown 65534:65534 ./testdata/nfs/dir3/dir2/hnpfls
|
||||||
|
sudo chmod 755 ./testdata/nfs/dir3/dir2/hnpfls
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
T=`stat -c '%a %u %g %y' ./testdata/nfs/dir3/dir2/hnpfls | perl -pe 's/(:\d+)(.*)/$1/'`
|
||||||
|
[[ "$T" = "755 65534 65534 2024-01-01 14:04" ]]
|
||||||
|
format_green "set attrs ok"
|
||||||
|
|
||||||
|
# move dir
|
||||||
|
mv ./testdata/nfs/dir3/dir2 ./testdata/nfs/
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
ls -l ./testdata/nfs | grep dir3
|
||||||
|
ls -l ./testdata/nfs | grep dir2
|
||||||
|
format_green "move dir ok"
|
||||||
|
|
||||||
|
# symlink, readlink
|
||||||
|
ln -s dir2 ./testdata/nfs/sym2
|
||||||
|
[[ "`stat -c '%A' ./testdata/nfs/sym2`" = "lrwxrwxrwx" ]]
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
[[ "`stat -c '%A' ./testdata/nfs/sym2`" = "lrwxrwxrwx" ]]
|
||||||
|
[[ "`readlink ./testdata/nfs/sym2`" = "dir2" ]]
|
||||||
|
format_green "symlink, readlink ok"
|
||||||
|
|
||||||
|
# mknod: chr, blk, sock, fifo + remove
|
||||||
|
sudo mknod ./testdata/nfs/nod_chr c 1 5
|
||||||
|
sudo mknod ./testdata/nfs/nod_blk b 2 6
|
||||||
|
mkfifo ./testdata/nfs/nod_fifo
|
||||||
|
perl -e 'use Socket; socket($sock, PF_UNIX, SOCK_STREAM, undef) || die $!; bind($sock, sockaddr_un("./testdata/nfs/nod_sock")) || die $!;'
|
||||||
|
chmod 777 ./testdata/nfs/nod_*
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
[[ "`ls testdata|wc -l`" -ge 4 ]]
|
||||||
|
[[ "`stat -c '%A' ./testdata/nfs/nod_blk`" = "brwxrwxrwx" ]]
|
||||||
|
[[ "`stat -c '%A' ./testdata/nfs/nod_chr`" = "crwxrwxrwx" ]]
|
||||||
|
[[ "`stat -c '%A' ./testdata/nfs/nod_fifo`" = "prwxrwxrwx" ]]
|
||||||
|
[[ "`stat -c '%A' ./testdata/nfs/nod_sock`" = "srwxrwxrwx" ]]
|
||||||
|
sudo rm ./testdata/nfs/nod_*
|
||||||
|
format_green "mknod + rm ok"
|
||||||
|
|
||||||
|
# hardlink
|
||||||
|
echo ABCDEF > ./testdata/nfs/linked1
|
||||||
|
i=`stat -c '%i' ./testdata/nfs/linked1`
|
||||||
|
ln ./testdata/nfs/linked1 ./testdata/nfs/linked2
|
||||||
|
[[ "`stat -c '%i' ./testdata/nfs/linked2`" -eq $i ]]
|
||||||
|
echo BABABA > ./testdata/nfs/linked2
|
||||||
|
diff ./testdata/nfs/linked2 ./testdata/nfs/linked1
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
diff ./testdata/nfs/linked2 ./testdata/nfs/linked1
|
||||||
|
[[ "`cat ./testdata/nfs/linked2`" = "BABABA" ]]
|
||||||
|
rm ./testdata/nfs/linked2
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
[[ "`cat ./testdata/nfs/linked1`" = "BABABA" ]]
|
||||||
|
format_green "hardlink ok"
|
||||||
|
|
||||||
|
# rm small
|
||||||
|
ls -l ./testdata/nfs
|
||||||
|
dd if=/dev/urandom of=./testdata/nfs/smallfile bs=100k count=1
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
rm ./testdata/nfs/smallfile
|
||||||
|
if ls ./testdata/nfs | grep smallfile; then false; fi
|
||||||
|
sudo umount ./testdata/nfs/
|
||||||
|
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||||
|
if ls ./testdata/nfs | grep smallfile; then false; fi
|
||||||
|
format_green "rm small ok"
|
||||||
|
|
||||||
|
format_green OK
|
Loading…
Reference in New Issue