Add basic NFS tests, fix bugs
Test / buildenv (push) Successful in 12s
Details
Test / build (push) Successful in 2m58s
Details
Test / test_cas (push) Successful in 12s
Details
Test / make_test (push) Successful in 37s
Details
Test / test_change_pg_count (push) Successful in 35s
Details
Test / test_change_pg_size (push) Successful in 10s
Details
Test / test_change_pg_count_ec (push) Successful in 34s
Details
Test / test_create_nomaxid (push) Successful in 9s
Details
Test / test_etcd_fail (push) Successful in 1m8s
Details
Test / test_add_osd (push) Successful in 2m46s
Details
Test / test_interrupted_rebalance_imm (push) Successful in 1m55s
Details
Test / test_interrupted_rebalance (push) Successful in 2m2s
Details
Test / test_failure_domain (push) Successful in 41s
Details
Test / test_snapshot (push) Successful in 57s
Details
Test / test_interrupted_rebalance_ec (push) Successful in 1m55s
Details
Test / test_snapshot_ec (push) Successful in 26s
Details
Test / test_minsize_1 (push) Successful in 17s
Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m23s
Details
Test / test_rm (push) Successful in 15s
Details
Test / test_move_reappear (push) Successful in 22s
Details
Test / test_snapshot_down (push) Successful in 30s
Details
Test / test_snapshot_down_ec (push) Successful in 33s
Details
Test / test_splitbrain (push) Successful in 29s
Details
Test / test_snapshot_chain (push) Successful in 2m7s
Details
Test / test_snapshot_chain_ec (push) Successful in 3m2s
Details
Test / test_rebalance_verify_imm (push) Successful in 3m45s
Details
Test / test_rebalance_verify (push) Successful in 4m25s
Details
Test / test_switch_primary (push) Successful in 44s
Details
Test / test_write (push) Successful in 45s
Details
Test / test_write_no_same (push) Successful in 20s
Details
Test / test_write_xor (push) Successful in 1m1s
Details
Test / test_rebalance_verify_ec (push) Successful in 4m49s
Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m17s
Details
Test / test_heal_pg_size_2 (push) Successful in 3m22s
Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m44s
Details
Test / test_heal_ec (push) Successful in 6m19s
Details
Test / test_heal_csum_32k_dj (push) Successful in 5m43s
Details
Test / test_heal_csum_32k (push) Successful in 6m42s
Details
Test / test_scrub (push) Successful in 1m38s
Details
Test / test_heal_csum_4k_dj (push) Successful in 6m29s
Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m47s
Details
Test / test_heal_csum_4k (push) Successful in 6m21s
Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m39s
Details
Test / test_scrub_xor (push) Successful in 34s
Details
Test / test_scrub_ec (push) Successful in 37s
Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 39s
Details
Test / test_nfs (push) Failing after 12s
Details
Test / test_scrub_pg_size_3 (push) Successful in 48s
Details
Test / buildenv (push) Successful in 12s
Details
Test / build (push) Successful in 2m58s
Details
Test / test_cas (push) Successful in 12s
Details
Test / make_test (push) Successful in 37s
Details
Test / test_change_pg_count (push) Successful in 35s
Details
Test / test_change_pg_size (push) Successful in 10s
Details
Test / test_change_pg_count_ec (push) Successful in 34s
Details
Test / test_create_nomaxid (push) Successful in 9s
Details
Test / test_etcd_fail (push) Successful in 1m8s
Details
Test / test_add_osd (push) Successful in 2m46s
Details
Test / test_interrupted_rebalance_imm (push) Successful in 1m55s
Details
Test / test_interrupted_rebalance (push) Successful in 2m2s
Details
Test / test_failure_domain (push) Successful in 41s
Details
Test / test_snapshot (push) Successful in 57s
Details
Test / test_interrupted_rebalance_ec (push) Successful in 1m55s
Details
Test / test_snapshot_ec (push) Successful in 26s
Details
Test / test_minsize_1 (push) Successful in 17s
Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m23s
Details
Test / test_rm (push) Successful in 15s
Details
Test / test_move_reappear (push) Successful in 22s
Details
Test / test_snapshot_down (push) Successful in 30s
Details
Test / test_snapshot_down_ec (push) Successful in 33s
Details
Test / test_splitbrain (push) Successful in 29s
Details
Test / test_snapshot_chain (push) Successful in 2m7s
Details
Test / test_snapshot_chain_ec (push) Successful in 3m2s
Details
Test / test_rebalance_verify_imm (push) Successful in 3m45s
Details
Test / test_rebalance_verify (push) Successful in 4m25s
Details
Test / test_switch_primary (push) Successful in 44s
Details
Test / test_write (push) Successful in 45s
Details
Test / test_write_no_same (push) Successful in 20s
Details
Test / test_write_xor (push) Successful in 1m1s
Details
Test / test_rebalance_verify_ec (push) Successful in 4m49s
Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m17s
Details
Test / test_heal_pg_size_2 (push) Successful in 3m22s
Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m44s
Details
Test / test_heal_ec (push) Successful in 6m19s
Details
Test / test_heal_csum_32k_dj (push) Successful in 5m43s
Details
Test / test_heal_csum_32k (push) Successful in 6m42s
Details
Test / test_scrub (push) Successful in 1m38s
Details
Test / test_heal_csum_4k_dj (push) Successful in 6m29s
Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m47s
Details
Test / test_heal_csum_4k (push) Successful in 6m21s
Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m39s
Details
Test / test_scrub_xor (push) Successful in 34s
Details
Test / test_scrub_ec (push) Successful in 37s
Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 39s
Details
Test / test_nfs (push) Failing after 12s
Details
Test / test_scrub_pg_size_3 (push) Successful in 48s
Details
parent
1b1e199496
commit
75d2d92861
|
@ -856,3 +856,21 @@ jobs:
|
|||
echo ""
|
||||
done
|
||||
|
||||
test_nfs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_nfs.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
|
|
|
@ -29,11 +29,29 @@ nfstime3 nfstime_from_str(const std::string & s)
|
|||
return t;
|
||||
}
|
||||
|
||||
static std::string timespec_to_str(timespec t)
|
||||
{
|
||||
char buf[64];
|
||||
snprintf(buf, sizeof(buf), "%ju.%09ju", t.tv_sec, t.tv_nsec);
|
||||
int l = strlen(buf);
|
||||
while (l > 0 && buf[l-1] == '0')
|
||||
l--;
|
||||
if (l > 0 && buf[l-1] == '.')
|
||||
l--;
|
||||
buf[l] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
std::string nfstime_to_str(nfstime3 t)
|
||||
{
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%u.%09u", t.seconds, t.nseconds);
|
||||
return buf;
|
||||
return timespec_to_str((timespec){ .tv_sec = t.seconds, .tv_nsec = t.nseconds });
|
||||
}
|
||||
|
||||
std::string nfstime_now_str()
|
||||
{
|
||||
timespec t;
|
||||
clock_gettime(CLOCK_REALTIME, &t);
|
||||
return timespec_to_str(t);
|
||||
}
|
||||
|
||||
int kv_map_type(const std::string & type)
|
||||
|
|
|
@ -46,7 +46,7 @@ struct kv_inode_extend_t
|
|||
struct kv_fs_state_t
|
||||
{
|
||||
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
|
||||
uint64_t fs_next_id = 0, fs_allocated_id = 0;
|
||||
uint64_t fs_next_id = 1, fs_allocated_id = 0;
|
||||
std::vector<uint64_t> unallocated_ids;
|
||||
std::vector<shared_alloc_queue_t> allocating_shared;
|
||||
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
|
||||
|
@ -57,12 +57,13 @@ struct shared_file_header_t
|
|||
{
|
||||
uint64_t magic = 0;
|
||||
uint64_t inode = 0;
|
||||
uint64_t size = 0;
|
||||
uint64_t alloc = 0;
|
||||
};
|
||||
|
||||
nfsstat3 vitastor_nfs_map_err(int err);
|
||||
nfstime3 nfstime_from_str(const std::string & s);
|
||||
std::string nfstime_to_str(nfstime3 t);
|
||||
std::string nfstime_now_str();
|
||||
int kv_map_type(const std::string & type);
|
||||
fattr3 get_kv_attributes(nfs_client_t *self, uint64_t ino, json11::Json attrs);
|
||||
std::string kv_direntry_key(uint64_t dir_ino, const std::string & filename);
|
||||
|
|
|
@ -105,7 +105,8 @@ static void kv_do_create(kv_create_state *st)
|
|||
}
|
||||
st->new_id = new_id;
|
||||
auto direntry = json11::Json::object{ { "ino", st->new_id } };
|
||||
if (st->attrobj["type"].string_value() == "dir")
|
||||
if (st->attrobj.find("type") != st->attrobj.end() &&
|
||||
st->attrobj["type"].string_value() == "dir")
|
||||
{
|
||||
direntry["type"] = "dir";
|
||||
}
|
||||
|
|
|
@ -57,6 +57,8 @@ int kv_nfs3_getattr_proc(void *opaque, rpc_op_t *rop)
|
|||
}
|
||||
kv_read_inode(self, ino, [=](int res, const std::string & value, json11::Json attrs)
|
||||
{
|
||||
if (self->parent->trace)
|
||||
fprintf(stderr, "[%d] GETATTR %ju -> %s\n", self->nfs_fd, ino, value.c_str());
|
||||
if (res < 0)
|
||||
{
|
||||
*reply = (GETATTR3res){ .status = vitastor_nfs_map_err(-res) };
|
||||
|
|
|
@ -85,8 +85,7 @@ resume_2:
|
|||
return;
|
||||
}
|
||||
auto hdr = ((shared_file_header_t*)st->aligned_buf);
|
||||
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino ||
|
||||
align_shared_size(st->self, hdr->size) > align_shared_size(st->self, st->ientry["size"].uint64_value()))
|
||||
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino)
|
||||
{
|
||||
// Got unrelated data - retry from the beginning
|
||||
free(st->aligned_buf);
|
||||
|
@ -101,7 +100,7 @@ resume_2:
|
|||
}
|
||||
}
|
||||
st->aligned_offset = (st->offset & ~(st->self->parent->pool_alignment-1));
|
||||
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment) &
|
||||
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment-1) &
|
||||
~(st->self->parent->pool_alignment-1)) - st->aligned_offset;
|
||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
|
||||
|
@ -121,7 +120,7 @@ resume_2:
|
|||
return;
|
||||
resume_3:
|
||||
auto cb = std::move(st->cb);
|
||||
cb(st->res);
|
||||
cb(st->res < 0 ? st->res : 0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -157,6 +156,8 @@ int kv_nfs3_read_proc(void *opaque, rpc_op_t *rop)
|
|||
rpc_queue_reply(st->rop);
|
||||
delete st;
|
||||
};
|
||||
if (st->self->parent->trace)
|
||||
fprintf(stderr, "[%d] READ %ju %ju+%ju\n", st->self->nfs_fd, st->ino, st->offset, st->size);
|
||||
nfs_kv_continue_read(st, 0);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -235,7 +235,7 @@ resume_2:
|
|||
st->self->parent->db->list_next(st->list_handle, NULL);
|
||||
return;
|
||||
resume_3:
|
||||
if (st->res == -ENOENT || st->cur_key.size() > st->prefix.size() || st->cur_key.substr(0, st->prefix.size()) != st->prefix)
|
||||
if (st->res == -ENOENT || st->cur_key.size() < st->prefix.size() || st->cur_key.substr(0, st->prefix.size()) != st->prefix)
|
||||
{
|
||||
st->self->parent->db->list_close(st->list_handle);
|
||||
st->list_handle = NULL;
|
||||
|
|
|
@ -147,11 +147,16 @@ int kv_nfs3_setattr_proc(void *opaque, rpc_op_t *rop)
|
|||
st->set_attrs["uid"] = (uint64_t)args->new_attributes.uid.uid;
|
||||
if (args->new_attributes.gid.set_it)
|
||||
st->set_attrs["gid"] = (uint64_t)args->new_attributes.gid.gid;
|
||||
if (args->new_attributes.atime.set_it)
|
||||
if (args->new_attributes.atime.set_it == SET_TO_SERVER_TIME)
|
||||
st->set_attrs["atime"] = nfstime_now_str();
|
||||
else if (args->new_attributes.atime.set_it == SET_TO_CLIENT_TIME)
|
||||
st->set_attrs["atime"] = nfstime_to_str(args->new_attributes.atime.atime);
|
||||
if (args->new_attributes.mtime.set_it)
|
||||
if (args->new_attributes.mtime.set_it == SET_TO_SERVER_TIME)
|
||||
st->set_attrs["mtime"] = nfstime_now_str();
|
||||
else if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
|
||||
st->set_attrs["mtime"] = nfstime_to_str(args->new_attributes.mtime.mtime);
|
||||
fprintf(stderr, "SETATTR %ju ATTRS %s\n", st->ino, json11::Json(st->set_attrs).dump().c_str());
|
||||
if (st->self->parent->trace)
|
||||
fprintf(stderr, "[%d] SETATTR %ju ATTRS %s\n", st->self->nfs_fd, st->ino, json11::Json(st->set_attrs).dump().c_str());
|
||||
st->cb = [st](int res)
|
||||
{
|
||||
auto reply = (SETATTR3res*)st->rop->reply;
|
||||
|
|
|
@ -17,6 +17,7 @@ struct nfs_rmw_t
|
|||
uint8_t *buf = NULL;
|
||||
uint64_t size = 0;
|
||||
uint8_t *part_buf = NULL;
|
||||
uint64_t version = 0;
|
||||
};
|
||||
|
||||
struct nfs_kv_write_state
|
||||
|
@ -106,6 +107,14 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state, uint64_t si
|
|||
);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
st->res = 0;
|
||||
st->shared_inode = st->self->parent->kvfs->cur_shared_inode;
|
||||
st->shared_offset = st->self->parent->kvfs->cur_shared_offset;
|
||||
st->self->parent->kvfs->cur_shared_offset += (size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1);
|
||||
nfs_kv_continue_write(st, state);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t align_shared_size(nfs_client_t *self, uint64_t size)
|
||||
|
@ -157,6 +166,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
bool is_begin = (rmw->offset % align);
|
||||
bool is_end = ((rmw->offset+rmw->size) % align);
|
||||
// RMW either only at beginning or only at end and within a single block
|
||||
assert(rmw->size < align);
|
||||
assert(is_begin != is_end);
|
||||
assert((rmw->offset/parent->pool_block_size) == ((rmw->offset+rmw->size-1)/parent->pool_block_size));
|
||||
if (!rmw->part_buf)
|
||||
|
@ -166,7 +176,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
auto op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_READ;
|
||||
op->inode = parent->fs_base_inode + rmw->ino;
|
||||
op->offset = (rmw->offset + (is_begin ? 0 : rmw->size)) & ~(align-1);
|
||||
op->offset = rmw->offset & ~(align-1);
|
||||
op->len = align;
|
||||
op->iov.push_back(rmw->part_buf, op->len);
|
||||
rmw->st->waiting++;
|
||||
|
@ -185,6 +195,18 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (!rmw->version)
|
||||
{
|
||||
auto st = rmw->st;
|
||||
rmw->version = rd_op->version+1;
|
||||
if (st->rmw[0].st && st->rmw[1].st &&
|
||||
st->rmw[0].offset/st->self->parent->pool_block_size == st->rmw[1].offset/st->self->parent->pool_block_size)
|
||||
{
|
||||
// Same block... RMWs should be sequential
|
||||
int other = rmw == &st->rmw[0] ? 1 : 0;
|
||||
st->rmw[other].version = rmw->version+1;
|
||||
}
|
||||
}
|
||||
auto parent = rmw->st->self->parent;
|
||||
auto align = parent->pool_alignment;
|
||||
bool is_begin = (rmw->offset % align);
|
||||
|
@ -192,8 +214,8 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
op->opcode = OSD_OP_WRITE;
|
||||
op->inode = rmw->st->self->parent->fs_base_inode + rmw->ino;
|
||||
op->offset = rmw->offset & ~(align-1);
|
||||
op->len = (rmw->size + align-1) & ~(align-1);
|
||||
op->version = rd_op->version+1;
|
||||
op->len = align;
|
||||
op->version = rmw->version;
|
||||
if (is_begin)
|
||||
{
|
||||
op->iov.push_back(rmw->part_buf, rmw->offset % align);
|
||||
|
@ -201,12 +223,11 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
|
|||
op->iov.push_back(rmw->buf, rmw->size);
|
||||
if (!is_begin)
|
||||
{
|
||||
auto tail = ((rmw->offset+rmw->size) % align);
|
||||
op->iov.push_back(rmw->part_buf + tail, align - tail);
|
||||
op->iov.push_back(rmw->part_buf + rmw->size, align - rmw->size);
|
||||
}
|
||||
op->callback = [rmw](cluster_op_t *op)
|
||||
{
|
||||
if (op->retval == -EAGAIN)
|
||||
if (op->retval == -EINTR)
|
||||
{
|
||||
// CAS failure - retry
|
||||
rmw->st->waiting--;
|
||||
|
@ -272,11 +293,12 @@ static bool nfs_do_shared_readmodify(nfs_kv_write_state *st, int base_state, int
|
|||
else if (state == base_state) goto resume_0;
|
||||
assert(!st->aligned_buf);
|
||||
st->aligned_size = unshare
|
||||
? sizeof(shared_file_header_t) + (st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1)
|
||||
? sizeof(shared_file_header_t) + ((st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1))
|
||||
: align_shared_size(st->self, st->new_size);
|
||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||
memset(st->aligned_buf + sizeof(shared_file_header_t), 0, st->offset);
|
||||
if (st->ientry["shared_ino"].uint64_value() != 0)
|
||||
if (st->ientry["shared_ino"].uint64_value() != 0 &&
|
||||
st->ientry["size"].uint64_value() != 0)
|
||||
{
|
||||
// Read old data if shared non-empty
|
||||
nfs_do_shared_read(st, base_state);
|
||||
|
@ -286,14 +308,15 @@ resume_0:
|
|||
{
|
||||
auto cb = std::move(st->cb);
|
||||
cb(st->res);
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
auto hdr = ((shared_file_header_t*)st->aligned_buf);
|
||||
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino ||
|
||||
align_shared_size(st->self, hdr->size) > align_shared_size(st->self, st->ientry["size"].uint64_value()))
|
||||
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino)
|
||||
{
|
||||
// Got unrelated data - retry from the beginning
|
||||
st->allow_cache = false;
|
||||
free(st->aligned_buf);
|
||||
st->aligned_buf = NULL;
|
||||
nfs_kv_continue_write(st, 0);
|
||||
return false;
|
||||
}
|
||||
|
@ -301,7 +324,7 @@ resume_0:
|
|||
*((shared_file_header_t*)st->aligned_buf) = {
|
||||
.magic = SHARED_FILE_MAGIC_V1,
|
||||
.inode = st->ino,
|
||||
.size = st->new_size,
|
||||
.alloc = st->aligned_size,
|
||||
};
|
||||
memcpy(st->aligned_buf + sizeof(shared_file_header_t) + st->offset, st->buf, st->size);
|
||||
memset(st->aligned_buf + sizeof(shared_file_header_t) + st->offset + st->size, 0,
|
||||
|
@ -316,6 +339,8 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
|
|||
uint64_t good_offset = offset;
|
||||
uint64_t good_size = st->size;
|
||||
st->waiting++;
|
||||
st->rmw[0].st = NULL;
|
||||
st->rmw[1].st = NULL;
|
||||
if (offset % alignment)
|
||||
{
|
||||
// Requires read-modify-write in the beginning
|
||||
|
@ -337,17 +362,18 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
|
|||
.buf = st->buf,
|
||||
.size = s,
|
||||
};
|
||||
// FIXME: skip rmw at shared beginning
|
||||
nfs_do_rmw(&st->rmw[0]);
|
||||
}
|
||||
if ((offset+st->size-1) % alignment)
|
||||
if ((offset+st->size) % alignment)
|
||||
{
|
||||
// Requires read-modify-write in the end
|
||||
auto s = ((offset+st->size-1) % alignment);
|
||||
auto s = ((offset+st->size) % alignment);
|
||||
if (good_size > s)
|
||||
good_size -= s;
|
||||
else
|
||||
good_size = 0;
|
||||
if (((offset+st->size-1) / alignment) > (offset / alignment))
|
||||
if ((offset+st->size)/alignment > offset/alignment)
|
||||
{
|
||||
st->rmw[1] = {
|
||||
.st = st,
|
||||
|
@ -357,6 +383,7 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
|
|||
.buf = st->buf + st->size-s,
|
||||
.size = s,
|
||||
};
|
||||
// FIXME: skip rmw at end
|
||||
nfs_do_rmw(&st->rmw[1]);
|
||||
}
|
||||
}
|
||||
|
@ -405,19 +432,28 @@ static std::string new_shared_ientry(nfs_kv_write_state *st)
|
|||
return json11::Json(ni).dump();
|
||||
}
|
||||
|
||||
static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state)
|
||||
static std::string new_unshared_ientry(nfs_kv_write_state *st)
|
||||
{
|
||||
if (state == 1)
|
||||
{
|
||||
auto ni = st->ientry.object_items();
|
||||
ni.erase("empty");
|
||||
ni.erase("shared_ino");
|
||||
ni.erase("shared_offset");
|
||||
ni.erase("shared_alloc");
|
||||
ni.erase("shared_ver");
|
||||
return json11::Json(ni).dump();
|
||||
}
|
||||
|
||||
static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state, int base_state)
|
||||
{
|
||||
if (state == base_state+1)
|
||||
goto resume_1;
|
||||
}
|
||||
st->ext->cur_extend = st->ext->next_extend;
|
||||
st->ext->next_extend = 0;
|
||||
st->res2 = -EAGAIN;
|
||||
st->self->parent->db->set(kv_inode_key(st->ino), new_normal_ientry(st), [st](int res)
|
||||
st->self->parent->db->set(kv_inode_key(st->ino), new_normal_ientry(st), [st, base_state](int res)
|
||||
{
|
||||
st->res = res;
|
||||
nfs_kv_continue_write(st, 13);
|
||||
nfs_kv_continue_write(st, base_state+1);
|
||||
}, [st](int res, const std::string & old_value)
|
||||
{
|
||||
if (res != 0)
|
||||
|
@ -432,6 +468,7 @@ static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state)
|
|||
auto ientry = json11::Json::parse(old_value, err).object_items();
|
||||
if (err != "")
|
||||
{
|
||||
fprintf(stderr, "Invalid JSON in inode %lu = %s: %s\n", st->ino, old_value.c_str(), err.c_str());
|
||||
st->res2 = -EINVAL;
|
||||
return false;
|
||||
}
|
||||
|
@ -501,24 +538,20 @@ resume_1:
|
|||
// - In parallel: check if data fits into inode size and extend if it doesn't
|
||||
// - If CAS failure: re-read inode and retry to extend the size
|
||||
// - If shared:
|
||||
// - Read whole file from shared inode
|
||||
// - If the file header in data doesn't match: re-read inode and restart
|
||||
// - If data doesn't fit into the same shared inode:
|
||||
// - Allocate space in a new shared inode
|
||||
// - Read whole file from shared inode
|
||||
// - Write data into the new shared inode
|
||||
// - If CAS failure: allocate another shared inode and retry
|
||||
// - Update inode metadata (set new size and new shared inode)
|
||||
// - If CAS failure: free allocated shared space, re-read inode and restart
|
||||
// - If it fits:
|
||||
// - Write updated data into the shared inode
|
||||
// - Update shared inode data in-place
|
||||
// - Update inode entry in any case to block parallel non-shared writes
|
||||
// - If CAS failure: re-read inode and restart
|
||||
// - Otherwise:
|
||||
// - Write data into non-shared inode
|
||||
// - Read inode in parallel
|
||||
// - If not a regular file:
|
||||
// - Remove data
|
||||
// - Stop with -EINVAL
|
||||
// - Read inode
|
||||
// - If not a regular file - stop with -EINVAL
|
||||
// - If shared:
|
||||
// - Read whole file from shared inode
|
||||
// - Write data into non-shared inode
|
||||
|
@ -526,9 +559,9 @@ resume_1:
|
|||
// - Update inode metadata (make non-shared, update size)
|
||||
// - If CAS failure: restart
|
||||
// - Zero out the shared inode header
|
||||
// - If CAS failure: restart
|
||||
// - Check if size fits
|
||||
// - Extend if it doesn't
|
||||
// - Write data into non-shared inode
|
||||
// - Check if size fits
|
||||
// - Extend if it doesn't
|
||||
// Read:
|
||||
// - If (offset+size <= threshold):
|
||||
// - Read inode from cache
|
||||
|
@ -557,6 +590,9 @@ static void nfs_kv_continue_write(nfs_kv_write_state *st, int state)
|
|||
else if (state == 11) goto resume_11;
|
||||
else if (state == 12) goto resume_12;
|
||||
else if (state == 13) goto resume_13;
|
||||
else if (state == 14) goto resume_14;
|
||||
else if (state == 15) goto resume_15;
|
||||
else if (state == 16) goto resume_16;
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "BUG: invalid state in nfs_kv_continue_write()");
|
||||
|
@ -578,9 +614,7 @@ resume_0:
|
|||
}, st->allow_cache);
|
||||
return;
|
||||
resume_1:
|
||||
if (st->res < 0 ||
|
||||
st->ientry["type"].uint64_value() != 0 &&
|
||||
st->ientry["type"].uint64_value() != NF3REG)
|
||||
if (st->res < 0 || kv_map_type(st->ientry["type"].string_value()) != NF3REG)
|
||||
{
|
||||
auto cb = std::move(st->cb);
|
||||
cb(st->res == 0 ? -EINVAL : st->res);
|
||||
|
@ -594,11 +628,11 @@ resume_1:
|
|||
}
|
||||
if (st->offset + st->size + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold)
|
||||
{
|
||||
if (st->ientry["size"].uint64_value() == 0 ||
|
||||
if (st->ientry["size"].uint64_value() == 0 &&
|
||||
st->ientry["shared_ino"].uint64_value() == 0 ||
|
||||
st->ientry["empty"].bool_value() &&
|
||||
st->ientry["size"].uint64_value() + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold ||
|
||||
(st->ientry["size"].uint64_value() + sizeof(shared_file_header_t)) < st->self->parent->shared_inode_threshold ||
|
||||
st->ientry["shared_ino"].uint64_value() != 0 &&
|
||||
st->ientry["size"].uint64_value() < st->offset+st->size &&
|
||||
st->ientry["shared_alloc"].uint64_value() < align_shared_size(st->self, st->offset+st->size))
|
||||
{
|
||||
// Either empty, or shared and requires moving into a larger place (redirect-write)
|
||||
|
@ -669,17 +703,18 @@ resume_7:
|
|||
nfs_do_fsync(st, 8);
|
||||
return;
|
||||
}
|
||||
resume_8:
|
||||
// We always have to change inode entry on shared writes
|
||||
st->self->parent->db->set(kv_inode_key(st->ino), new_shared_ientry(st), [st](int res)
|
||||
{
|
||||
st->res = res;
|
||||
nfs_kv_continue_write(st, 8);
|
||||
nfs_kv_continue_write(st, 9);
|
||||
}, [st](int res, const std::string & old_value)
|
||||
{
|
||||
return res == 0 && old_value == st->ientry_text;
|
||||
});
|
||||
return;
|
||||
resume_8:
|
||||
resume_9:
|
||||
if (st->res == -EAGAIN)
|
||||
{
|
||||
goto resume_0;
|
||||
|
@ -690,28 +725,55 @@ resume_8:
|
|||
}
|
||||
// Fall through for non-shared
|
||||
}
|
||||
// Non-shared write
|
||||
// Unshare?
|
||||
if (st->ientry["shared_ino"].uint64_value() != 0)
|
||||
{
|
||||
// Unshare
|
||||
resume_9:
|
||||
if (!nfs_do_shared_readmodify(st, 9, state, true))
|
||||
if (st->ientry["size"].uint64_value() != 0)
|
||||
{
|
||||
assert(!st->aligned_buf);
|
||||
st->aligned_size = align_shared_size(st->self, st->ientry["size"].uint64_value());
|
||||
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
|
||||
nfs_do_shared_read(st, 10);
|
||||
return;
|
||||
nfs_do_unshare_write(st, 10);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Just write
|
||||
nfs_do_align_write(st, st->ino, st->offset, 10);
|
||||
}
|
||||
resume_10:
|
||||
nfs_do_unshare_write(st, 11);
|
||||
return;
|
||||
resume_11:
|
||||
;
|
||||
}
|
||||
st->self->parent->db->set(kv_inode_key(st->ino), new_unshared_ientry(st), [st](int res)
|
||||
{
|
||||
st->res = res;
|
||||
nfs_kv_continue_write(st, 12);
|
||||
}, [st](int res, const std::string & old_value)
|
||||
{
|
||||
return res == 0 && old_value == st->ientry_text;
|
||||
});
|
||||
return;
|
||||
resume_12:
|
||||
if (st->res == -EAGAIN)
|
||||
{
|
||||
// Restart
|
||||
goto resume_0;
|
||||
}
|
||||
if (st->res < 0)
|
||||
{
|
||||
auto cb = std::move(st->cb);
|
||||
cb(st->res);
|
||||
return;
|
||||
}
|
||||
st->ientry_text = new_unshared_ientry(st);
|
||||
}
|
||||
// Non-shared write
|
||||
nfs_do_align_write(st, st->ino, st->offset, 13);
|
||||
return;
|
||||
resume_13:
|
||||
if (st->res == 0 && st->stable && !st->was_immediate)
|
||||
{
|
||||
nfs_do_fsync(st, 11);
|
||||
nfs_do_fsync(st, 14);
|
||||
return;
|
||||
}
|
||||
resume_11:
|
||||
resume_14:
|
||||
if (st->res < 0)
|
||||
{
|
||||
auto cb = std::move(st->cb);
|
||||
|
@ -724,7 +786,7 @@ resume_11:
|
|||
{
|
||||
st->ext = &st->self->parent->kvfs->extends[st->ino];
|
||||
st->ext->refcnt++;
|
||||
resume_12:
|
||||
resume_15:
|
||||
if (st->ext->next_extend < st->new_size)
|
||||
{
|
||||
// Aggregate inode extension requests
|
||||
|
@ -733,15 +795,15 @@ resume_12:
|
|||
if (st->ext->cur_extend > 0)
|
||||
{
|
||||
// Wait for current extend which is already in progress
|
||||
st->ext->waiters.push_back([st](){ nfs_kv_continue_write(st, 12); });
|
||||
st->ext->waiters.push_back([st](){ nfs_kv_continue_write(st, 15); });
|
||||
return;
|
||||
}
|
||||
if (st->ext->done_extend < st->new_size)
|
||||
{
|
||||
nfs_kv_extend_inode(st, 0);
|
||||
nfs_kv_extend_inode(st, 15, 15);
|
||||
return;
|
||||
resume_13:
|
||||
nfs_kv_extend_inode(st, 1);
|
||||
resume_16:
|
||||
nfs_kv_extend_inode(st, 16, 15);
|
||||
}
|
||||
st->ext->refcnt--;
|
||||
assert(st->ext->refcnt >= 0);
|
||||
|
@ -769,6 +831,8 @@ int kv_nfs3_write_proc(void *opaque, rpc_op_t *rop)
|
|||
st->ino = kv_fh_inode(args->file);
|
||||
st->offset = args->offset;
|
||||
st->size = (args->count > args->data.size ? args->data.size : args->count);
|
||||
if (st->self->parent->trace)
|
||||
fprintf(stderr, "[%d] WRITE %ju %ju+%ju\n", st->self->nfs_fd, st->ino, st->offset, st->size);
|
||||
if (!st->ino || st->size > MAX_REQUEST_SIZE)
|
||||
{
|
||||
*reply = (WRITE3res){ .status = NFS3ERR_INVAL };
|
||||
|
|
|
@ -98,7 +98,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
||||
server_id = (uint64_t)lrand48() | ((uint64_t)lrand48() << 31) | ((uint64_t)lrand48() << 62);
|
||||
// Parse options
|
||||
trace = cfg["log_level"].uint64_value() > 5;
|
||||
trace = cfg["log_level"].uint64_value() > 5 || cfg["trace"].uint64_value() > 0;
|
||||
bind_address = cfg["bind"].string_value();
|
||||
if (bind_address == "")
|
||||
bind_address = "0.0.0.0";
|
||||
|
@ -156,7 +156,15 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
check_default_pool();
|
||||
// Check if we're using VitastorFS
|
||||
fs_kv_inode = cfg["fs"].uint64_value();
|
||||
if (!fs_kv_inode && cfg["fs"].is_string())
|
||||
if (fs_kv_inode)
|
||||
{
|
||||
if (!INODE_POOL(fs_kv_inode))
|
||||
{
|
||||
fprintf(stderr, "FS metadata inode number must include pool\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else if (cfg["fs"].is_string())
|
||||
{
|
||||
for (auto & ic: cli->st_cli.inode_config)
|
||||
{
|
||||
|
@ -166,6 +174,11 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (!fs_kv_inode)
|
||||
{
|
||||
fprintf(stderr, "FS metadata image \"%s\" does not exist\n", cfg["fs"].string_value().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
|
||||
if (!readdir_getattr_parallel)
|
||||
|
|
|
@ -68,3 +68,5 @@ SCHEME=xor ./test_scrub.sh
|
|||
PG_SIZE=3 ./test_scrub.sh
|
||||
PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec ./test_scrub.sh
|
||||
SCHEME=ec ./test_scrub.sh
|
||||
|
||||
./test_nfs.sh
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
#!/bin/bash -ex
|
||||
|
||||
PG_COUNT=16
|
||||
. `dirname $0`/run_3osds.sh
|
||||
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 10G fsmeta
|
||||
build/src/vitastor-nfs --fs fsmeta --etcd_address $ETCD_URL --portmap 0 --port 2050 --foreground 1 --trace 1 >>./testdata/nfs.log 2>&1 &
|
||||
NFS_PID=$!
|
||||
|
||||
mkdir -p testdata/nfs
|
||||
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||
MNT=$(pwd)/testdata/nfs
|
||||
trap "sudo umount -f $MNT"' || true; kill -9 $(jobs -p)' EXIT
|
||||
|
||||
# write small file
|
||||
ls -l ./testdata/nfs
|
||||
dd if=/dev/urandom of=./testdata/f1 bs=100k count=1
|
||||
cp testdata/f1 ./testdata/nfs/
|
||||
sudo umount ./testdata/nfs/
|
||||
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||
ls -l ./testdata/nfs | grep f1
|
||||
diff ./testdata/f1 ./testdata/nfs/f1
|
||||
format_green "100K file ok"
|
||||
|
||||
# overwrite it inplace
|
||||
dd if=/dev/urandom of=./testdata/f1_90k bs=90k count=1
|
||||
cp testdata/f1_90k ./testdata/nfs/f1
|
||||
sudo umount ./testdata/nfs/
|
||||
format_green "inplace overwrite 90K ok"
|
||||
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||
ls -l ./testdata/nfs | grep f1
|
||||
# create another copy
|
||||
dd if=./testdata/f1_90k of=./testdata/nfs/f1_nfs bs=1M
|
||||
diff ./testdata/f1_90k ./testdata/nfs/f1_nfs
|
||||
sudo umount ./testdata/nfs/
|
||||
format_green "another copy 90K ok"
|
||||
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||
ls -l ./testdata/nfs | grep f1
|
||||
cp ./testdata/nfs/f1 ./testdata/f1_nfs
|
||||
diff ./testdata/f1_90k ./testdata/nfs/f1
|
||||
format_green "90K data ok"
|
||||
|
||||
# move it inplace
|
||||
dd if=/dev/urandom of=./testdata/f1_110k bs=110k count=1
|
||||
cp testdata/f1_110k ./testdata/nfs/f1
|
||||
sudo umount ./testdata/nfs/
|
||||
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||
ls -l ./testdata/nfs | grep f1
|
||||
diff ./testdata/f1_110k ./testdata/nfs/f1
|
||||
format_green "move shared 90K -> 110K ok"
|
||||
|
||||
# extend it to large file
|
||||
dd if=/dev/urandom of=./testdata/f1_2M bs=2M count=1
|
||||
cp testdata/f1_2M ./testdata/nfs/f1
|
||||
sudo umount ./testdata/nfs/
|
||||
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
|
||||
ls -l ./testdata/nfs | grep f1
|
||||
cp ./testdata/nfs/f1 ./testdata/f1_nfs
|
||||
diff ./testdata/f1_2M ./testdata/nfs/f1
|
||||
format_green "extend to 2M ok"
|
||||
|
||||
format_green OK
|
Loading…
Reference in New Issue