Add basic NFS tests, fix bugs
Test / buildenv (push) Successful in 12s Details
Test / build (push) Successful in 2m58s Details
Test / test_cas (push) Successful in 12s Details
Test / make_test (push) Successful in 37s Details
Test / test_change_pg_count (push) Successful in 35s Details
Test / test_change_pg_size (push) Successful in 10s Details
Test / test_change_pg_count_ec (push) Successful in 34s Details
Test / test_create_nomaxid (push) Successful in 9s Details
Test / test_etcd_fail (push) Successful in 1m8s Details
Test / test_add_osd (push) Successful in 2m46s Details
Test / test_interrupted_rebalance_imm (push) Successful in 1m55s Details
Test / test_interrupted_rebalance (push) Successful in 2m2s Details
Test / test_failure_domain (push) Successful in 41s Details
Test / test_snapshot (push) Successful in 57s Details
Test / test_interrupted_rebalance_ec (push) Successful in 1m55s Details
Test / test_snapshot_ec (push) Successful in 26s Details
Test / test_minsize_1 (push) Successful in 17s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m23s Details
Test / test_rm (push) Successful in 15s Details
Test / test_move_reappear (push) Successful in 22s Details
Test / test_snapshot_down (push) Successful in 30s Details
Test / test_snapshot_down_ec (push) Successful in 33s Details
Test / test_splitbrain (push) Successful in 29s Details
Test / test_snapshot_chain (push) Successful in 2m7s Details
Test / test_snapshot_chain_ec (push) Successful in 3m2s Details
Test / test_rebalance_verify_imm (push) Successful in 3m45s Details
Test / test_rebalance_verify (push) Successful in 4m25s Details
Test / test_switch_primary (push) Successful in 44s Details
Test / test_write (push) Successful in 45s Details
Test / test_write_no_same (push) Successful in 20s Details
Test / test_write_xor (push) Successful in 1m1s Details
Test / test_rebalance_verify_ec (push) Successful in 4m49s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m17s Details
Test / test_heal_pg_size_2 (push) Successful in 3m22s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m44s Details
Test / test_heal_ec (push) Successful in 6m19s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m43s Details
Test / test_heal_csum_32k (push) Successful in 6m42s Details
Test / test_scrub (push) Successful in 1m38s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m29s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m47s Details
Test / test_heal_csum_4k (push) Successful in 6m21s Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m39s Details
Test / test_scrub_xor (push) Successful in 34s Details
Test / test_scrub_ec (push) Successful in 37s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 39s Details
Test / test_nfs (push) Failing after 12s Details
Test / test_scrub_pg_size_3 (push) Successful in 48s Details

Vitaliy Filippov 2024-02-25 02:27:10 +03:00
parent 1b1e199496
commit 75d2d92861
12 changed files with 262 additions and 75 deletions

View File

@ -856,3 +856,21 @@ jobs:
echo ""
done
test_nfs:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_nfs.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done

View File

@ -29,11 +29,29 @@ nfstime3 nfstime_from_str(const std::string & s)
return t;
}
static std::string timespec_to_str(timespec t)
{
char buf[64];
snprintf(buf, sizeof(buf), "%ju.%09ju", t.tv_sec, t.tv_nsec);
int l = strlen(buf);
while (l > 0 && buf[l-1] == '0')
l--;
if (l > 0 && buf[l-1] == '.')
l--;
buf[l] = 0;
return buf;
}
std::string nfstime_to_str(nfstime3 t)
{
char buf[32];
snprintf(buf, sizeof(buf), "%u.%09u", t.seconds, t.nseconds);
return buf;
return timespec_to_str((timespec){ .tv_sec = t.seconds, .tv_nsec = t.nseconds });
}
std::string nfstime_now_str()
{
timespec t;
clock_gettime(CLOCK_REALTIME, &t);
return timespec_to_str(t);
}
int kv_map_type(const std::string & type)

View File

@ -46,7 +46,7 @@ struct kv_inode_extend_t
struct kv_fs_state_t
{
std::map<list_cookie_t, list_cookie_val_t> list_cookies;
uint64_t fs_next_id = 0, fs_allocated_id = 0;
uint64_t fs_next_id = 1, fs_allocated_id = 0;
std::vector<uint64_t> unallocated_ids;
std::vector<shared_alloc_queue_t> allocating_shared;
uint64_t cur_shared_inode = 0, cur_shared_offset = 0;
@ -57,12 +57,13 @@ struct shared_file_header_t
{
uint64_t magic = 0;
uint64_t inode = 0;
uint64_t size = 0;
uint64_t alloc = 0;
};
nfsstat3 vitastor_nfs_map_err(int err);
nfstime3 nfstime_from_str(const std::string & s);
std::string nfstime_to_str(nfstime3 t);
std::string nfstime_now_str();
int kv_map_type(const std::string & type);
fattr3 get_kv_attributes(nfs_client_t *self, uint64_t ino, json11::Json attrs);
std::string kv_direntry_key(uint64_t dir_ino, const std::string & filename);

View File

@ -105,7 +105,8 @@ static void kv_do_create(kv_create_state *st)
}
st->new_id = new_id;
auto direntry = json11::Json::object{ { "ino", st->new_id } };
if (st->attrobj["type"].string_value() == "dir")
if (st->attrobj.find("type") != st->attrobj.end() &&
st->attrobj["type"].string_value() == "dir")
{
direntry["type"] = "dir";
}

View File

@ -57,6 +57,8 @@ int kv_nfs3_getattr_proc(void *opaque, rpc_op_t *rop)
}
kv_read_inode(self, ino, [=](int res, const std::string & value, json11::Json attrs)
{
if (self->parent->trace)
fprintf(stderr, "[%d] GETATTR %ju -> %s\n", self->nfs_fd, ino, value.c_str());
if (res < 0)
{
*reply = (GETATTR3res){ .status = vitastor_nfs_map_err(-res) };

View File

@ -85,8 +85,7 @@ resume_2:
return;
}
auto hdr = ((shared_file_header_t*)st->aligned_buf);
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino ||
align_shared_size(st->self, hdr->size) > align_shared_size(st->self, st->ientry["size"].uint64_value()))
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino)
{
// Got unrelated data - retry from the beginning
free(st->aligned_buf);
@ -101,7 +100,7 @@ resume_2:
}
}
st->aligned_offset = (st->offset & ~(st->self->parent->pool_alignment-1));
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment) &
st->aligned_size = ((st->offset + st->size + st->self->parent->pool_alignment-1) &
~(st->self->parent->pool_alignment-1)) - st->aligned_offset;
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
st->buf = st->aligned_buf + st->offset - st->aligned_offset;
@ -121,7 +120,7 @@ resume_2:
return;
resume_3:
auto cb = std::move(st->cb);
cb(st->res);
cb(st->res < 0 ? st->res : 0);
return;
}
@ -157,6 +156,8 @@ int kv_nfs3_read_proc(void *opaque, rpc_op_t *rop)
rpc_queue_reply(st->rop);
delete st;
};
if (st->self->parent->trace)
fprintf(stderr, "[%d] READ %ju %ju+%ju\n", st->self->nfs_fd, st->ino, st->offset, st->size);
nfs_kv_continue_read(st, 0);
return 1;
}

View File

@ -235,7 +235,7 @@ resume_2:
st->self->parent->db->list_next(st->list_handle, NULL);
return;
resume_3:
if (st->res == -ENOENT || st->cur_key.size() > st->prefix.size() || st->cur_key.substr(0, st->prefix.size()) != st->prefix)
if (st->res == -ENOENT || st->cur_key.size() < st->prefix.size() || st->cur_key.substr(0, st->prefix.size()) != st->prefix)
{
st->self->parent->db->list_close(st->list_handle);
st->list_handle = NULL;

View File

@ -147,11 +147,16 @@ int kv_nfs3_setattr_proc(void *opaque, rpc_op_t *rop)
st->set_attrs["uid"] = (uint64_t)args->new_attributes.uid.uid;
if (args->new_attributes.gid.set_it)
st->set_attrs["gid"] = (uint64_t)args->new_attributes.gid.gid;
if (args->new_attributes.atime.set_it)
if (args->new_attributes.atime.set_it == SET_TO_SERVER_TIME)
st->set_attrs["atime"] = nfstime_now_str();
else if (args->new_attributes.atime.set_it == SET_TO_CLIENT_TIME)
st->set_attrs["atime"] = nfstime_to_str(args->new_attributes.atime.atime);
if (args->new_attributes.mtime.set_it)
if (args->new_attributes.mtime.set_it == SET_TO_SERVER_TIME)
st->set_attrs["mtime"] = nfstime_now_str();
else if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
st->set_attrs["mtime"] = nfstime_to_str(args->new_attributes.mtime.mtime);
fprintf(stderr, "SETATTR %ju ATTRS %s\n", st->ino, json11::Json(st->set_attrs).dump().c_str());
if (st->self->parent->trace)
fprintf(stderr, "[%d] SETATTR %ju ATTRS %s\n", st->self->nfs_fd, st->ino, json11::Json(st->set_attrs).dump().c_str());
st->cb = [st](int res)
{
auto reply = (SETATTR3res*)st->rop->reply;

View File

@ -17,6 +17,7 @@ struct nfs_rmw_t
uint8_t *buf = NULL;
uint64_t size = 0;
uint8_t *part_buf = NULL;
uint64_t version = 0;
};
struct nfs_kv_write_state
@ -106,6 +107,14 @@ static void allocate_shared_inode(nfs_kv_write_state *st, int state, uint64_t si
);
});
}
else
{
st->res = 0;
st->shared_inode = st->self->parent->kvfs->cur_shared_inode;
st->shared_offset = st->self->parent->kvfs->cur_shared_offset;
st->self->parent->kvfs->cur_shared_offset += (size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1);
nfs_kv_continue_write(st, state);
}
}
uint64_t align_shared_size(nfs_client_t *self, uint64_t size)
@ -157,6 +166,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
bool is_begin = (rmw->offset % align);
bool is_end = ((rmw->offset+rmw->size) % align);
// RMW either only at beginning or only at end and within a single block
assert(rmw->size < align);
assert(is_begin != is_end);
assert((rmw->offset/parent->pool_block_size) == ((rmw->offset+rmw->size-1)/parent->pool_block_size));
if (!rmw->part_buf)
@ -166,7 +176,7 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
auto op = new cluster_op_t;
op->opcode = OSD_OP_READ;
op->inode = parent->fs_base_inode + rmw->ino;
op->offset = (rmw->offset + (is_begin ? 0 : rmw->size)) & ~(align-1);
op->offset = rmw->offset & ~(align-1);
op->len = align;
op->iov.push_back(rmw->part_buf, op->len);
rmw->st->waiting++;
@ -185,6 +195,18 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
}
else
{
if (!rmw->version)
{
auto st = rmw->st;
rmw->version = rd_op->version+1;
if (st->rmw[0].st && st->rmw[1].st &&
st->rmw[0].offset/st->self->parent->pool_block_size == st->rmw[1].offset/st->self->parent->pool_block_size)
{
// Same block... RMWs should be sequential
int other = rmw == &st->rmw[0] ? 1 : 0;
st->rmw[other].version = rmw->version+1;
}
}
auto parent = rmw->st->self->parent;
auto align = parent->pool_alignment;
bool is_begin = (rmw->offset % align);
@ -192,8 +214,8 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
op->opcode = OSD_OP_WRITE;
op->inode = rmw->st->self->parent->fs_base_inode + rmw->ino;
op->offset = rmw->offset & ~(align-1);
op->len = (rmw->size + align-1) & ~(align-1);
op->version = rd_op->version+1;
op->len = align;
op->version = rmw->version;
if (is_begin)
{
op->iov.push_back(rmw->part_buf, rmw->offset % align);
@ -201,12 +223,11 @@ static void nfs_do_rmw(nfs_rmw_t *rmw)
op->iov.push_back(rmw->buf, rmw->size);
if (!is_begin)
{
auto tail = ((rmw->offset+rmw->size) % align);
op->iov.push_back(rmw->part_buf + tail, align - tail);
op->iov.push_back(rmw->part_buf + rmw->size, align - rmw->size);
}
op->callback = [rmw](cluster_op_t *op)
{
if (op->retval == -EAGAIN)
if (op->retval == -EINTR)
{
// CAS failure - retry
rmw->st->waiting--;
@ -272,11 +293,12 @@ static bool nfs_do_shared_readmodify(nfs_kv_write_state *st, int base_state, int
else if (state == base_state) goto resume_0;
assert(!st->aligned_buf);
st->aligned_size = unshare
? sizeof(shared_file_header_t) + (st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1)
? sizeof(shared_file_header_t) + ((st->new_size + st->self->parent->pool_alignment-1) & ~(st->self->parent->pool_alignment-1))
: align_shared_size(st->self, st->new_size);
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
memset(st->aligned_buf + sizeof(shared_file_header_t), 0, st->offset);
if (st->ientry["shared_ino"].uint64_value() != 0)
if (st->ientry["shared_ino"].uint64_value() != 0 &&
st->ientry["size"].uint64_value() != 0)
{
// Read old data if shared non-empty
nfs_do_shared_read(st, base_state);
@ -286,14 +308,15 @@ resume_0:
{
auto cb = std::move(st->cb);
cb(st->res);
return true;
return false;
}
auto hdr = ((shared_file_header_t*)st->aligned_buf);
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino ||
align_shared_size(st->self, hdr->size) > align_shared_size(st->self, st->ientry["size"].uint64_value()))
if (hdr->magic != SHARED_FILE_MAGIC_V1 || hdr->inode != st->ino)
{
// Got unrelated data - retry from the beginning
st->allow_cache = false;
free(st->aligned_buf);
st->aligned_buf = NULL;
nfs_kv_continue_write(st, 0);
return false;
}
@ -301,7 +324,7 @@ resume_0:
*((shared_file_header_t*)st->aligned_buf) = {
.magic = SHARED_FILE_MAGIC_V1,
.inode = st->ino,
.size = st->new_size,
.alloc = st->aligned_size,
};
memcpy(st->aligned_buf + sizeof(shared_file_header_t) + st->offset, st->buf, st->size);
memset(st->aligned_buf + sizeof(shared_file_header_t) + st->offset + st->size, 0,
@ -316,6 +339,8 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
uint64_t good_offset = offset;
uint64_t good_size = st->size;
st->waiting++;
st->rmw[0].st = NULL;
st->rmw[1].st = NULL;
if (offset % alignment)
{
// Requires read-modify-write in the beginning
@ -337,17 +362,18 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
.buf = st->buf,
.size = s,
};
// FIXME: skip rmw at shared beginning
nfs_do_rmw(&st->rmw[0]);
}
if ((offset+st->size-1) % alignment)
if ((offset+st->size) % alignment)
{
// Requires read-modify-write in the end
auto s = ((offset+st->size-1) % alignment);
auto s = ((offset+st->size) % alignment);
if (good_size > s)
good_size -= s;
else
good_size = 0;
if (((offset+st->size-1) / alignment) > (offset / alignment))
if ((offset+st->size)/alignment > offset/alignment)
{
st->rmw[1] = {
.st = st,
@ -357,6 +383,7 @@ static void nfs_do_align_write(nfs_kv_write_state *st, uint64_t ino, uint64_t of
.buf = st->buf + st->size-s,
.size = s,
};
// FIXME: skip rmw at end
nfs_do_rmw(&st->rmw[1]);
}
}
@ -405,19 +432,28 @@ static std::string new_shared_ientry(nfs_kv_write_state *st)
return json11::Json(ni).dump();
}
static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state)
static std::string new_unshared_ientry(nfs_kv_write_state *st)
{
if (state == 1)
{
auto ni = st->ientry.object_items();
ni.erase("empty");
ni.erase("shared_ino");
ni.erase("shared_offset");
ni.erase("shared_alloc");
ni.erase("shared_ver");
return json11::Json(ni).dump();
}
static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state, int base_state)
{
if (state == base_state+1)
goto resume_1;
}
st->ext->cur_extend = st->ext->next_extend;
st->ext->next_extend = 0;
st->res2 = -EAGAIN;
st->self->parent->db->set(kv_inode_key(st->ino), new_normal_ientry(st), [st](int res)
st->self->parent->db->set(kv_inode_key(st->ino), new_normal_ientry(st), [st, base_state](int res)
{
st->res = res;
nfs_kv_continue_write(st, 13);
nfs_kv_continue_write(st, base_state+1);
}, [st](int res, const std::string & old_value)
{
if (res != 0)
@ -432,6 +468,7 @@ static void nfs_kv_extend_inode(nfs_kv_write_state *st, int state)
auto ientry = json11::Json::parse(old_value, err).object_items();
if (err != "")
{
fprintf(stderr, "Invalid JSON in inode %lu = %s: %s\n", st->ino, old_value.c_str(), err.c_str());
st->res2 = -EINVAL;
return false;
}
@ -501,24 +538,20 @@ resume_1:
// - In parallel: check if data fits into inode size and extend if it doesn't
// - If CAS failure: re-read inode and retry to extend the size
// - If shared:
// - Read whole file from shared inode
// - If the file header in data doesn't match: re-read inode and restart
// - If data doesn't fit into the same shared inode:
// - Allocate space in a new shared inode
// - Read whole file from shared inode
// - Write data into the new shared inode
// - If CAS failure: allocate another shared inode and retry
// - Update inode metadata (set new size and new shared inode)
// - If CAS failure: free allocated shared space, re-read inode and restart
// - If it fits:
// - Write updated data into the shared inode
// - Update shared inode data in-place
// - Update inode entry in any case to block parallel non-shared writes
// - If CAS failure: re-read inode and restart
// - Otherwise:
// - Write data into non-shared inode
// - Read inode in parallel
// - If not a regular file:
// - Remove data
// - Stop with -EINVAL
// - Read inode
// - If not a regular file - stop with -EINVAL
// - If shared:
// - Read whole file from shared inode
// - Write data into non-shared inode
@ -526,9 +559,9 @@ resume_1:
// - Update inode metadata (make non-shared, update size)
// - If CAS failure: restart
// - Zero out the shared inode header
// - If CAS failure: restart
// - Check if size fits
// - Extend if it doesn't
// - Write data into non-shared inode
// - Check if size fits
// - Extend if it doesn't
// Read:
// - If (offset+size <= threshold):
// - Read inode from cache
@ -557,6 +590,9 @@ static void nfs_kv_continue_write(nfs_kv_write_state *st, int state)
else if (state == 11) goto resume_11;
else if (state == 12) goto resume_12;
else if (state == 13) goto resume_13;
else if (state == 14) goto resume_14;
else if (state == 15) goto resume_15;
else if (state == 16) goto resume_16;
else
{
fprintf(stderr, "BUG: invalid state in nfs_kv_continue_write()");
@ -578,9 +614,7 @@ resume_0:
}, st->allow_cache);
return;
resume_1:
if (st->res < 0 ||
st->ientry["type"].uint64_value() != 0 &&
st->ientry["type"].uint64_value() != NF3REG)
if (st->res < 0 || kv_map_type(st->ientry["type"].string_value()) != NF3REG)
{
auto cb = std::move(st->cb);
cb(st->res == 0 ? -EINVAL : st->res);
@ -594,11 +628,11 @@ resume_1:
}
if (st->offset + st->size + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold)
{
if (st->ientry["size"].uint64_value() == 0 ||
if (st->ientry["size"].uint64_value() == 0 &&
st->ientry["shared_ino"].uint64_value() == 0 ||
st->ientry["empty"].bool_value() &&
st->ientry["size"].uint64_value() + sizeof(shared_file_header_t) < st->self->parent->shared_inode_threshold ||
(st->ientry["size"].uint64_value() + sizeof(shared_file_header_t)) < st->self->parent->shared_inode_threshold ||
st->ientry["shared_ino"].uint64_value() != 0 &&
st->ientry["size"].uint64_value() < st->offset+st->size &&
st->ientry["shared_alloc"].uint64_value() < align_shared_size(st->self, st->offset+st->size))
{
// Either empty, or shared and requires moving into a larger place (redirect-write)
@ -669,17 +703,18 @@ resume_7:
nfs_do_fsync(st, 8);
return;
}
resume_8:
// We always have to change inode entry on shared writes
st->self->parent->db->set(kv_inode_key(st->ino), new_shared_ientry(st), [st](int res)
{
st->res = res;
nfs_kv_continue_write(st, 8);
nfs_kv_continue_write(st, 9);
}, [st](int res, const std::string & old_value)
{
return res == 0 && old_value == st->ientry_text;
});
return;
resume_8:
resume_9:
if (st->res == -EAGAIN)
{
goto resume_0;
@ -690,28 +725,55 @@ resume_8:
}
// Fall through for non-shared
}
// Non-shared write
// Unshare?
if (st->ientry["shared_ino"].uint64_value() != 0)
{
// Unshare
resume_9:
if (!nfs_do_shared_readmodify(st, 9, state, true))
if (st->ientry["size"].uint64_value() != 0)
{
assert(!st->aligned_buf);
st->aligned_size = align_shared_size(st->self, st->ientry["size"].uint64_value());
st->aligned_buf = (uint8_t*)malloc_or_die(st->aligned_size);
nfs_do_shared_read(st, 10);
return;
nfs_do_unshare_write(st, 10);
return;
}
else
{
// Just write
nfs_do_align_write(st, st->ino, st->offset, 10);
}
resume_10:
nfs_do_unshare_write(st, 11);
return;
resume_11:
;
}
st->self->parent->db->set(kv_inode_key(st->ino), new_unshared_ientry(st), [st](int res)
{
st->res = res;
nfs_kv_continue_write(st, 12);
}, [st](int res, const std::string & old_value)
{
return res == 0 && old_value == st->ientry_text;
});
return;
resume_12:
if (st->res == -EAGAIN)
{
// Restart
goto resume_0;
}
if (st->res < 0)
{
auto cb = std::move(st->cb);
cb(st->res);
return;
}
st->ientry_text = new_unshared_ientry(st);
}
// Non-shared write
nfs_do_align_write(st, st->ino, st->offset, 13);
return;
resume_13:
if (st->res == 0 && st->stable && !st->was_immediate)
{
nfs_do_fsync(st, 11);
nfs_do_fsync(st, 14);
return;
}
resume_11:
resume_14:
if (st->res < 0)
{
auto cb = std::move(st->cb);
@ -724,7 +786,7 @@ resume_11:
{
st->ext = &st->self->parent->kvfs->extends[st->ino];
st->ext->refcnt++;
resume_12:
resume_15:
if (st->ext->next_extend < st->new_size)
{
// Aggregate inode extension requests
@ -733,15 +795,15 @@ resume_12:
if (st->ext->cur_extend > 0)
{
// Wait for current extend which is already in progress
st->ext->waiters.push_back([st](){ nfs_kv_continue_write(st, 12); });
st->ext->waiters.push_back([st](){ nfs_kv_continue_write(st, 15); });
return;
}
if (st->ext->done_extend < st->new_size)
{
nfs_kv_extend_inode(st, 0);
nfs_kv_extend_inode(st, 15, 15);
return;
resume_13:
nfs_kv_extend_inode(st, 1);
resume_16:
nfs_kv_extend_inode(st, 16, 15);
}
st->ext->refcnt--;
assert(st->ext->refcnt >= 0);
@ -769,6 +831,8 @@ int kv_nfs3_write_proc(void *opaque, rpc_op_t *rop)
st->ino = kv_fh_inode(args->file);
st->offset = args->offset;
st->size = (args->count > args->data.size ? args->data.size : args->count);
if (st->self->parent->trace)
fprintf(stderr, "[%d] WRITE %ju %ju+%ju\n", st->self->nfs_fd, st->ino, st->offset, st->size);
if (!st->ino || st->size > MAX_REQUEST_SIZE)
{
*reply = (WRITE3res){ .status = NFS3ERR_INVAL };

View File

@ -98,7 +98,7 @@ void nfs_proxy_t::run(json11::Json cfg)
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
server_id = (uint64_t)lrand48() | ((uint64_t)lrand48() << 31) | ((uint64_t)lrand48() << 62);
// Parse options
trace = cfg["log_level"].uint64_value() > 5;
trace = cfg["log_level"].uint64_value() > 5 || cfg["trace"].uint64_value() > 0;
bind_address = cfg["bind"].string_value();
if (bind_address == "")
bind_address = "0.0.0.0";
@ -156,7 +156,15 @@ void nfs_proxy_t::run(json11::Json cfg)
check_default_pool();
// Check if we're using VitastorFS
fs_kv_inode = cfg["fs"].uint64_value();
if (!fs_kv_inode && cfg["fs"].is_string())
if (fs_kv_inode)
{
if (!INODE_POOL(fs_kv_inode))
{
fprintf(stderr, "FS metadata inode number must include pool\n");
exit(1);
}
}
else if (cfg["fs"].is_string())
{
for (auto & ic: cli->st_cli.inode_config)
{
@ -166,6 +174,11 @@ void nfs_proxy_t::run(json11::Json cfg)
break;
}
}
if (!fs_kv_inode)
{
fprintf(stderr, "FS metadata image \"%s\" does not exist\n", cfg["fs"].string_value().c_str());
exit(1);
}
}
readdir_getattr_parallel = cfg["readdir_getattr_parallel"].uint64_value();
if (!readdir_getattr_parallel)

View File

@ -68,3 +68,5 @@ SCHEME=xor ./test_scrub.sh
PG_SIZE=3 ./test_scrub.sh
PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec ./test_scrub.sh
SCHEME=ec ./test_scrub.sh
./test_nfs.sh

62
tests/test_nfs.sh Executable file
View File

@ -0,0 +1,62 @@
#!/bin/bash -ex
PG_COUNT=16
. `dirname $0`/run_3osds.sh
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 10G fsmeta
build/src/vitastor-nfs --fs fsmeta --etcd_address $ETCD_URL --portmap 0 --port 2050 --foreground 1 --trace 1 >>./testdata/nfs.log 2>&1 &
NFS_PID=$!
mkdir -p testdata/nfs
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
MNT=$(pwd)/testdata/nfs
trap "sudo umount -f $MNT"' || true; kill -9 $(jobs -p)' EXIT
# write small file
ls -l ./testdata/nfs
dd if=/dev/urandom of=./testdata/f1 bs=100k count=1
cp testdata/f1 ./testdata/nfs/
sudo umount ./testdata/nfs/
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
ls -l ./testdata/nfs | grep f1
diff ./testdata/f1 ./testdata/nfs/f1
format_green "100K file ok"
# overwrite it inplace
dd if=/dev/urandom of=./testdata/f1_90k bs=90k count=1
cp testdata/f1_90k ./testdata/nfs/f1
sudo umount ./testdata/nfs/
format_green "inplace overwrite 90K ok"
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
ls -l ./testdata/nfs | grep f1
# create another copy
dd if=./testdata/f1_90k of=./testdata/nfs/f1_nfs bs=1M
diff ./testdata/f1_90k ./testdata/nfs/f1_nfs
sudo umount ./testdata/nfs/
format_green "another copy 90K ok"
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
ls -l ./testdata/nfs | grep f1
cp ./testdata/nfs/f1 ./testdata/f1_nfs
diff ./testdata/f1_90k ./testdata/nfs/f1
format_green "90K data ok"
# move it inplace
dd if=/dev/urandom of=./testdata/f1_110k bs=110k count=1
cp testdata/f1_110k ./testdata/nfs/f1
sudo umount ./testdata/nfs/
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
ls -l ./testdata/nfs | grep f1
diff ./testdata/f1_110k ./testdata/nfs/f1
format_green "move shared 90K -> 110K ok"
# extend it to large file
dd if=/dev/urandom of=./testdata/f1_2M bs=2M count=1
cp testdata/f1_2M ./testdata/nfs/f1
sudo umount ./testdata/nfs/
sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
ls -l ./testdata/nfs | grep f1
cp ./testdata/nfs/f1 ./testdata/f1_nfs
diff ./testdata/f1_2M ./testdata/nfs/f1
format_green "extend to 2M ok"
format_green OK