diff --git a/src/nfs_kv_remove.cpp b/src/nfs_kv_remove.cpp index f515e2f4..c2a0bf14 100644 --- a/src/nfs_kv_remove.cpp +++ b/src/nfs_kv_remove.cpp @@ -22,6 +22,7 @@ struct kv_del_state int type = 0; bool is_rmdir = false; bool rm_data = false; + bool allow_cache = true; int res = 0, res2 = 0; std::function cb; }; @@ -48,12 +49,13 @@ static void nfs_kv_continue_delete(kv_del_state *st, int state) fprintf(stderr, "BUG: invalid state in nfs_kv_continue_delete()"); abort(); } +resume_0: st->self->parent->db->get(kv_direntry_key(st->dir_ino, st->filename), [st](int res, const std::string & value) { st->res = res; st->direntry_text = value; nfs_kv_continue_delete(st, 1); - }); + }, st->allow_cache); return; resume_1: if (st->res < 0) @@ -82,7 +84,7 @@ resume_1: st->res = res; st->ientry_text = value; nfs_kv_continue_delete(st, 2); - }); + }, st->allow_cache); return; resume_2: if (st->res < 0) @@ -124,8 +126,8 @@ resume_3: if (st->res == -EAGAIN) { // CAS failure, restart from the beginning - nfs_kv_continue_delete(st, 0); - return; + st->allow_cache = false; + goto resume_0; } else if (st->res < 0 && st->res != -ENOENT) { diff --git a/src/nfs_kv_rename.cpp b/src/nfs_kv_rename.cpp index 2dce1f12..1f0da052 100644 --- a/src/nfs_kv_rename.cpp +++ b/src/nfs_kv_rename.cpp @@ -7,28 +7,46 @@ #include "nfs_proxy.h" #include "nfs_kv.h" +#include "cli.h" struct nfs_kv_rename_state { nfs_client_t *self = NULL; rpc_op_t *rop = NULL; + // params: uint64_t old_dir_ino = 0, new_dir_ino = 0; std::string old_name, new_name; - std::string old_direntry_text; - std::string ientry_text; - json11::Json direntry, ientry; + // state: + bool allow_cache = true; + std::string old_direntry_text, old_ientry_text, new_direntry_text, new_ientry_text; + json11::Json old_direntry, old_ientry, new_direntry, new_ientry; + std::string new_dir_prefix; + void *list_handle = NULL; + bool new_exists = false; + bool rm_dest_data = false; int res = 0, res2 = 0; std::function cb; }; static void nfs_kv_continue_rename(nfs_kv_rename_state *st, int state) { - // Simplified algorithm (non-atomic and without ENOTDIR/EISDIR): - // 1) Check if the target directory exists - // 2) Delete & save (using CAS) the source direntry - // 3) Write the target direntry using CAS, fail if it already exists - // 4) Restore the source direntry on failure - // Atomic version would require something like a journal + // Algorithm (non-atomic of course): + // 1) Read source direntry + // 2) Read destination direntry + // 3) If destination exists: + // 3.1) Check file/folder compatibility (EISDIR/ENOTDIR) + // 3.2) Check if destination is empty if it's a folder + // 4) If not: + // 4.1) Check that the destination directory is actually a directory + // 5) Overwrite destination direntry, restart from beginning if CAS failure + // 6) Delete source direntry, restart from beginning if CAS failure + // 7) If the moved direntry was a regular file: + // 7.1) Read inode + // 7.2) Delete inode if its link count <= 1 + // 7.3) Delete inode data if its link count <= 1 and it's a regular non-shared file + // 7.4) Reduce link count by 1 if it's > 1 + // 8) If the moved direntry is a directory: + // 8.1) Change parent_ino reference in its inode if (state == 0) {} else if (state == 1) goto resume_1; else if (state == 2) goto resume_2; @@ -36,37 +54,27 @@ static void nfs_kv_continue_rename(nfs_kv_rename_state *st, int state) else if (state == 4) goto resume_4; else if (state == 5) goto resume_5; else if (state == 6) goto resume_6; + else if (state == 7) goto resume_7; + else if (state == 8) goto resume_8; + else if (state == 9) goto resume_9; + else if (state == 10) goto resume_10; + else if (state == 11) goto resume_11; + else if (state == 12) goto resume_12; else { fprintf(stderr, "BUG: invalid state in nfs_kv_continue_rename()"); abort(); } - kv_read_inode(st->self, st->new_dir_ino, [st](int res, const std::string & value, json11::Json attrs) +resume_0: + // Read the old direntry + st->self->parent->db->get(kv_direntry_key(st->old_dir_ino, st->old_name), [=](int res, const std::string & value) { - st->res = res == 0 ? (attrs["type"].string_value() == "dir" ? 0 : -ENOTDIR) : res; + st->res = res; + st->old_direntry_text = value; nfs_kv_continue_rename(st, 1); - }); + }, st->allow_cache); return; resume_1: - if (st->res < 0) - { - auto cb = std::move(st->cb); - cb(st->res); - return; - } - // Read & delete the old direntry - st->self->parent->db->del(kv_direntry_key(st->old_dir_ino, st->old_name), [st](int res) - { - st->res = res; - nfs_kv_continue_rename(st, 2); - }, [=](int res, const std::string & old_value) - { - st->res = res; - st->old_direntry_text = old_value; - return true; - }); - return; -resume_2: if (st->res < 0) { auto cb = std::move(st->cb); @@ -75,47 +83,85 @@ resume_2: } { std::string err; - st->direntry = json11::Json::parse(st->old_direntry_text, err); + st->old_direntry = json11::Json::parse(st->old_direntry_text, err); if (err != "") { fprintf(stderr, "Invalid JSON in direntry %s = %s: %s\n", kv_direntry_key(st->old_dir_ino, st->old_name).c_str(), st->old_direntry_text.c_str(), err.c_str()); - } - } - if (st->direntry["type"].string_value() == "dir" && - st->direntry["ino"].uint64_value() != 0 && - st->new_dir_ino != st->old_dir_ino) - { - // Read & check inode - kv_read_inode(st->self, st->direntry["ino"].uint64_value(), [st](int res, const std::string & value, json11::Json ientry) - { - st->res = res; - st->ientry_text = value; - st->ientry = ientry; - nfs_kv_continue_rename(st, 3); - }); - return; -resume_3: - if (st->res < 0) - { auto cb = std::move(st->cb); - cb(st->res); + cb(-EIO); return; } - // Change parent reference + } + // Read the new direntry + st->self->parent->db->get(kv_direntry_key(st->new_dir_ino, st->new_name), [=](int res, const std::string & value) + { + st->res = res; + st->new_direntry_text = value; + nfs_kv_continue_rename(st, 2); + }, st->allow_cache); + return; +resume_2: + if (st->res < 0 && st->res != -ENOENT) + { + auto cb = std::move(st->cb); + cb(st->res); + return; + } + if (st->res == 0) + { + std::string err; + st->new_direntry = json11::Json::parse(st->new_direntry_text, err); + if (err != "") { - auto ientry_new = st->ientry.object_items(); - ientry_new["parent_ino"] = st->new_dir_ino; - st->self->parent->db->set(kv_inode_key(st->direntry["ino"].uint64_value()), json11::Json(ientry_new).dump(), [st](int res) + fprintf(stderr, "Invalid JSON in direntry %s = %s: %s\n", + kv_direntry_key(st->new_dir_ino, st->new_name).c_str(), + st->new_direntry_text.c_str(), err.c_str()); + auto cb = std::move(st->cb); + cb(-EIO); + return; + } + } + st->new_exists = st->res == 0; + if (st->new_exists) + { + // Check file/folder compatibility (EISDIR/ENOTDIR) + if ((st->old_direntry["type"] == "dir") != (st->new_direntry["type"] == "dir")) + { + auto cb = std::move(st->cb); + cb((st->new_direntry["type"] == "dir") ? -ENOTDIR : -EISDIR); + return; + } + if (st->new_direntry["type"] == "dir") + { + // Check that the destination directory is empty + st->new_dir_prefix = kv_direntry_key(st->new_direntry["ino"].uint64_value(), ""); + st->list_handle = st->self->parent->db->list_start(st->new_dir_prefix); + st->self->parent->db->list_next(st->list_handle, [st](int res, const std::string & key, const std::string & value) { st->res = res; - nfs_kv_continue_rename(st, 4); - }, [st](int res, const std::string & old_value) - { - return old_value == st->ientry_text; + nfs_kv_continue_rename(st, 3); }); + return; +resume_3: + st->self->parent->db->list_close(st->list_handle); + if (st->res != -ENOENT) + { + auto cb = std::move(st->cb); + cb(-ENOTEMPTY); + return; + } } + } + else + { + // Check that the new directory is actually a directory + kv_read_inode(st->self, st->new_dir_ino, [st](int res, const std::string & value, json11::Json attrs) + { + st->res = res == 0 ? (attrs["type"].string_value() == "dir" ? 0 : -ENOTDIR) : res; + nfs_kv_continue_rename(st, 4); + }); return; resume_4: if (st->res < 0) @@ -125,42 +171,182 @@ resume_4: return; } } + // Write the new direntry st->self->parent->db->set(kv_direntry_key(st->new_dir_ino, st->new_name), st->old_direntry_text, [st](int res) { st->res = res; nfs_kv_continue_rename(st, 5); }, [st](int res, const std::string & old_value) { - return res == -ENOENT; + return st->new_exists ? (old_value == st->new_direntry_text) : (res == -ENOENT); }); return; resume_5: + if (st->res == -EAGAIN) + { + // CAS failure + st->allow_cache = false; + goto resume_0; + } if (st->res < 0) { - if (st->res == -EAGAIN) - st->res = -EEXIST; - st->res2 = st->res; - st->self->parent->db->set(kv_direntry_key(st->old_dir_ino, st->old_name), st->old_direntry_text, [st](int res) + auto cb = std::move(st->cb); + cb(st->res); + return; + } + // Delete the old direntry + st->self->parent->db->del(kv_direntry_key(st->old_dir_ino, st->old_name), [st](int res) + { + st->res = res; + nfs_kv_continue_rename(st, 6); + }, [=](int res, const std::string & old_value) + { + return res == 0 && old_value == st->old_direntry_text; + }); + return; +resume_6: + if (st->res == -EAGAIN) + { + // CAS failure + st->allow_cache = false; + goto resume_0; + } + if (st->res < 0) + { + auto cb = std::move(st->cb); + cb(st->res); + return; + } + st->allow_cache = true; +resume_7again: + if (st->new_exists && st->new_direntry["type"].string_value() != "dir") + { + // (Maybe) delete old destination file data + kv_read_inode(st->self, st->new_direntry["ino"].uint64_value(), [st](int res, const std::string & value, json11::Json attrs) { st->res = res; - nfs_kv_continue_rename(st, 6); - }, [st](int res, const std::string & old_value) - { - return res == -ENOENT; - }); + st->new_ientry_text = value; + st->new_ientry = attrs; + nfs_kv_continue_rename(st, 7); + }, st->allow_cache); return; -resume_6: +resume_7: + if (st->res == 0) + { + // (5) Reduce inode refcount by 1 or delete inode + if (st->new_ientry["nlink"].uint64_value() > 1) + { + auto copy = st->new_ientry.object_items(); + copy["nlink"] = st->new_ientry["nlink"].uint64_value()-1; + st->self->parent->db->set(kv_inode_key(st->new_direntry["ino"].uint64_value()), json11::Json(copy).dump(), [st](int res) + { + st->res = res; + nfs_kv_continue_rename(st, 8); + }, [st](int res, const std::string & old_value) + { + return old_value == st->new_ientry_text; + }); + } + else + { + st->rm_dest_data = kv_map_type(st->new_ientry["type"].string_value()) == NF3REG + && !st->new_ientry["shared_ino"].uint64_value(); + st->self->parent->db->del(kv_inode_key(st->new_direntry["ino"].uint64_value()), [st](int res) + { + st->res = res; + nfs_kv_continue_rename(st, 8); + }, [st](int res, const std::string & old_value) + { + return old_value == st->new_ientry_text; + }); + } + return; +resume_8: + if (st->res == -EAGAIN) + { + // CAS failure - re-read inode + st->allow_cache = false; + goto resume_7again; + } + if (st->res < 0) + { + auto cb = std::move(st->cb); + cb(st->res); + return; + } + // Delete inode data if required + if (st->rm_dest_data) + { + st->self->parent->cmd->loop_and_wait(st->self->parent->cmd->start_rm_data(json11::Json::object { + { "inode", INODE_NO_POOL(st->self->parent->fs_base_inode + st->new_direntry["ino"].uint64_value()) }, + { "pool", (uint64_t)INODE_POOL(st->self->parent->fs_base_inode + st->new_direntry["ino"].uint64_value()) }, + }), [st](const cli_result_t & r) + { + if (r.err) + { + fprintf(stderr, "Failed to remove inode %jx data: %s (code %d)\n", + st->new_direntry["ino"].uint64_value(), r.text.c_str(), r.err); + } + st->res = r.err; + nfs_kv_continue_rename(st, 9); + }); + return; +resume_9: + if (st->res < 0) + { + auto cb = std::move(st->cb); + cb(st->res); + return; + } + } + } + } + if (st->old_direntry["type"].string_value() == "dir" && st->new_dir_ino != st->old_dir_ino) + { + // Change parent_ino in old ientry + st->allow_cache = true; +resume_10: + kv_read_inode(st->self, st->old_direntry["ino"].uint64_value(), [st](int res, const std::string & value, json11::Json ientry) + { + st->res = res; + st->old_ientry_text = value; + st->old_ientry = ientry; + nfs_kv_continue_rename(st, 11); + }, st->allow_cache); + return; +resume_11: if (st->res < 0) { - if (st->res == -EAGAIN) - st->res = -EEXIST; - fprintf(stderr, "error restoring %s = %s after failed rename: %s (code %d)\n", - kv_direntry_key(st->old_dir_ino, st->old_name).c_str(), st->old_direntry_text.c_str(), - strerror(-st->res), st->res); + auto cb = std::move(st->cb); + cb(st->res); + return; + } + { + auto ientry_new = st->old_ientry.object_items(); + ientry_new["parent_ino"] = st->new_dir_ino; + st->self->parent->db->set(kv_inode_key(st->old_direntry["ino"].uint64_value()), json11::Json(ientry_new).dump(), [st](int res) + { + st->res = res; + nfs_kv_continue_rename(st, 12); + }, [st](int res, const std::string & old_value) + { + return old_value == st->old_ientry_text; + }); } - auto cb = std::move(st->cb); - cb(st->res2); return; +resume_12: + if (st->res == -EAGAIN) + { + // CAS failure - try again + st->allow_cache = false; + goto resume_10; + } + if (st->res < 0) + { + auto cb = std::move(st->cb); + cb(st->res); + return; + } } auto cb = std::move(st->cb); cb(st->res); diff --git a/src/nfs_mount.cpp b/src/nfs_mount.cpp index 2965bee7..eae5865d 100644 --- a/src/nfs_mount.cpp +++ b/src/nfs_mount.cpp @@ -10,11 +10,18 @@ nfsstat3 vitastor_nfs_map_err(int err) { + if (err < 0) + { + err = -err; + } return (err == EINVAL ? NFS3ERR_INVAL : (err == ENOENT ? NFS3ERR_NOENT : (err == ENOSPC ? NFS3ERR_NOSPC : (err == EEXIST ? NFS3ERR_EXIST - : (err == EIO ? NFS3ERR_IO : (err ? NFS3ERR_IO : NFS3_OK)))))); + : (err == EISDIR ? NFS3ERR_ISDIR + : (err == ENOTDIR ? NFS3ERR_NOTDIR + : (err == ENOTEMPTY ? NFS3ERR_NOTEMPTY + : (err == EIO ? NFS3ERR_IO : (err ? NFS3ERR_IO : NFS3_OK))))))))); } int nfs3_null_proc(void *opaque, rpc_op_t *rop) diff --git a/tests/test_nfs.sh b/tests/test_nfs.sh index 962876d5..8864b68f 100755 --- a/tests/test_nfs.sh +++ b/tests/test_nfs.sh @@ -154,4 +154,14 @@ sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft if ls ./testdata/nfs | grep smallfile; then false; fi format_green "rm small ok" +# rename over existing +echo ZXCVBN > ./testdata/nfs/over1 +mv ./testdata/nfs/over1 ./testdata/nfs/linked2 +sudo umount ./testdata/nfs/ +sudo mount localhost:/ ./testdata/nfs -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp +if ls ./testdata/nfs | grep over1; then false; fi +[[ "`cat ./testdata/nfs/linked2`" = "ZXCVBN" ]] +[[ "`cat ./testdata/nfs/linked1`" = "BABABA" ]] +format_green "rename over existing file ok" + format_green OK