WIP VitastorFS with metadata storage in VitastorKV

Fix eviction when random_pos selects the end
Implement min/max list_count to make listings during performance test reasonable
2024-01-13 01:31:45 +03:00 · 2024-01-13 01:30:29 +03:00 · 2024-01-13 01:30:29 +03:00 · 2024-01-13 01:30:29 +03:00 · 2024-01-13 01:30:29 +03:00 · 2024-01-13 01:30:29 +03:00
25 changed files with 525 additions and 350 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)

 project(vitastor)

-set(VERSION "1.3.1")
+set(VERSION "1.4.0")

 add_subdirectory(src)
--- a/csi/Makefile
+++ b/csi/Makefile
@ -1,4 +1,4 @@
-VERSION ?= v1.3.1
+VERSION ?= v1.4.0

 all: build push

--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v1.3.1
+          image: vitalif/vitastor-csi:v1.4.0
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@ -121,7 +121,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v1.3.1
+          image: vitalif/vitastor-csi:v1.4.0
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/src/config.go
+++ b/csi/src/config.go
@ -5,7 +5,7 @@ package vitastor

 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "1.3.1"
+    vitastorCSIDriverVersion = "1.4.0"
 )

 // Config struct fills the parameters of request or user input
--- a/csi/src/nodeserver.go
+++ b/csi/src/nodeserver.go
@ -188,7 +188,6 @@ func (ns *NodeServer) unmapNbd(devicePath string)

 func findByPidFile(pidFile string) (*os.Process, error)
 {
-    klog.Infof("killing process with PID from file %s", pidFile)
    pidBuf, err := os.ReadFile(pidFile)
    if (err != nil)
    {
@ -209,6 +208,7 @@ func findByPidFile(pidFile string) (*os.Process, error)

 func killByPidFile(pidFile string) error
 {
+    klog.Infof("killing process with PID from file %s", pidFile)
    proc, err := findByPidFile(pidFile)
    if (err != nil)
    {
--- a/debian/changelog
+++ b/debian/changelog
@ -1,4 +1,4 @@
-vitastor (1.3.1-1) unstable; urgency=medium
+vitastor (1.4.0-1) unstable; urgency=medium

  * Bugfixes

--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@ -35,8 +35,8 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    cp -r /root/vitastor vitastor-1.3.1; \
-    cd vitastor-1.3.1; \
+    cp -r /root/vitastor vitastor-1.4.0; \
+    cd vitastor-1.4.0; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -49,8 +49,8 @@ RUN set -e -x; \
    rm -rf a b; \
    echo "dep:fio=$FIO" > debian/fio_version; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.3.1.orig.tar.xz vitastor-1.3.1; \
-    cd vitastor-1.3.1; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.0.orig.tar.xz vitastor-1.4.0; \
+    cd vitastor-1.4.0; \
    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
--- a/docs/installation/kubernetes.en.md
+++ b/docs/installation/kubernetes.en.md
@ -37,6 +37,7 @@ Vitastor CSI supports:
 - Volume snapshots. Example: [snapshot class](../../csi/deploy/example-snapshot-class.yaml), [snapshot](../../csi/deploy/example-snapshot.yaml), [clone](../../csi/deploy/example-snapshot-clone.yaml)
 - [VDUSE](../usage/qemu.en.md#vduse) (preferred) and [NBD](../usage/nbd.en.md) device mapping methods
 - Upgrades with VDUSE - new handler processes are restarted when CSI pods are restarted themselves
+- VDUSE daemon auto-restart - handler processes are automatically restarted if they crash due to a bug in Vitastor client code
 - Multiple clusters by using multiple configuration files in ConfigMap.

 Remember that to use snapshots with CSI you also have to install [Snapshot Controller and CRDs](https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment).
--- a/docs/installation/kubernetes.ru.md
+++ b/docs/installation/kubernetes.ru.md
@ -37,6 +37,7 @@ CSI-плагин Vitastor поддерживает:
 - Снимки томов. Пример: [класс снимков](../../csi/deploy/example-snapshot-class.yaml), [снимок](../../csi/deploy/example-snapshot.yaml), [клон снимка](../../csi/deploy/example-snapshot-clone.yaml)
 - Способы подключения устройств [VDUSE](../usage/qemu.ru.md#vduse) (предпочитаемый) и [NBD](../usage/nbd.ru.md)
 - Обновление при использовании VDUSE - новые процессы-обработчики устройств успешно перезапускаются вместе с самими подами CSI
+- Автоперезауск демонов VDUSE - процесс-обработчик автоматически перезапустится, если он внезапно упадёт из-за бага в коде клиента Vitastor
 - Несколько кластеров через задание нескольких файлов конфигурации в ConfigMap.

 Не забывайте, что для использования снимков нужно сначала установить [контроллер снимков и CRD](https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment).
--- a/mon/package.json
+++ b/mon/package.json
@ -1,6 +1,6 @@
 {
  "name": "vitastor-mon",
-  "version": "1.3.1",
+  "version": "1.4.0",
  "description": "Vitastor SDS monitor service",
  "main": "mon-main.js",
  "scripts": {
--- a/patches/cinder-vitastor.py
+++ b/patches/cinder-vitastor.py
@ -50,7 +50,7 @@ from cinder.volume import configuration
 from cinder.volume import driver
 from cinder.volume import volume_utils

-VERSION = '1.3.1'
+VERSION = '1.4.0'

 LOG = logging.getLogger(__name__)

--- a/rpm/build-tarball.sh
+++ b/rpm/build-tarball.sh
@ -24,4 +24,4 @@ rm fio
 mv fio-copy fio
 FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
 perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
-tar --transform 's#^#vitastor-1.3.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.3.1$(rpm --eval '%dist').tar.gz *
+tar --transform 's#^#vitastor-1.4.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.0$(rpm --eval '%dist').tar.gz *
--- a/rpm/vitastor-el7.Dockerfile
+++ b/rpm/vitastor-el7.Dockerfile
@ -36,7 +36,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.3.1.el7.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-1.4.0.el7.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el7.spec
+++ b/rpm/vitastor-el7.spec
@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.3.1
+Version:        1.4.0
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.3.1.el7.tar.gz
+Source0:        vitastor-1.4.0.el7.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/rpm/vitastor-el8.Dockerfile
+++ b/rpm/vitastor-el8.Dockerfile
@ -35,7 +35,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.3.1.el8.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-1.4.0.el8.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el8.spec
+++ b/rpm/vitastor-el8.spec
@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.3.1
+Version:        1.4.0
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.3.1.el8.tar.gz
+Source0:        vitastor-1.4.0.el8.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/rpm/vitastor-el9.Dockerfile
+++ b/rpm/vitastor-el9.Dockerfile
@ -18,7 +18,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.3.1.el9.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-1.4.0.el9.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el9.spec
+++ b/rpm/vitastor-el9.spec
@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.3.1
+Version:        1.4.0
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.3.1.el9.tar.gz
+Source0:        vitastor-1.4.0.el9.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
 	set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
 endif()

-add_definitions(-DVERSION="1.3.1")
+add_definitions(-DVERSION="1.4.0")
 add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
 add_link_options(-fno-omit-frame-pointer)
 if (${WITH_ASAN})
@ -181,23 +181,29 @@ target_link_libraries(vitastor-nbd
 	vitastor_client
 )

-# vitastor-kv
-add_executable(vitastor-kv
-	kv_cli.cpp
+# libvitastor_kv.so
+add_library(vitastor_kv SHARED
 	kv_db.cpp
 	kv_db.h
 )
-target_link_libraries(vitastor-kv
+target_link_libraries(vitastor_kv
 	vitastor_client
 )
+set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
+
+# vitastor-kv
+add_executable(vitastor-kv
+	kv_cli.cpp
+)
+target_link_libraries(vitastor-kv
+	vitastor_kv
+)

 add_executable(vitastor-kv-stress
 	kv_stress.cpp
-	kv_db.cpp
-	kv_db.h
 )
 target_link_libraries(vitastor-kv-stress
-	vitastor_client
+	vitastor_kv
 )

 # vitastor-nfs
@ -213,6 +219,7 @@ add_executable(vitastor-nfs
 )
 target_link_libraries(vitastor-nfs
 	vitastor_client
+	vitastor_kv
 )

 # vitastor-cli
--- a/src/blockstore_impl.cpp
+++ b/src/blockstore_impl.cpp
@ -558,13 +558,14 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
            if (stable_count >= stable_alloc)
            {
                stable_alloc *= 2;
-                stable = (obj_ver_id*)realloc(stable, sizeof(obj_ver_id) * stable_alloc);
-                if (!stable)
+                obj_ver_id* nst = (obj_ver_id*)realloc(stable, sizeof(obj_ver_id) * stable_alloc);
+                if (!nst)
                {
                    op->retval = -ENOMEM;
                    FINISH_OP(op);
                    return;
                }
+                stable = nst;
            }
            stable[stable_count++] = {
                .oid = clean_it->first,
@ -642,8 +643,8 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
                            if (stable_count >= stable_alloc)
                            {
                                stable_alloc += 32768;
-                                stable = (obj_ver_id*)realloc(stable, sizeof(obj_ver_id) * stable_alloc);
-                                if (!stable)
+                                obj_ver_id *nst = (obj_ver_id*)realloc(stable, sizeof(obj_ver_id) * stable_alloc);
+                                if (!nst)
                                {
                                    if (unstable)
                                        free(unstable);
@ -651,6 +652,7 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
                                    FINISH_OP(op);
                                    return;
                                }
+                                stable = nst;
                            }
                            stable[stable_count++] = dirty_it->first;
                        }
@ -666,8 +668,8 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
                    if (unstable_count >= unstable_alloc)
                    {
                        unstable_alloc += 32768;
-                        unstable = (obj_ver_id*)realloc(unstable, sizeof(obj_ver_id) * unstable_alloc);
-                        if (!unstable)
+                        obj_ver_id *nst = (obj_ver_id*)realloc(unstable, sizeof(obj_ver_id) * unstable_alloc);
+                        if (!nst)
                        {
                            if (stable)
                                free(stable);
@ -675,6 +677,7 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
                            FINISH_OP(op);
                            return;
                        }
+                        unstable = nst;
                    }
                    unstable[unstable_count++] = dirty_it->first;
                }
@ -694,8 +697,8 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
    if (stable_count+unstable_count > stable_alloc)
    {
        stable_alloc = stable_count+unstable_count;
-        stable = (obj_ver_id*)realloc(stable, sizeof(obj_ver_id) * stable_alloc);
-        if (!stable)
+        obj_ver_id *nst = (obj_ver_id*)realloc(stable, sizeof(obj_ver_id) * stable_alloc);
+        if (!nst)
        {
            if (unstable)
                free(unstable);
@ -703,6 +706,7 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
            FINISH_OP(op);
            return;
        }
+        stable = nst;
    }
    // Copy unstable entries
    for (int i = 0; i < unstable_count; i++)
--- a/src/etcd_state_client.h
+++ b/src/etcd_state_client.h
@ -28,7 +28,7 @@ struct etcd_kv_t
 {
    std::string key;
    json11::Json value;
-    uint64_t mod_revision;
+    uint64_t mod_revision = 0;
 };

 struct pg_config_t
--- a/src/nfs_conn.cpp
+++ b/src/nfs_conn.cpp
--- a/src/nfs_proxy.h
+++ b/src/nfs_proxy.h
@ -4,6 +4,7 @@
 #include "epoll_manager.h"
 #include "nfs_portmap.h"
 #include "nfs/xdr_impl.h"
+#include "kv_db.h"

 #define RPC_INIT_BUF_SIZE 32768

@ -16,6 +17,22 @@ struct nfs_dir_t
    timespec mtime;
 };

+struct list_cookie_t
+{
+    uint64_t dir_ino, cookieverf, cookie;
+};
+
+inline bool operator < (const list_cookie_t & a, const list_cookie_t & b)
+{
+    return a.dir_ino < b.dir_ino || a.dir_ino == b.dir_ino &&
+        (a.cookieverf < b.cookieverf || a.cookieverf == b.cookieverf && a.cookie < b.cookie);
+};
+
+struct list_cookie_val_t
+{
+    std::string key;
+};
+
 class nfs_proxy_t
 {
 public:
@ -27,6 +44,7 @@ public:
    std::string export_root;
    bool portmap_enabled;
    unsigned nfs_port;
+    int readdir_getattr_parallel = 8, id_alloc_batch_size = 200;

    pool_id_t default_pool_id;

@ -35,6 +53,12 @@ public:
    epoll_manager_t *epmgr = NULL;
    cluster_client_t *cli = NULL;
    cli_tool_t *cmd = NULL;
+    kv_dbw_t *db = NULL;
+    uint64_t root_uid = 0, root_gid = 0;
+    std::map<list_cookie_t, list_cookie_val_t> list_cookies;
+    uint64_t fs_min_id = 0, fs_max_id = 0;
+    uint64_t fs_next_id = 0, fs_allocated_id = 0;
+    std::vector<uint64_t> unallocated_ids;

    std::vector<XDR*> xdr_pool;

@ -106,6 +130,8 @@ struct extend_write_t
 struct extend_inode_t
 {
    uint64_t cur_extend = 0, next_extend = 0;
+    std::string old_ientry;
+    json11::Json::object attrs;
 };

 class nfs_client_t
--- a/src/vitastor.pc.in
+++ b/src/vitastor.pc.in
@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@

 Name: Vitastor
 Description: Vitastor client library
-Version: 1.3.1
+Version: 1.4.0
 Libs: -L${libdir} -lvitastor_client
 Cflags: -I${includedir}
Author	SHA1	Message	Date
Vitaliy Filippov	576694c780	WIP VitastorFS with metadata storage in VitastorKV Test / test_rm (push) Successful in 14s Details Test / test_interrupted_rebalance_ec (push) Successful in 3m26s Details Test / test_etcd_fail (push) Successful in 5m51s Details Test / test_snapshot_down (push) Successful in 25s Details Test / test_snapshot_chain (push) Successful in 1m38s Details Test / test_snapshot_down_ec (push) Successful in 23s Details Test / test_splitbrain (push) Successful in 21s Details Test / test_snapshot_chain_ec (push) Successful in 3m0s Details Test / test_rebalance_verify_imm (push) Successful in 2m28s Details Test / test_interrupted_rebalance (push) Failing after 10m7s Details Test / test_write (push) Successful in 39s Details Test / test_rebalance_verify_ec (push) Successful in 3m52s Details Test / test_rebalance_verify_ec_imm (push) Successful in 2m35s Details Test / test_write_no_same (push) Successful in 15s Details Test / test_rebalance_verify (push) Successful in 6m48s Details Test / test_write_xor (push) Failing after 3m10s Details Test / test_heal_ec (push) Successful in 4m21s Details Test / test_heal_pg_size_2 (push) Successful in 4m33s Details Test / test_heal_csum_32k_dmj (push) Successful in 5m6s Details Test / test_heal_csum_32k_dj (push) Successful in 5m52s Details Test / test_heal_csum_32k (push) Successful in 6m22s Details Test / test_heal_csum_4k_dmj (push) Successful in 6m20s Details Test / test_scrub_zero_osd_2 (push) Successful in 37s Details Test / test_scrub (push) Successful in 40s Details Test / test_scrub_xor (push) Successful in 53s Details Test / test_heal_csum_4k_dj (push) Successful in 6m12s Details Test / test_scrub_pg_size_3 (push) Failing after 1m54s Details Test / test_scrub_ec (push) Successful in 1m12s Details Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m46s Details Test / test_heal_csum_4k (push) Successful in 6m16s Details	2024-01-13 01:31:45 +03:00
Vitaliy Filippov	37915d8315	Fix eviction when random_pos selects the end Test / test_rm (push) Successful in 13s Details Test / test_move_reappear (push) Successful in 18s Details Test / test_interrupted_rebalance (push) Successful in 4m51s Details Test / test_snapshot_chain (push) Successful in 1m27s Details Test / test_snapshot_down (push) Successful in 24s Details Test / test_snapshot_down_ec (push) Successful in 26s Details Test / test_splitbrain (push) Successful in 16s Details Test / test_snapshot_chain_ec (push) Successful in 2m26s Details Test / test_rebalance_verify_imm (push) Successful in 2m13s Details Test / test_rebalance_verify_ec (push) Successful in 3m1s Details Test / test_write (push) Successful in 40s Details Test / test_write_xor (push) Successful in 36s Details Test / test_write_no_same (push) Successful in 16s Details Test / test_rebalance_verify_ec_imm (push) Successful in 2m58s Details Test / test_interrupted_rebalance_ec (push) Failing after 10m10s Details Test / test_rebalance_verify (push) Successful in 7m26s Details Test / test_heal_pg_size_2 (push) Failing after 4m17s Details Test / test_heal_csum_32k_dmj (push) Successful in 4m45s Details Test / test_heal_ec (push) Successful in 5m35s Details Test / test_heal_csum_32k_dj (push) Successful in 5m19s Details Test / test_heal_csum_32k (push) Successful in 5m53s Details Test / test_scrub (push) Successful in 56s Details Test / test_heal_csum_4k_dj (push) Successful in 6m10s Details Test / test_scrub_zero_osd_2 (push) Successful in 50s Details Test / test_heal_csum_4k_dmj (push) Successful in 6m47s Details Test / test_scrub_xor (push) Successful in 38s Details Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m7s Details Test / test_scrub_ec (push) Successful in 47s Details Test / test_scrub_pg_size_3 (push) Successful in 1m49s Details Test / test_heal_csum_4k (push) Successful in 6m18s Details	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	496d6122d0	Implement min/max list_count to make listings during performance test reasonable	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	ec5b761903	Fix and improve parallel allocation - Do not try to allocate more DB blocks in an inode block until it's "confirmed" and "locked" by the first write - Do not recheck for new zero DB blocks on first write into an inode block - a CAS failure means someone else is already writing into it - Throw new allocation blocks away regardless of whether the known_version is 0 on a CAS failure	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	2d7069abdb	Implement key_prefix for K/V stress test	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	b032cb767a	More fixes - do not overwrite a block with older version if known version is newer (read may start before update and end after update) - invalidated block versions can't be remembered and trusted - right boundary for split blocks is right_half when diving down, not key_lt - restart update also when block is "invalidated", not just on version mismatch - copy callback in listings to avoid closure destruction bugs too	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	6203b12203	Add logging and one more assert	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	c51644727b	Make get_block() wait for updating when unrelated block is found along the path	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	af651aef24	Fix a race condition where changed blocks were parsed over existing cached blocks and getting a mix of data	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	b4962e636d	Simplify code by removing an unneeded "optimisation"	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	2aaa7f13c4	Add kv_log_level, print warnings on level 1, trace ops on level 10	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	ba3eb5c0f5	Fix duplicate keys in listings on parallel updates -- do not rewind key "iterator position"	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	cac938e5b4	Implement key suffix to avoid collisions of multiple test workers	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	6771f766af	Do not complain on empty first block	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	9d354ae4e5	Add JSON output for stress-tester	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	a6067124fc	Print total stats	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	58b04953e3	Do not send more than op_count operations (fix segfault on finish)	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	da4f1def94	Add some more resiliency to serialize()	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	14a6ddcf3f	Invalidate blocks being updated too	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	5e61f34f1e	Change new block allocation method: make each writer choose multiple empty PG blocks and place blocks in them	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	d362602c17	Remove blocks from cache on unsuccessful updates	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	4b285437fe	Allow to track multiple updates per block (it should never happen though)	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	b9d21348b3	Do not call stop_updating after failed write_new_block and after clear_block (both delete the item)	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	6865ffa9e7	Track versions of parent blocks and recheck if changed during update	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	e5cc6cb30e	Fix resume_split condition (key_lt can also be "")	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	ca2a8fe464	Experiment: transform offsets for better sharding	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	1d2a6c20b4	More post-stress-test fixes - Prevent _split types of new blocks - Stop updating new blocks only after the whole update, otherwise pointers may become invalid - Use recheck_none for updates initially - Use UINT64_MAX as initial block version when postponing ops, otherwise the check fails when the block is initially empty. This for example leads to writing both leaf items & block pointers (which is incorrect) into the root block when starting stress-test with --parallelism 32 - Fix -EINTR comparison	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	c50b256cef	Print operation statistics	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	1db4d8668d	K/V fixes after stress-test :-) - track block versions correctly - per inode block (128kb) instead of tree block (4kb) - prevent multiple parallel CAS writes of the same inode block - add logging for EILSEQ which means invalid data in the tree - fix get_block updated flag which was true for blocks already in cache and was leading to infinite loops on "unrelated block" errors - apply changes to blocks in cache only after successful writes (using "virtual changes") - do not replace cached block with an older version from disk - recheck "unrelated blocks" (read/update collisions) until data stops changing - track tree path correctly - do not treat split block as parent of its right half - correctly move blocks when finding new empty place on disk - restart updates from the beginning when one of blocks is changed by a parallel update - fix delete using SET opcode and setting key to the empty value instead - prevent changing the same key more than 1 time in parallel - fix listing verification - resume continue_updates in update_find (required because it uses continue_update itself) - add allow_old_cached parameter to get()	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	447569cae6	Implement K/V DB stress tester	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	811d6c71f8	Evict blocks based on memory limit & block usage	2024-01-13 01:30:29 +03:00
Vitaliy Filippov	fca151336f	Track blocks per level	2024-01-13 01:30:28 +03:00
Vitaliy Filippov	0ff803138a	Track block level	2024-01-13 01:30:28 +03:00
Vitaliy Filippov	938991b102	Experimental B-Tree Vitastor embedded K/V database implementation!	2024-01-13 01:30:28 +03:00
Vitaliy Filippov	d00d4dbac0	Initialize mod_revision field in etcd_state_client Test / test_interrupted_rebalance_ec (push) Successful in 2m28s Details Test / test_rm (push) Successful in 17s Details Test / test_move_reappear (push) Successful in 29s Details Test / test_snapshot_down (push) Successful in 26s Details Test / test_snapshot_down_ec (push) Successful in 26s Details Test / test_splitbrain (push) Successful in 16s Details Test / test_snapshot_chain (push) Successful in 2m0s Details Test / test_rebalance_verify_imm (push) Successful in 2m28s Details Test / test_rebalance_verify (push) Successful in 3m0s Details Test / test_rebalance_verify_ec (push) Successful in 3m14s Details Test / test_write_no_same (push) Successful in 13s Details Test / test_rebalance_verify_ec_imm (push) Successful in 3m7s Details Test / test_heal_pg_size_2 (push) Successful in 3m33s Details Test / test_heal_ec (push) Successful in 4m40s Details Test / test_heal_csum_32k_dj (push) Successful in 5m40s Details Test / test_heal_csum_32k (push) Successful in 6m8s Details Test / test_scrub (push) Successful in 1m4s Details Test / test_scrub_zero_osd_2 (push) Successful in 47s Details Test / test_heal_csum_4k_dmj (push) Successful in 6m33s Details Test / test_heal_csum_4k_dj (push) Successful in 6m28s Details Test / test_scrub_xor (push) Successful in 44s Details Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m2s Details Test / test_scrub_ec (push) Successful in 42s Details Test / test_scrub_pg_size_3 (push) Successful in 1m38s Details Test / test_heal_csum_4k (push) Successful in 5m56s Details Test / test_interrupted_rebalance (push) Successful in 1m53s Details Test / test_snapshot_chain_ec (push) Failing after 3m17s Details Test / test_write (push) Failing after 3m15s Details Test / test_heal_csum_32k_dmj (push) Successful in 4m6s Details Test / test_write_xor (push) Failing after 3m11s Details	2024-01-13 01:30:28 +03:00
Vitaliy Filippov	5d9d6f32a0	Fix common realloc memory leak mistakes found by cppcheck	2024-01-13 01:30:28 +03:00
Vitaliy Filippov	5280d1d561	Release 1.4.0 Test / test_snapshot (push) Successful in 26s Details Test / test_snapshot_ec (push) Successful in 26s Details Test / test_rm (push) Successful in 16s Details Test / test_move_reappear (push) Successful in 24s Details Test / test_snapshot_down (push) Successful in 26s Details Test / test_snapshot_down_ec (push) Successful in 30s Details Test / test_splitbrain (push) Successful in 28s Details Test / test_snapshot_chain (push) Successful in 2m41s Details Test / test_rebalance_verify_imm (push) Successful in 2m48s Details Test / test_rebalance_verify (push) Successful in 3m28s Details Test / test_write (push) Successful in 47s Details Test / test_write_no_same (push) Successful in 14s Details Test / test_rebalance_verify_ec_imm (push) Successful in 3m5s Details Test / test_rebalance_verify_ec (push) Successful in 3m41s Details Test / test_heal_pg_size_2 (push) Successful in 3m45s Details Test / test_heal_csum_32k_dmj (push) Successful in 4m52s Details Test / test_heal_ec (push) Successful in 5m11s Details Test / test_heal_csum_32k_dj (push) Successful in 5m42s Details Test / test_heal_csum_32k (push) Successful in 5m56s Details Test / test_scrub (push) Successful in 1m25s Details Test / test_scrub_zero_osd_2 (push) Successful in 1m18s Details Test / test_scrub_xor (push) Successful in 42s Details Test / test_heal_csum_4k_dmj (push) Successful in 6m49s Details Test / test_heal_csum_4k_dj (push) Successful in 6m32s Details Test / test_heal_csum_4k (push) Successful in 5m31s Details Test / test_scrub_ec (push) Successful in 50s Details Test / test_scrub_pg_size_3 (push) Successful in 1m2s Details Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m5s Details Test / test_snapshot_chain_ec (push) Successful in 1m21s Details Test / test_write_xor (push) Successful in 36s Details New features: - Intelligent recovery/rebalance speed auto-tuning to reduce its impact on clients (see README -> Features) - Auto-restoration of dead VDUSE daemons in CSI plugin - Add vitastor-disk update-sb command - Update QEMU for Debian Bookworm to 8.1 and use it for CSI plugin Bug fixes: - Fix pools SOMETIMES staying inactive after stopping a node due to OSDs not reacting to PG state changes caused by incorrect full reload of state from etcd on reconnection - Make monitors retry pool configuration changes quickier which fixes them being unable to apply changes when an ongoing rebalance is quickly making a lot of PGs clean - Fix CSI plugin not accepting array of strings as etcd address in /etc/vitastor/vitastor.conf - Allow multiple interfaces with the same IP address, for "simple routed" full mesh network - Do not ignore loopback addresses for OSD network (to make ECMP setups with frr possible) - Fix a rare client crash during OSD reconnections - Only treat data partitions as existing OSDs in vitastor-disk prepare - Remove etcd parameter from default command examples - Fix reported free space sometimes changing non-immediately after deletion of data from OSDs - Fix a possible OSD crash on print_slow when bs_op is NULL - Use the same etcd_ws_keepalive_interval in mon as in OSD - Fix mon not using values from config when /config/global is not present - Remove pve-storage-portal-dns-list format for vitastor_etcd_address - Parse log_level in cluster_client - Fix vitastor-nbd image existence check not working because of non-zeroed inode_watch fields - Do not warn on EPIPE in client unless log_level is raised explicitly - Fix incorrect error in CSI when searching for the device in /sys - Remove 2 last prints to stdout in etcd_state_client - Fix a possible OSD crash when checking corrupted journal entries	2024-01-12 01:28:33 +03:00
Vitaliy Filippov	317b0feb0a	Add a note about VDUSE daemon auto-restart	2024-01-12 01:27:36 +03:00
Vitaliy Filippov	247f0552db	Fix debug log "killing..." in CSI	2024-01-10 01:19:34 +03:00