Release 2.1.0

New features: - Support separate OSD cluster network - [osd_cluster_network](https://vitastor.io/docs/config/network.html#osd_cluster_network) and, in general, multiple OSD networks, including RDMA - Add an alternative RDMA implementation via RDMA-CM - [use_rdmacm](https://vitastor.io/docs/config/network.html#use_rdmacm), required for iWARP and, maybe, for some IB setups (but not for RoCE) - Change default PG behaviour to wait for all "up" OSDs to be connected before starting it. The old behaviour may be returned by enabling a new [allow_net_split](https://vitastor.io/docs/config/osd.html#allow_net_split) option. - Add a patch for QEMU 9.2 Bug fixes: - Fix incorrect "has_xxx" PG state names in ls-pgs - Fix possible QEMU crashes after detaching of Vitastor disks (and update all QEMU builds in Vitastor repos) - Fix clients sometimes spamming OSDs with infinite reconnections when some PGs are offline - Fall back to TCP on RDMA connection failures - Add missing logging of RDMA ibv_modify_qp() errors - Add a minimum interval for etcd_state_client to reload state
Update QEMU version in vitastor-csi Dockerfile
2025-04-01 20:16:27 +03:00 · 2025-04-01 20:16:27 +03:00 · 2025-04-01 16:16:03 +03:00 · 2025-04-01 12:07:15 +03:00 · 2025-03-31 21:12:09 +03:00 · 2025-03-31 21:01:25 +03:00
255 changed files with 11263 additions and 2756 deletions
--- a/.gitea/workflows/buildenv.Dockerfile
+++ b/.gitea/workflows/buildenv.Dockerfile
@@ -22,7 +22,7 @@ RUN apt-get update
 RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
    liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
 RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
-RUN apt-get -y install jq lp-solve sudo nfs-common
+RUN apt-get update && apt-get -y install jq lp-solve sudo nfs-common fdisk parted
 RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
 RUN set -ex; \
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
@@ -288,6 +288,24 @@ jobs:
          echo ""
        done
  test_create_halfhost:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_create_halfhost.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_failure_domain:
    runs-on: ubuntu-latest
    needs: build
@@ -396,6 +414,24 @@ jobs:
          echo ""
        done
  test_rm_degraded:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_rm_degraded.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_snapshot_chain:
    runs-on: ubuntu-latest
    needs: build
@@ -828,6 +864,42 @@ jobs:
          echo ""
        done
  test_resize:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_resize.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_resize_auto:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_resize_auto.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_snapshot_pool2:
    runs-on: ubuntu-latest
    needs: build
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
 project(vitastor)
-set(VITASTOR_VERSION "1.9.2")
+set(VITASTOR_VERSION "2.1.0")
 add_subdirectory(src)
--- a/README-ru.md
+++ b/README-ru.md
@@ -6,7 +6,7 @@
 Вернём былую скорость кластерному блочному хранилищу!
-Vitastor - распределённая блочная и файловая SDS (программная СХД), прямой аналог Ceph RBD и CephFS,
+Vitastor - распределённая блочная, файловая и объектная SDS (программная СХД), прямой аналог Ceph RBD, CephFS и RGW,
 а также внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
 быстрый и при этом простой. Только пока маленький :-).
@@ -41,10 +41,12 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - [Автор и лицензия](docs/intro/author.ru.md)
 - Установка
  - [Пакеты](docs/installation/packages.ru.md)
  - [Docker](docs/installation/docker.ru.md)
  - [Proxmox](docs/installation/proxmox.ru.md)
  - [OpenNebula](docs/installation/opennebula.ru.md)
  - [OpenStack](docs/installation/openstack.ru.md)
  - [Kubernetes CSI](docs/installation/kubernetes.ru.md)
  - [S3](docs/installation/s3.ru.md)
  - [Сборка из исходных кодов](docs/installation/source.ru.md)
 - Конфигурация
  - [Обзор](docs/config.ru.md)
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 Make Clustered Block Storage Fast Again.
-Vitastor is a distributed block and file SDS, direct replacement of Ceph RBD and CephFS,
+Vitastor is a distributed block, file and object SDS, direct replacement of Ceph RBD, CephFS and RGW,
 and also internal SDS's of public clouds. However, in contrast to them, Vitastor is fast
 and simple at the same time. The only thing is it's slightly young :-).
@@ -41,10 +41,12 @@ Read more details in the documentation. You can start from here: [Quick Start](d
  - [Author and license](docs/intro/author.en.md)
 - Installation
  - [Packages](docs/installation/packages.en.md)
  - [Docker](docs/installation/docker.en.md)
  - [Proxmox](docs/installation/proxmox.en.md)
  - [OpenNebula](docs/installation/opennebula.en.md)
  - [OpenStack](docs/installation/openstack.en.md)
  - [Kubernetes CSI](docs/installation/kubernetes.en.md)
  - [S3](docs/installation/s3.en.md)
  - [Building from Source](docs/installation/source.en.md)
 - Configuration
  - [Overview](docs/config.en.md)
--- a/csi/Dockerfile
+++ b/csi/Dockerfile
@@ -22,6 +22,8 @@ RUN apt-get update && \
    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
    apt-get update && \
    apt-get install -y e2fsprogs xfsprogs kmod iproute2 \
        # NFS mount dependencies
        nfs-common netbase \
        # dependencies of qemu-storage-daemon
        libnuma1 liburing2 libglib2.0-0 libfuse3-3 libaio1 libzstd1 libnettle8 \
        libgmp10 libhogweed6 libp11-kit0 libidn2-0 libunistring2 libtasn1-6 libpcre2-8-0 libffi8 && \
@@ -35,8 +37,8 @@ RUN (echo deb http://vitastor.io/debian bookworm main > /etc/apt/sources.list.d/
    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg && \
    apt-get update && \
    apt-get install -y vitastor-client && \
-    wget https://vitastor.io/archive/qemu/qemu-bookworm-8.1.2%2Bds-1%2Bvitastor1/qemu-utils_8.1.2%2Bds-1%2Bvitastor1_amd64.deb && \
+    wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-utils_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
-    wget https://vitastor.io/archive/qemu/qemu-bookworm-8.1.2%2Bds-1%2Bvitastor1/qemu-block-extra_8.1.2%2Bds-1%2Bvitastor1_amd64.deb && \
+    wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-block-extra_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
    dpkg -x qemu-utils*.deb tmp1 && \
    dpkg -x qemu-block-extra*.deb tmp1 && \
    cp -a tmp1/usr/bin/qemu-storage-daemon /usr/bin/ && \
--- a/csi/Makefile
+++ b/csi/Makefile
@@ -1,4 +1,4 @@
-VITASTOR_VERSION ?= v1.9.2
+VITASTOR_VERSION ?= v2.1.0
 all: build push
--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v1.9.2
+          image: vitalif/vitastor-csi:v2.1.0
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@@ -121,7 +121,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v1.9.2
+          image: vitalif/vitastor-csi:v2.1.0
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/009-storage-class.yaml
+++ b/csi/deploy/009-storage-class.yaml
@@ -9,8 +9,16 @@ metadata:
 provisioner: csi.vitastor.io
 volumeBindingMode: Immediate
 parameters:
-  etcdVolumePrefix: ""
+  # CSI driver can create block-based volumes and VitastorFS-based volumes
-  poolId: "1"
+  # only VitastorFS-based volumes and raw block volumes (without FS) support ReadWriteMany mode
  # set this parameter to VitastorFS metadata volume name to use VitastorFS
  # if unset, block-based volumes will be created
  vitastorfs: ""
  # for block-based storage classes, pool ID may be either a string (name) or a number (ID)
  # for vitastorFS-based storage classes it must be a string - name of the default pool for FS data
  poolId: "testpool"
  # volume name prefix for block-based storage classes or NFS subdirectory (including /) for FS-based volumes
  volumePrefix: ""
  # you can choose other configuration file if you have it in the config map
  # different etcd URLs and prefixes should also be put in the config
  #configPath: "/etc/vitastor/vitastor.conf"
--- a/csi/deploy/example-storage-class-fs.yaml
+++ b/csi/deploy/example-storage-class-fs.yaml
@@ -0,0 +1,25 @@
 ---
 apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
  namespace: vitastor-system
  name: vitastor
  annotations:
    storageclass.kubernetes.io/is-default-class: "true"
 provisioner: csi.vitastor.io
 volumeBindingMode: Immediate
 parameters:
  # CSI driver can create block-based volumes and VitastorFS-based volumes
  # only VitastorFS-based volumes and raw block volumes (without FS) support ReadWriteMany mode
  # set this parameter to VitastorFS metadata volume name to use VitastorFS
  # if unset, block-based volumes will be created
  vitastorfs: "testfs"
  # for block-based storage classes, pool ID may be either a string (name) or a number (ID)
  # for vitastorFS-based storage classes it must be a string - name of the default pool for FS data
  poolId: "testpool"
  # volume name prefix for block-based storage classes or NFS subdirectory (including /) for FS-based volumes
  volumePrefix: "k8s/"
  # you can choose other configuration file if you have it in the config map
  # different etcd URLs and prefixes should also be put in the config
  #configPath: "/etc/vitastor/vitastor.conf"
 allowVolumeExpansion: true
--- a/csi/src/config.go
+++ b/csi/src/config.go
@@ -5,7 +5,7 @@ package vitastor
 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "1.9.2"
+    vitastorCSIDriverVersion = "2.1.0"
 )
 // Config struct fills the parameters of request or user input
--- a/csi/src/controllerserver.go
+++ b/csi/src/controllerserver.go
@@ -8,11 +8,8 @@ import (
    "encoding/json"
    "fmt"
    "strings"
    "bytes"
    "strconv"
    "time"
    "os"
    "os/exec"
    "io/ioutil"
    "github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
@@ -70,9 +67,10 @@ func GetConnectionParams(params map[string]string) (map[string]string, error)
    {
        configPath = "/etc/vitastor/vitastor.conf"
    }
-    else
+    ctxVars["configPath"] = configPath
    if (params["vitastorfs"] != "")
    {
-        ctxVars["configPath"] = configPath
+        ctxVars["vitastorfs"] = params["vitastorfs"]
    }
    config := make(map[string]interface{})
    configFD, err := os.Open(configPath)
@@ -114,22 +112,6 @@ func GetConnectionParams(params map[string]string) (map[string]string, error)
    return ctxVars, nil
 }
 func system(program string, args ...string) ([]byte, []byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var stdout, stderr bytes.Buffer
    c.Stdout, c.Stderr = &stdout, &stderr
    err := c.Run()
    if (err != nil)
    {
        stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s, status %s\n", stdoutStr+stderrStr, err)
        return nil, nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
    }
    return stdout.Bytes(), stderr.Bytes(), nil
 }
 func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
 {
    if (ctxVars["configPath"] != "")
@@ -158,33 +140,57 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
        return nil, status.Error(codes.InvalidArgument, "volume capabilities is a required field")
    }
    err := cs.checkCaps(volumeCapabilities)
    if (err != nil)
    {
        return nil, err
    }
    etcdVolumePrefix := req.Parameters["etcdVolumePrefix"]
    poolId, _ := strconv.ParseUint(req.Parameters["poolId"], 10, 64)
    if (poolId == 0)
    {
        return nil, status.Error(codes.InvalidArgument, "poolId is missing in storage class configuration")
    }
    volName := etcdVolumePrefix + req.GetName()
    volSize := 1 * GB
    if capRange := req.GetCapacityRange(); capRange != nil
    {
        volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
    }
    ctxVars, err := GetConnectionParams(req.Parameters)
    if (err != nil)
    {
        return nil, err
    }
-    args := []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", fmt.Sprintf("%v", poolId) }
+    err = cs.checkCaps(volumeCapabilities, ctxVars["vitastorfs"] != "")
    if (err != nil)
    {
        return nil, err
    }
    pool := req.Parameters["poolId"]
    if (pool == "")
    {
        return nil, status.Error(codes.InvalidArgument, "poolId is missing in storage class configuration")
    }
    volumePrefix := req.Parameters["volumePrefix"]
    if (volumePrefix == "")
    {
        // Old name
        volumePrefix = req.Parameters["etcdVolumePrefix"]
    }
    volName := volumePrefix + req.GetName()
    volSize := 1 * GB
    if capRange := req.GetCapacityRange(); capRange != nil
    {
        volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
    }
    if (ctxVars["vitastorfs"] != "")
    {
        // Nothing to create, subdirectories are created during mounting
        // FIXME: It would be cool to support quotas some day and set it here
        if (req.VolumeContentSource.GetSnapshot() != nil)
        {
            return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
        }
        ctxVars["name"] = volName
        ctxVars["pool"] = pool
        volumeIdJson, _ := json.Marshal(ctxVars)
        return &csi.CreateVolumeResponse{
            Volume: &csi.Volume{
                // Ugly, but VolumeContext isn't passed to DeleteVolume :-(
                VolumeId: string(volumeIdJson),
                CapacityBytes: volSize,
            },
        }, nil
    }
    args := []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", pool }
    // Support creation from snapshot
    var src *csi.VolumeContentSource
@@ -267,6 +273,12 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        // FIXME: Delete FS subdirectory
        return &csi.DeleteVolumeResponse{}, nil
    }
    _, err = invokeCLI(ctxVars, []string{ "rm", volName })
    if (err != nil)
    {
@@ -301,13 +313,25 @@ func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req
    {
        return nil, status.Error(codes.InvalidArgument, "volumeId is nil")
    }
    volVars := make(map[string]string)
    err := json.Unmarshal([]byte(volumeID), &volVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    ctxVars, err := GetConnectionParams(volVars)
    if (err != nil)
    {
        return nil, err
    }
    volumeCapabilities := req.GetVolumeCapabilities()
    if (volumeCapabilities == nil)
    {
        return nil, status.Error(codes.InvalidArgument, "volumeCapabilities is nil")
    }
-    err := cs.checkCaps(volumeCapabilities)
+    err = cs.checkCaps(volumeCapabilities, ctxVars["vitastorfs"] != "")
    if (err != nil)
    {
        return nil, err
@@ -320,7 +344,7 @@ func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req
    }, nil
 }
-func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability) error
+func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability, fs bool) error
 {
    var volumeCapabilityAccessModes []*csi.VolumeCapability_AccessMode
    for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
@@ -336,6 +360,10 @@ func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability
    {
        if (capability.GetBlock() != nil)
        {
            if (fs)
            {
                return status.Errorf(codes.InvalidArgument, "%v not supported with FS-based volumes", capability)
            }
            for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
                csi.VolumeCapability_AccessMode_MULTI_NODE_SINGLE_WRITER,
                csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
@@ -346,6 +374,12 @@ func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability
        }
    }
    if (fs)
    {
        // All access modes including RWX are supported with FS-based volumes
        return nil
    }
    capabilitySupport := false
    for _, capability := range volumeCapabilities
    {
@@ -360,7 +394,7 @@ func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability
    if (!capabilitySupport)
    {
-        return status.Errorf(codes.NotFound, "%v not supported", volumeCapabilities)
+        return status.Errorf(codes.InvalidArgument, "%v not supported", volumeCapabilities)
    }
    return nil
@@ -452,6 +486,12 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    if (ctxVars["vitastorfs"] != "")
    {
        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
    }
    volName := ctxVars["name"]
    // Create image using vitastor-cli
@@ -510,6 +550,11 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
    }
    _, err = invokeCLI(ctxVars, []string{ "rm", volName+"@"+snapName })
    if (err != nil)
    {
@@ -541,6 +586,11 @@ func (cs *ControllerServer) ListSnapshots(ctx context.Context, req *csi.ListSnap
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
    }
    inodeCfg, err := invokeList(ctxVars, volName+"@*", false)
    if (err != nil)
    {
@@ -604,6 +654,16 @@ func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        // Nothing to change
        // FIXME: Support quotas and change quota here
        return &csi.ControllerExpandVolumeResponse{
            CapacityBytes: req.CapacityRange.RequiredBytes,
            NodeExpansionRequired: false,
        }, nil
    }
    inodeCfg, err := invokeList(ctxVars, volName, true)
    if (err != nil)
    {
--- a/csi/src/nodeserver.go
+++ b/csi/src/nodeserver.go
@@ -5,11 +5,15 @@ package vitastor
 import (
    "context"
    "crypto/sha1"
    "encoding/hex"
    "encoding/json"
    "fmt"
    "os"
    "os/exec"
    "path/filepath"
    "regexp"
    "strconv"
    "strings"
    "sync"
    "syscall"
@@ -29,13 +33,14 @@ import (
 type NodeServer struct
 {
    *Driver
-    useVduse bool
+    useVduse        bool
-    stateDir string
+    stateDir        string
-    mounter mount.Interface
+    nfsStageDir     string
    mounter         mount.Interface
    restartInterval time.Duration
-    mu sync.Mutex
+    mu              sync.Mutex
-    cond *sync.Cond
+    cond            *sync.Cond
-    volumeLocks map[string]bool
+    volumeLocks     map[string]bool
 }
 type DeviceState struct
@@ -48,6 +53,15 @@ type DeviceState struct
    PidFile    string `json:"pidFile"`
 }
 type NfsState struct
 {
    ConfigPath string `json:"configPath"`
    FsName     string `json:"fsName"`
    Pool       string `json:"pool"`
    Path       string `json:"path"`
    Port       int    `json:"port"`
 }
 // NewNodeServer create new instance node
 func NewNodeServer(driver *Driver) *NodeServer
 {
@@ -60,11 +74,17 @@ func NewNodeServer(driver *Driver) *NodeServer
    {
        stateDir += "/"
    }
    nfsStageDir := os.Getenv("NFS_STAGE_DIR")
    if (nfsStageDir == "")
    {
        nfsStageDir = "/var/lib/kubelet/plugins/csi.vitastor.io/nfs"
    }
    ns := &NodeServer{
-        Driver: driver,
+        Driver:      driver,
-        useVduse: checkVduseSupport(),
+        useVduse:    checkVduseSupport(),
-        stateDir: stateDir,
+        stateDir:    stateDir,
-        mounter: mount.New(""),
+        nfsStageDir: nfsStageDir,
        mounter:     mount.New(""),
        volumeLocks: make(map[string]bool),
    }
    ns.cond = sync.NewCond(&ns.mu)
@@ -123,12 +143,12 @@ func (ns *NodeServer) restarter()
 func (ns *NodeServer) restoreVduseDaemons()
 {
    pattern := ns.stateDir+"vitastor-vduse-*.json"
-    matches, err := filepath.Glob(pattern)
+    stateFiles, err := filepath.Glob(pattern)
    if (err != nil)
    {
        klog.Errorf("failed to list %s: %v", pattern, err)
    }
-    if (len(matches) == 0)
+    if (len(stateFiles) == 0)
    {
        return
    }
@@ -146,59 +166,162 @@ func (ns *NodeServer) restoreVduseDaemons()
        klog.Errorf("/sbin/vdpa -j dev list returned bad JSON (error %v): %v", err, string(devListJSON))
        return
    }
-    for _, stateFile := range matches
+    for _, stateFile := range stateFiles
    {
-        vdpaId := filepath.Base(stateFile)
+        ns.checkVduseState(stateFile, devs)
-        vdpaId = vdpaId[0:len(vdpaId)-5]
+    }
-        // Check if VDPA device is still added to the bus
+}
        if (devs[vdpaId] == nil)
        {
            // Unused, clean it up
            unmapVduseById(ns.stateDir, vdpaId)
            continue
        }
-        stateJSON, err := os.ReadFile(stateFile)
+func (ns *NodeServer) checkVduseState(stateFile string, devs map[string]interface{})
 {
    // Check if VDPA device is still added to the bus
    vdpaId := filepath.Base(stateFile)
    vdpaId = vdpaId[0:len(vdpaId)-5]
    if (devs[vdpaId] == nil)
    {
        // Unused, clean it up
        unmapVduseById(ns.stateDir, vdpaId)
        return
    }
    // Read state file
    stateJSON, err := os.ReadFile(stateFile)
    if (err != nil)
    {
        klog.Warningf("error reading state file %v: %v", stateFile, err)
        return
    }
    var state DeviceState
    err = json.Unmarshal(stateJSON, &state)
    if (err != nil)
    {
        klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
        return
    }
    // Lock volume
    ns.lockVolume(state.ConfigPath+":block:"+state.Image)
    defer ns.unlockVolume(state.ConfigPath+":block:"+state.Image)
    // Recheck state file after locking
    _, err = os.ReadFile(stateFile)
    if (err != nil)
    {
        klog.Warningf("state file %v disappeared, skipping volume", stateFile)
        return
    }
    // Check if the storage daemon is still active
    pidFile := ns.stateDir + vdpaId + ".pid"
    exists := false
    proc, err := findByPidFile(pidFile)
    if (err == nil)
    {
        exists = proc.Signal(syscall.Signal(0)) == nil
    }
    if (!exists)
    {
        // Restart daemon
        klog.Warningf("restarting storage daemon for volume %v (VDPA ID %v)", state.Image, vdpaId)
        err = startStorageDaemon(vdpaId, state.Image, pidFile, state.ConfigPath, state.Readonly)
        if (err != nil)
        {
-            klog.Warningf("error reading state file %v: %v", stateFile, err)
+            klog.Warningf("failed to restart storage daemon for volume %v: %v", state.Image, err)
            continue
        }
-        var state DeviceState
+    }
-        err = json.Unmarshal(stateJSON, &state)
+}
 func (ns *NodeServer) restoreNfsDaemons()
 {
    pattern := ns.stateDir+"vitastor-nfs-*.json"
    stateFiles, err := filepath.Glob(pattern)
    if (err != nil)
    {
        klog.Errorf("failed to list %s: %v", pattern, err)
    }
    if (len(stateFiles) == 0)
    {
        return
    }
    activeNFS, err := ns.listActiveNFS()
    if (err != nil)
    {
        return
    }
    // Check all state files and try to restore active mounts
    for _, stateFile := range stateFiles
    {
        ns.checkNfsState(stateFile, activeNFS)
    }
 }
 func (ns *NodeServer) readNfsState(stateFile string, allowNotExists bool) (*NfsState, error)
 {
    stateJSON, err := os.ReadFile(stateFile)
    if (err != nil)
    {
        if (allowNotExists && os.IsNotExist(err))
        {
            return nil, nil
        }
        klog.Warningf("error reading state file %v: %v", stateFile, err)
        return nil, err
    }
    var state NfsState
    err = json.Unmarshal(stateJSON, &state)
    if (err != nil)
    {
        klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
        return nil, err
    }
    return &state, nil
 }
 func (ns *NodeServer) checkNfsState(stateFile string, activeNfs map[int][]string)
 {
    // Read state file
    state, err := ns.readNfsState(stateFile, false)
    if (err != nil)
    {
        return
    }
    // Lock FS
    ns.lockVolume(state.ConfigPath+":fs:"+state.FsName)
    defer ns.unlockVolume(state.ConfigPath+":fs:"+state.FsName)
    // Check if NFS at this port is still mounted
    pidFile := ns.stateDir + filepath.Base(stateFile)
    pidFile = pidFile[0:len(pidFile)-5] + ".pid"
    if (len(activeNfs[state.Port]) == 0)
    {
        // this is a stale state file, remove it
        klog.Warningf("state file %v contains stale mount at port %d, removing it", stateFile, state.Port)
        ns.stopNFS(stateFile, pidFile)
        return
    }
    // Check PID file
    exists := false
    proc, err := findByPidFile(pidFile)
    if (err == nil)
    {
        exists = proc.Signal(syscall.Signal(0)) == nil
    }
    if (!exists)
    {
        // Restart vitastor-nfs server
        klog.Warningf("restarting NFS server for FS %v at port %v", state.FsName, state.Port)
        _, _, err := system(
            "/usr/bin/vitastor-nfs", "start",
            "--pidfile", pidFile,
            "--bind", "127.0.0.1",
            "--port", fmt.Sprintf("%d", state.Port),
            "--fs", state.FsName,
            "--pool", state.Pool,
            "--portmap", "0",
        )
        if (err != nil)
        {
-            klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
+            klog.Warningf("failed to restart NFS server for FS %v: %v", state.FsName, err)
            continue
        }
        ns.lockVolume(state.ConfigPath+":"+state.Image)
        // Recheck state file after locking
        _, err = os.ReadFile(stateFile)
        if (err != nil)
        {
            klog.Warningf("state file %v disappeared, skipping volume", stateFile)
            ns.unlockVolume(state.ConfigPath+":"+state.Image)
            continue
        }
        // Check if the storage daemon is still active
        pidFile := ns.stateDir + vdpaId + ".pid"
        exists := false
        proc, err := findByPidFile(pidFile)
        if (err == nil)
        {
            exists = proc.Signal(syscall.Signal(0)) == nil
        }
        if (!exists)
        {
            // Restart daemon
            klog.Warningf("restarting storage daemon for volume %v (VDPA ID %v)", state.Image, vdpaId)
            _ = startStorageDaemon(vdpaId, state.Image, pidFile, state.ConfigPath, state.Readonly)
        }
        ns.unlockVolume(state.ConfigPath+":"+state.Image)
    }
 }
@@ -220,16 +343,26 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
    }
    volName := ctxVars["name"]
-    ns.lockVolume(ctxVars["configPath"]+":"+volName)
+    if (ctxVars["vitastorfs"] != "")
-    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
+    {
        return &csi.NodeStageVolumeResponse{}, nil
    }
    ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
    defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
    targetPath := req.GetStagingTargetPath()
    isBlock := req.GetVolumeCapability().GetBlock() != nil
    // Check that it's not already mounted
-    _, err = mount.IsNotMountPoint(ns.mounter, targetPath)
+    notmnt, err := mount.IsNotMountPoint(ns.mounter, targetPath)
    if (err == nil)
    {
        if (!notmnt)
        {
            klog.Errorf("target path %s is already mounted", targetPath)
            return nil, fmt.Errorf("target path %s is already mounted", targetPath)
        }
        var finfo os.FileInfo
        finfo, err = os.Stat(targetPath)
        if (err != nil)
@@ -300,6 +433,7 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
    diskMounter := &mount.SafeFormatAndMount{Interface: ns.mounter, Exec: utilexec.New()}
    if (isBlock)
    {
        klog.Infof("bind-mounting %s to %s", devicePath, targetPath)
        err = diskMounter.Mount(devicePath, targetPath, "", []string{"bind"})
    }
    else
@@ -329,39 +463,40 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
        readOnly := Contains(opt, "ro")
        if (existingFormat == "" && !readOnly)
        {
            var cmdOut []byte
            switch fsType
            {
                case "ext4":
                    args := []string{"-m0", "-Enodiscard,lazy_itable_init=1,lazy_journal_init=1", devicePath}
-                    cmdOut, err = diskMounter.Exec.Command("mkfs.ext4", args...).CombinedOutput()
+                    _, err = systemCombined("mkfs.ext4", args...)
                case "xfs":
-                    cmdOut, err = diskMounter.Exec.Command("mkfs.xfs", "-K", devicePath).CombinedOutput()
+                    _, err = systemCombined("mkfs.xfs", "-K", devicePath)
            }
            if (err != nil)
            {
                klog.Errorf("failed to run mkfs error: %v, output: %v", err, string(cmdOut))
                goto unmap
            }
        }
        klog.Infof("formatting and mounting %s to %s with FS %s, options: %v", devicePath, targetPath, fsType, opt)
        err = diskMounter.FormatAndMount(devicePath, targetPath, fsType, opt)
        if (err == nil)
        {
            klog.Infof("successfully mounted %s to %s", devicePath, targetPath)
        }
        // Try to run online resize on mount.
        // FIXME: Implement online resize. It requires online resize support in vitastor-nbd.
        if (err == nil && existingFormat != "" && !readOnly)
        {
            var cmdOut []byte
            switch (fsType)
            {
                case "ext4":
-                    cmdOut, err = diskMounter.Exec.Command("resize2fs", devicePath).CombinedOutput()
+                    _, err = systemCombined("resize2fs", devicePath)
                case "xfs":
-                    cmdOut, err = diskMounter.Exec.Command("xfs_growfs", devicePath).CombinedOutput()
+                    _, err = systemCombined("xfs_growfs", devicePath)
            }
            if (err != nil)
            {
                klog.Errorf("failed to run resizefs error: %v, output: %v", err, string(cmdOut))
                goto unmap
            }
        }
@@ -401,8 +536,13 @@ func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag
    }
    volName := ctxVars["name"]
-    ns.lockVolume(ctxVars["configPath"]+":"+volName)
+    if (ctxVars["vitastorfs"] != "")
-    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
+    {
        return &csi.NodeUnstageVolumeResponse{}, nil
    }
    ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
    defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
    targetPath := req.GetStagingTargetPath()
    devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
@@ -455,6 +595,153 @@ func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag
    return &csi.NodeUnstageVolumeResponse{}, nil
 }
 // Mount or check if NFS is already mounted
 func (ns *NodeServer) mountNFS(ctxVars map[string]string) (string, error)
 {
    sum := sha1.Sum([]byte(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"]))
    nfsHash := hex.EncodeToString(sum[:])
    stateFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".json"
    pidFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".pid"
    mountPath := ns.nfsStageDir+"/"+nfsHash
    state, err := ns.readNfsState(stateFile, true)
    if (state != nil)
    {
        return state.Path, nil
    }
    if (err != nil)
    {
        return "", err
    }
    err = os.MkdirAll(mountPath, 0777)
    if (err != nil)
    {
        return "", err
    }
    // Create a new mount
    state = &NfsState{
        ConfigPath: ctxVars["configPath"],
        FsName:     ctxVars["vitastorfs"],
        Pool:       ctxVars["pool"],
        Path:       mountPath,
    }
    klog.Infof("starting new NFS server for FS %v", state.FsName)
    stdout, _, err := system(
        "/usr/bin/vitastor-nfs", "start",
        "--pidfile", pidFile,
        "--bind", "127.0.0.1",
        "--port", "auto",
        "--fs", state.FsName,
        "--pool", state.Pool,
        "--portmap", "0",
    )
    if (err != nil)
    {
        return "", err
    }
    match := regexp.MustCompile("Port: (\\d+)").FindStringSubmatch(string(stdout))
    if (match == nil)
    {
        klog.Errorf("failed to find port in vitastor-nfs output: %v", string(stdout))
        ns.stopNFS(stateFile, pidFile)
        return "", fmt.Errorf("failed to find port in vitastor-nfs output (bad vitastor-nfs version?)")
    }
    port, _ := strconv.ParseUint(match[1], 0, 16)
    state.Port = int(port)
    // Write state file
    stateJSON, _ := json.Marshal(state)
    err = os.WriteFile(stateFile, stateJSON, 0600)
    if (err != nil)
    {
        klog.Errorf("failed to write state file %v", stateFile)
        ns.stopNFS(stateFile, pidFile)
        return "", err
    }
    // Mount NFS
    _, _, err = system(
        "mount", "-t", "nfs", "127.0.0.1:/", state.Path,
        "-o", fmt.Sprintf("port=%d,mountport=%d,nfsvers=3,soft,nolock,tcp", port, port),
    )
    if (err != nil)
    {
        ns.stopNFS(stateFile, pidFile)
        return "", err
    }
    return state.Path, nil
 }
 // Mount or check if NFS is already mounted
 func (ns *NodeServer) checkStopNFS(ctxVars map[string]string)
 {
    sum := sha1.Sum([]byte(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"]))
    nfsHash := hex.EncodeToString(sum[:])
    stateFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".json"
    pidFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".pid"
    mountPath := ns.nfsStageDir+"/"+nfsHash
    state, err := ns.readNfsState(stateFile, true)
    if (state == nil)
    {
        return
    }
    activeNFS, err := ns.listActiveNFS()
    if (err != nil)
    {
        return
    }
    if (len(activeNFS[state.Port]) > 0)
    {
        return
    }
    // All volume mounts are detached, unmount the root mount and kill the server
    err = mount.CleanupMountPoint(mountPath, ns.mounter, false)
    if (err != nil)
    {
        klog.Errorf("failed to unmount %v: %v", mountPath, err)
        return
    }
    ns.stopNFS(stateFile, pidFile)
 }
 func (ns *NodeServer) stopNFS(stateFile, pidFile string)
 {
    err := killByPidFile(pidFile)
    if (err != nil)
    {
        klog.Errorf("failed to kill process with pid from %v: %v", pidFile, err)
    }
    os.Remove(pidFile)
    os.Remove(stateFile)
 }
 func (ns *NodeServer) listActiveNFS() (map[int][]string, error)
 {
    mounts, err := mount.ParseMountInfo("/proc/self/mountinfo")
    if (err != nil)
    {
        klog.Errorf("failed to list mounts: %v", err)
        return nil, err
    }
    activeNFS := make(map[int][]string)
    for _, mount := range mounts
    {
        // Volume mounts always refer to subpaths
        if (mount.FsType == "nfs" && mount.Root != "/")
        {
            for _, opt := range mount.MountOptions
            {
                if (strings.HasPrefix(opt, "port="))
                {
                    port64, err := strconv.ParseUint(opt[5:], 10, 16)
                    if (err == nil)
                    {
                        activeNFS[int(port64)] = append(activeNFS[int(port64)], mount.MountPoint)
                    }
                }
            }
        }
    }
    return activeNFS, nil
 }
 // NodePublishVolume mounts the volume mounted to the staging path to the target path
 func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error)
 {
@@ -473,23 +760,39 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
    }
    volName := ctxVars["name"]
-    ns.lockVolume(ctxVars["configPath"]+":"+volName)
+    if (ctxVars["vitastorfs"] != "")
-    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
+    {
        ns.lockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
        defer ns.unlockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
    }
    else
    {
        ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
        defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
    }
    stagingTargetPath := req.GetStagingTargetPath()
    targetPath := req.GetTargetPath()
    isBlock := req.GetVolumeCapability().GetBlock() != nil
-    // Check that stagingTargetPath is mounted
+    if (ctxVars["vitastorfs"] == "")
    _, err = mount.IsNotMountPoint(ns.mounter, stagingTargetPath)
    if (err != nil)
    {
-        klog.Errorf("staging path %v is not mounted: %v", stagingTargetPath, err)
+        // Check that stagingTargetPath is mounted
-        return nil, fmt.Errorf("staging path %v is not mounted: %v", stagingTargetPath, err)
+        notmnt, err := mount.IsNotMountPoint(ns.mounter, stagingTargetPath)
        if (err != nil)
        {
            klog.Errorf("staging path %v is not mounted: %w", stagingTargetPath, err)
            return nil, fmt.Errorf("staging path %v is not mounted: %w", stagingTargetPath, err)
        }
        else if (notmnt)
        {
            klog.Errorf("staging path %v is not mounted", stagingTargetPath)
            return nil, fmt.Errorf("staging path %v is not mounted", stagingTargetPath)
        }
    }
    // Check that targetPath is not already mounted
-    _, err = mount.IsNotMountPoint(ns.mounter, targetPath)
+    notmnt, err := mount.IsNotMountPoint(ns.mounter, targetPath)
    if (err != nil)
    {
        if (os.IsNotExist(err))
@@ -524,6 +827,29 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
            return nil, err
        }
    }
    else if (!notmnt)
    {
        klog.Errorf("target path %s is already mounted", targetPath)
        return nil, fmt.Errorf("target path %s is already mounted", targetPath)
    }
    if (ctxVars["vitastorfs"] != "")
    {
        nfspath, err := ns.mountNFS(ctxVars)
        if (err != nil)
        {
            ns.checkStopNFS(ctxVars)
            return nil, err
        }
        // volName should include prefix
        stagingTargetPath = nfspath+"/"+volName
        err = os.MkdirAll(stagingTargetPath, 0777)
        if (err != nil && !os.IsExist(err))
        {
            ns.checkStopNFS(ctxVars)
            return nil, err
        }
    }
    execArgs := []string{"--bind", stagingTargetPath, targetPath}
    if (req.GetReadonly())
@@ -536,6 +862,10 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
    out, err := cmd.Output()
    if (err != nil)
    {
        if (ctxVars["vitastorfs"] != "")
        {
            ns.checkStopNFS(ctxVars)
        }
        return nil, fmt.Errorf("Error running mount %v: %s", strings.Join(execArgs, " "), out)
    }
@@ -555,8 +885,16 @@ func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu
    }
    volName := ctxVars["name"]
-    ns.lockVolume(ctxVars["configPath"]+":"+volName)
+    if (ctxVars["vitastorfs"] != "")
-    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
+    {
        ns.lockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
        defer ns.unlockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
    }
    else
    {
        ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
        defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
    }
    targetPath := req.GetTargetPath()
    devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
@@ -583,6 +921,11 @@ func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        ns.checkStopNFS(ctxVars)
    }
    return &csi.NodeUnpublishVolumeResponse{}, nil
 }
--- a/csi/src/utils.go
+++ b/csi/src/utils.go
@@ -4,6 +4,7 @@
 package vitastor
 import (
    "bytes"
    "errors"
    "encoding/json"
    "fmt"
@@ -15,6 +16,8 @@ import (
    "syscall"
    "k8s.io/klog"
    "google.golang.org/grpc/codes"
    "google.golang.org/grpc/status"
 )
 func Contains(list []string, s string) bool
@@ -73,6 +76,10 @@ func checkVduseSupport() bool
            " For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
        )
    }
    else
    {
        klog.Infof("VDUSE support enabled successfully")
    }
    return vduse
 }
@@ -97,6 +104,7 @@ func mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, e
    {
        return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
    }
    klog.Infof("Attached volume %s via NBD as %s", volName, dev)
    return dev, err
 }
@@ -217,6 +225,7 @@ func mapVduse(stateDir string, volName string, ctxVars map[string]string, readon
                    err = os.WriteFile(stateFile, stateJSON, 0600)
                    if (err == nil)
                    {
                        klog.Infof("Attached volume %s via VDUSE as %s (VDPA ID %s)", volName, blockdev, vdpaId)
                        return blockdev, vdpaId, nil
                    }
                }
@@ -299,3 +308,35 @@ func unmapVduseById(stateDir, vdpaId string)
        os.Remove(pidFile)
    }
 }
 func system(program string, args ...string) ([]byte, []byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var stdout, stderr bytes.Buffer
    c.Stdout, c.Stderr = &stdout, &stderr
    err := c.Run()
    if (err != nil)
    {
        stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s\nOutput:\n%s", err, stdoutStr+stderrStr)
        return nil, nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
    }
    return stdout.Bytes(), stderr.Bytes(), nil
 }
 func systemCombined(program string, args ...string) ([]byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var out bytes.Buffer
    c.Stdout, c.Stderr = &out, &out
    err := c.Run()
    if (err != nil)
    {
        outStr := string(out.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s, status %s\n", outStr, err)
        return nil, status.Error(codes.Internal, outStr+" (status "+err.Error()+")")
    }
    return out.Bytes(), nil
 }
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-vitastor (1.9.2-1) unstable; urgency=medium
+vitastor (2.1.0-1) unstable; urgency=medium
  * Bugfixes
--- a/debian/control
+++ b/debian/control
@@ -2,7 +2,10 @@ Source: vitastor
 Section: admin
 Priority: optional
 Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
-Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev
+Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8),
  linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev,
  libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
  node-bindings <!nocheck>, node-gyp, node-nan
 Standards-Version: 4.5.0
 Homepage: https://vitastor.io/
 Rules-Requires-Root: no
@@ -59,3 +62,9 @@ Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client, patch, python3, jq
 Description: Vitastor OpenNebula storage plugin
 Vitastor storage plugin for OpenNebula.
 Package: node-vitastor
 Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, node-bindings
 Description: Node.js bindings for Vitastor client
 Node.js native bindings for the Vitastor client library (vitastor-client).
--- a/debian/node-vitastor.install
+++ b/debian/node-vitastor.install
@@ -0,0 +1 @@
 usr/lib/x86_64-linux-gnu/nodejs/vitastor
--- a/debian/patched-qemu.Dockerfile
+++ b/debian/patched-qemu.Dockerfile
@@ -1,17 +1,23 @@
 # Build patched QEMU for Debian inside a container
 # cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
 ARG DISTRO=debian
 ARG REL=
-FROM debian:$REL
+FROM $DISTRO:$REL
 ARG DISTRO=debian
 ARG REL=
 WORKDIR /root
 RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then \
-        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
+        if [ "$REL" = "buster" ]; then \
            echo "deb http://archive.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        else \
            echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        fi; \
        echo >> /etc/apt/preferences; \
        echo 'Package: *' >> /etc/apt/preferences; \
-        echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
+        echo "Pin: release n=$REL-backports" >> /etc/apt/preferences; \
        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
@@ -20,8 +26,8 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
 RUN apt-get update
-RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
+RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
-RUN apt-get -y build-dep qemu
+RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y build-dep qemu
 # To build a custom version
 #RUN cp /root/packages/qemu-orig/* /root
 RUN apt-get --download-only source qemu
@@ -38,9 +44,9 @@ ADD src/client/qemu_driver.c /root/qemu_driver.c
 #    apt-get install -y vitastor-client vitastor-client-dev quilt
 RUN set -e; \
-    dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
+    DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
    apt-get update; \
-    apt-get install -y quilt; \
+    DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install quilt; \
    mkdir -p /root/packages/qemu-$REL; \
    rm -rf /root/packages/qemu-$REL/*; \
    cd /root/packages/qemu-$REL; \
@@ -54,7 +60,7 @@ RUN set -e; \
    quilt add block/vitastor.c; \
    cp /root/qemu_driver.c block/vitastor.c; \
    quilt refresh; \
-    V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \
+    V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(\+deb\d+u\d+)?(~bpo[\d\+]*)?\).*$/$1/')+vitastor5; \
    if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \
    DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
--- a/debian/rules
+++ b/debian/rules
@@ -4,6 +4,14 @@ export DH_VERBOSE = 1
 %:
 	dh $@
 override_dh_install:
 	perl -pe 's!prefix=/usr!prefix='`pwd`'/debian/tmp/usr!' < obj-x86_64-linux-gnu/src/client/vitastor.pc > node-binding/vitastor.pc
 	cd node-binding && PKG_CONFIG_PATH=./ PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 npm install --unsafe-perm || exit 1
 	mkdir -p debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
 	cp -v node-binding/package.json node-binding/index.js node-binding/addon.cc node-binding/addon.h node-binding/client.cc node-binding/client.h debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor
 	cp -v node-binding/build/Release/addon.node debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
 	dh_install
 override_dh_installdeb:
 	cat debian/fio_version >> debian/vitastor-fio.substvars
 	[ -f debian/qemu_version ] && (cat debian/qemu_version >> debian/vitastor-qemu.substvars) || true
--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@@ -21,10 +21,11 @@ RUN set -e -x; \
    echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
-RUN apt-get update
+RUN apt-get update && \
-RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl
+    apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake \
-RUN apt-get -y build-dep fio
+        libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
-RUN apt-get --download-only source fio
+    apt-get -y build-dep fio && \
    apt-get --download-only source fio
 ADD . /root/vitastor
 RUN set -e -x; \
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,9 +1,11 @@
 # Build Docker image with Vitastor packages
-FROM debian:bullseye
+FROM debian:bookworm
-ADD vitastor.list /etc/apt/sources.list.d
+ADD etc/apt /etc/apt/
-ADD vitastor.gpg /etc/apt/trusted.gpg.d
+RUN apt-get update && apt-get -y install vitastor udev systemd qemu-system-x86 qemu-system-common qemu-block-extra qemu-utils jq nfs-common && apt-get clean
-ADD vitastor.pref /etc/apt/preferences.d
+ADD sleep.sh /usr/bin/
-ADD apt.conf /etc/apt/
+ADD install.sh /usr/bin/
-RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common && apt-get clean
+ADD scripts /opt/scripts/
 ADD etc /etc/
 RUN ln -s /usr/lib/vitastor/mon/make-etcd /usr/bin/make-etcd
--- a/docker/Makefile
+++ b/docker/Makefile
@@ -0,0 +1,9 @@
 VITASTOR_VERSION ?= v2.1.0
 all: build push
 build:
 	@docker build --rm -t vitalif/vitastor:$(VITASTOR_VERSION) .
 push:
 	@docker push vitalif/vitastor:$(VITASTOR_VERSION)
--- a/docker/etc/apt/apt.conf
+++ b/docker/etc/apt/apt.conf
--- a/docker/etc/apt/preferences.d/vitastor.pref
+++ b/docker/etc/apt/preferences.d/vitastor.pref
--- a/docker/etc/apt/sources.list.d/vitastor.list
+++ b/docker/etc/apt/sources.list.d/vitastor.list
@@ -0,0 +1 @@
 deb http://vitastor.io/debian bookworm main
--- a/docker/etc/apt/trusted.gpg.d/vitastor.gpg
+++ b/docker/etc/apt/trusted.gpg.d/vitastor.gpg
--- a/docker/etc/systemd/system/vitastor-etcd.service
+++ b/docker/etc/systemd/system/vitastor-etcd.service
@@ -0,0 +1,27 @@
 [Unit]
 Description=Containerized etcd for Vitastor
 After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 PartOf=vitastor.target
 [Service]
 Restart=always
 Environment=GOGC=50
 EnvironmentFile=/etc/vitastor/docker.conf
 EnvironmentFile=/etc/vitastor/etcd.conf
 SyslogIdentifier=etcd
 ExecStart=bash -c 'docker run --rm -i -v /var/lib/vitastor/etcd:/data \
    --log-driver none --network host $CONTAINER_OPTIONS --name vitastor-etcd \
    $ETCD_IMAGE /usr/local/bin/etcd --name "$ETCD_NAME" --data-dir /data \
    --snapshot-count 10000 --advertise-client-urls http://$ETCD_IP:2379 --listen-client-urls http://$ETCD_IP:2379 \
    --initial-advertise-peer-urls http://$ETCD_IP:2380 --listen-peer-urls http://$ETCD_IP:2380 \
    --initial-cluster-token vitastor-etcd-1 --initial-cluster "$ETCD_INITIAL_CLUSTER" \
    --initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \
    --auto-compaction-retention=10 --auto-compaction-mode=revision'
 ExecStop=docker stop vitastor-etcd
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-host.service
+++ b/docker/etc/systemd/system/vitastor-host.service
@@ -0,0 +1,23 @@
 [Unit]
 Description=Empty container for running Vitastor commands
 After=network-online.target local-fs.target time-sync.target docker.service
 Wants=network-online.target local-fs.target time-sync.target docker.service
 PartOf=vitastor.target
 [Service]
 Restart=always
 EnvironmentFile=/etc/vitastor/docker.conf
 ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev -v /run:/run \
    --security-opt seccomp=unconfined --privileged --pid=host --log-driver none --network host --name vitastor vitastor:$VITASTOR_VERSION \
    sleep.sh'
 ExecStartPost=udevadm trigger
 ExecStop=docker stop vitastor
 WorkingDirectory=/
 PrivateTmp=false
 TasksMax=infinity
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-mon.service
+++ b/docker/etc/systemd/system/vitastor-mon.service
@@ -0,0 +1,23 @@
 [Unit]
 Description=Containerized Vitastor monitor
 After=network-online.target local-fs.target time-sync.target docker.service
 Wants=network-online.target local-fs.target time-sync.target docker.service
 PartOf=vitastor.target
 [Service]
 Restart=always
 EnvironmentFile=/etc/vitastor/docker.conf
 SyslogIdentifier=vitastor-mon
 ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /var/lib/vitastor:/var/lib/vitastor -v /dev:/dev \
    --log-driver none --network host $CONTAINER_OPTIONS --name vitastor-mon vitastor:$VITASTOR_VERSION \
    node /usr/lib/vitastor/mon/mon-main.js'
 ExecStop=docker stop vitastor-mon
 WorkingDirectory=/
 PrivateTmp=false
 TasksMax=infinity
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-osd@.service
+++ b/docker/etc/systemd/system/vitastor-osd@.service
@@ -0,0 +1,28 @@
 [Unit]
 Description=Containerized Vitastor object storage daemon osd.%i
 After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 PartOf=vitastor.target
 [Service]
 LimitNOFILE=1048576
 LimitNPROC=1048576
 LimitMEMLOCK=infinity
 EnvironmentFile=/etc/vitastor/docker.conf
 SyslogIdentifier=vitastor-osd%i
 ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev \
    $(for i in $(ls /dev/vitastor/osd%i-*); do echo --device $i:$i; done) \
    --log-driver none --network host --ulimit nofile=1048576 --ulimit memlock=-1 \
    --security-opt seccomp=unconfined $CONTAINER_OPTIONS --name vitastor-osd%i \
    vitastor:$VITASTOR_VERSION vitastor-disk exec-osd /dev/vitastor/osd%i-data'
 ExecStartPre=+docker exec vitastor vitastor-disk pre-exec /dev/vitastor/osd%i-data
 ExecStop=docker stop vitastor-etcd%i
 WorkingDirectory=/
 PrivateTmp=false
 TasksMax=infinity
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=vitastor.target
--- a/docker/etc/systemd/system/vitastor.target
+++ b/docker/etc/systemd/system/vitastor.target
@@ -0,0 +1,4 @@
 [Unit]
 Description=vitastor target
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/udev/rules.d/90-vitastor.rules
+++ b/docker/etc/udev/rules.d/90-vitastor.rules
@@ -0,0 +1,7 @@
 SUBSYSTEM=="block", ENV{ID_PART_ENTRY_TYPE}=="e7009fac-a5a1-4d72-af72-53de13059903", \
    OWNER="vitastor", GROUP="vitastor", \
    IMPORT{program}="/usr/bin/docker exec vitastor vitastor-disk udev $devnode", \
    SYMLINK+="vitastor/$env{VITASTOR_ALIAS}"
 ENV{VITASTOR_OSD_NUM}!="", ACTION=="add", RUN{program}+="/usr/bin/systemctl enable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
 ENV{VITASTOR_OSD_NUM}!="", ACTION=="remove", RUN{program}+="/usr/bin/systemctl disable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
--- a/docker/etc/vitastor/docker.conf
+++ b/docker/etc/vitastor/docker.conf
@@ -0,0 +1,11 @@
 #
 # Configuration file for containerized Vitastor installation
 # (non-Kubernetes, with systemd and udev-based orchestration)
 #
 # Desired Vitastor version
 VITASTOR_VERSION=v2.1.0
 # Additional arguments for all containers
 # For example, you may want to specify a custom logging driver here
 CONTAINER_OPTIONS=""
--- a/docker/etc/vitastor/etcd.conf
+++ b/docker/etc/vitastor/etcd.conf
@@ -0,0 +1,4 @@
 ETCD_IMAGE=quay.io/coreos/etcd:v3.5.18
 ETCD_NAME=""
 ETCD_IP=""
 ETCD_INITIAL_CLUSTER=""
--- a/docker/etc/vitastor/vitastor.conf
+++ b/docker/etc/vitastor/vitastor.conf
@@ -0,0 +1,2 @@
 {
 }
--- a/docker/install.sh
+++ b/docker/install.sh
@@ -0,0 +1,9 @@
 #!/bin/bash
 set -e
 cp -urv /etc/default /host-etc/
 cp -urv /etc/systemd /host-etc/
 cp -urv /etc/udev /host-etc/
 cp -urnv /etc/vitastor /host-etc/
 cp -urnv /opt/scripts/* /host-bin/
--- a/docker/scripts/vitastor-cli
+++ b/docker/scripts/vitastor-cli
@@ -0,0 +1,3 @@
 #!/bin/bash
 docker exec -it vitastor vitastor-cli "$@"
--- a/docker/scripts/vitastor-disk
+++ b/docker/scripts/vitastor-disk
@@ -0,0 +1,3 @@
 #!/bin/bash
 docker exec -it vitastor vitastor-disk "$@"
--- a/docker/scripts/vitastor-fio
+++ b/docker/scripts/vitastor-fio
@@ -0,0 +1,3 @@
 #!/bin/bash
 docker exec -it vitastor fio "$@"
--- a/docker/scripts/vitastor-nbd
+++ b/docker/scripts/vitastor-nbd
@@ -0,0 +1,3 @@
 #!/bin/bash
 docker exec -it vitastor vitastor-nbd "$@"
--- a/docker/sleep.sh
+++ b/docker/sleep.sh
@@ -0,0 +1,3 @@
 #!/bin/bash
 while :; do sleep infinity; done
--- a/docker/vitastor.list
+++ b/docker/vitastor.list
@@ -1 +0,0 @@
 deb http://vitastor.io/debian bullseye main
--- a/docs/config.en.md
+++ b/docs/config.en.md
@@ -13,7 +13,7 @@ Vitastor configuration consists of:
 - [Separate OSD settings](config/pool.en.md#osd-settings)
 - [Inode configuration](config/inode.en.md) i.e. image metadata like name, size and parent reference
-Configuration parameters can be set in 3 places:
+Configuration parameters can be set in 4 places:
 - Configuration file (`/etc/vitastor/vitastor.conf` or other path)
 - etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
  connection parameters should obviously be set in the configuration file.
--- a/docs/config.ru.md
+++ b/docs/config.ru.md
@@ -14,7 +14,7 @@
 - [Настроек инодов](config/inode.ru.md), т.е. метаданных образов, таких, как имя, размер и ссылки на
  родительский образ
-Параметры конфигурации могут задаваться в 3 местах:
+Параметры конфигурации могут задаваться в 4 местах:
 - Файле конфигурации (`/etc/vitastor/vitastor.conf` или по другому пути)
 - Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
  задаваться там, кроме, естественно, самих параметров соединения с etcd,
--- a/docs/config/client.en.md
+++ b/docs/config/client.en.md
@@ -13,6 +13,7 @@ affect their interaction with the cluster.
 - [client_retry_interval](#client_retry_interval)
 - [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_retry_enospc](#client_retry_enospc)
 - [client_wait_up_timeout](#client_wait_up_timeout)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
@@ -70,6 +71,19 @@ and clients are not blocked and just get EIO error code instead.
 Retry writes on out of space errors to wait until some space is freed on
 OSDs.
 ## client_wait_up_timeout
 - Type: seconds
 - Default: 16
 - Can be changed online: yes
 Wait for this number of seconds until PGs are up when doing operations
 which require all PGs to be up. Currently only used by object listings
 in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
 The default value is calculated as `1 + OSD lease timeout`, which is
 `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 ## client_max_dirty_bytes
 - Type: integer
--- a/docs/config/client.ru.md
+++ b/docs/config/client.ru.md
@@ -13,6 +13,7 @@
 - [client_retry_interval](#client_retry_interval)
 - [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_retry_enospc](#client_retry_enospc)
 - [client_wait_up_timeout](#client_wait_up_timeout)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
@@ -72,6 +73,19 @@ RDMA и хотите повысить пиковую производитель
 Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
 ожидать, пока на OSD не освободится место.
 ## client_wait_up_timeout
 - Тип: секунды
 - Значение по умолчанию: 16
 - Можно менять на лету: да
 Время ожидания поднятия PG при операциях, требующих активности всех PG.
 В данный момент используется листингами объектов в командах, использующих
 удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
 Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
 `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 ## client_max_dirty_bytes
 - Тип: целое число
--- a/docs/config/layout-osd.en.md
+++ b/docs/config/layout-osd.en.md
@@ -118,12 +118,13 @@ Physical block size of the journal device. Must be a multiple of
 - Type: boolean
 - Default: false
-Do not issue fsyncs to the data device, i.e. do not flush its cache.
+Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
-Safe ONLY if your data device has write-through cache. If you disable
+Safe ONLY if your data device has write-through cache or if write-back
-the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
+cache is disabled. If you disable drive cache manually with `hdparm` or
-that the cache disable command is run every time before starting Vitastor
+writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
-OSD, for example, in the systemd unit. See also `immediate_commit` option
+every time before starting Vitastor OSD (vitastor-disk does it automatically).
-for the instructions to disable cache and how to benefit from it.
+See also [immediate_commit](layout-cluster.en.md#immediate_commit)
 for information about how to benefit from disabled cache.
 ## disable_meta_fsync
@@ -171,8 +172,7 @@ size, it actually has to write the whole 4 KB sector.
 Because of this it can actually be beneficial to use SSDs which work well
 with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-and meta_block_size. But the only SSD that may fit into this category is
+and meta_block_size. But at the moment, no such SSDs are known...
 Intel Optane (probably, not tested yet).
 Clients don't need to be aware of disk_alignment, so it's not required to
 put a modified value into etcd key /vitastor/config/global.
--- a/docs/config/layout-osd.ru.md
+++ b/docs/config/layout-osd.ru.md
@@ -122,13 +122,14 @@ SSD-диске, иначе производительность пострада
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
-Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
+Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
 Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-записью (write-through). Если вы отключаете кэш через `hdparm` или
+записью (write-through) или если кэш с отложенной записью (write-back) отключён.
-`scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
+Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
-выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
+то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
-Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
+(vitastor-disk делает это автоматически). Смотрите также опцию
-и о том, как из этого извлечь выгоду.
+[immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
 как извлечь выгоду из отключённого кэша.
 ## disable_meta_fsync
@@ -179,9 +180,8 @@ SSD и HDD диски используют 4 КБ физические сект
 Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
 меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-journal_block_size и meta_block_size. Однако единственные SSD, которые
+journal_block_size и meta_block_size. Однако на данный момент такие SSD
-теоретически могут попасть в эту категорию - это Intel Optane (но и это
+не известны...
 пока не проверялось автором).
 Клиентам не обязательно знать про disk_alignment, так что помещать значение
 этого параметра в etcd в /vitastor/config/global не нужно.
--- a/docs/config/monitor.en.md
+++ b/docs/config/monitor.en.md
@@ -24,6 +24,7 @@ These parameters only apply to Monitors.
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
 - [use_old_pg_combinator](#use_old_pg_combinator)
 - [osd_backfillfull_ratio](#osd_backfillfull_ratio)
 ## use_antietcd
@@ -73,13 +74,13 @@ Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json]
 - Type: integer
 - Default: 8060
-HTTP port for monitors to listen on (including metrics exporter)
+HTTP port for monitors to listen to (including metrics exporter)
 ## mon_http_ip
 - Type: string
-IP address for monitors to listen on (all addresses by default)
+IP address for monitors to listen to (all addresses by default)
 ## mon_https_cert
@@ -175,3 +176,18 @@ present in the configuration, then it is defined with the default priority
 Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
 and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
 ## osd_backfillfull_ratio
 - Type: number
 - Default: 0.99
 Monitors try to prevent OSDs becoming 100% full during rebalance or recovery by
 calculating how much space will be occupied on every OSD after all rebalance
 and recovery operations finish, and pausing rebalance and recovery if that
 amount of space exceeds OSD capacity multiplied by the value of this
 configuration parameter.
 Future used space is calculated by summing space used by all user data blocks
 (objects) in all PGs placed on a specific OSD, even if some of these objects
 currently reside on a different set of OSDs.
--- a/docs/config/monitor.ru.md
+++ b/docs/config/monitor.ru.md
@@ -24,6 +24,7 @@
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
 - [use_old_pg_combinator](#use_old_pg_combinator)
 - [osd_backfillfull_ratio](#osd_backfillfull_ratio)
 ## use_antietcd
@@ -178,3 +179,19 @@ OSD перед обновлением агрегированной статис
 Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
 и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
 ## osd_backfillfull_ratio
 - Тип: число
 - Значение по умолчанию: 0.99
 Мониторы стараются предотвратить 100% заполнение OSD в процессе ребаланса
 или восстановления, рассчитывая, сколько места будет занято на каждом OSD после
 завершения всех операций ребаланса и восстановления, и приостанавливая
 ребаланс и восстановление, если рассчитанный объём превышает ёмкость OSD,
 умноженную на значение данного параметра.
 Будущее занятое место рассчитывается сложением места, занятого всеми
 пользовательскими блоками данных (объектами) во всех PG, расположенных
 на конкретном OSD, даже если часть этих объектов в данный момент находится
 на другом наборе OSD.
--- a/docs/config/network.en.md
+++ b/docs/config/network.en.md
@@ -9,9 +9,11 @@
 These parameters apply to clients and OSDs and affect network connection logic
 between clients, OSDs and etcd.
- [tcp_header_buffer_size](#tcp_header_buffer_size)
+- [osd_network](#osd_network)
- [use_sync_send_recv](#use_sync_send_recv)
+- [osd_cluster_network](#osd_cluster_network)
 - [use_rdma](#use_rdma)
 - [use_rdmacm](#use_rdmacm)
 - [disable_tcp](#disable_tcp)
 - [rdma_device](#rdma_device)
 - [rdma_port_num](#rdma_port_num)
 - [rdma_gid_index](#rdma_gid_index)
@@ -30,49 +32,79 @@ between clients, OSDs and etcd.
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
 - [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
 - [etcd_min_reload_interval](#etcd_min_reload_interval)
 - [tcp_header_buffer_size](#tcp_header_buffer_size)
 - [use_sync_send_recv](#use_sync_send_recv)
-## tcp_header_buffer_size
+## osd_network
- Type: integer
+- Type: string or array of strings
 - Default: 65536
-Size of the buffer used to read data using an additional copy. Vitastor
+Network mask of public OSD network(s) (IPv4 or IPv6). Each OSD listens to all
-packet headers are 128 bytes, payload is always at least 4 KB, so it is
+addresses of UP + RUNNING interfaces matching one of these networks, on the
-usually beneficial to try to read multiple packets at once even though
+same port. Port is auto-selected except if [bind_port](osd.en.md#bind_port) is
-it requires to copy the data an additional time. The rest of each packet
+explicitly specified. Bind address(es) may also be overridden manually by
-is received without an additional copy. You can try to play with this
+specifying [bind_address](osd.en.md#bind_address). If OSD networks are not specified
-parameter and see how it affects random iops and linear bandwidth if you
+at all, OSD just listens to a wildcard address (0.0.0.0).
 want.
-## use_sync_send_recv
+## osd_cluster_network
- Type: boolean
+- Type: string or array of strings
 - Default: false
-If true, synchronous send/recv syscalls are used instead of io_uring for
+Network mask of separate network(s) (IPv4 or IPv6) to use for OSD
-socket communication. Useless for OSDs because they require io_uring anyway,
+cluster connections. I.e. OSDs will always attempt to use these networks
-but may be required for clients with old kernel versions.
+to connect to other OSDs, while clients will attempt to use networks from
 [osd_network](#osd_network).
 ## use_rdma
 - Type: boolean
 - Default: true
-Try to use RDMA for communication if it's available. Disable if you don't
+Try to use RDMA through libibverbs for communication if it's available.
-want Vitastor to use RDMA. TCP-only clients can also talk to an RDMA-enabled
+Disable if you don't want Vitastor to use RDMA. TCP-only clients can also
-cluster, so disabling RDMA may be needed if clients have RDMA devices,
+talk to an RDMA-enabled cluster, so disabling RDMA may be needed if clients
-but they are not connected to the cluster.
+have RDMA devices, but they are not connected to the cluster.
 `use_rdma` works with RoCEv1/RoCEv2 networks, but not with iWARP and,
 maybe, with some Infiniband configurations which require RDMA-CM.
 Consider `use_rdmacm` for such networks.
 ## use_rdmacm
 - Type: boolean
 - Default: true
 Use an alternative implementation of RDMA through RDMA-CM (Connection
 Manager). Works with all RDMA networks: Infiniband, iWARP and
 RoCEv1/RoCEv2, and even allows to disable TCP and run only with RDMA.
 OSDs always use random port numbers for RDMA-CM listeners, different
 from their TCP ports. `use_rdma` is automatically disabled when
 `use_rdmacm` is enabled.
 ## disable_tcp
 - Type: boolean
 - Default: true
 Fully disable TCP and only use RDMA-CM for OSD communication.
 ## rdma_device
 - Type: string
 RDMA device name to use for Vitastor OSD communications (for example,
-"rocep5s0f0"). Now Vitastor supports all adapters, even ones without
+"rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
-ODP support, like Mellanox ConnectX-3 and non-Mellanox cards.
+device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
 or choose the first available RDMA device if no RoCE devices are
 found or if `osd_network` is not specified. Auto-selection is also
 unsupported with old libibverbs < v32, like in Debian 10 Buster or
 CentOS 7.
-Versions up to Vitastor 1.2.0 required ODP which is only present in
+Vitastor supports all adapters, even ones without ODP support, like
-Mellanox ConnectX >= 4. See also [rdma_odp](#rdma_odp).
+Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
 1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
 See also [rdma_odp](#rdma_odp).
 Run `ibv_devinfo -v` as root to list available RDMA devices and their
 features.
@@ -86,32 +118,36 @@ PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
 ## rdma_port_num
 - Type: integer
 - Default: 1
 RDMA device port number to use. Only for devices that have more than 1 port.
 See `phys_port_cnt` in `ibv_devinfo -v` output to determine how many ports
 your device has.
 Not relevant for RDMA-CM (use_rdmacm).
 ## rdma_gid_index
 - Type: integer
 - Default: 0
 Global address identifier index of the RDMA device to use. Different GID
 indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
 Search for "GID" in `ibv_devinfo -v` output to determine which GID index
 you need.
-**IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
+If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
-rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).
+RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
 GID auto-selection is unsupported with libibverbs < v32.
 A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
 Not relevant for RDMA-CM (use_rdmacm).
 ## rdma_mtu
 - Type: integer
 - Default: 4096
-RDMA Path MTU to use. Must be 1024, 2048 or 4096. There is usually no
+RDMA Path MTU to use. Must be 1024, 2048 or 4096. Default is to use the
-sense to change it from the default 4096.
+RDMA device's MTU.
 ## rdma_max_sge
@@ -253,3 +289,35 @@ etcd_report_interval to guarantee that keepalive actually works.
 etcd websocket ping interval required to keep the connection alive and
 detect disconnections quickly.
 ## etcd_min_reload_interval
 - Type: milliseconds
 - Default: 1000
 - Can be changed online: yes
 Minimum interval for full etcd state reload. Introduced to prevent
 excessive load on etcd during outages when etcd can't keep up with event
 streams and cancels them.
 ## tcp_header_buffer_size
 - Type: integer
 - Default: 65536
 Size of the buffer used to read data using an additional copy. Vitastor
 packet headers are 128 bytes, payload is always at least 4 KB, so it is
 usually beneficial to try to read multiple packets at once even though
 it requires to copy the data an additional time. The rest of each packet
 is received without an additional copy. You can try to play with this
 parameter and see how it affects random iops and linear bandwidth if you
 want.
 ## use_sync_send_recv
 - Type: boolean
 - Default: false
 If true, synchronous send/recv syscalls are used instead of io_uring for
 socket communication. Useless for OSDs because they require io_uring anyway,
 but may be required for clients with old kernel versions.
--- a/docs/config/network.ru.md
+++ b/docs/config/network.ru.md
@@ -9,9 +9,11 @@
 Данные параметры используются клиентами и OSD и влияют на логику сетевого
 взаимодействия между клиентами, OSD, а также etcd.
- [tcp_header_buffer_size](#tcp_header_buffer_size)
+- [osd_network](#osd_network)
- [use_sync_send_recv](#use_sync_send_recv)
+- [osd_cluster_network](#osd_cluster_network)
 - [use_rdma](#use_rdma)
 - [use_rdmacm](#use_rdmacm)
 - [disable_tcp](#disable_tcp)
 - [rdma_device](#rdma_device)
 - [rdma_port_num](#rdma_port_num)
 - [rdma_gid_index](#rdma_gid_index)
@@ -30,53 +32,79 @@
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
 - [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
 - [etcd_min_reload_interval](#etcd_min_reload_interval)
 - [tcp_header_buffer_size](#tcp_header_buffer_size)
 - [use_sync_send_recv](#use_sync_send_recv)
-## tcp_header_buffer_size
+## osd_network
- Тип: целое число
+- Тип: строка или массив строк
 - Значение по умолчанию: 65536
-Размер буфера для чтения данных с дополнительным копированием. Пакеты
+Маски подсетей (IPv4 или IPv6) публичной сети или сетей OSD. Каждый OSD слушает
-Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
+один и тот же порт на всех адресах поднятых (UP + RUNNING) сетевых интерфейсов,
-от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
+соответствующих одной из указанных сетей. Порт выбирается автоматически, если
-сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
+только [bind_port](osd.ru.md#bind_port) не задан явно. Адреса для подключений можно
-скопировать данные. Часть каждого пакета за пределами значения данного
+также переопределить явно, задав [bind_address](osd.ru.md#bind_address). Если сети OSD
-параметра читается без дополнительного копирования. Вы можете попробовать
+не заданы вообще, OSD слушает все адреса (0.0.0.0).
 поменять этот параметр и посмотреть, как он влияет на производительность
 случайного и линейного доступа.
-## use_sync_send_recv
+## osd_cluster_network
- Тип: булево (да/нет)
+- Тип: строка или массив строк
 - Значение по умолчанию: false
-Если установлено в истину, то вместо io_uring для передачи данных по сети
+Маски подсетей (IPv4 или IPv6) отдельной кластерной сети или сетей OSD.
-будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
+То есть, OSD будут всегда стараться использовать эти сети для соединений
-это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
+с другими OSD, а клиенты будут стараться использовать сети из [osd_network](#osd_network).
 принципе, это может применяться для клиентов со старыми версиями ядра.
 ## use_rdma
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
-Пытаться использовать RDMA для связи при наличии доступных устройств.
+Попробовать использовать RDMA через libibverbs для связи при наличии
-Отключите, если вы не хотите, чтобы Vitastor использовал RDMA.
+доступных устройств. Отключите, если вы не хотите, чтобы Vitastor
-TCP-клиенты также могут работать с RDMA-кластером, так что отключать
+использовал RDMA. TCP-клиенты также могут работать с RDMA-кластером,
-RDMA может быть нужно только если у клиентов есть RDMA-устройства,
+так что отключать RDMA может быть нужно, только если у клиентов есть
-но они не имеют соединения с кластером Vitastor.
+RDMA-устройства, но они не имеют соединения с кластером Vitastor.
 `use_rdma` работает с RoCEv1/RoCEv2 сетями, но не работает с iWARP и
 может не работать с частью конфигураций Infiniband, требующих RDMA-CM.
 Рассмотрите включение `use_rdmacm` для таких сетей.
 ## use_rdmacm
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
 Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
 RoCEv1/RoCEv2, и даже позволяет полностью отключить TCP и работать
 только на RDMA. OSD используют случайные номера портов для ожидания
 соединений через RDMA-CM, отличающиеся от их TCP-портов. Также при
 включении `use_rdmacm` автоматически отключается опция `use_rdma`.
 ## disable_tcp
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Полностью отключить TCP и использовать только RDMA-CM для соединений с OSD.
 ## rdma_device
 - Тип: строка
 Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
-Сейчас Vitastor поддерживает все модели адаптеров, включая те, у которых
+Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
-нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
+[osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
-картами производства не Mellanox.
+попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
 не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
 libibverbs < v32, например в Debian 10 Buster или CentOS 7.
-Версии Vitastor до 1.2.0 включительно требовали ODP, который есть только
+Vitastor поддерживает все модели адаптеров, включая те, у которых
-на Mellanox ConnectX 4 и более новых. См. также [rdma_odp](#rdma_odp).
+нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
 картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
 требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
 См. также [rdma_odp](#rdma_odp).
 Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
 список доступных RDMA-устройств, их параметры и возможности.
@@ -91,33 +119,38 @@ Control) и ECN (Explicit Congestion Notification).
 ## rdma_port_num
 - Тип: целое число
 - Значение по умолчанию: 1
 Номер порта RDMA-устройства, который следует использовать. Имеет смысл
 только для устройств, у которых более 1 порта. Чтобы узнать, сколько портов
 у вашего адаптера, посмотрите `phys_port_cnt` в выводе команды
 `ibv_devinfo -v`.
 Опция неприменима к RDMA-CM (use_rdmacm).
 ## rdma_gid_index
 - Тип: целое число
 - Значение по умолчанию: 0
 Номер глобального идентификатора адреса RDMA-устройства, который следует
 использовать. Разным gid_index могут соответствовать разные протоколы связи:
 RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
 словом "GID" в выводе команды `ibv_devinfo -v`.
-**ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
+Если не указан, Vitastor попробует автоматически выбрать сначала GID,
-правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).
+соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
 RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
 libibverbs < v32.
 Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
 Опция неприменима к RDMA-CM (use_rdmacm).
 ## rdma_mtu
 - Тип: целое число
 - Значение по умолчанию: 4096
 Максимальная единица передачи (Path MTU) для RDMA. Должно быть равно 1024,
-2048 или 4096. Обычно нет смысла менять значение по умолчанию, равное 4096.
+2048 или 4096. По умолчанию используется значение MTU RDMA-устройства.
 ## rdma_max_sge
@@ -263,3 +296,37 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
 - Можно менять на лету: да
 Интервал проверки живости вебсокет-подключений к etcd.
 ## etcd_min_reload_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 1000
 - Можно менять на лету: да
 Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
 предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
 успевает рассылать потоки событий и отменяет их.
 ## tcp_header_buffer_size
 - Тип: целое число
 - Значение по умолчанию: 65536
 Размер буфера для чтения данных с дополнительным копированием. Пакеты
 Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
 от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
 сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
 скопировать данные. Часть каждого пакета за пределами значения данного
 параметра читается без дополнительного копирования. Вы можете попробовать
 поменять этот параметр и посмотреть, как он влияет на производительность
 случайного и линейного доступа.
 ## use_sync_send_recv
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Если установлено в истину, то вместо io_uring для передачи данных по сети
 будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
 это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
 принципе, это может применяться для клиентов со старыми версиями ядра.
--- a/docs/config/osd.en.md
+++ b/docs/config/osd.en.md
@@ -7,16 +7,15 @@
 # Runtime OSD Parameters
 These parameters only apply to OSDs, are not fixed at the moment of OSD drive
-initialization and can be changed - either with an OSD restart or, for some of
+initialization and can be changed - in /etc/vitastor/vitastor.conf or [vitastor-disk update-sb](../usage/disk.en.md#update-sb)
-them, even without restarting by updating configuration in etcd.
+with an OSD restart or, for some of them, even without restarting by updating configuration in etcd.
 - [bind_address](#bind_address)
 - [bind_port](#bind_port)
 - [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
 - [osd_network](#osd_network)
 - [bind_address](#bind_address)
 - [bind_port](#bind_port)
 - [autosync_interval](#autosync_interval)
 - [autosync_writes](#autosync_writes)
 - [recovery_queue_depth](#recovery_queue_depth)
@@ -61,6 +60,26 @@ them, even without restarting by updating configuration in etcd.
 - [recovery_tune_agg_interval](#recovery_tune_agg_interval)
 - [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
 - [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
 - [discard_on_start](#discard_on_start)
 - [min_discard_size](#min_discard_size)
 - [allow_net_split](#allow_net_split)
 ## bind_address
 - Type: string or array of strings
 Instead of the network masks ([osd_network](network.en.md#osd_network) and
 [osd_cluster_network](network.en.md#osd_cluster_network)), you can also set
 OSD listen addresses explicitly using this parameter. May be useful if you
 want to start OSDs on interfaces that are not UP + RUNNING.
 ## bind_port
 - Type: integer
 By default, OSDs pick random ports to use for incoming connections
 automatically. With this option you can set a specific port for a specific
 OSD by hand.
 ## osd_iothread_count
@@ -104,34 +123,6 @@ debugging purposes. It's possible to implement additional feature for the
 monitor which may allow to separate primary and secondary OSDs, but it's
 unclear why anyone could need it, so it's not implemented.
 ## osd_network
 - Type: string or array of strings
 Network mask of the network (IPv4 or IPv6) to use for OSDs. Note that
 although it's possible to specify multiple networks here, this does not
 mean that OSDs will create multiple listening sockets - they'll only
 pick the first matching address of an UP + RUNNING interface. Separate
 networks for cluster and client connections are also not implemented, but
 they are mostly useless anyway, so it's not a big deal.
 ## bind_address
 - Type: string
 - Default: 0.0.0.0
 Instead of the network mask, you can also set OSD listen address explicitly
 using this parameter. May be useful if you want to start OSDs on interfaces
 that are not UP + RUNNING.
 ## bind_port
 - Type: integer
 By default, OSDs pick random ports to use for incoming connections
 automatically. With this option you can set a specific port for a specific
 OSD by hand.
 ## autosync_interval
 - Type: seconds
@@ -316,7 +307,7 @@ for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
 decrease write performance for fast disks because page cache is an overhead
 itself.
-Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
+Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
 (which requires disable_data_fsync) with drives having write-back cache
 which can't be turned off, for example, Intel Optane. Also note that *some*
 desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
@@ -629,3 +620,30 @@ are changed to 0.
 Maximum possible value for auto-tuned recovery_sleep_us. Higher values
 are treated as outliers and ignored in aggregation.
 ## discard_on_start
 - Type: boolean
 Discard (SSD TRIM) unused data device blocks on every OSD startup.
 ## min_discard_size
 - Type: integer
 - Default: 1048576
 Minimum consecutive block size to TRIM it.
 ## allow_net_split
 - Type: boolean
 - Default: false
 Allow "safe" cases of network splits/partitions - allow to start PGs without
 connections to some OSDs currently registered as alive in etcd, if the number
 of actually connected PG OSDs is at least pg_minsize. That is, allow some OSDs to lose
 connectivity with some other OSDs as long as it doesn't break pg_minsize guarantees.
 The downside is that it increases the probability of writing data into just pg_minsize
 OSDs during failover which can lead to PGs becoming incomplete after additional outages.
 The old behaviour in versions up to 2.0.0 was equal to enabled allow_net_split.
--- a/docs/config/osd.ru.md
+++ b/docs/config/osd.ru.md
@@ -8,16 +8,15 @@
 Данные параметры используются только OSD, но, в отличие от дисковых параметров,
 не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
-момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
+момент с перезапуском OSD в /etc/vitastor/vitastor.conf или [vitastor-disk update-sb](../usage/disk.ru.md#update-sb),
-изменения конфигурации в etcd.
+а некоторые и без перезапуска, с помощью изменения конфигурации в etcd.
 - [bind_address](#bind_address)
 - [bind_port](#bind_port)
 - [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
 - [osd_network](#osd_network)
 - [bind_address](#bind_address)
 - [bind_port](#bind_port)
 - [autosync_interval](#autosync_interval)
 - [autosync_writes](#autosync_writes)
 - [recovery_queue_depth](#recovery_queue_depth)
@@ -62,6 +61,26 @@
 - [recovery_tune_agg_interval](#recovery_tune_agg_interval)
 - [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
 - [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
 - [discard_on_start](#discard_on_start)
 - [min_discard_size](#min_discard_size)
 - [allow_net_split](#allow_net_split)
 ## bind_address
 - Тип: строка или массив строк
 Вместо использования масок подсети ([osd_network](network.ru.md#osd_network) и
 [osd_cluster_network](network.ru.md#osd_cluster_network)), вы также можете явно
 задать адрес(а), на которых будут ожидать соединений OSD, с помощью данного
 параметра. Это может быть полезно, например, чтобы запускать OSD на неподнятых
 интерфейсах (не UP + RUNNING).
 ## bind_port
 - Тип: целое число
 По умолчанию OSD сами выбирают случайные порты для входящих подключений.
 С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 ## osd_iothread_count
@@ -107,34 +126,6 @@ max_etcd_attempts * etcd_quick_timeout.
 первичные OSD от вторичных, но пока не понятно, зачем это может кому-то
 понадобиться, поэтому это не реализовано.
 ## osd_network
 - Тип: строка или массив строк
 Маска подсети (IPv4 или IPv6) для использования для соединений с OSD.
 Имейте в виду, что хотя сейчас и можно передать в этот параметр несколько
 подсетей, это не означает, что OSD будут создавать несколько слушающих
 сокетов - они лишь будут выбирать адрес первого поднятого (состояние UP +
 RUNNING), подходящий под заданную маску. Также не реализовано разделение
 кластерной и публичной сетей OSD. Правда, от него обычно всё равно довольно
 мало толку, так что особенной проблемы в этом нет.
 ## bind_address
 - Тип: строка
 - Значение по умолчанию: 0.0.0.0
 Этим параметром можно явным образом задать адрес, на котором будет ожидать
 соединений OSD (вместо использования маски подсети). Может быть полезно,
 например, чтобы запускать OSD на неподнятых интерфейсах (не UP + RUNNING).
 ## bind_port
 - Тип: целое число
 По умолчанию OSD сами выбирают случайные порты для входящих подключений.
 С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 ## autosync_interval
 - Тип: секунды
@@ -660,3 +651,31 @@ EC (кодов коррекции ошибок) с более, чем 1 диск
 Максимальное возможное значение авто-подстроенного recovery_sleep_us.
 Большие значения считаются случайными выбросами и игнорируются в
 усреднении.
 ## discard_on_start
 - Тип: булево (да/нет)
 Освобождать (SSD TRIM) неиспользуемые блоки диска данных при каждом запуске OSD.
 ## min_discard_size
 - Тип: целое число
 - Значение по умолчанию: 1048576
 Минимальный размер последовательного блока данных, чтобы освобождать его через TRIM.
 ## allow_net_split
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Разрешить "безопасные" случаи разделений сети - разрешить активировать PG без
 соединений к некоторым OSD, помеченным активными в etcd, если общее число активных
 OSD в PG составляет как минимум pg_minsize. То есть, разрешать некоторым OSD терять
 соединения с некоторыми другими OSD, если это не нарушает гарантий pg_minsize.
 Минус такого разрешения в том, что оно повышает вероятность записи данных ровно в
 pg_minsize OSD во время переключений, что может потом привести к тому, что PG станут
 неполными (incomplete), если упадут ещё какие-то OSD.
 Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
--- a/docs/config/pool.en.md
+++ b/docs/config/pool.en.md
@@ -43,7 +43,7 @@ Parameters:
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
- [used_for_fs](#used_for_fs)
+- [used_for_app](#used_for_app)
 Examples:
@@ -189,6 +189,9 @@ So, pg_minsize regulates the number of failures that a pool can tolerate
 without temporary downtime for [osd_out_time](monitor.en.md#osd_out_time),
 but at a cost of slightly reduced storage reliability.
 See also [allow_net_split](osd.en.md#allow_net_split) and
 [PG state descriptions](../usage/admin.en.md#pg-states).
 FIXME: pg_minsize behaviour may be changed in the future to only make PGs
 read-only instead of deactivating them.
@@ -377,24 +380,37 @@ of the OSDs containing a data chunk for a PG.
 Automatic scrubbing interval for this pool. Overrides
 [global scrub_interval setting](osd.en.md#scrub_interval).
-## used_for_fs
+## used_for_app
 - Type: string
-If non-empty, the pool is marked as used for VitastorFS with metadata stored
+If non-empty, the pool is marked as used for a separate application, for example,
-in block image (regular Vitastor volume) named as the value of this pool parameter.
+VitastorFS or S3, which allocates Vitastor volume IDs by itself and does not use
 image/inode metadata in etcd.
-When a pool is marked as used for VitastorFS, regular block volume creation in it
+When a pool is marked as used for such app, regular block volume creation in it
 is disabled (vitastor-cli refuses to create images without --force) to protect
-the user from block volume and FS file ID collisions and data loss.
+the user from block volume and FS/S3 volume ID collisions and data loss.
-[vitastor-nfs](../usage/nfs.ru.md), in its turn, refuses to use pools not marked
+Also such pools do not calculate per-inode space usage statistics in etcd because
 using it for an external application implies that it may contain a very large
 number of volumes and their statistics may take too much space in etcd.
 Setting used_for_app to `fs:<name>` tells Vitastor that the pool is used for VitastorFS
 with VitastorKV metadata base stored in a block image (regular Vitastor volume) named
 `<name>`.
 [vitastor-nfs](../usage/nfs.en.md), in its turn, refuses to use pools not marked
 for the corresponding FS when starting. This also implies that you can use one
 pool only for one VitastorFS.
-The second thing that is disabled for VitastorFS pools is reporting per-inode space
+If you plan to use the pool for S3, set its used_for_app to `s3:<name>`. `<name>` may
-usage statistics in etcd because a FS pool may store a very large number of files
+be basically anything you want (for example, `s3:standard`) - it's not validated
-and statistics for them all would take a lot of space in etcd.
+by Vitastor S3 components in any way.
 All other values except prefixed with `fs:` or `s3:` may be used freely and don't
 mean anything special for Vitastor core components. For now, you can use them as
 you wish.
 # Examples
--- a/docs/config/pool.ru.md
+++ b/docs/config/pool.ru.md
@@ -42,7 +42,7 @@
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
- [used_for_fs](#used_for_fs)
+- [used_for_app](#used_for_app)
 Примеры:
@@ -256,7 +256,7 @@ PG в Vitastor эферемерны, то есть вы можете менят
 ## raw_placement
- Type: string
+- Тип: строка
 Низкоуровневые правила генерации PG в форме DSL (доменно-специфичного языка).
 Используйте, только если действительно знаете, зачем вам это надо :)
@@ -383,26 +383,42 @@ OSD с "all".
 Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
 Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
-## used_for_fs
+## used_for_app
- Type: string
+- Тип: строка
-Если непусто, пул помечается как используемый для файловой системы VitastorFS с
+Если непусто, пул помечается как используемый для отдельного приложения, например,
-метаданными, хранимыми в блочном образе Vitastor с именем, равным значению
+для VitastorFS или S3, которое распределяет ID образов в пуле само и не использует
-этого параметра.
+метаданные образов/инодов в etcd.
-Когда пул помечается как используемый для VitastorFS, создание обычных блочных
+Когда пул помечается используемым для такого приложения, создание обычных блочных
-образов в нём отключается (vitastor-cli отказывается создавать образы без --force),
+образов в нём запрещается (vitastor-cli отказывается создавать образы без --force),
-чтобы защитить пользователя от коллизий ID файлов и блочных образов и, таким
+чтобы защитить пользователя от коллизий ID блочных образов и томов ФС/S3, и,
-образом, от потери данных.
+таким образом, от потери данных.
 Также для таких пулов отключается передача статистики в etcd по отдельным инодам,
 так как использование для внешнего приложения подразумевает, что пул может содержать
 очень много томов и их статистика может занять слишком много места в etcd.
 Установка used_for_app в значение `fs:<name>` сообщает о том, что пул используется
 для VitastorFS с базой метаданных VitastorKV, хранимой в блочном образе с именем
 `<name>`.
 [vitastor-nfs](../usage/nfs.ru.md), в свою очередь, при запуске отказывается
-использовать для ФС пулы, не выделенные для неё. Это также означает, что один
+использовать для ФС пулы, не помеченные, как используемые для неё. Это также
-пул может использоваться только для одной VitastorFS.
+означает, что один пул может использоваться только для одной VitastorFS.
-Также для ФС-пулов отключается передача статистики в etcd по отдельным инодам,
+Если же вы планируете использовать пул для данных S3, установите его used_for_app
-так как ФС-пул может содержать очень много файлов и статистика по ним всем
+в значение `s3:<name>`, где `<name>` - любое название по вашему усмотрению
-заняла бы очень много места в etcd.
+(например, `s3:standard`) - конкретное содержимое `<name>` пока никак не проверяется
 компонентами Vitastor S3.
 Смотрите также [allow_net_split](osd.ru.md#allow_net_split) и
 [документацию по состояниям PG](../usage/admin.ru.md#состояния-pg).
 Все остальные значения used_for_app, кроме начинающихся на `fs:` или `s3:`, не
 означают ничего особенного для основных компонентов Vitastor. Поэтому сейчас вы
 можете использовать их свободно любым желаемым способом.
 # Примеры
--- a/docs/config/src/client.yml
+++ b/docs/config/src/client.yml
@@ -61,6 +61,24 @@
  info_ru: |
    Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
    ожидать, пока на OSD не освободится место.
 - name: client_wait_up_timeout
  type: sec
  default: 16
  online: true
  info: |
    Wait for this number of seconds until PGs are up when doing operations
    which require all PGs to be up. Currently only used by object listings
    in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
    The default value is calculated as `1 + OSD lease timeout`, which is
    `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
  info_ru: |
    Время ожидания поднятия PG при операциях, требующих активности всех PG.
    В данный момент используется листингами объектов в командах, использующих
    удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
    Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
    `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 - name: client_max_dirty_bytes
  type: int
  default: 33554432
--- a/docs/config/src/included.en.md
+++ b/docs/config/src/included.en.md
@@ -14,8 +14,12 @@
 {{../../installation/packages.en.md}}
 {{../../installation/docker.en.md}}
 {{../../installation/proxmox.en.md}}
 {{../../installation/opennebula.en.md}}
 {{../../installation/openstack.en.md}}
 {{../../installation/kubernetes.en.md}}
--- a/docs/config/src/included.ru.md
+++ b/docs/config/src/included.ru.md
@@ -14,8 +14,12 @@
 {{../../installation/packages.ru.md}}
 {{../../installation/docker.ru.md}}
 {{../../installation/proxmox.ru.md}}
 {{../../installation/opennebula.ru.md}}
 {{../../installation/openstack.ru.md}}
 {{../../installation/kubernetes.ru.md}}
--- a/docs/config/src/layout-osd.yml
+++ b/docs/config/src/layout-osd.yml
@@ -110,20 +110,22 @@
  type: bool
  default: false
  info: |
-    Do not issue fsyncs to the data device, i.e. do not flush its cache.
+    Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
-    Safe ONLY if your data device has write-through cache. If you disable
+    Safe ONLY if your data device has write-through cache or if write-back
-    the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
+    cache is disabled. If you disable drive cache manually with `hdparm` or
-    that the cache disable command is run every time before starting Vitastor
+    writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
-    OSD, for example, in the systemd unit. See also `immediate_commit` option
+    every time before starting Vitastor OSD (vitastor-disk does it automatically).
-    for the instructions to disable cache and how to benefit from it.
+    See also [immediate_commit](layout-cluster.en.md#immediate_commit)
    for information about how to benefit from disabled cache.
  info_ru: |
-    Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
+    Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
    Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-    записью (write-through). Если вы отключаете кэш через `hdparm` или
+    записью (write-through) или если кэш с отложенной записью (write-back) отключён.
-    `scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
+    Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
-    выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
+    то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
-    Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
+    (vitastor-disk делает это автоматически). Смотрите также опцию
-    и о том, как из этого извлечь выгоду.
+    [immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
    как извлечь выгоду из отключённого кэша.
 - name: disable_meta_fsync
  type: bool
  default: false
@@ -179,8 +181,7 @@
    Because of this it can actually be beneficial to use SSDs which work well
    with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-    and meta_block_size. But the only SSD that may fit into this category is
+    and meta_block_size. But at the moment, no such SSDs are known...
    Intel Optane (probably, not tested yet).
    Clients don't need to be aware of disk_alignment, so it's not required to
    put a modified value into etcd key /vitastor/config/global.
@@ -198,9 +199,8 @@
    Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
    меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-    journal_block_size и meta_block_size. Однако единственные SSD, которые
+    journal_block_size и meta_block_size. Однако на данный момент такие SSD
-    теоретически могут попасть в эту категорию - это Intel Optane (но и это
+    не известны...
    пока не проверялось автором).
    Клиентам не обязательно знать про disk_alignment, так что помещать значение
    этого параметра в etcd в /vitastor/config/global не нужно.
--- a/docs/config/src/monitor.yml
+++ b/docs/config/src/monitor.yml
@@ -75,11 +75,11 @@
 - name: mon_http_port
  type: int
  default: 8060
-  info: HTTP port for monitors to listen on (including metrics exporter)
+  info: HTTP port for monitors to listen to (including metrics exporter)
  info_ru: Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
 - name: mon_http_ip
  type: string
-  info: IP address for monitors to listen on (all addresses by default)
+  info: IP address for monitors to listen to (all addresses by default)
  info_ru: IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
 - name: mon_https_cert
  type: string
@@ -172,3 +172,27 @@
  info_ru: |
    Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
    и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
 - name: osd_backfillfull_ratio
  type: float
  default: 0.99
  info: |
    Monitors try to prevent OSDs becoming 100% full during rebalance or recovery by
    calculating how much space will be occupied on every OSD after all rebalance
    and recovery operations finish, and pausing rebalance and recovery if that
    amount of space exceeds OSD capacity multiplied by the value of this
    configuration parameter.
    Future used space is calculated by summing space used by all user data blocks
    (objects) in all PGs placed on a specific OSD, even if some of these objects
    currently reside on a different set of OSDs.
  info_ru: |
    Мониторы стараются предотвратить 100% заполнение OSD в процессе ребаланса
    или восстановления, рассчитывая, сколько места будет занято на каждом OSD после
    завершения всех операций ребаланса и восстановления, и приостанавливая
    ребаланс и восстановление, если рассчитанный объём превышает ёмкость OSD,
    умноженную на значение данного параметра.
    Будущее занятое место рассчитывается сложением места, занятого всеми
    пользовательскими блоками данных (объектами) во всех PG, расположенных
    на конкретном OSD, даже если часть этих объектов в данный момент находится
    на другом наборе OSD.
--- a/docs/config/src/network.yml
+++ b/docs/config/src/network.yml
@@ -1,58 +1,93 @@
- name: tcp_header_buffer_size
+- name: osd_network
-  type: int
+  type: string or array of strings
-  default: 65536
+  type_ru: строка или массив строк
  info: |
-    Size of the buffer used to read data using an additional copy. Vitastor
+    Network mask of public OSD network(s) (IPv4 or IPv6). Each OSD listens to all
-    packet headers are 128 bytes, payload is always at least 4 KB, so it is
+    addresses of UP + RUNNING interfaces matching one of these networks, on the
-    usually beneficial to try to read multiple packets at once even though
+    same port. Port is auto-selected except if [bind_port](osd.en.md#bind_port) is
-    it requires to copy the data an additional time. The rest of each packet
+    explicitly specified. Bind address(es) may also be overridden manually by
-    is received without an additional copy. You can try to play with this
+    specifying [bind_address](osd.en.md#bind_address). If OSD networks are not specified
-    parameter and see how it affects random iops and linear bandwidth if you
+    at all, OSD just listens to a wildcard address (0.0.0.0).
    want.
  info_ru: |
-    Размер буфера для чтения данных с дополнительным копированием. Пакеты
+    Маски подсетей (IPv4 или IPv6) публичной сети или сетей OSD. Каждый OSD слушает
-    Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
+    один и тот же порт на всех адресах поднятых (UP + RUNNING) сетевых интерфейсов,
-    от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
+    соответствующих одной из указанных сетей. Порт выбирается автоматически, если
-    сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
+    только [bind_port](osd.ru.md#bind_port) не задан явно. Адреса для подключений можно
-    скопировать данные. Часть каждого пакета за пределами значения данного
+    также переопределить явно, задав [bind_address](osd.ru.md#bind_address). Если сети OSD
-    параметра читается без дополнительного копирования. Вы можете попробовать
+    не заданы вообще, OSD слушает все адреса (0.0.0.0).
-    поменять этот параметр и посмотреть, как он влияет на производительность
+- name: osd_cluster_network
-    случайного и линейного доступа.
+  type: string or array of strings
- name: use_sync_send_recv
+  type_ru: строка или массив строк
  type: bool
  default: false
  info: |
-    If true, synchronous send/recv syscalls are used instead of io_uring for
+    Network mask of separate network(s) (IPv4 or IPv6) to use for OSD
-    socket communication. Useless for OSDs because they require io_uring anyway,
+    cluster connections. I.e. OSDs will always attempt to use these networks
-    but may be required for clients with old kernel versions.
+    to connect to other OSDs, while clients will attempt to use networks from
    [osd_network](#osd_network).
  info_ru: |
-    Если установлено в истину, то вместо io_uring для передачи данных по сети
+    Маски подсетей (IPv4 или IPv6) отдельной кластерной сети или сетей OSD.
-    будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
+    То есть, OSD будут всегда стараться использовать эти сети для соединений
-    это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
+    с другими OSD, а клиенты будут стараться использовать сети из [osd_network](#osd_network).
    принципе, это может применяться для клиентов со старыми версиями ядра.
 - name: use_rdma
  type: bool
  default: true
  info: |
-    Try to use RDMA for communication if it's available. Disable if you don't
+    Try to use RDMA through libibverbs for communication if it's available.
-    want Vitastor to use RDMA. TCP-only clients can also talk to an RDMA-enabled
+    Disable if you don't want Vitastor to use RDMA. TCP-only clients can also
-    cluster, so disabling RDMA may be needed if clients have RDMA devices,
+    talk to an RDMA-enabled cluster, so disabling RDMA may be needed if clients
-    but they are not connected to the cluster.
+    have RDMA devices, but they are not connected to the cluster.
    `use_rdma` works with RoCEv1/RoCEv2 networks, but not with iWARP and,
    maybe, with some Infiniband configurations which require RDMA-CM.
    Consider `use_rdmacm` for such networks.
  info_ru: |
-    Пытаться использовать RDMA для связи при наличии доступных устройств.
+    Попробовать использовать RDMA через libibverbs для связи при наличии
-    Отключите, если вы не хотите, чтобы Vitastor использовал RDMA.
+    доступных устройств. Отключите, если вы не хотите, чтобы Vitastor
-    TCP-клиенты также могут работать с RDMA-кластером, так что отключать
+    использовал RDMA. TCP-клиенты также могут работать с RDMA-кластером,
-    RDMA может быть нужно только если у клиентов есть RDMA-устройства,
+    так что отключать RDMA может быть нужно, только если у клиентов есть
-    но они не имеют соединения с кластером Vitastor.
+    RDMA-устройства, но они не имеют соединения с кластером Vitastor.
    `use_rdma` работает с RoCEv1/RoCEv2 сетями, но не работает с iWARP и
    может не работать с частью конфигураций Infiniband, требующих RDMA-CM.
    Рассмотрите включение `use_rdmacm` для таких сетей.
 - name: use_rdmacm
  type: bool
  default: true
  info: |
    Use an alternative implementation of RDMA through RDMA-CM (Connection
    Manager). Works with all RDMA networks: Infiniband, iWARP and
    RoCEv1/RoCEv2, and even allows to disable TCP and run only with RDMA.
    OSDs always use random port numbers for RDMA-CM listeners, different
    from their TCP ports. `use_rdma` is automatically disabled when
    `use_rdmacm` is enabled.
  info_ru: |
    Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
    Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
    RoCEv1/RoCEv2, и даже позволяет полностью отключить TCP и работать
    только на RDMA. OSD используют случайные номера портов для ожидания
    соединений через RDMA-CM, отличающиеся от их TCP-портов. Также при
    включении `use_rdmacm` автоматически отключается опция `use_rdma`.
 - name: disable_tcp
  type: bool
  default: true
  info: |
    Fully disable TCP and only use RDMA-CM for OSD communication.
  info_ru: |
    Полностью отключить TCP и использовать только RDMA-CM для соединений с OSD.
 - name: rdma_device
  type: string
  info: |
    RDMA device name to use for Vitastor OSD communications (for example,
-    "rocep5s0f0"). Now Vitastor supports all adapters, even ones without
+    "rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
-    ODP support, like Mellanox ConnectX-3 and non-Mellanox cards.
+    device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
    or choose the first available RDMA device if no RoCE devices are
    found or if `osd_network` is not specified. Auto-selection is also
    unsupported with old libibverbs < v32, like in Debian 10 Buster or
    CentOS 7.
-    Versions up to Vitastor 1.2.0 required ODP which is only present in
+    Vitastor supports all adapters, even ones without ODP support, like
-    Mellanox ConnectX >= 4. See also [rdma_odp](#rdma_odp).
+    Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
    1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
    See also [rdma_odp](#rdma_odp).
    Run `ibv_devinfo -v` as root to list available RDMA devices and their
    features.
@@ -64,12 +99,17 @@
    PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
  info_ru: |
    Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
-    Сейчас Vitastor поддерживает все модели адаптеров, включая те, у которых
+    Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
-    нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
+    [osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
-    картами производства не Mellanox.
+    попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
    не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
    libibverbs < v32, например в Debian 10 Buster или CentOS 7.
-    Версии Vitastor до 1.2.0 включительно требовали ODP, который есть только
+    Vitastor поддерживает все модели адаптеров, включая те, у которых
-    на Mellanox ConnectX 4 и более новых. См. также [rdma_odp](#rdma_odp).
+    нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
    картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
    требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
    См. также [rdma_odp](#rdma_odp).
    Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
    список доступных RDMA-устройств, их параметры и возможности.
@@ -82,44 +122,56 @@
    Control) и ECN (Explicit Congestion Notification).
 - name: rdma_port_num
  type: int
  default: 1
  info: |
    RDMA device port number to use. Only for devices that have more than 1 port.
    See `phys_port_cnt` in `ibv_devinfo -v` output to determine how many ports
    your device has.
    Not relevant for RDMA-CM (use_rdmacm).
  info_ru: |
    Номер порта RDMA-устройства, который следует использовать. Имеет смысл
    только для устройств, у которых более 1 порта. Чтобы узнать, сколько портов
    у вашего адаптера, посмотрите `phys_port_cnt` в выводе команды
    `ibv_devinfo -v`.
    Опция неприменима к RDMA-CM (use_rdmacm).
 - name: rdma_gid_index
  type: int
  default: 0
  info: |
    Global address identifier index of the RDMA device to use. Different GID
    indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
    Search for "GID" in `ibv_devinfo -v` output to determine which GID index
    you need.
-    **IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
+    If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
-    rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).
+    RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
    GID auto-selection is unsupported with libibverbs < v32.
    A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
    Not relevant for RDMA-CM (use_rdmacm).
  info_ru: |
    Номер глобального идентификатора адреса RDMA-устройства, который следует
    использовать. Разным gid_index могут соответствовать разные протоколы связи:
    RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
    словом "GID" в выводе команды `ibv_devinfo -v`.
-    **ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
+    Если не указан, Vitastor попробует автоматически выбрать сначала GID,
-    правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).
+    соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
    RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
    libibverbs < v32.
    Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
    Опция неприменима к RDMA-CM (use_rdmacm).
 - name: rdma_mtu
  type: int
  default: 4096
  info: |
-    RDMA Path MTU to use. Must be 1024, 2048 or 4096. There is usually no
+    RDMA Path MTU to use. Must be 1024, 2048 or 4096. Default is to use the
-    sense to change it from the default 4096.
+    RDMA device's MTU.
  info_ru: |
    Максимальная единица передачи (Path MTU) для RDMA. Должно быть равно 1024,
-    2048 или 4096. Обычно нет смысла менять значение по умолчанию, равное 4096.
+    2048 или 4096. По умолчанию используется значение MTU RDMA-устройства.
 - name: rdma_max_sge
  type: int
  default: 128
@@ -289,3 +341,47 @@
    detect disconnections quickly.
  info_ru: |
    Интервал проверки живости вебсокет-подключений к etcd.
 - name: etcd_min_reload_interval
  type: ms
  default: 1000
  online: true
  info: |
    Minimum interval for full etcd state reload. Introduced to prevent
    excessive load on etcd during outages when etcd can't keep up with event
    streams and cancels them.
  info_ru: |
    Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
    предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
    успевает рассылать потоки событий и отменяет их.
 - name: tcp_header_buffer_size
  type: int
  default: 65536
  info: |
    Size of the buffer used to read data using an additional copy. Vitastor
    packet headers are 128 bytes, payload is always at least 4 KB, so it is
    usually beneficial to try to read multiple packets at once even though
    it requires to copy the data an additional time. The rest of each packet
    is received without an additional copy. You can try to play with this
    parameter and see how it affects random iops and linear bandwidth if you
    want.
  info_ru: |
    Размер буфера для чтения данных с дополнительным копированием. Пакеты
    Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
    от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
    сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
    скопировать данные. Часть каждого пакета за пределами значения данного
    параметра читается без дополнительного копирования. Вы можете попробовать
    поменять этот параметр и посмотреть, как он влияет на производительность
    случайного и линейного доступа.
 - name: use_sync_send_recv
  type: bool
  default: false
  info: |
    If true, synchronous send/recv syscalls are used instead of io_uring for
    socket communication. Useless for OSDs because they require io_uring anyway,
    but may be required for clients with old kernel versions.
  info_ru: |
    Если установлено в истину, то вместо io_uring для передачи данных по сети
    будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
    это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
    принципе, это может применяться для клиентов со старыми версиями ядра.
--- a/docs/config/src/osd.en.md
+++ b/docs/config/src/osd.en.md
@@ -1,5 +1,5 @@
 # Runtime OSD Parameters
 These parameters only apply to OSDs, are not fixed at the moment of OSD drive
-initialization and can be changed - either with an OSD restart or, for some of
+initialization and can be changed - in /etc/vitastor/vitastor.conf or [vitastor-disk update-sb](../usage/disk.en.md#update-sb)
-them, even without restarting by updating configuration in etcd.
+with an OSD restart or, for some of them, even without restarting by updating configuration in etcd.
--- a/docs/config/src/osd.ru.md
+++ b/docs/config/src/osd.ru.md
@@ -2,5 +2,5 @@
 Данные параметры используются только OSD, но, в отличие от дисковых параметров,
 не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
-момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
+момент с перезапуском OSD в /etc/vitastor/vitastor.conf или [vitastor-disk update-sb](../usage/disk.ru.md#update-sb),
-изменения конфигурации в etcd.
+а некоторые и без перезапуска, с помощью изменения конфигурации в etcd.
--- a/docs/config/src/osd.yml
+++ b/docs/config/src/osd.yml
@@ -1,3 +1,26 @@
 - name: bind_address
  type: string or array of strings
  type_ru: строка или массив строк
  info: |
    Instead of the network masks ([osd_network](network.en.md#osd_network) and
    [osd_cluster_network](network.en.md#osd_cluster_network)), you can also set
    OSD listen addresses explicitly using this parameter. May be useful if you
    want to start OSDs on interfaces that are not UP + RUNNING.
  info_ru: |
    Вместо использования масок подсети ([osd_network](network.ru.md#osd_network) и
    [osd_cluster_network](network.ru.md#osd_cluster_network)), вы также можете явно
    задать адрес(а), на которых будут ожидать соединений OSD, с помощью данного
    параметра. Это может быть полезно, например, чтобы запускать OSD на неподнятых
    интерфейсах (не UP + RUNNING).
 - name: bind_port
  type: int
  info: |
    By default, OSDs pick random ports to use for incoming connections
    automatically. With this option you can set a specific port for a specific
    OSD by hand.
  info_ru: |
    По умолчанию OSD сами выбирают случайные порты для входящих подключений.
    С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 - name: osd_iothread_count
  type: int
  default: 0
@@ -56,44 +79,6 @@
    реализовать дополнительный режим для монитора, который позволит отделять
    первичные OSD от вторичных, но пока не понятно, зачем это может кому-то
    понадобиться, поэтому это не реализовано.
 - name: osd_network
  type: string or array of strings
  type_ru: строка или массив строк
  info: |
    Network mask of the network (IPv4 or IPv6) to use for OSDs. Note that
    although it's possible to specify multiple networks here, this does not
    mean that OSDs will create multiple listening sockets - they'll only
    pick the first matching address of an UP + RUNNING interface. Separate
    networks for cluster and client connections are also not implemented, but
    they are mostly useless anyway, so it's not a big deal.
  info_ru: |
    Маска подсети (IPv4 или IPv6) для использования для соединений с OSD.
    Имейте в виду, что хотя сейчас и можно передать в этот параметр несколько
    подсетей, это не означает, что OSD будут создавать несколько слушающих
    сокетов - они лишь будут выбирать адрес первого поднятого (состояние UP +
    RUNNING), подходящий под заданную маску. Также не реализовано разделение
    кластерной и публичной сетей OSD. Правда, от него обычно всё равно довольно
    мало толку, так что особенной проблемы в этом нет.
 - name: bind_address
  type: string
  default: "0.0.0.0"
  info: |
    Instead of the network mask, you can also set OSD listen address explicitly
    using this parameter. May be useful if you want to start OSDs on interfaces
    that are not UP + RUNNING.
  info_ru: |
    Этим параметром можно явным образом задать адрес, на котором будет ожидать
    соединений OSD (вместо использования маски подсети). Может быть полезно,
    например, чтобы запускать OSD на неподнятых интерфейсах (не UP + RUNNING).
 - name: bind_port
  type: int
  info: |
    By default, OSDs pick random ports to use for incoming connections
    automatically. With this option you can set a specific port for a specific
    OSD by hand.
  info_ru: |
    По умолчанию OSD сами выбирают случайные порты для входящих подключений.
    С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 - name: autosync_interval
  type: sec
  default: 5
@@ -315,7 +300,7 @@
    decrease write performance for fast disks because page cache is an overhead
    itself.
-    Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
+    Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
    (which requires disable_data_fsync) with drives having write-back cache
    which can't be turned off, for example, Intel Optane. Also note that *some*
    desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
@@ -765,3 +750,34 @@
    Максимальное возможное значение авто-подстроенного recovery_sleep_us.
    Большие значения считаются случайными выбросами и игнорируются в
    усреднении.
 - name: discard_on_start
  type: bool
  info: Discard (SSD TRIM) unused data device blocks on every OSD startup.
  info_ru: Освобождать (SSD TRIM) неиспользуемые блоки диска данных при каждом запуске OSD.
 - name: min_discard_size
  type: int
  default: 1048576
  info: Minimum consecutive block size to TRIM it.
  info_ru: Минимальный размер последовательного блока данных, чтобы освобождать его через TRIM.
 - name: allow_net_split
  type: bool
  default: false
  info: |
    Allow "safe" cases of network splits/partitions - allow to start PGs without
    connections to some OSDs currently registered as alive in etcd, if the number
    of actually connected PG OSDs is at least pg_minsize. That is, allow some OSDs to lose
    connectivity with some other OSDs as long as it doesn't break pg_minsize guarantees.
    The downside is that it increases the probability of writing data into just pg_minsize
    OSDs during failover which can lead to PGs becoming incomplete after additional outages.
    The old behaviour in versions up to 2.0.0 was equal to enabled allow_net_split.
  info_ru: |
    Разрешить "безопасные" случаи разделений сети - разрешить активировать PG без
    соединений к некоторым OSD, помеченным активными в etcd, если общее число активных
    OSD в PG составляет как минимум pg_minsize. То есть, разрешать некоторым OSD терять
    соединения с некоторыми другими OSD, если это не нарушает гарантий pg_minsize.
    Минус такого разрешения в том, что оно повышает вероятность записи данных ровно в
    pg_minsize OSD во время переключений, что может потом привести к тому, что PG станут
    неполными (incomplete), если упадут ещё какие-то OSD.
    Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
--- a/docs/installation/docker.en.md
+++ b/docs/installation/docker.en.md
@@ -0,0 +1,60 @@
 [Documentation](../../README.md#documentation) → Installation → Dockerized Installation
 -----
 [Читать на русском](docker.ru.md)
 # Dockerized Installation
 Vitastor may be installed in Docker/Podman. In such setups etcd, monitors and OSD
 all run in containers, but everything else looks as close as possible to a usual
 setup with packages:
 - host network is used
 - auto-start is implemented through udev and systemd
 - logs are written to journald (not docker json log files)
 - command-line wrapper scripts are installed to the host system to call vitastor-disk,
  vitastor-cli and others through the container
 Such installations may be useful when it's impossible or inconvenient to install
 Vitastor from packages, for example, in exotic Linux distributions.
 If you don't want just a simple containerized installation, you can also take a look
 at Vitastor Kubernetes operator: https://github.com/Antilles7227/vitastor-operator
 ## Installing Containers
 The instruction is very simple.
 1. Download a Docker image of the desired version: \
   `docker pull vitastor:2.1.0`
 2. Install scripts to the host system: \
   `docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:2.1.0 install.sh`
 3. Reload udev rules: \
   `udevadm control --reload-rules`
 And you can return to [Quick Start](../intro/quickstart.en.md).
 ## Upgrading Containers
 First make sure to check the topic [Upgrading Vitastor](../usage/admin.en.md#upgrading-vitastor)
 to figure out if you need any additional steps.
 Then, to upgrade a containerized installation, you just need to change the `VITASTOR_VERSION`
 option in `/etc/vitastor/docker.conf` and restart all Vitastor services:
 `systemctl restart vitastor.target`
 ## QEMU
 Vitastor Docker image also contains QEMU, qemu-img and qemu-storage-daemon built with Vitastor support.
 However, running QEMU in Docker is harder to setup and it depends on the used virtualization UI
 (OpenNebula, Proxmox and so on). Some of them also required patched Libvirt.
 That's why containerized installation of Vitastor doesn't contain a ready-made QEMU setup and it's
 recommended to install QEMU from packages or build it manually.
 ## fio
 Vitastor Docker image also contains fio and installs a wrapper called `vitastor-fio` to use it from
 the host system.
--- a/docs/installation/docker.ru.md
+++ b/docs/installation/docker.ru.md
@@ -0,0 +1,60 @@
 [Документация](../../README-ru.md#документация) → Установка → Установка в Docker
 -----
 [Read in English](docker.en.md)
 # Установка в Docker
 Vitastor можно установить в Docker/Podman. При этом etcd, мониторы и OSD запускаются
 в контейнерах, но всё остальное выглядит максимально приближенно к установке из пакетов:
 - используется сеть хост-системы
 - для автозапуска используются udev и systemd
 - журналы записываются в journald (не в json-файлы журналов docker)
 - в хост-систему устанавливаются обёртки для вызова консольных инструментов vitastor-disk,
  vitastor-cli и других через контейнер
 Такая установка полезна тогда, когда установка из пакетов невозможна или неудобна,
 например, в нестандартных Linux-дистрибутивах.
 Если вам нужна не просто контейнеризованная инсталляция, вы также можете обратить внимание
 на Vitastor Kubernetes-оператор: https://github.com/Antilles7227/vitastor-operator
 ## Установка контейнеров
 Инструкция по установке максимально простая.
 1. Скачайте Docker-образ желаемой версии: \
   `docker pull vitastor:2.1.0`
 2. Установите скрипты в хост-систему командой: \
   `docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:2.1.0 install.sh`
 3. Перезагрузите правила udev: \
   `udevadm control --reload-rules`
 После этого вы можете возвращаться к разделу [Быстрый старт](../intro/quickstart.ru.md).
 ## Обновление контейнеров
 Сначала обязательно проверьте раздел [Обновление Vitastor](../usage/admin.ru.md#обновление-vitastor),
 чтобы понять, не требуются ли вам какие-то дополнительные действия.
 После этого для обновления Docker-инсталляции вам нужно просто поменять опцию `VITASTOR_VERSION`
 в файле `/etc/vitastor/docker.conf` и перезапустить все сервисы Vitastor командой:
 `systemctl restart vitastor.target`
 ## QEMU
 В Docker-образ также входят QEMU, qemu-img и qemu-storage-daemon, собранные с поддержкой Vitastor.
 Однако настроить запуск QEMU в Docker сложнее и способ запуска зависит от используемого интерфейса
 виртуализации (OpenNebula, Proxmox и т.п.). Также для OpenNebula, например, требуется патченый
 Libvirt.
 Поэтому по умолчанию Docker-сборка пока что не включает в себя готового способа запуска QEMU
 и QEMU рекомендуется устанавливать из пакетов или собирать самостоятельно.
 ## fio
 fio также входит в Docker-контейнер vitastor, и в хост-систему устанавливается обёртка `vitastor-fio`
 для запуска fio в контейнер.
--- a/docs/installation/kubernetes.en.md
+++ b/docs/installation/kubernetes.en.md
@@ -6,9 +6,18 @@
 # Kubernetes CSI
-Vitastor has a CSI plugin for Kubernetes which supports RWO (and block RWX) volumes.
+Vitastor has a CSI plugin for Kubernetes which supports block-based and VitastorFS-based volumes.
-To deploy it, take manifests from [csi/deploy/](../../csi/deploy/) directory, put your
+Block-based volumes may be formatted and mounted with a normal FS (ext4 or xfs). Such volumes
 only support RWO (ReadWriteOnce) mode.
 Block-based volumes may also be left without FS and attached into the container as a block
 device. Such volumes also support RWX (ReadWriteMany) mode.
 VitastorFS-based volumes use a clustered file system and support FS-based RWX (ReadWriteMany)
 mode. However, such volumes don't support quotas and snapshots.
 To deploy the CSI plugin, take manifests from [csi/deploy/](../../csi/deploy/) directory, put your
 Vitastor configuration in [001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
 configure storage class in [009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
 and apply all `NNN-*.yaml` manifests to your Kubernetes installation:
@@ -23,16 +32,16 @@ After that you'll be able to create PersistentVolumes.
 kernel modules enabled (vdpa, vduse, virtio-vdpa). If your distribution doesn't
 have them pre-built - build them yourself ([instructions](../usage/qemu.en.md#vduse)),
 I promise it's worth it :-). When VDUSE is unavailable, CSI driver uses [NBD](../usage/nbd.en.md)
-to map Vitastor devices. NBD is slower and prone to timeout issues: if Vitastor
+to map Vitastor devices. NBD is slower and, with kernels older than 5.19, unmountable
-cluster becomes unresponsible for more than [nbd_timeout](../config/client.en.md#nbd_timeout),
+if the cluster becomes unresponsible.
 the NBD device detaches and breaks pods using it.
 ## Features
 Vitastor CSI supports:
 - Kubernetes starting with 1.20 (or 1.17 for older vitastor-csi <= 1.1.0)
- Filesystem RWO (ReadWriteOnce) volumes. Example: [PVC](../../csi/deploy/example-pvc.yaml), [pod](../../csi/deploy/example-test-pod.yaml)
+- Block-based FS-formatted RWO (ReadWriteOnce) volumes. Example: [PVC](../../csi/deploy/example-pvc.yaml), [pod](../../csi/deploy/example-test-pod.yaml)
 - Raw block RWX (ReadWriteMany) volumes. Example: [PVC](../../csi/deploy/example-pvc-block.yaml), [pod](../../csi/deploy/example-test-pod-block.yaml)
 - VitastorFS-based volumes RWX (ReadWriteMany) volumes. Example: [storage class](../../csi/deploy/example-storage-class-fs.yaml)
 - Volume expansion
 - Volume snapshots. Example: [snapshot class](../../csi/deploy/example-snapshot-class.yaml), [snapshot](../../csi/deploy/example-snapshot.yaml), [clone](../../csi/deploy/example-snapshot-clone.yaml)
 - [VDUSE](../usage/qemu.en.md#vduse) (preferred) and [NBD](../usage/nbd.en.md) device mapping methods
--- a/docs/installation/kubernetes.ru.md
+++ b/docs/installation/kubernetes.ru.md
@@ -6,7 +6,17 @@
 # Kubernetes CSI
-У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
+У Vitastor есть CSI-плагин для Kubernetes, поддерживающий блочные тома и тома на основе
 кластерной ФС VitastorFS.
 Блочные тома могут быть отформатированы и примонтированы со стандартной ФС (ext4 или xfs).
 Такие тома поддерживают только режим RWO (ReadWriteOnce, одновременный доступ с одного узла).
 Блочные тома также могут не форматироваться и подключаться в контейнер в виде блочного устройства.
 В таком случае их можно подключать в режиме RWX (ReadWriteMany, одновременный доступ с многих узлов).
 Тома на основе VitastorFS используют кластерную ФС и поэтому также поддерживают режим RWX
 (ReadWriteMany). Однако, такие тома не поддерживают ограничение размера и снимки.
 Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
 вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
@@ -33,6 +43,7 @@ CSI-плагин Vitastor поддерживает:
 - Версии Kubernetes, начиная с 1.20 (или с 1.17 для более старых vitastor-csi <= 1.1.0)
 - Файловые RWO (ReadWriteOnce) тома. Пример: [PVC](../../csi/deploy/example-pvc.yaml), [под](../../csi/deploy/example-test-pod.yaml)
 - Сырые блочные RWX (ReadWriteMany) тома. Пример: [PVC](../../csi/deploy/example-pvc-block.yaml), [под](../../csi/deploy/example-test-pod-block.yaml)
 - Основанные на VitastorFS RWX (ReadWriteMany) тома. Пример: [класс хранения](../../csi/deploy/example-storage-class-fs.yaml)
 - Расширение размера томов
 - Снимки томов. Пример: [класс снимков](../../csi/deploy/example-snapshot-class.yaml), [снимок](../../csi/deploy/example-snapshot.yaml), [клон снимка](../../csi/deploy/example-snapshot-clone.yaml)
 - Способы подключения устройств [VDUSE](../usage/qemu.ru.md#vduse) (предпочитаемый) и [NBD](../usage/nbd.ru.md)
--- a/docs/installation/packages.en.md
+++ b/docs/installation/packages.en.md
@@ -14,6 +14,7 @@
  - Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
  - Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
  - Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
  - Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
    stable version from 0.9.x branch instead of 1.x
 - Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
--- a/docs/installation/packages.ru.md
+++ b/docs/installation/packages.ru.md
@@ -14,6 +14,7 @@
  - Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
  - Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
  - Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
  - Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
    установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
 - Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
--- a/docs/installation/s3.en.md
+++ b/docs/installation/s3.en.md
@@ -0,0 +1,191 @@
 [Documentation](../../README.md#documentation) → Installation → S3 for Vitastor
 -----
 [Читать на русском](s3.ru.md)
 # S3 for Vitastor
 The moment has come - Vitastor S3 implementation based on Zenko CloudServer is released.
 ## Highlights
 - Zenko CloudServer is implemented in node.js.
 - Object metadata is stored in MongoDB.
 - Modified Zenko CloudServer version is used for Vitastor. It is slightly different from
  the original, has an optimised build and unneeded dependencies are stripped off.
 - Object data is stored in Vitastor block volumes, but the volume metadata is stored in
  the same MongoDB, not in Vitastor etcd.
 - Objects are written to volumes sequentially one after another. The space is allocated
  with rounding to the sector size (4 KB), so each object takes at least 4 KB.
 - An important property of such storage scheme is that small objects aren't chunked into
  parts in Vitastor EC N+K pools and thus don't require reads from all N disks when
  downloading.
 - Deleted objects are marked as deleted, but the space is only actually freed during
  asynchronously executed "defragmentation" process. Defragmentation runs automatically
  in the background when a volume reaches configured amount of "garbage" (20% by default).
  Defragmentation copies actual objects to new volume(s) and then removes the old volume.
  Defragmentation can be configured in locationConfig.json.
 ## Plans for future development
 - User account storage in the DB instead of a static file. Original Zenko uses
  a separate closed-source "Scality Vault" service for it, that's why we use
  a static file for now.
 - More detailed documentation.
 - Support for other (and faster) key-value DBMS for object metadata storage.
 - Other performance optimisations, for example, related to the used hash function -
  MD5 used for Amazon compatibility purposes is relatively slow.
 - Object Lifecycle support. There is a Lifecycle implementation for Zenko called
  [Backbeat](https://github.com/scality/backbeat) but it's not adapted for Vitastor yet.
 - Quota support. Original Zenko uses a separate "SCUBA" service for quotas, but
  it's also proprietary and not available publicly.
 ## Installation
 In a few words:
 - Install MongoDB, create a user for S3 metadata DB.
 - Create a Vitastor pool for S3 data.
 - Download and setup the Docker container `vitalif/vitastor-zenko`.
 ### Setup MongoDB
 You can setup MongoDB yourself, following the [MongoDB manual](https://www.mongodb.com/docs/manual/installation/).
 Or you can follow the instructions below - it describes a simple example of MongoDB setup
 in Docker (through docker-compose) with 3 replicas.
 1. On each host, create a file `docker-compose.yml` with the content listed below.
   Replace `<YOUR_PASSWORD>` with your future mongodb administrator password, and optionally
   replace `0.0.0.0` with `localhost,<server_IP>`. It's recommended to either use a private IP
   or [setup TLS](https://www.mongodb.com/docs/manual/tutorial/configure-ssl/) afterwards.
 ```
 version: '3.1'
 services:
  mongo:
    container_name: mongo
    image: mongo:7-jammy
    restart: always
    environment:
      MONGO_INITDB_ROOT_USERNAME: root
      MONGO_INITDB_ROOT_PASSWORD: <YOUR_PASSWORD>
    network_mode: host
    volumes:
      - ./keyfile:/opt/keyfile
      - ./mongo-data/db:/data/db
      - ./mongo-data/configdb:/data/configdb
    entrypoint: /bin/bash -c
    command: [ "chown mongodb /opt/keyfile && chmod 600 /opt/keyfile && . /usr/local/bin/docker-entrypoint.sh mongod --replSet rs0 --keyFile /opt/keyfile --bind_ip 0.0.0.0" ]
 ```
 2. Generate a shared cluster key using `openssl rand -base64 756 > ./keyfile` and copy
   that `keyfile` to all hosts.
 3. Start MongoDB on all hosts with `docker compose up -d mongo`.
 4. Enter Mongo Shell with `docker exec -it mongo mongosh -u root -p <YOUR_PASSWORD> localhost/admin`
   and execute the following command (replace IP addresses `10.10.10.{1,2,3}` with your host IPs):
 `rs.initiate({ _id: 'rs0', members: [
  { _id: 1, host: '10.10.10.1:27017' },
  { _id: 2, host: '10.10.10.2:27017' },
  { _id: 3, host: '10.10.10.3:27017' }
 ] })`
 5. Stay in Mongo Shell and create a user for the future S3 database:
 `db.createUser({ user: 's3', pwd: '<YOUR_S3_PASSWORD>', roles: [
  { role: 'readWrite', db: 's3' },
  { role: 'dbAdmin', db: 's3' },
  { role: 'readWrite', db: 'vitastor' },
  { role: 'dbAdmin', db: 'vitastor' }
 ] })`
 ### Setup Vitastor
 Create a pool in Vitastor for S3 object data, for example:
 `vitastor-cli create-pool --ec 2+1 -n 512 s3-data --used_for_app s3:standard`
 The `--used_for_app` options works as fool-proofing and prevents you from
 accidentally creating a regular block volume in the S3 pool and overwriting some S3 data.
 Also it hides inode space statistics from Vitastor etcd.
 Retrieve the ID of your pool with `vitastor-cli ls-pools s3-data --detail`.
 ### Setup Vitastor S3
 1. Add the following lines to `docker-compose.yml` (instead of `network_mode: host`,
   you can use `ports: [ "8000:8000", "8002:8002" ]`):
 ```
  zenko:
    container_name: zenko
    image: vitalif/vitastor-zenko
    restart: always
    security_opt:
      - seccomp:unconfined
    ulimits:
      memlock: -1
    network_mode: host
    volumes:
      - /etc/vitastor:/etc/vitastor
      - /etc/vitastor/s3:/conf
 ```
 2. Download Docker image: `docker pull vitalif/vitastor-zenko`
 3. Extract configuration file examples from the Docker image:
   ```
   docker run --rm -it -v /etc/vitastor:/etc/vitastor -v /etc/vitastor/s3:/conf vitalif/vitastor-zenko configure.sh
   ```
 4. Edit configuration files in `/etc/vitastor/s3/`:
   - `config.json` - common settings.
   - `authdata.json` - user accounts and access keys.
   - `locationConfig.json` - S3 storage class list with placement settings.
     Note: it actually contains storage classes (like STANDARD, COLD, etc)
     instead of "locations" (zones like us-east-1) as in the original Zenko CloudServer.
   - Put your MongoDB connection data into `config.json` and `locationConfig.json`.
   - Put your Vitastor pool ID into `locationConfig.json`.
   - For now, the complete list of Vitastor backend settings is only available [in the code](https://git.yourcmc.ru/vitalif/zenko-arsenal/src/branch/master/lib/storage/data/vitastor/VitastorBackend.ts#L94).
 ### Start Zenko
 Start the S3 server with:
 ```
 docker run --restart always --security-opt seccomp:unconfined --ulimit memlock=-1 --network=host \
    -v /etc/vitastor:/etc/vitastor -v /etc/vitastor/s3:/conf --name zenko vitalif/vitastor-zenko
 ```
 If you use default settings, Zenko CloudServer starts on port 8000.
 The default access key is `accessKey1` with a secret key of `verySecretKey1`.
 Now you can access your S3 with, for example, [s3cmd](https://s3tools.org/s3cmd):
 ```
 s3cmd --access_key=accessKey1 --secret_key=verySecretKey1 --host=http://localhost:8000 mb s3://testbucket
 ```
 Or even mount it with [GeeseFS](https://github.com/yandex-cloud/geesefs):
 ```
 AWS_ACCESS_KEY_ID=accessKey1 \
    AWS_SECRET_ACCESS_KEY=verySecretKey1 \
    geesefs --endpoint http://localhost:8000 testbucket mountdir
 ```
 ## Author & License
 - [Zenko CloudServer](https://s3-server.readthedocs.io/en/latest/) author is Scality,
  licensed under [Apache License, version 2.0](https://www.apache.org/licenses/LICENSE-2.0)
 - [Vitastor](https://git.yourcmc.ru/vitalif/vitastor/) and Zenko Vitastor backend author is
  Vitaliy Filippov, licensed under [VNPL-1.1](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/VNPL-1.1.txt)
  (a "network copyleft" license based on AGPL/SSPL, but worded in a better way)
 - Vitastor S3 repository: https://git.yourcmc.ru/vitalif/zenko-cloudserver-vitastor
 - Vitastor S3 backend code: https://git.yourcmc.ru/vitalif/zenko-arsenal/src/branch/master/lib/storage/data/vitastor/VitastorBackend.ts
--- a/docs/installation/s3.ru.md
+++ b/docs/installation/s3.ru.md
@@ -0,0 +1,171 @@
 [Документация](../../README-ru.md#документация) → Установка → S3 на базе Vitastor
 -----
 [Read in English](s3.en.md)
 # S3 на базе Vitastor
 Итак, свершилось - реализация Vitastor S3 на базе Zenko CloudServer достигла
 состояния готовности к публикации и использованию.
 ## Ключевые особенности
 - Zenko CloudServer реализован на node.js.
 - Метаданные объектов хранятся в MongoDB.
 - Поставляется модифицированная версия Zenko CloudServer, отвязанная от лишних зависимостей,
  с оптимизированной сборкой и немного отличающаяся от оригинала.
 - Данные объектов хранятся в блочных томах Vitastor, однако информация о самих томах
  сохраняется не в etcd Vitastor, а тоже в БД на основе MongoDB.
 - Объекты записываются в тома последовательно друг за другом. Место выделяется с округлением
  до размера сектора (до 4 килобайт), поэтому каждый объект занимает как минимум 4 КБ.
 - Благодаря такой схеме записи объектов мелкие объекты не нарезаются на части и поэтому не
  требуют чтения с N дисков данных в EC N+K пулах Vitastor.
 - При удалении объекты помечаются удалёнными, но место освобождается не сразу, а при
  запускаемой асинхронно "дефрагментации". Дефрагментация запускается автоматически в фоне
  при достижении заданного объёма "мусора" в томе (по умолчанию 20%), копирует актуальные
  объекты в новые тома, после чего очищает старый том полностью. Дефрагментацию можно
  настраивать в locationConfig.json.
 ## Планы развития
 - Хранение учётных записей в БД, а не в статическом файле (в оригинальном Zenko для
  этого используется отдельный закрытый сервис "Scality Vault").
 - Более подробная документация.
 - Поддержка других (и более производительных) key-value СУБД для хранения метаданных.
 - Другие оптимизации производительности, например, в области используемой хеш-функции
  (хеш MD5, используемый в целях совместимости, относительно медленный).
 - Поддержка Object Lifecycle. Реализация Lifecycle для Zenko существует и называется
  [Backbeat](https://github.com/scality/backbeat), но она ещё не адаптирована для Vitastor.
 - Квоты. В оригинальном Zenko для этого используется отдельный сервис "SCUBA", однако
  он тоже является закрытым и недоступен для публичного использования.
 ## Установка
 Кратко:
 - Установите MongoDB, создайте пользователя для БД метаданных S3.
 - Создайте в Vitastor пул для хранения данных объектов.
 - Скачайте и настройте Docker-контейнер `vitalif/vitastor-zenko`.
 ### Установка MongoDB
 Вы можете установить MongoDB сами, следуя [официальному руководству MongoDB](https://www.mongodb.com/docs/manual/installation/).
 Либо вы можете последовать инструкции, приведённой ниже - здесь описан простейший пример
 установки MongoDB в Docker (docker-compose) в конфигурации с 3 репликами.
 1. На всех 3 серверах создайте файл `docker-compose.yml`, заменив `<ВАШ_ПАРОЛЬ>`
   на собственный будущий пароль администратора mongodb, а `0.0.0.0` по желанию
   заменив на на `localhost,<IP_сервера>` - желательно либо использовать публично не доступный IP,
   либо потом [настроить TLS](https://www.mongodb.com/docs/manual/tutorial/configure-ssl/).
 ```
 version: '3.1'
 services:
  mongo:
    container_name: mongo
    image: mongo:7-jammy
    restart: always
    environment:
      MONGO_INITDB_ROOT_USERNAME: root
      MONGO_INITDB_ROOT_PASSWORD: <ВАШ_ПАРОЛЬ>
    network_mode: host
    volumes:
      - ./keyfile:/opt/keyfile
      - ./mongo-data/db:/data/db
      - ./mongo-data/configdb:/data/configdb
    entrypoint: /bin/bash -c
    command: [ "chown mongodb /opt/keyfile && chmod 600 /opt/keyfile && . /usr/local/bin/docker-entrypoint.sh mongod --replSet rs0 --keyFile /opt/keyfile --bind_ip 0.0.0.0" ]
 ```
 2. В той же директории сгенерируйте общий ключ кластера командой `openssl rand -base64 756 > ./keyfile`
   и скопируйте этот файл на все 3 сервера.
 3. На всех 3 серверах запустите MongoDB командой `docker compose up -d mongo`.
 4. Зайдите в Mongo Shell с помощью команды `docker exec -it mongo mongosh -u root -p <ВАШ_ПАРОЛЬ> localhost/admin`
   и там выполните команду (заменив IP-адреса `10.10.10.{1,2,3}` на адреса своих серверов):
 `rs.initiate({ _id: 'rs0', members: [
  { _id: 1, host: '10.10.10.1:27017' },
  { _id: 2, host: '10.10.10.2:27017' },
  { _id: 3, host: '10.10.10.3:27017' }
 ] })`
 5. Находясь там же, в Mongo Shell, создайте пользователя с доступом к будущей базе данных S3:
 `db.createUser({ user: 's3', pwd: '<ВАШ_ПАРОЛЬ_S3>', roles: [
  { role: 'readWrite', db: 's3' },
  { role: 'dbAdmin', db: 's3' },
  { role: 'readWrite', db: 'vitastor' },
  { role: 'dbAdmin', db: 'vitastor' }
 ] })`
 ### Настройка Vitastor
 Создайте в Vitastor отдельный пул для данных объектов S3, например:
 `vitastor-cli create-pool --ec 2+1 -n 512 s3-data --used_for_app s3:standard`
 Опция `--used_for_app` работает как "защита от дурака" и не даёт вам случайно создать
 в этом пуле обычный блочный том и перезаписать им какие-то данные S3, а также скрывает
 статистику занятого места по томам S3 из etcd.
 Получите ID своего пула с помощью команды `vitastor-cli ls-pools --detail`.
 ### Установка Vitastor S3
 1. Добавьте в `docker-compose.yml` строки (альтернативно вместо `network_mode: host`
   можно использовать `ports: [ "8000:8000", "8002:8002" ]`):
 ```
  zenko:
    container_name: zenko
    image: vitalif/vitastor-zenko
    restart: always
    security_opt:
      - seccomp:unconfined
    ulimits:
      memlock: -1
    network_mode: host
    volumes:
      - /etc/vitastor:/etc/vitastor
      - /etc/vitastor/s3:/conf
 ```
 2. Извлеките из Docker-образа Vitastor примеры файлов конфигурации:
   `docker run --rm -it -v /etc/vitastor:/etc/vitastor -v /etc/vitastor/s3:/conf vitalif/vitastor-zenko configure.sh`
 3. Отредактируйте файлы конфигурации в `/etc/vitastor/s3/`:
   - `config.json` - общие настройки.
   - `authdata.json` - учётные записи и ключи доступа.
   - `locationConfig.json` - список классов хранения S3 с настройками расположения.
     Внимание: в данной версии это именно список S3 storage class-ов (STANDARD, COLD и т.п.),
     а не зон (подобных us-east-1), как в оригинальном Zenko CloudServer.
   - В `config.json` и в `locationConfig.json` пропишите свои данные подключения к MongoDB.
   - В `locationConfig.json` укажите ID пула Vitastor для хранения данных.
   - Полный перечень настроек Vitastor-бэкенда пока можно посмотреть [в коде](https://git.yourcmc.ru/vitalif/zenko-arsenal/src/branch/master/lib/storage/data/vitastor/VitastorBackend.ts#L94).
 ### Запуск
 Запустите S3-сервер: `docker-compose up -d zenko`
 Готово! Вы получили S3-сервер, работающий на порту 8000.
 Можете попробовать обратиться к нему с помощью, например, [s3cmd](https://s3tools.org/s3cmd):
 `s3cmd --host-bucket= --no-ssl --access_key=accessKey1 --secret_key=verySecretKey1 --host=http://localhost:8000 mb s3://testbucket`
 Или смонтировать его с помощью [GeeseFS](https://github.com/yandex-cloud/geesefs):
 `AWS_ACCESS_KEY_ID=accessKey1 AWS_SECRET_ACCESS_KEY=verySecretKey1 geesefs --endpoint http://localhost:8000 testbucket /mnt/geesefs`
 ## Лицензия
 - Автор [Zenko CloudServer](https://s3-server.readthedocs.io/en/latest/) - Scality, лицензия [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
 - Vitastor-бэкенд для S3, как и сам Vitastor, лицензируется на условиях [VNPL 1.1](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/VNPL-1.1.txt)
 - Репозиторий сборки: https://git.yourcmc.ru/vitalif/zenko-cloudserver-vitastor
 - Бэкенд хранения данных: https://git.yourcmc.ru/vitalif/zenko-arsenal/src/branch/master/lib/storage/data/vitastor/VitastorBackend.ts
--- a/docs/installation/source.en.md
+++ b/docs/installation/source.en.md
@@ -16,7 +16,7 @@
  designated initializers support from C++20
 - CMake
 - liburing, jerasure headers and libraries
- ISA-L, libibverbs headers and libraries (optional)
+- ISA-L, libibverbs and librdmacm headers and libraries (optional)
 - tcmalloc (google-perftools-dev)
 ## Basic instructions
--- a/docs/installation/source.ru.md
+++ b/docs/installation/source.ru.md
@@ -16,7 +16,7 @@
  назначенных инициализаторов (designated initializers) из C++20
 - CMake
 - Заголовки и библиотеки liburing, jerasure
- Опционально - заголовки и библиотеки ISA-L, libibverbs
+- Опционально - заголовки и библиотеки ISA-L, libibverbs, librdmacm
 - tcmalloc (google-perftools-dev)
 ## Базовая инструкция
--- a/docs/intro/architecture.en.md
+++ b/docs/intro/architecture.en.md
@@ -6,19 +6,151 @@
 # Architecture
 - [Server-side components](#server-side-components)
 - [Basic concepts](#basic-concepts)
 - [Client-side components](#client-side-components)
 - [Additional utilities](#additional-utilities)
 - [Overall read/write process](#overall-read-write-process)
  - [Nuances of request handling](#nuances-of-request-handling)
 - [Similarities to Ceph](#similarities-to-ceph)
 - [Differences from Ceph](#differences-from-ceph)
 - [Implementation Principles](#implementation-principles)
 ## Server-side components
 - **OSD** (Object Storage Daemon) is a process that directly works with the disk, stores data
  and serves read/write requests. One OSD serves one disk (or one partition). OSDs talk to etcd
  and to each other — they receive cluster state from etcd, and send read/write requests for
  secondary copies of data to other OSDs.
 - **etcd** — clustered key/value database, used as a reliable storage for configuration
  and high-level cluster state. Etcd is the component that prevents splitbrain in the cluster.
  Data blocks are not stored in etcd, etcd doesn't participate in data write or read path.
 - **Монитор** — a separate node.js based daemon which monitors the cluster, calculates
  required configuration changes and saves them to etcd, thus commanding OSDs to apply these
  changes. Monitor also aggregates cluster statistics. OSD don't talk to monitor, monitor
  only sends and receives data from etcd.
 ## Basic concepts
- OSD (Object Storage Daemon) is a process that stores data and serves read/write requests.
+- **Pool** is a container for data that has equal redundancy scheme and disk placement rules.
- PG (Placement Group) is a "shard" of the cluster, group of data stored on one set of replicas.
+- **PG (Placement Group)** is a "shard" of the cluster, subdivision unit that has its own
- Pool is a container for data that has equal redundancy scheme and placement rules.
+  set of OSDs for data storage.
- Monitor is a separate daemon that watches cluster state and handles failures.
+- **Failure Domain** is a group of OSDs, from the simultaneous failure of which you are
- Failure Domain is a group of OSDs that you allow to fail. It's "host" by default.
+  protected by Vitastor. Default failure domain is "host" (server), but you choose a
- Placement Tree groups OSDs in a hierarchy to later split them into Failure Domains.
+  larger (for example, a rack of servers) or smaller (a single drive) failure domain
  for every pool.
 - **Placement Tree** (similar to Ceph CRUSH Tree) groups OSDs in a hierarchy to later
  split them into Failure Domains.
 ## Client-side components
 - **Client library** encapsulates client I/O logic. Client library connects to etcd and to all OSDs,
  receives cluster state from etcd, sends read and write requests directly to all OSDs. Due
  to the symmetric distributed architecture, all data blocks (each 128 KB by default) are placed
  to different OSDs, but clients always know where each data block is stored and connect directly
  to the right OSD.
 All other client-side components are based on the client library:
 - **[vitastor-cli](../usage/cli.en.md)** — command-line utility for cluster management.
  Allows to view cluster state, manage pools and images, i.e. create, modify and remove
  virtual disks, their snapshots and clones.
 - **[QEMU driver](../usage/qemu.en.md)** — pluggable QEMU module allowing QEMU/KVM virtual
  machines work with virtual Vitastor disks directly from userspace through the client library,
  without the need to attach disks as kernel block devices. However, if you want to attach
  disks, you can also do that with the same driver and [VDUSE](../usage/qemu.en.md#vduse).
 - **[vitastor-nbd](../usage/nbd.en.md)** — utility that allows to attach Vitastor disks as
  kernel block devices using NBD (Network Block Device), which works more like "BUSE"
  (Block Device In Userspace). Vitastor doesn't have Linux kernel modules for the same task
  (at least by now). NBD is an older, non-recommended way to attach disks — you should use
  VDUSE whenever you can.
 - **[CSI driver](../installation/kubernetes.en.md)** — driver for attaching Vitastor images
  and VitastorFS subdirectories as Kubernetes persistent volumes. Block-based CSI uses
  VDUSE (when available) or NBD — images are attached as kernel block devices and mounted
  into containers. FS-based CSI uses **[vitastor-nfs](../usage/nfs.en.md)**.
 - **Drivers for Proxmox, OpenStack and so on** — pluggable modules for corresponding systems,
  allowing to use Vitastor as storage in them.
 - **[vitastor-nfs](../usage/nfs.en.md)** — NFS 3.0 server allowing export of two file system variants:
  the first is a simplified pseudo-FS for file-based access to Vitastor block images (for non-QEMU
  hypervisors with NFS support), the second is **VitastorFS**, full-featured clustered POSIX FS.
  Both variants support parallel access from multiple vitastor-nfs servers. In fact, you are
  not required to setup separate NFS servers at all and use vitastor-nfs mount command on every
  client node — it starts the NFS server and mounts the FS locally.
 - **[fio driver](../usage/fio.en.md)** — pluggable module for fio disk benchmarking tool for
  running performance tests on your Vitastor cluster.
 - **vitastor-kv** — client for a key-value DB working over shared block volumes (usual
  vitastor images). VitastorFS metadata is stored in vitastor-kv.
 ## Additional utilities
 - **vitastor-disk** — a Vitastor OSD disk management tool. You can create, remove,
  resize and move OSD partitions with it.
 ## Overall read/write process
 - Vitastor stores virtual disks, also named "images" or "inodes".
 - Each image is stored in some pool. Pool specifies storage parameters such as redundancy
  scheme (replication or EC — erasure codes, i.e. error correction codes), failure domain
  and restrictions on OSD selection for image data placement. See [Pool configuration](../config/pool.en.md) for details.
 - Each image is split into objects/blocks of fixed size, equal to [block_size](../config/layout-cluster.en.md#block_size)
  (128 KB by default), multiplied by data part count for EC or 1 for replicas. That is,
  if a pool uses EC 4+2 coding scheme (4 data parts + 2 parity parts), then, with the
  default block_size, images are split into 512 KB objects.
 - Client read/write requests are split into parts at object boundaries.
 - Each object is mapped to a PG number it belongs to, by simply taking a remainder of
  division of its offset by PG count of the image's pool.
 - Client reads primary OSD for all PGs from etcd. Primary OSD for each PG is assigned
  by the monitor during cluster operation, along with the full PG OSD set.
 - If not already connected, client connects to primary OSDs of all PGs involved in a
  read/write request and sends parts of the request to them.
 - If a primary OSD is unavailable, client retries connection attempts indefinitely
  either until it becomes available or until the monitor assigns another OSD as primary
  for that PG.
 - Client also retries requests if the primary OSD replies with error code EPIPE, meaning
  that the PG is inactive at this OSD at the moment - for example, when the primary OSD
  is switched, or if the primary OSD itself loses connection to replicas during request
  handling.
 - Primary OSD determines where the parts of the object are stored. By default, all objects
  are assumed to be stored at the target OSD set of a PG, but some of them may be present
  at a different OSD set if they are degraded or moved, or if the data rebalancing process
  is active. OSDs doesn't do any network requests, if calculates locations of all objects
  during PG activation and stores it in memory.
 - Primary OSD handles the request locally when it can - for example, when it's a read
  from a replicated pool or when it's a read from a EC pool involving only one data part
  stored on the OSD's local disk.
 - When a request requires reads or writes to additional OSDs, primary OSD uses already
  established connections to secondary OSDs of the PG to execute these requests. This happens
  in parallel to local disk operations. All such connections are guaranteed to be already
  established when the PG is active, and if any of them is dropped, PG is restarted and
  all current read/write operations to it fail with EPIPE error and are retried by clients.
 - After completing all secondary read/write requests, primary OSD sends the response to
  the client.
 ### Nuances of request handling
 - If a pool uses erasure codes and some of the OSDs are unavailable, primary OSDs recover
  data from the remaining parts during read.
 - Each object has a version number. During write, primary OSD first determines the current
  version of the object. As primary OSD usually stores the object or its part itself, most
  of the time version is read from the memory of the OSD itself. However, if primary OSD
  doesn't contain parts of the object, it requests the version number from a secondary OSD
  which has that part. Such request still doesn't involve reading from the disk though,
  because object metadata, including version number, is always stored in OSD memory.
 - If a pool uses erasure codes, partial writes of an object require reading other parts of
  it from secondary OSDs or from the local disk of the primary OSD itself. This is called
  "read-modify-write" process.
 - If a pool uses erasure codes, two-phase write process is used to get rid of the Write Hole
  problem: first a new version of object parts is written to all secondary OSDs without
  removing the previous version, and then, after receiving successful write confirmations
  from all OSDs, new version is committed and the old one is allowed to be removed.
 - In a pool doesn't use immediate_commit mode, then write requests sent by clients aren't
  treated as committed to physical media instantly. Clients have to send separate type of
  requests (SYNC) to commit changes, and before it isn't sent, new versions of data are
  allowed to be lost if some OSDs die. Thus, when immediate_commit is disabled, clients
  store copies of all write requests in memory and repeat them from there when the
  connection to primary OSD is lost. This in-memory copy is removed after a successful
  SYNC, and to prevent excessive memory usage, clients also do an automatic SYNC
  every [client_dirty_limit](../config/network.en.md#client_dirty_limit) written bytes.
 ## Similarities to Ceph
@@ -87,5 +219,5 @@
 - Deleting images in a degraded cluster may currently lead to objects reappearing
  after dead OSDs come back, and in case of erasure-coded pools, they may even
  reappear as incomplete. Just repeat the removal request again in this case.
-  This problem will be fixed in the nearest future, the fix is already implemented
+  This problem will be fixed in the future, along with the metadata disk storage
-  in the "epoch-deletions" branch.
+  format update.
--- a/docs/intro/architecture.ru.md
+++ b/docs/intro/architecture.ru.md
@@ -11,6 +11,7 @@
 - [Серверные компоненты](#серверные-компоненты)
 - [Базовые понятия](#базовые-понятия)
 - [Клиентские компоненты](#клиентские-компоненты)
 - [Дополнительные утилиты](#дополнительные-утилиты)
 - [Общий процесс записи и чтения](#общий-процесс-записи-и-чтения)
  - [Особенности обработки запросов](#особенности-обработки-запросов)
 - [Схожесть с Ceph](#схожесть-с-ceph)
@@ -23,8 +24,8 @@
  Один OSD управляет одним диском (или разделом). OSD общаются с etcd и друг с другом — от etcd они
  получают состояние кластера, а друг другу передают запросы записи и чтения вторичных копий данных.
 - **etcd** — кластерная key/value база данных, используется для хранения настроек и верхнеуровневого
-  состояния кластера, а также предотвращения разделения сознания. Блоки данных в etcd не хранятся,
+  состояния кластера, а также предотвращения разделения сознания (splitbrain). Блоки данных в etcd не
-  в обработке клиентских запросов чтения и записи etcd не участвует.
+  хранятся, в обработке клиентских запросов чтения и записи etcd не участвует.
 - **Монитор** — отдельный демон на node.js, рассчитывающий необходимые изменения в конфигурацию
  кластера, сохраняющий эту информацию в etcd и таким образом командующий OSD применить эти изменения.
  Также агрегирует статистику. Контактирует только с etcd, OSD с монитором не общаются.
@@ -34,40 +35,56 @@
 - **Пул (Pool)** — контейнер для данных, имеющих одну и ту же схему избыточности и правила распределения по OSD.
 - **PG (Placement Group)** — "шард", единица деления пулов в кластере, которой назначается свой набор
  OSD для хранения данных (копий или частей объектов).
- **Домен отказа (Failure Domain)** — группа OSD, одновременное падение которых рассматривается
+- **Домен отказа (Failure Domain)** — группа OSD, от одновременного падения которых должен защищать
-  как вероятное. По умолчанию это "host" (сервер).
+  Vitastor. По умолчанию домен отказа — "host" (сервер), но вы можете установить для пула как больший
  домен отказа (например, стойку серверов), так и меньший (например, отдельный диск).
 - **Дерево распределения** (Placement Tree, в Ceph CRUSH Tree) — иерархическая группировка OSD
  в узлы, которые далее можно использовать как домены отказа.
 ## Клиентские компоненты
- **Клиентская библиотека** — инкапсулирует логику на стороне клиента. Соединяются с etcd и со всеми OSD,
+- **Клиентская библиотека** — инкапсулирует логику на стороне клиента. Соединяется с etcd и со всеми OSD,
-  от etcd получают состояние кластера, команды чтения и записи отправляют на все OSD напрямую.
+  от etcd получает состояние кластера, команды чтения и записи отправляет на все OSD напрямую.
  В силу архитектуры все отдельные блоки данных (по умолчанию по 128 КБ) располагается на разных
  OSD, но клиент устроен так, что всегда точно знает, к какому OSD обращаться, и подключается
  к нему напрямую.
 На базе клиентской библиотеки реализованы все остальные клиенты:
- **vitastor-cli** — утилита командной строки для управления кластером. В данный момент позволяет
+- **[vitastor-cli](../usage/cli.ru.md)** — утилита командной строки для управления кластером.
-  просматривать общее состояние кластера и управлять образами — т.е. создавать, менять и удалять
+  Позволяет просматривать общее состояние кластера, управлять пулами и образами — то есть
-  виртуальные диски, их снимки и клоны.
+  создавать, менять и удалять виртуальные диски, их снимки и клоны.
- **Драйвер QEMU** — подключаемый модуль QEMU, позволяющий QEMU/KVM виртуальным машинам работать
+- **[Драйвер QEMU](../usage/qemu.ru.md)** — подключаемый модуль QEMU, позволяющий QEMU/KVM
-  с виртуальными дисками Vitastor напрямую из пространства пользователя с помощью клиентской
+  виртуальным машинам работать с виртуальными дисками Vitastor напрямую из пространства пользователя
-  библиотеки, без необходимости отображения дисков в виде блочных устройств. Тот же драйвер
+  с помощью клиентской библиотеки, без необходимости подключения дисков в виде блочных устройств
-  позволяет подключать диски в систему через [VDUSE](../usage/qemu.ru.md#vduse).
+  Linux. Если, однако, вы хотите подключать диски в виде блочных устройств, то вы тоже можете
- **vitastor-nbd** — утилита, позволяющая монтировать образы Vitastor в виде блочных устройств
+  сделать это с помощью того же самого драйвера и [VDUSE](../usage/qemu.ru.md#vduse).
-  с помощью NBD (Network Block Device), на самом деле скорее работающего как "BUSE"
+- **[vitastor-nbd](../usage/nbd.ru.md)** — утилита, позволяющая монтировать образы Vitastor
-  (Block Device In Userspace). Модуля ядра Linux для выполнения той же задачи в Vitastor нет
+  в виде блочных устройств с помощью NBD (Network Block Device), на самом деле скорее работающего
-  (по крайней мере, пока).
+  как "BUSE" (Block Device In Userspace). Модуля ядра Linux для выполнения той же задачи в
- **CSI драйвер** — драйвер для подключения Vitastor-образов в виде персистентных томов (PV) Kubernetes.
+  Vitastor нет (по крайней мере, пока). NBD — более старый и нерекомендуемый способ подключения
-  Работает через vitastor-nbd — образы отражаются в виде блочных устройств и монтируются
+  дисков — вам следует использовать VDUSE всегда, когда это возможно.
-  в контейнеры.
+- **[CSI драйвер](../installation/kubernetes.ru.md)** — драйвер для подключения Vitastor-образов
  и поддиректорий VitastorFS в виде персистентных томов (PV) Kubernetes. Блочный CSI работает через
  VDUSE (когда это возможно) или через NBD — образы отражаются в виде блочных устройств и монтируются
  в контейнеры. Файловый CSI использует **[vitastor-nfs](../usage/nfs.ru.md)**.
 - **Драйвера Proxmox, OpenStack и т.п.** — подключаемые модули для соответствующих систем,
  позволяющие использовать Vitastor как хранилище в оных.
- **vitastor-nfs** — утилита, предоставляющая файловый доступ к образам в кластере Vitastor
+- **[vitastor-nfs](../usage/nfs.ru.md)** — NFS 3.0 сервер, предоставляющий два варианта файловой системы:
-  по протоколу NFS 3.0. Предназначена для гипервизоров, не основанных на QEMU и Linux, но при
+  первая — упрощённая для файлового доступа к блочным образам (для не-QEMU гипервизоров, поддерживающих NFS),
-  этом поддерживающих NFS.
+  вторая — VitastorFS, полноценная кластерная POSIX ФС. Оба варианта поддерживают параллельный
  доступ с нескольких vitastor-nfs серверов. На самом деле можно вообще не выделять
  отдельные NFS-серверы, а вместо этого использовать команду vitastor-nfs mount, запускающую
  NFS-сервер прямо на клиентской машине и монтирующую ФС локально.
 - **[Драйвер fio](../usage/fio.ru.md)** — подключаемый модуль для утилиты тестирования
  производительности дисков fio, позволяющий тестировать Vitastor-кластеры.
 - **vitastor-kv** — клиент для key-value базы данных, работающей поверх разделяемого блочного
  образа (обычного блочного образа vitastor). Метаданные VitastorFS хранятся именно в vitastor-kv.
 ## Дополнительные утилиты
 - **vitastor-disk** — утилита для разметки дисков под Vitastor OSD. С её помощью можно
  создавать, удалять, менять размеры или перемещать разделы OSD.
 ## Общий процесс записи и чтения
@@ -98,16 +115,22 @@
  находиться на других OSD, если эти объекты деградированы или перемещены, или идёт процесс
  ребаланса. Запросы для проверки по сети не отправляются, информация о местоположении всех
  объектов рассчитывается первичным OSD при активации PG и хранится в памяти.
- Первичный OSD соединяется (если ещё не соединён) с вторичными OSD, на которых располагаются
+- Когда это возможно, первичный OSD обрабатывает запрос локально. Например, так происходит
-  части объекта, и отправляет им запросы чтения/записи, а также читает/пишет из/в своё локальное
+  при чтениях объектов из пулов с репликацией или при чтении из EC пула, затрагивающего
-  хранилище, если сам входит в набор.
+  только часть, хранимую на диске самого первичного OSD.
 - Когда запрос требует записи или чтения с вторичных OSD, первичный OSD использует заранее
  установленные соединения с ними для выполнения этих запросов. Это происходит параллельно
  локальным операциям чтения/записи с диска самого OSD. Так как соединения к вторичным OSD PG
  устанавливаются при её запуске, то они уже гарантированно установлены, когда PG активна,
  и если любое из этих соединений отключается, PG перезапускается, а все текущие запросы чтения
  и записи в неё завершаются с ошибкой EPIPE, после чего повторяются клиентами.
 - После завершения всех вторичных операций чтения/записи первичный OSD отправляет ответ клиенту.
 ### Особенности обработки запросов
 - Если в пуле используются коды коррекции ошибок и при этом часть OSD недоступна, первичный
  OSD при чтении восстанавливает данные из оставшихся частей.
- Каждый объект имеет номер версии. При записи объекта первичный OSD сначала читает из номер
+- Каждый объект имеет номер версии. При записи объекта первичный OSD сначала получает номер
  версии объекта. Так как первичный OSD обычно сам хранит копию или часть объекта, номер
  версии обычно читается из памяти самого OSD. Однако, если ни одна часть обновляемого объекта
  не находится на первичном OSD, для получения номера версии он обращается к одному из вторичных
@@ -115,20 +138,20 @@
  так как метаданные объектов, включая номер версии, все OSD хранят в памяти.
 - Если в пуле используются коды коррекции ошибок, перед частичной записью объекта для вычисления
  чётности зачастую требуется чтение частей объекта с вторичных OSD или с локального диска
-  самого первичного OSD.
+  самого первичного OSD. Это называется процессом "чтение-модификация-запись" (read-modify-write).
- Также, если в пуле используются коды коррекции ошибок, для закрытия Write Hole применяется
+- Если в пуле используются коды коррекции ошибок, для закрытия Write Hole применяется
  двухфазный алгоритм записи: сначала на все вторичные OSD записывается новая версия частей
  объекта, но при этом старая версия не удаляется, а потом, после получения подтверждения
  успешной записи от всех вторичных OSD, новая версия фиксируется и разрешается удаление старой.
- Если в кластере не включён режим immediate_commit, то запросы записи, отправляемые клиентами,
+- Если в пуле не включён режим immediate_commit, то запросы записи, отправляемые клиентами,
  не считаются зафиксированными на физических накопителях сразу. Для фиксации данных клиенты
  должны отдельно отправлять запросы SYNC (отдельный от чтения и записи вид запроса),
  а пока такой запрос не отправлен, считается, что записанные данные могут исчезнуть,
  если соответствующий OSD упадёт. Поэтому, когда режим immediate_commit отключён, все
  запросы записи клиенты копируют в памяти и при потере соединения и повторном соединении
-  с OSD повторяют из памяти. Скопированные в память данные удаляются при успешном fsync,
+  с OSD повторяют из памяти. Скопированные в память данные удаляются при успешном SYNC,
  а чтобы хранение этих данных не приводило к чрезмерному потреблению памяти, клиенты
-  автоматически выполняют fsync каждые [client_dirty_limit](../config/network.ru.md#client_dirty_limit)
+  автоматически выполняют SYNC каждые [client_dirty_limit](../config/network.ru.md#client_dirty_limit)
  записанных байт.
 ## Схожесть с Ceph
@@ -205,5 +228,5 @@
 - Удаление образов в деградированном кластере может в данный момент приводить к повторному
  "появлению" удалённых объектов после поднятия отключённых OSD, причём в случае EC-пулов,
  объекты могут появиться в виде "неполных". Если вы столкнётесь с такой ситуацией, просто
-  повторите запрос удаления. Исправление этой проблемы уже реализовано в ветке "epoch-deletions"
+  повторите запрос удаления. Данная проблема будет исправлена в будущем вместе с обновлением
-  и вскоре будет включено в релиз.
+  дискового формата хранения метаданных.
--- a/docs/intro/features.en.md
+++ b/docs/intro/features.en.md
@@ -28,7 +28,7 @@
 - Per-OSD and per-image I/O and space usage statistics in etcd
 - Snapshots and copy-on-write image clones
 - [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
- [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
+- RDMA/RoCEv2 support [via libibverbs](../config/network.en.md#use_rdma) or [RDMA-CM](../config/network.en.md#use_rdmacm)
 - [Scrubbing](../config/osd.en.md#auto_scrub) (verification of copies)
 - [Checksums](../config/layout-osd.en.md#data_csum_type)
 - [Client write-back cache](../config/client.en.md#client_enable_writeback)
@@ -36,6 +36,8 @@
 - [Clustered file system](../usage/nfs.en.md#vitastorfs)
 - [Experimental internal etcd replacement - antietcd](../config/monitor.en.md#use_antietcd)
 - [Built-in Prometheus metric exporter](../config/monitor.en.md#enable_prometheus)
 - [NFS RDMA support](../usage/nfs.en.md#rdma) (probably also usable for GPUDirect)
 - [S3](../installation/s3.en.md)
 ## Plugins and tools
@@ -62,7 +64,6 @@ The following features are planned for the future:
 - iSCSI and NVMeoF gateways
 - Multi-threaded client
 - Faster failover
 - S3
 - Tiered storage (SSD caching)
 - NVDIMM support
 - Compression (possibly)
--- a/docs/intro/features.ru.md
+++ b/docs/intro/features.ru.md
@@ -30,7 +30,7 @@
 - Именование инодов через хранение их метаданных в etcd
 - Снапшоты и copy-on-write клоны
 - [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
- [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
+- Поддержка RDMA/RoCEv2 [через libibverbs](../config/network.ru.md#use_rdma) или [RDMA-CM](../config/network.ru.md#use_rdmacm)
 - [Фоновая проверка целостности](../config/osd.ru.md#auto_scrub) (сверка копий)
 - [Контрольные суммы](../config/layout-osd.ru.md#data_csum_type)
 - [Буферизация записи на стороне клиента](../config/client.ru.md#client_enable_writeback)
@@ -38,6 +38,8 @@
 - [Кластерная файловая система](../usage/nfs.ru.md#vitastorfs)
 - [Экспериментальная встроенная замена etcd - antietcd](../config/monitor.ru.md#use_antietcd)
 - [Встроенный Prometheus-экспортер метрик](../config/monitor.ru.md#enable_prometheus)
 - [Поддержка NFS RDMA](../usage/nfs.ru.md#rdma) (вероятно, также подходящая для GPUDirect)
 - [S3](../installation/s3.ru.md)
 ## Драйверы и инструменты
@@ -62,7 +64,6 @@
 - iSCSI и NVMeoF прокси
 - Многопоточный клиент
 - Более быстрое переключение при отказах
 - S3
 - Поддержка SSD-кэширования (tiered storage)
 - Поддержка NVDIMM
 - Возможно, сжатие
--- a/docs/intro/quickstart.en.md
+++ b/docs/intro/quickstart.en.md
@@ -26,13 +26,13 @@
  you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
 - Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
 - Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Install Vitastor packages](../installation/packages.en.md).
+- Either [install Vitastor packages](../installation/packages.en.md) or [install Vitastor in Docker](../installation/docker.en.md).
 ## Recommended drives
 - SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
 - NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
-  Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
+  Intel DC-P3700/P4500/P4600, Intel D5-P4320/P5530, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
 - HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
 ## Configure monitors
@@ -45,11 +45,12 @@ On the monitor hosts:
  }
  ```
 - Create systemd units for etcd by running: `/usr/lib/vitastor/mon/make-etcd`
- Start etcd and monitors: `systemctl enable --now etcd vitastor-mon`
+  Or, if you installed Vitastor in Docker, run `systemctl start vitastor-host; docker exec vitastor make-etcd`.
 - Start etcd and monitors: `systemctl enable --now vitastor-etcd vitastor-mon`
 ## Configure OSDs
- Put etcd_address and osd_network into `/etc/vitastor/vitastor.conf`. Example:
+- Put etcd_address and [osd_network](../config/network.en.md#osd_network) into `/etc/vitastor/vitastor.conf`. Example:
  ```
  {
    "etcd_address": ["10.200.1.10:2379","10.200.1.11:2379","10.200.1.12:2379"],
--- a/docs/intro/quickstart.ru.md
+++ b/docs/intro/quickstart.ru.md
@@ -26,13 +26,13 @@
  обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
 - Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
 - Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Установите пакеты Vitastor](../installation/packages.ru.md).
+- Либо [установите пакеты Vitastor](../installation/packages.ru.md), либо [установите Vitastor в Docker](../installation/docker.ru.md).
 ## Рекомендуемые диски
 - SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
 - NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
-  Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
+  Intel DC-P3700/P4500/P4600, Intel D5-P4320/P5530, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
 - HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
 ## Настройте мониторы
@@ -44,12 +44,13 @@
    "etcd_address": ["10.200.1.10:2379","10.200.1.11:2379","10.200.1.12:2379"]
  }
  ```
- Инициализируйте сервисы etcd, запустив `/usr/lib/vitastor/mon/make-etcd`
+- Инициализируйте сервисы etcd, запустив `/usr/lib/vitastor/mon/make-etcd`.\
- Запустите etcd и мониторы: `systemctl enable --now etcd vitastor-mon`
+  Либо, если вы установили Vitastor в Docker, запустите `systemctl start vitastor-host; docker exec vitastor make-etcd`.
 - Запустите etcd и мониторы: `systemctl enable --now vitastor-etcd vitastor-mon`
 ## Настройте OSD
- Пропишите etcd_address и osd_network в `/etc/vitastor/vitastor.conf`. Например:
+- Пропишите etcd_address и [osd_network](../config/network.ru.md#osd_network) в `/etc/vitastor/vitastor.conf`. Например:
  ```
  {
    "etcd_address": ["10.200.1.10:2379","10.200.1.11:2379","10.200.1.12:2379"],
--- a/docs/usage/admin.en.md
+++ b/docs/usage/admin.en.md
@@ -35,10 +35,19 @@ PG state consists of exactly 1 base state and an arbitrary number of additional
 PG state always includes exactly 1 of the following base states:
 - **active** — PG is active and handles user I/O.
- **incomplete** — Not enough OSDs are available to activate this PG. That is, more disks
+- **incomplete** — Not enough OSDs are available to activate this PG. More exactly, that
-  are lost than it's allowed by the pool's redundancy scheme. For example, if the pool has
+  means one of the following:
-  pg_size=3 and pg_minsize=1, part of the data may be written only to 1 OSD. If that exact
+  - Less than pg_minsize current target OSDs are available for the PG. I.e. more disks
-  OSD is lost, PG will become **incomplete**.
+    are lost than allowed by the pool's redundancy scheme.
  - All OSDs of some of PG's history records are unavailable, or, for EC pools, less
    than (pg_size-parity_chunks) OSDs are available in one of the history records.
    In other words it means that some data in this PG was written to an OSD set such that
    it's currently impossible to read it back because these OSDs are down. For example,
    if the pool has pg_size=3 and pg_minsize=1, part of the data may be written only to
    1 OSD. If that exact OSD is lost, PG becomes **incomplete**.
  - [allow_net_split](../config/osd.en.md#allow_net_split) is disabled (default) and
    primary OSD of the PG can't connect to some secondary OSDs marked as alive in etcd.
    I.e. a network partition happened: OSDs can talk to etcd, but not to some other OSDs.
 - **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
  this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
  or the primary OSD refuses to start this PG (for example, because of wrong block_size),
@@ -58,8 +67,9 @@ and during switching primary OSD of PGs.
 **starting**, **repeering**, **stopping** states normally almost aren't visible at all.
 If you notice them for any noticeable time — chances are some operations on some OSDs hung.
-Search for "slow op" in OSD logs to find them — operations hung for more than
+Check `vitastor-cli status` and search for "slow op" in OSD logs to find them — operations
-[slow_log_interval](../config/osd.en.md#slow_log_interval) are logged as "slow ops".
+hung for more than [slow_log_interval](../config/osd.en.md#slow_log_interval) are logged as
 "slow ops" and displayed in `status`.
 State transition diagram:
@@ -171,7 +181,14 @@ to make them use the new version of the client library.
 ### 1.7.x to 1.8.0
-After upgrading version <= 1.7.x to version >= 1.8.0, BUT <= 1.9.0: restart all clients
+It's recommended to upgrade from version <= 1.7.x to version >= 1.8.0 with full downtime,
 i.e. you should first stop clients and then the cluster (OSDs and monitor), because 1.8.0
 includes a fix for etcd event stream inconsistency which could lead to "incomplete" objects
 appearing in EC pools, and in rare cases, probably, even to data corruption during mass OSD
 restarts. It doesn't mean that you WILL hit this problem if you upgrade without full downtime,
 but it's better to secure yourself against it.
 Also, if you upgrade version from <= 1.7.x to version >= 1.8.0, BUT <= 1.9.0: restart all clients
 (VMs and so on), otherwise they will hang when monitor clears old PG configuration key,
 which happens 24 hours after upgrade.
--- a/docs/usage/admin.ru.md
+++ b/docs/usage/admin.ru.md
@@ -35,10 +35,20 @@
 Состояние PG включает в себя ровно 1 флаг из следующих:
 - **active** — PG активна и обрабатывает запросы ввода-вывода от пользователей.
- **incomplete** — Недостаточно живых OSD, чтобы включить эту PG.
+- **incomplete** — Недостаточно живых OSD, чтобы включить эту PG. Если точнее, то это
-  То есть, дисков потеряно больше, чем разрешено схемой отказоустойчивости пула и pg_minsize.
+  означает один из следующих вариантов:
-  Например, если у пула pg_size=3 и pg_minsize=1, то часть данных может записаться всего на 1 OSD.
+  - Доступно менее, чем pg_minsize текущих целевых OSD данной PG. Иными словами, потеряно
-  Если потом конкретно этот OSD упадёт, PG окажется **incomplete**.
+    больше дисков, чем это разрешает схема отказоустойчивости пула.
  - Все OSD одной из исторических записей PG недоступны, или, для EC-пулов, в одной
    из исторических записей PG доступно менее, чем (pg_size-parity_chunks) OSD. Другими
    словами это означает, что часть данных этой PG была записана в такой набор OSD, из
    которого их сейчас невозможно прочитать обратно, так как OSD не включены. Например,
    если у пула pg_size=3 и pg_minsize=1, то часть данных может записаться всего на 1 OSD.
    Если потом конкретно этот OSD упадёт, PG окажется **incomplete**.
  - [allow_net_split](../config/osd.ru.md#allow_net_split) отключено (по умолчанию) и
    первичный OSD данной PG не может соединиться с частью вторичных OSD этой PG, помеченных
    как живых в etcd. Это означает, что произошло разделение сети: OSD могут общаться с etcd,
    но не могут общаться с частью других OSD.
 - **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
  (если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
  назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
@@ -56,9 +66,9 @@ OSD, на протяжении небольшого периода времен
 Состояния **starting**, **repeering**, **stopping** в норме практически не заметны вообще,
 PG должны очень быстро переходить из них в другие. Если эти состояния заметны
 хоть сколько-то значительное время — вероятно, какие-то операции на каких-то OSD зависли.
-Чтобы найти их, ищите "slow op" в журналах OSD — операции, зависшие дольше,
+Чтобы найти их, посморите `vitastor-cli status` и поищите слова "slow op" в журналах OSD —
-чем на [slow_log_interval](../config/osd.ru.md#slow_log_interval), записываются в
+операции, зависшие дольше, чем на [slow_log_interval](../config/osd.ru.md#slow_log_interval),
-журналы OSD как "slow op".
+записываются в журналы OSD как "slow op" и отображаются в `status`.
 Диаграмма переходов:
@@ -168,7 +178,14 @@ done
 ### 1.7.x -> 1.8.0
-После обновления с версий <= 1.7.x до версий >= 1.8.0, НО <= 1.9.0: перезапустите всех
+Обновляться с версий <= 1.7.x до версий >= 1.8.0 рекомендуется с полной остановкой
 сначала клиентов, а затем кластера, так как в 1.8.0 исправлена проблема (неконсистентность
 потоков событий от etcd), способная приводить к появлению incomplete объектов в EC-пулах
 и, хоть и редко, но даже к повреждению данных при массовых перезапусках OSD. Если вы
 обновляетесь без полной остановки - это не значит, что вы обязательно столкнётесь с этой
 проблемой, но лучше подстраховаться.
 Также, если вы обновляетесь с версии <= 1.7.x до версии >= 1.8.0, НО <= 1.9.0: перезапустите всех
 клиентов (процессы виртуальных машин можно перезапустить путём миграции на другой сервер),
 иначе они зависнут, когда монитор удалит старый ключ конфигурации PG, что происходит через
 24 часа после обновления.
--- a/docs/usage/cli.en.md
+++ b/docs/usage/cli.en.md
@@ -37,7 +37,7 @@ It supports the following commands:
 Global options:
 ```
--config_file FILE   Path to Vitastor configuration file
+--config_path FILE   Path to Vitastor configuration file
 --etcd_address URL   Etcd connection address
 --iodepth N          Send N operations in parallel to each OSD when possible (default 32)
 --parallel_osds M    Work with M osds in parallel when possible (default 4)
@@ -146,6 +146,7 @@ Rename, resize image or change its readonly status. Images with children can't b
 If the new size is smaller than the old size, extra data will be purged.
 You should resize file system in the image, if present, before shrinking it.
 * `--deleted 1|0` - Set/clear 'deleted image' flag (set automatically during unfinished deletes).
 * `-f|--force` - Proceed with shrinking or setting readwrite flag even if the image has children.
 * `--down-ok` - Proceed with shrinking even if some data will be left on unavailable OSDs.
@@ -221,6 +222,7 @@ Remove inode data without changing metadata.
              Requires more memory, but allows to show correct removal progress.
 --min-offset  Purge only data starting with specified offset.
 --max-offset  Purge only data before specified offset.
 --client_wait_up_timeout 16  Timeout for waiting until PGs are up in seconds.
 ```
 ## merge-data
@@ -353,7 +355,7 @@ Set OSD reweight, tags or noout flag. See detail description in [OSD config docu
 ## pg-list
-`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
+`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs|pgs [OPTIONS] [state1+state2] [^state3] [...]`
 List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:
@@ -361,6 +363,7 @@ List PGs with any of listed state filters (^ or ! in the beginning is negation).
 --pool <pool name or number>  Only list PGs of the given pool.
 --min <min pg number>         Only list PGs with number >= min.
 --max <max pg number>         Only list PGs with number <= max.
 --osd 1,2,...                 Only list PGs with some data on specified OSD(s).
 ```
 Examples:
@@ -375,11 +378,11 @@ Examples:
 Create a pool. Required parameters:
-| <!-- -->                 | <!-- -->                                                                              |
+| <!-- -->                 | <!-- -->                                                                                |
-|--------------------------|---------------------------------------------------------------------------------------|
+|--------------------------|-----------------------------------------------------------------------------------------|
-| `-s R` or `--pg_size R`  | Number of replicas for replicated pools                                               |
+| `-s R` or `--pg_size R`  | Number of replicas for replicated pools                                                 |
-| `--ec N+K`               | Number of data (N) and parity (K) chunks for erasure-coded pools                      |
+| `--ec N+K`               | Number of data (N) and parity (K) chunks for erasure-coded pools                        |
-| `-n N` or `--pg_count N` | PG count for the new pool (start with 10*<OSD count>/pg_size rounded to a power of 2) |
+| `-n N` or `--pg_count N` | PG count for the new pool (start with 10*\<OSD count\>/pg_size rounded to a power of 2) |
 Optional parameters:
@@ -396,7 +399,8 @@ Optional parameters:
 | `--raw_placement <rules>`      | Specify raw PG generation rules ([details](../config/pool.en.md#raw_placement)) |
 | `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags               |
 | `--scrub_interval <time>`      | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y  |
-| `--used_for_fs <name>`         | Mark pool as used for VitastorFS with metadata in image <name>             |
+| `--used_for_app fs:<name>`     | Mark pool as used for VitastorFS with metadata in image `<name>`           |
 | `--used_for_app s3:<name>`     | Mark pool as used for S3 location with name `<name>`                       |
 | `--pg_stripe_size <number>`    | Increase object grouping stripe                                            |
 | `--max_osd_combinations 10000` | Maximum number of random combinations for LP solver input                  |
 | `--wait`                       | Wait for the new pool to come online                                       |
--- a/docs/usage/cli.ru.md
+++ b/docs/usage/cli.ru.md
@@ -22,6 +22,8 @@ vitastor-cli - интерфейс командной строки для адм
 - [flatten](#flatten)
 - [rm-data](#rm-data)
 - [merge-data](#merge-data)
 - [describe](#describe)
 - [fix](#fix)
 - [alloc-osd](#alloc-osd)
 - [rm-osd](#rm-osd)
 - [osd-tree](#osd-tree)
@@ -36,7 +38,7 @@ vitastor-cli - интерфейс командной строки для адм
 Глобальные опции:
 ```
--config_file FILE   Путь к файлу конфигурации Vitastor
+--config_path FILE   Путь к файлу конфигурации Vitastor
 --etcd_address URL   Адрес соединения с etcd
 --iodepth N          Отправлять параллельно N операций на каждый OSD (по умолчанию 32)
 --parallel_osds M    Работать параллельно с M OSD (по умолчанию 4)
@@ -149,6 +151,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
 Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением
 образа сначала уменьшите файловую систему в нём.
 * `--deleted 1|0` - Установить/снять флаг "образ удалён" (устанавливается при незавершённом удалении).
 * `-f|--force` - Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
 * `--down-ok` - Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD.
@@ -226,6 +229,7 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
              Требует больше памяти, но позволяет правильно печатать прогресс удаления.
 --min-offset  Удалять только данные, начиная с заданного смещения.
 --max-offset  Удалять только данные до (исключительно) заданного смещения.
 --client_wait_up_timeout 16  Время ожидания поднятия PG в секундах.
 ```
 ## merge-data
@@ -373,9 +377,10 @@ OSD  PARENT            UP    SIZE  USED%    TAGS          WEIGHT  BLOCK  BITMAP
 в начале фильтра означает отрицание). Опции:
 ```
--pool <pool name or number>  Only list PGs of the given pool.
+--pool <pool name or number>  Вывести только PG в заданном пуле.
--min <min pg number>         Only list PGs with number >= min.
+--min <min pg number>         Вывести только PG с номерами >= min.
--max <max pg number>         Only list PGs with number <= max.
+--max <max pg number>         Вывести только PG с номерами <= max.
 --osd 1,2,...                 Вывести только PG с данными на заданных OSD.
 ```
 Примеры:
@@ -390,11 +395,11 @@ OSD  PARENT            UP    SIZE  USED%    TAGS          WEIGHT  BLOCK  BITMAP
 Создать пул. Обязательные параметры:
-| <!-- -->                  | <!-- -->                                                                                    |
+| <!-- -->                  | <!-- -->                                                                                      |
-|---------------------------|---------------------------------------------------------------------------------------------|
+|---------------------------|-----------------------------------------------------------------------------------------------|
-| `-s R` или `--pg_size R`  | Число копий данных для реплицированных пулов                                                |
+| `-s R` или `--pg_size R`  | Число копий данных для реплицированных пулов                                                  |
-| `--ec N+K`                | Число частей данных (N) и чётности (K) для пулов с кодами коррекции ошибок                  |
+| `--ec N+K`                | Число частей данных (N) и чётности (K) для пулов с кодами коррекции ошибок                    |
-| `-n N` или `--pg_count N` | Число PG для нового пула (начните с 10*<число OSD>/pg_size, округлённого до степени двойки) |
+| `-n N` или `--pg_count N` | Число PG для нового пула (начните с 10*\<число OSD\>/pg_size, округлённого до степени двойки) |
 Необязательные параметры:
--- a/docs/usage/disk.en.md
+++ b/docs/usage/disk.en.md
@@ -14,6 +14,7 @@ It supports the following commands:
 - [upgrade-simple](#upgrade-simple)
 - [resize](#resize)
 - [raw-resize](#raw-resize)
 - [trim](#trim)
 - [start/stop/restart/enable/disable](#start/stop/restart/enable/disable)
 - [purge](#purge)
 - [read-sb](#read-sb)
@@ -51,12 +52,16 @@ Options (automatic mode):
 --osd_per_disk <N>
  Create <N> OSDs on each disk (default 1)
 --hybrid
-  Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for
+  Prepare hybrid (HDD+SSD, NVMe+SATA or etc) OSDs using provided devices. By default,
-  journals and metadata, HDDs will be used for data. Partitions for journals and
+  any passed SSDs will be used for journals and metadata, HDDs will be used for data,
-  metadata will be created automatically. Whether disks are SSD or HDD is decided
+  but you can override this behaviour with --fast-devices option. Journal and metadata
-  by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object
+  partitions will be created automatically. In the default mode, SSD and HDD disks
-  size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,
+  are distinguished by the `/sys/block/.../queue/rotational` flag. When HDDs are used
-  and throttle_small_writes is enabled by default.
+  for data in hybrid mode, default block_size is 1 MB instead of 128 KB, default journal
  size is 1 GB instead of 32 MB, and throttle_small_writes is enabled by default.
 --fast-devices /dev/nvmeX,/dev/nvmeY
  In --hybrid mode, use these devices for journal and metadata instead of auto-detecting
  and extracting them from the main [devices...] list.
 --disable_data_fsync auto
  Disable data device cache and fsync (1/yes/true = on, default auto)
 --disable_meta_fsync auto
@@ -93,6 +98,9 @@ Options (both modes):
 --data_device_block 4k     Override data device block size
 --meta_device_block 4k     Override metadata device block size
 --journal_device_block 4k  Override journal device block size
 --discard_on_start 0       TRIM unused data device blocks every OSD start (default off)
 --min_discard_size 1M      Minimum TRIM block size
 --json                     Enable JSON output
 ```
 [immediate_commit](../config/layout-cluster.en.md#immediate_commit) setting is
@@ -175,6 +183,19 @@ parameters from OSD command line (i.e. from systemd unit or superblock).
 SIZE may include k/m/g/t suffixes. If any of the new layout parameter
 options are not specified, old values will be used.
 ## trim
 `vitastor-disk trim <osd_num>|<osd_device> [<osd_num>|<osd_device>...]`
 Try to discard unused blocks (SSD TRIM) on the data device of each of the OSD(s).
 May only be used on stopped OSDs. Options:
 ```
 --min_discard_size 1M      Minimum TRIM block size
 --discard_granularity 0    Override device's discard granularity
 ```
 ## start/stop/restart/enable/disable
 `vitastor-disk start|stop|restart|enable|disable [--now] <device> [device2 device3 ...]`
--- a/docs/usage/disk.ru.md
+++ b/docs/usage/disk.ru.md
@@ -51,12 +51,17 @@ vitastor-disk - инструмент командной строки для уп
 --osd_per_disk <N>
  Создавать по несколько (<N>) OSD на каждом диске (по умолчанию 1)
 --hybrid
-  Инициализировать гибридные (HDD+SSD) OSD на указанных дисках. SSD будут
+  Инициализировать гибридные (HDD+SSD, NVMe+SATA и т.п.) OSD на указанных дисках.
-  использованы для журналов и метаданных, а HDD - для данных. Разделы для журналов
+  По умолчанию, SSD будут использованы для журналов и метаданных, а HDD - для данных,
-  и метаданных будут созданы автоматически. Является ли диск SSD или HDD, определяется
+  но вы можете поменять это поведение опцией --fast-devices. Разделы для журналов
-  по флагу `/sys/block/.../queue/rotational`. В гибридном режиме по умолчанию
+  и метаданных будут созданы автоматически. В режиме по умолчанию SSD и HDD-диски
-  используется размер объекта 1 МБ вместо 128 КБ, размер журнала 1 ГБ вместо 32 МБ
+  различаются по флагу `/sys/block/.../queue/rotational`. Когда в гибридном режиме
-  и включённый throttle_small_writes.
+  для данных используются HDD, по умолчанию размер блока устанавливается 1 МБ вместо
  128 КБ, размер журнала 1 ГБ вместо 32 МБ, и throttle_small_writes включается по
  умолчанию.
 --fast-devices /dev/nvmeX,/dev/nvmeY
  Использовать данные диски для журналов и метаданных в гибридном режиме вместо их
  автоопределения и извлечения из основного списка [devices...].
 --disable_data_fsync auto
  Отключать кэш и fsync-и для устройств данных. (1/yes/true = да, по умолчанию автоопределение)
 --disable_meta_fsync auto
@@ -94,6 +99,9 @@ vitastor-disk - инструмент командной строки для уп
 --data_device_block 4k     Задать размер блока устройства данных
 --meta_device_block 4k     Задать размер блока метаданных
 --journal_device_block 4k  Задать размер блока журнала
 --discard_on_start 0       Выполнять TRIM пустых блоков данных при запуске OSD (по умолчанию нет)
 --min_discard_size 1M      Минимальный размер блока для TRIM
 --json                     Включить JSON-вывод
 ```
 Настройка [immediate_commit](../config/layout-cluster.ru.md#immediate_commit)
@@ -177,6 +185,20 @@ throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
 `РАЗМЕР` может быть указан с суффиксами k/m/g/t. Если любой из новых параметров
 расположения не указан, он принимается равным старому значению.
 ## trim
 `vitastor-disk trim <osd_num>|<osd_device> [<osd_num>|<osd_device>...]`
 Попробовать пометить пустые блоки дисков данных всех указанных OSD неиспользуемыми
 (выполнить команду SSD TRIM).
 Можно использовать только с остановленными OSD. Опции:
 ```
 --min_discard_size 1M      Минимальный размер блока для TRIM
 --discard_granularity 0    Кратность размера блока для TRIM
 ```
 ## start/stop/restart/enable/disable
 `vitastor-disk start|stop|restart|enable|disable [--now] <device> [device2 device3 ...]`
--- a/docs/usage/nbd.en.md
+++ b/docs/usage/nbd.en.md
@@ -36,7 +36,7 @@ It will output a block device name like /dev/nbd0 which you can then use as a no
 You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
-vitastor-nbd supports all usual Vitastor configuration options like `--config_file <path_to_config>` plus NBD-specific:
+vitastor-nbd supports all usual Vitastor configuration options like `--config_path <path_to_config>` plus NBD-specific:
 * `--nbd_timeout 0` \
  Timeout for I/O operations in seconds after exceeding which the kernel stops the device.
@@ -54,16 +54,18 @@ vitastor-nbd supports all usual Vitastor configuration options like `--config_fi
  Stay in foreground, do not daemonize.
 Note that `nbd_timeout`, `nbd_max_devices` and `nbd_max_part` options may also be specified
-in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_file`.
+in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_path`.
 ## unmap
 To unmap the device run:
 ```
-vitastor-nbd unmap /dev/nbd0
+vitastor-nbd unmap [--force] /dev/nbd0
 ```
 If `--force` is specified, `vitastor-nbd` doesn't check if the device is actually mapped.
 ## ls
 ```
@@ -96,7 +98,7 @@ Example output (JSON format):
 vitastor-nbd netlink-map [/dev/nbdN] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
 ```
-On recent kernel versions it's also possinle to map NBD devices using netlink interface.
+On recent kernel versions it's also possible to map NBD devices using netlink interface.
 This is an experimental feature because it doesn't solve all issues of NBD. Differences from regular ioctl-based 'map':
--- a/docs/usage/nbd.ru.md
+++ b/docs/usage/nbd.ru.md
@@ -41,7 +41,7 @@ vitastor-nbd map [/dev/nbdN] --image testimg
 Для обращения по номеру инода, аналогично другим командам, можно использовать опции
 `--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
-vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_file <path_to_config>`,
+vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_path <path_to_config>`,
 плюс специфичные для NBD:
 * `--nbd_timeout 0` \
@@ -62,16 +62,19 @@ vitastor-nbd поддерживает все обычные опции Vitastor,
 Обратите внимание, что опции `nbd_timeout`, `nbd_max_devices` и `nbd_max_part` можно
 также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
-заданном опцией `--config_file`.
+заданном опцией `--config_path`.
 ## unmap
 Для отключения устройства выполните:
 ```
-vitastor-nbd unmap /dev/nbd0
+vitastor-nbd unmap [--force] /dev/nbd0
 ```
 Если задана опция `--force`, `vitastor-nbd` не проверяет, подключено ли устройство,
 перед попыткой его отключить.
 ## ls
 ```
--- a/docs/usage/nfs.en.md
+++ b/docs/usage/nfs.en.md
@@ -58,7 +58,7 @@ To use VitastorFS:
 2. Create an image for FS metadata, preferably in a faster (SSD or replica-HDD) pool,
   but you can create it in the data pool too if you want (image size doesn't matter):
   `vitastor-cli create -s 10G -p fastpool testfs`
-3. Mark data pool as an FS pool: `vitastor-cli modify-pool --used-for-fs testfs data-pool`
+3. Mark data pool as an FS pool: `vitastor-cli modify-pool --used-for-app fs:testfs data-pool`
 4. Either mount the FS: `vitastor-nfs mount --fs testfs --pool data-pool /mnt/vita`
 5. Or start the NFS server: `vitastor-nfs start --fs testfs --pool data-pool`
@@ -111,6 +111,21 @@ settings, because Vitastor NFS proxy doesn't keep uncommitted data in memory
 with these settings. But it may even work without `immediate_commit=all` because
 the Linux NFS client repeats all uncommitted writes if it loses the connection.
 ## RDMA
 vitastor-nfs supports NFS over RDMA, which, in theory, should also allow to use
 VitastorFS from GPUDirect.
 You can test NFS-RDMA even if you don't have an RDMA NIC using SoftROCE:
 1. First, add SoftROCE device on both servers: `rdma link add rxe0 type rxe netdev eth0`.
   Here, `rdma` utility is a part the iproute2 package, and `eth0` should be replaced with
   the name of your Ethernet NIC.
 2. Start vitastor-nfs with RDMA: `vitastor-nfs start (--fs <NAME> | --block) --pool <POOL> --port 20049 --nfs_rdma 20049 --portmap 0`
 3. Mount the FS: `mount 192.168.0.10:/mnt/test/ /mnt/vita/ -o port=20049,mountport=20049,nfsvers=3,soft,nolock,rdma`
 ## Commands
 ### mount
@@ -131,11 +146,16 @@ The server will be automatically stopped when the FS is unmounted.
 Start network NFS server. Options:
-| <!-- -->        | <!-- -->                                                   |
+| <!-- -->               | <!-- -->                                                                                                                    |
-|-----------------|------------------------------------------------------------|
+|------------------------|-----------------------------------------------------------------------------------------------------------------------------|
-| `--bind <IP>`   | bind service to \<IP> address (default 0.0.0.0)            |
+| `--bind <IP>`          | bind service to \<IP> address (default 0.0.0.0)                                                                             |
-| `--port <PORT>` | use port \<PORT> for NFS services (default is 2049)        |
+| `--port <PORT>`        | use port \<PORT> for NFS services (default is 2049). Specify "auto" to auto-select and print port                           |
-| `--portmap 0`   | do not listen on port 111 (portmap/rpcbind, requires root) |
+| `--portmap 0`          | do not listen on port 111 (portmap/rpcbind, requires root)                                                                  |
 | `--nfs_rdma <PORT>`    | enable NFS-RDMA at RDMA-CM port \<PORT> (you can try 20049). If RDMA is enabled and --port is set to 0, TCP will be disabled |
 | `--nfs_rdma_credit 16` | maximum operation credit for RDMA clients (max iodepth)                                                                     |
 | `--nfs_rdma_send 1024` | maximum RDMA send operation count (should be larger than iodepth)                                                           |
 | `--nfs_rdma_alloc 1M`  | RDMA memory allocation rounding                                                                                             |
 | `--nfs_rdma_gc 64M`    | maximum unused RDMA buffers                                                                                                 |
 ### upgrade
--- a/docs/usage/nfs.ru.md
+++ b/docs/usage/nfs.ru.md
@@ -60,7 +60,7 @@ JSON-формате :-). Для инспекции содержимого БД
   или по крайней мере на HDD, но без EC), но можно и в том же пуле, что данные
   (размер образа значения не имеет):
   `vitastor-cli create -s 10G -p fastpool testfs`
-3. Пометьте пул данных как ФС-пул: `vitastor-cli modify-pool --used-for-fs testfs data-pool`
+3. Пометьте пул данных как ФС-пул: `vitastor-cli modify-pool --used-for-app fs:testfs data-pool`
 4. Либо примонтируйте ФС: `vitastor-nfs mount --fs testfs --pool data-pool /mnt/vita`
 5. Либо запустите сетевой NFS-сервер: `vitastor-nfs start --fs testfs --pool data-pool`
@@ -116,6 +116,21 @@ JSON-формате :-). Для инспекции содержимого БД
 даже без `immediate_commit=all`, потому что NFS-клиент ядра Linux повторяет все
 незафиксированные запросы при потере соединения.
 ## RDMA
 vitastor-nfs поддерживает NFS через RDMA. В теории это также должно позволять использовать
 VitastorFS из GPUDirect.
 Вы можете протестировать NFS-RDMA, даже если у вас нет RDMA-карты, с помощью SoftROCE:
 1. Сначала создайте SoftROCE устройства на обоих тестовых серверах: `rdma link add rxe0 type rxe netdev eth0`.
   Утилита `rdma` входит в состав пакета iproute2, а `eth0` вам нужно заменить на имя своей
   сетевой карты.
 2. Запустите vitastor-nfs с RDMA: `vitastor-nfs start (--fs <NAME> | --block) --pool <POOL> --port 20049 --nfs_rdma 20049 --portmap 0`
 3. Смонтируйте ФС: `mount 192.168.0.10:/mnt/test/ /mnt/vita/ -o port=20049,mountport=20049,nfsvers=3,soft,nolock,rdma`
 ## Команды
 ### mount
@@ -136,11 +151,16 @@ JSON-формате :-). Для инспекции содержимого БД
 Запустить сетевой NFS-сервер. Опции:
-| <!-- -->        | <!-- -->                                                              |
+| <!-- -->               | <!-- -->                                                                                                                    |
-|-----------------|-----------------------------------------------------------------------|
+|------------------------|-----------------------------------------------------------------------------------------------------------------------------|
-| `--bind <IP>`   | принимать соединения по адресу \<IP> (по умолчанию 0.0.0.0 - на всех) |
+| `--bind <IP>`          | принимать соединения по адресу \<IP> (по умолчанию 0.0.0.0 - на всех)                                                       |
-| `--port <PORT>` | использовать порт \<PORT> для NFS-сервисов (по умолчанию 2049)        |
+| `--port <PORT>`        | использовать порт \<PORT> для NFS-сервисов (по умолчанию 2049). Укажите "auto", чтобы выбрать и напечатать случайный порт   |
-| `--portmap 0`   | отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий) |
+| `--portmap 0`          | отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий)                              |
 | `--nfs_rdma <PORT>`    | включить NFS-RDMA на порту RDMA-CM \<PORT> (попробуйте 20049). Если RDMA включено и указано `--port 0`, TCP будет отключено |
 | `--nfs_rdma_credit 16` | максимальный "кредит", глубина очереди для NFS-клиентов                                                                     |
 | `--nfs_rdma_send 1024` | максимальное число операций RDMA отправки (должно быть больше nfs_rdma_credit)                                              |
 | `--nfs_rdma_alloc 1M`  | округление выделения памяти для RDMA-клиентов                                                                               |
 | `--nfs_rdma_gc 64M`    | максимальный объём неиспользуемой памяти RDMA-клиентом перед освобождением                                                  |
 ### upgrade
--- a/mon/antietcd_adapter.js
+++ b/mon/antietcd_adapter.js
@@ -23,6 +23,9 @@ class AntiEtcdAdapter
            }, {}));
            const cfg_port = config.antietcd_port;
            const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
            is_local['0.0.0.0'] = true;
            is_local['::'] = true;
            is_local[''] = true;
            const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
            if (selected.length > 1)
            {
--- a/mon/etcd_adapter.js
+++ b/mon/etcd_adapter.js
@@ -232,6 +232,7 @@ class EtcdAdapter
    async become_master()
    {
        const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
        console.log('Waiting to become master');
        // eslint-disable-next-line no-constant-condition
        while (1)
        {
@@ -243,7 +244,6 @@ class EtcdAdapter
            {
                break;
            }
            console.log('Waiting to become master');
            await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout));
        }
        console.log('Became master');
--- a/mon/etcd_schema.js
+++ b/mon/etcd_schema.js
@@ -56,6 +56,7 @@ const etcd_tree = {
            osd_out_time: 600, // seconds. min: 0
            placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
            use_old_pg_combinator: false,
            osd_backfillfull_ratio: 0.99,
            // client and osd
            tcp_header_buffer_size: 65536,
            use_sync_send_recv: false,
@@ -215,6 +216,7 @@ const etcd_tree = {
                    parent_pool?: <pool_id>,
                    parent_id?: <inode_t>,
                    readonly?: boolean,
                    deleted?: boolean,
                }
            }
        }, */
--- a/mon/mon.js
+++ b/mon/mon.js
@@ -74,6 +74,7 @@ class Mon
        this.state = JSON.parse(JSON.stringify(etcd_tree));
        this.prev_stats = { osd_stats: {}, osd_diff: {} };
        this.recheck_pgs_active = false;
        this.updating_total_stats = false;
        this.watcher_active = false;
        this.old_pg_config = false;
        this.old_pg_stats_seen = false;
@@ -567,6 +568,7 @@ class Mon
    async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
    {
        const etcd_request = { compare: [], success: [] };
        for (const pool_id in (this.state.pg.config||{}).items||{})
        {
            // We should stop all PGs when deleting a pool or changing its PG count
@@ -579,9 +581,24 @@ class Mon
                    return false;
                }
            }
            if (!this.state.config.pools[pool_id])
            {
                // Delete PG history and stats of the deleted pool
                etcd_request.success.push({ requestDeleteRange: {
                    key: b64(this.config.etcd_prefix+'/pg/history/'+pool_id+'/'),
                    range_end: b64(this.config.etcd_prefix+'/pg/history/'+pool_id+'0'),
                } });
                etcd_request.success.push({ requestDeleteRange: {
                    key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
                    range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
                } });
                etcd_request.success.push({ requestDeleteRange: {
                    key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
                    range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
                } });
            }
        }
        const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
        const etcd_request = { compare: [], success: [] };
        for (const pool_id in (new_pg_config||{}).items||{})
        {
            if (!this.state.config.pools[pool_id])
@@ -642,7 +659,19 @@ class Mon
                this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
        }
        new_pg_config.hash = tree_hash;
-        return await this.save_pg_config(new_pg_config, etcd_request);
+        const { backfillfull_pools, backfillfull_osds } = sum_object_counts(
            { ...this.state, pg: { ...this.state.pg, config: new_pg_config } }, this.config
        );
        if (backfillfull_pools.join(',') != ((this.state.pg.config||{}).backfillfull_pools||[]).join(','))
        {
            this.log_backfillfull(backfillfull_osds, backfillfull_pools);
        }
        new_pg_config.backfillfull_pools = backfillfull_pools.length ? backfillfull_pools : undefined;
        if (!await this.save_pg_config(new_pg_config, etcd_request))
        {
            return false;
        }
        return true;
    }
    async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
@@ -714,7 +743,7 @@ class Mon
    async update_total_stats()
    {
        const txn = [];
-        const { object_counts, object_bytes } = sum_object_counts(this.state, this.config);
+        const { object_counts, object_bytes, backfillfull_pools, backfillfull_osds } = sum_object_counts(this.state, this.config);
        let stats = sum_op_stats(this.state.osd, this.prev_stats);
        let { inode_stats, seen_pools } = sum_inode_stats(this.state, this.prev_stats);
        stats.object_counts = object_counts;
@@ -744,29 +773,54 @@ class Mon
                }
            }
        }
-        for (const pool_id in this.state.pool.stats)
+        if (!this.recheck_pgs_active)
        {
-            if (!seen_pools[pool_id])
+            // PG recheck also modifies /pool/stats, so don't touch it here if it's active
            for (const pool_id in this.state.pool.stats)
            {
-                txn.push({ requestDeleteRange: {
+                if (!seen_pools[pool_id])
-                    key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
+                {
-                } });
+                    txn.push({ requestDeleteRange: {
-                delete this.state.pool.stats[pool_id];
+                        key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
-            }
+                    } });
-            else
+                    delete this.state.pool.stats[pool_id];
-            {
+                }
-                const pool_stats = { ...this.state.pool.stats[pool_id] };
+                else
-                serialize_bigints(pool_stats);
+                {
-                txn.push({ requestPut: {
+                    const pool_stats = { ...this.state.pool.stats[pool_id] };
-                    key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
+                    serialize_bigints(pool_stats);
-                    value: b64(JSON.stringify(pool_stats)),
+                    txn.push({ requestPut: {
-                } });
+                        key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
                        value: b64(JSON.stringify(pool_stats)),
                    } });
                }
            }
        }
        if (txn.length)
        {
            await this.etcd.etcd_call('/kv/txn', { success: txn }, this.config.etcd_mon_timeout, 0);
        }
        if (!this.recheck_pgs_active &&
            backfillfull_pools.join(',') != ((this.state.pg.config||{}).backfillfull_pools||[]).join(','))
        {
            this.log_backfillfull(backfillfull_osds, backfillfull_pools);
            const new_pg_config = { ...this.state.pg.config, backfillfull_pools: backfillfull_pools.length ? backfillfull_pools : undefined };
            await this.save_pg_config(new_pg_config);
        }
    }
    log_backfillfull(osds, pools)
    {
        for (const osd in osds)
        {
            const bf = osds[osd];
            console.log('OSD '+osd+' may fill up during rebalance: capacity '+(bf.cap/1024n/1024n)+
                ' MB, target user data '+(bf.clean/1024n/1024n)+' MB');
        }
        console.log(
            (pools.length ? 'Pool(s) '+pools.join(', ') : 'No pools')+
            ' are backfillfull now, applying rebalance configuration'
        );
    }
    schedule_update_stats()
@@ -778,7 +832,21 @@ class Mon
        this.stats_timer = setTimeout(() =>
        {
            this.stats_timer = null;
-            this.update_total_stats().catch(console.error);
+            if (this.updating_total_stats)
            {
                this.schedule_update_stats();
                return;
            }
            this.updating_total_stats = true;
            try
            {
                this.update_total_stats().catch(console.error);
            }
            catch (e)
            {
                console.error(e);
            }
            this.updating_total_stats = false;
        }, this.config.mon_stats_timeout);
    }
--- a/mon/package.json
+++ b/mon/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vitastor-mon",
-  "version": "1.9.2",
+  "version": "2.1.0",
  "description": "Vitastor SDS monitor service",
  "main": "mon-main.js",
  "scripts": {
@@ -9,7 +9,7 @@
  "author": "Vitaliy Filippov",
  "license": "UNLICENSED",
  "dependencies": {
-    "antietcd": "^1.1.0",
+    "antietcd": "^1.1.2",
    "sprintf-js": "^1.1.2",
    "ws": "^7.2.5"
  },
@@ -19,6 +19,6 @@
    "eslint-plugin-node": "^11.1.0"
  },
  "engines": {
-    "node": ">=12.0.0"
+    "node": ">=12.1.0"
  }
 }
--- a/mon/pg_gen.js
+++ b/mon/pg_gen.js
@@ -8,23 +8,9 @@ const LPOptimizer = require('./lp_optimizer/lp_optimizer.js');
 const { scale_pg_count } = require('./pg_utils.js');
 const { make_hier_tree, filter_osds_by_root_node,
    filter_osds_by_tags, filter_osds_by_block_layout, get_affinity_osds } = require('./osd_tree.js');
 const { select_murmur3 } = require('./lp_optimizer/murmur3.js');
-let seed;
+function pick_primary(pool_id, pg_num, pool_config, osd_set, up_osds, aff_osds)
 function reset_rng()
 {
    seed = 0x5f020e43;
 }
 function rng()
 {
    seed ^= seed << 13;
    seed ^= seed >> 17;
    seed ^= seed << 5;
    return seed + 2147483648;
 }
 function pick_primary(pool_config, osd_set, up_osds, aff_osds)
 {
    let alive_set;
    if (pool_config.scheme === 'replicated')
@@ -52,7 +38,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
    {
        return 0;
    }
-    return alive_set[rng() % alive_set.length];
+    return alive_set[select_murmur3(alive_set.length, osd_num => pool_id+'/'+pg_num+'/'+osd_num)];
 }
 function recheck_primary(state, global_config, up_osds, osd_tree)
@@ -66,7 +52,6 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
            continue;
        }
        const aff_osds = get_affinity_osds(pool_cfg, up_osds, osd_tree);
        reset_rng();
        for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
        {
            if (!state.pg.config.items[pool_id])
@@ -76,7 +61,7 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
            const pg_cfg = state.pg.config.items[pool_id][pg_num];
            if (pg_cfg)
            {
-                const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
+                const new_primary = pick_primary(pool_id, pg_num, state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
                if (pg_cfg.primary != new_primary)
                {
                    if (!new_pg_config)
@@ -99,13 +84,12 @@ function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revis
 {
    const aff_osds = get_affinity_osds(state.config.pools[pool_id] || {}, up_osds, osd_tree);
    const pg_items = {};
    reset_rng();
    new_pgs.map((osd_set, i) =>
    {
        osd_set = osd_set.map(osd_num => osd_num === LPOptimizer.NO_OSD ? 0 : osd_num);
        pg_items[i+1] = {
            osd_set,
-            primary: pick_primary(state.config.pools[pool_id], osd_set, up_osds, aff_osds),
+            primary: pick_primary(pool_id, i+1, state.config.pools[pool_id], osd_set, up_osds, aff_osds),
        };
        if (prev_pgs[i] && prev_pgs[i].join(' ') != osd_set.join(' ') &&
            prev_pgs[i].filter(osd_num => osd_num).length > 0)
--- a/mon/scripts/make-etcd
+++ b/mon/scripts/make-etcd
@@ -33,9 +33,11 @@ async function run()
        console.log(config_path+' is missing');
        process.exit(1);
    }
-    if (fs.existsSync("/etc/systemd/system/etcd.service"))
+    const in_docker = fs.existsSync("/etc/vitastor/etcd.conf") &&
        fs.existsSync("/etc/vitastor/docker.conf");
    if (!in_docker && fs.existsSync("/etc/systemd/system/vitastor-etcd.service"))
    {
-        console.log("/etc/systemd/system/etcd.service already exists");
+        console.log("/etc/systemd/system/vitastor-etcd.service already exists");
        process.exit(1);
    }
    const config = JSON.parse(fs.readFileSync(config_path, { encoding: 'utf-8' }));
@@ -52,10 +54,21 @@ async function run()
        console.log('No matching IPs in etcd_address from '+config_path);
        process.exit(0);
    }
-    const etcd_cluster = etcds.map((e, i) => `etcd${i}=http://${e}:2380`).join(',');
+    const etcd_name = 'etcd'+etcds[num].replace(/[^0-9a-z_]/ig, '_');
-    await system(`mkdir -p /var/lib/etcd${num}.etcd`);
+    const etcd_cluster = etcds.map(e => `etcd${e.replace(/[^0-9a-z_]/ig, '_')}=http://${e}:2380`).join(',');
    if (in_docker)
    {
        let etcd_conf = fs.readFileSync("/etc/vitastor/etcd.conf", { encoding: 'utf-8' });
        etcd_conf = replace_env(etcd_conf, 'ETCD_NAME', etcd_name);
        etcd_conf = replace_env(etcd_conf, 'ETCD_IP', etcds[num]);
        etcd_conf = replace_env(etcd_conf, 'ETCD_INITIAL_CLUSTER', etcd_cluster);
        fs.writeFileSync("/etc/vitastor/etcd.conf", etcd_conf);
        console.log('etcd for Vitastor configured. Run `systemctl enable --now vitastor-etcd` to start etcd');
        process.exit(0);
    }
    await system(`mkdir -p /var/lib/etcd`);
    fs.writeFileSync(
-        "/etc/systemd/system/etcd.service",
+        "/etc/systemd/system/vitastor-etcd.service",
 `[Unit]
 Description=etcd for vitastor
 After=network-online.target local-fs.target time-sync.target
@@ -64,14 +77,14 @@ Wants=network-online.target local-fs.target time-sync.target
 [Service]
 Restart=always
 Environment=GOGC=50
-ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
+ExecStart=etcd --name ${etcd_name} --data-dir /var/lib/etcd \\
    --snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
    --initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
    --initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
    --initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
    --auto-compaction-retention=10 --auto-compaction-mode=revision
-WorkingDirectory=/var/lib/etcd${num}.etcd
+WorkingDirectory=/var/lib/etcd
-ExecStartPre=+chown -R etcd /var/lib/etcd${num}.etcd
+ExecStartPre=+chown -R etcd /var/lib/etcd
 User=etcd
 PrivateTmp=false
 TasksMax=infinity
@@ -89,6 +102,13 @@ WantedBy=multi-user.target
    process.exit(0);
 }
 function replace_env(text, key, value)
 {
    let found = false;
    text = text.replace(new RegExp('^'+key+'\\s*=.*', 'm'), () => { found = true; return key+'='+value; });
    return found ? text : text.replace(/\s*$/, '\n')+key+'='+value+'\n';
 }
 function select_local_etcd(etcds)
 {
    const ifaces = os.networkInterfaces();
--- a/mon/scripts/vitastor-mon.service
+++ b/mon/scripts/vitastor-mon.service
@@ -5,6 +5,7 @@ Wants=network-online.target local-fs.target time-sync.target
 [Service]
 Restart=always
 SyslogIdentifier=vitastor-mon
 ExecStart=node /usr/lib/vitastor/mon/mon-main.js
 WorkingDirectory=/
 User=vitastor
--- a/Show More
+++ b/Show More
`@@ -1,4 +1,4 @@`
	`VITASTOR_VERSION ?= v1.9.2`	`VITASTOR_VERSION ?= v2.1.0`

	`all: build push`	`all: build push`
`@@ -1,4 +1,4 @@`
	`vitastor (1.9.2-1) unstable; urgency=medium`	`vitastor (2.1.0-1) unstable; urgency=medium`

	`* Bugfixes`	`* Bugfixes`
		`@@ -0,0 +1 @@`
							`deb http://vitastor.io/debian bookworm main`
		`@@ -0,0 +1,3 @@`
							`#!/bin/bash`

							`docker exec -it vitastor vitastor-cli "$@"`
		`@@ -0,0 +1,3 @@`
							`#!/bin/bash`

							`docker exec -it vitastor vitastor-disk "$@"`
		`@@ -0,0 +1,3 @@`
							`#!/bin/bash`

							`docker exec -it vitastor fio "$@"`
		`@@ -0,0 +1,3 @@`
							`#!/bin/bash`

							`docker exec -it vitastor vitastor-nbd "$@"`
		`@@ -0,0 +1,3 @@`
							`#!/bin/bash`

							`while :; do sleep infinity; done`
		`@@ -1 +0,0 @@`
			`deb http://vitastor.io/debian bullseye main`