Merge pull request 'master' (#3 ) from vitalif/vitastor:master into master

Reviewed-on: antilles/vitastor#3
Merge pull request 'master' (#2 ) from vitalif/vitastor:master into master
2024-02-13 14:44:09 +03:00 · 2024-01-12 15:04:03 +03:00 · 2024-01-09 13:25:13 +03:00
424 changed files with 5389 additions and 37061 deletions
--- a/.gitea/workflows/buildenv.Dockerfile
+++ b/.gitea/workflows/buildenv.Dockerfile
@ -22,7 +22,7 @@ RUN apt-get update
 RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
    liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
 RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
-RUN apt-get update && apt-get -y install jq lp-solve sudo nfs-common fdisk parted
+RUN apt-get -y install jq lp-solve sudo
 RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
 RUN set -ex; \
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
@ -16,7 +16,6 @@ env:
  BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
  TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
  OSD_ARGS: '--etcd_quick_timeout 2000'
  USE_RAMDISK: 1
 concurrency:
  group: ci-${{ github.ref }}
@ -65,13 +64,6 @@ jobs:
    # leak sanitizer sometimes crashes
    - run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
  npm_lint:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - run: cd /root/vitastor/mon && npm run lint
  test_add_osd:
    runs-on: ubuntu-latest
    needs: build
@ -198,24 +190,6 @@ jobs:
          echo ""
        done
  test_etcd_fail_antietcd:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 10
      run: ANTIETCD=1 /root/vitastor/tests/test_etcd_fail.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_interrupted_rebalance:
    runs-on: ubuntu-latest
    needs: build
@ -288,24 +262,6 @@ jobs:
          echo ""
        done
  test_create_halfhost:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_create_halfhost.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_failure_domain:
    runs-on: ubuntu-latest
    needs: build
@ -576,42 +532,6 @@ jobs:
          echo ""
        done
  test_dd:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_dd.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_root_node:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_root_node.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_switch_primary:
    runs-on: ubuntu-latest
    needs: build
@ -720,24 +640,6 @@ jobs:
          echo ""
        done
  test_heal_antietcd:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 10
      run: ANTIETCD=1 /root/vitastor/tests/test_heal.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_heal_csum_32k_dmj:
    runs-on: ubuntu-latest
    needs: build
@ -846,150 +748,6 @@ jobs:
          echo ""
        done
  test_resize:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_resize.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_resize_auto:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_resize_auto.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_snapshot_pool2:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_snapshot_pool2.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_osd_tags:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_osd_tags.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_enospc:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_enospc.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_enospc_xor:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: SCHEME=xor /root/vitastor/tests/test_enospc.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_enospc_imm:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_enospc.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_enospc_imm_xor:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: IMMEDIATE_COMMIT=1 SCHEME=xor /root/vitastor/tests/test_enospc.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
  test_scrub:
    runs-on: ubuntu-latest
    needs: build
@ -1098,21 +856,3 @@ jobs:
          echo ""
        done
  test_nfs:
    runs-on: ubuntu-latest
    needs: build
    container: ${{env.TEST_IMAGE}}:${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: 3
      run: /root/vitastor/tests/test_nfs.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- $i --------"
          cat $i
          echo ""
        done
--- a/.gitea/workflows/tests-to-yaml.pl
+++ b/.gitea/workflows/tests-to-yaml.pl
@ -34,10 +34,6 @@ for my $line (<>)
            {
                $test_name .= '_imm';
            }
            elsif ($1 eq 'ANTIETCD')
            {
                $test_name .= '_antietcd';
            }
            else
            {
                $test_name .= '_'.lc($1).'_'.$2;
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,16 @@
 package-lock.json
 fio
 qemu
 osd
 stub_osd
 stub_uring_osd
 stub_bench
 osd_test
 osd_peering_pg_test
 dump_journal
 nbd_proxy
 rm_inode
 test_allocator
 test_blockstore
 test_shit
 osd_rmw_test
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
 project(vitastor)
-set(VITASTOR_VERSION "1.9.3")
+set(VERSION "1.4.4")
 add_subdirectory(src)
--- a/README-ru.md
+++ b/README-ru.md
@ -1,4 +1,4 @@
-# Vitastor
+## Vitastor
 [Read English version](README.md)
@ -6,8 +6,8 @@
 Вернём былую скорость кластерному блочному хранилищу!
-Vitastor - распределённая блочная и файловая SDS (программная СХД), прямой аналог Ceph RBD и CephFS,
+Vitastor - распределённая блочная SDS (программная СХД), прямой аналог Ceph RBD и
-а также внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
+внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
 быстрый и при этом простой. Только пока маленький :-).
 Vitastor архитектурно похож на Ceph, что означает атомарность и строгую консистентность,
@ -19,10 +19,10 @@ Vitastor нацелен в первую очередь на SSD и SSD+HDD кл
 TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
 что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
-Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
+Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, Proxmox, Kubernetes.
 Другие драйверы могут также быть легко реализованы.
-Подробности смотрите в документации по ссылкам. Можете начать отсюда: [Быстрый старт](docs/intro/quickstart.ru.md).
+Подробности смотрите в документации по ссылкам ниже.
 ## Презентации и записи докладов
@ -42,7 +42,6 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
 - Установка
  - [Пакеты](docs/installation/packages.ru.md)
  - [Proxmox](docs/installation/proxmox.ru.md)
  - [OpenNebula](docs/installation/opennebula.ru.md)
  - [OpenStack](docs/installation/openstack.ru.md)
  - [Kubernetes CSI](docs/installation/kubernetes.ru.md)
  - [Сборка из исходных кодов](docs/installation/source.ru.md)
@ -51,7 +50,7 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - Параметры
    - [Общие](docs/config/common.ru.md)
    - [Сетевые](docs/config/network.ru.md)
-    - [Клиентский код](docs/config/client.ru.md)
+    - [Клиентский код](docs/config/client.en.md)
    - [Глобальные дисковые параметры](docs/config/layout-cluster.ru.md)
    - [Дисковые параметры OSD](docs/config/layout-osd.ru.md)
    - [Прочие параметры OSD](docs/config/osd.ru.md)
@ -64,13 +63,11 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - [fio](docs/usage/fio.ru.md) для тестов производительности
  - [NBD](docs/usage/nbd.ru.md) для монтирования ядром
  - [QEMU и qemu-img](docs/usage/qemu.ru.md)
-  - [NFS](docs/usage/nfs.ru.md) кластерная файловая система и псевдо-ФС прокси
+  - [NFS](docs/usage/nfs.ru.md)-прокси для VMWare и подобных
  - [Администрирование](docs/usage/admin.ru.md)
 - Производительность
  - [Понимание сути производительности](docs/performance/understanding.ru.md)
  - [Теоретический максимум](docs/performance/theoretical.ru.md)
  - [Пример сравнения с Ceph](docs/performance/comparison1.ru.md)
  - [Более новый тест Vitastor 1.3.1](docs/performance/bench2.ru.md)
 ## Автор и лицензия
--- a/README.md
+++ b/README.md
@ -6,9 +6,9 @@
 Make Clustered Block Storage Fast Again.
-Vitastor is a distributed block and file SDS, direct replacement of Ceph RBD and CephFS,
+Vitastor is a distributed block SDS, direct replacement of Ceph RBD and internal SDS's
-and also internal SDS's of public clouds. However, in contrast to them, Vitastor is fast
+of public clouds. However, in contrast to them, Vitastor is fast and simple at the same time.
-and simple at the same time. The only thing is it's slightly young :-).
+The only thing is it's slightly young :-).
 Vitastor is architecturally similar to Ceph which means strong consistency,
 primary-replication, symmetric clustering and automatic data distribution over any
@ -19,10 +19,10 @@ supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1
 with proper hardware which is ~10 times faster than other popular SDS's like Ceph
 or internal systems of public clouds.
-Vitastor supports QEMU, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
+Vitastor supports QEMU, NBD, NFS protocols, OpenStack, Proxmox, Kubernetes drivers.
 More drivers may be created easily.
-Read more details in the documentation. You can start from here: [Quick Start](docs/intro/quickstart.en.md).
+Read more details below in the documentation.
 ## Talks and presentations
@ -42,7 +42,6 @@ Read more details in the documentation. You can start from here: [Quick Start](d
 - Installation
  - [Packages](docs/installation/packages.en.md)
  - [Proxmox](docs/installation/proxmox.en.md)
  - [OpenNebula](docs/installation/opennebula.en.md)
  - [OpenStack](docs/installation/openstack.en.md)
  - [Kubernetes CSI](docs/installation/kubernetes.en.md)
  - [Building from Source](docs/installation/source.en.md)
@ -64,13 +63,11 @@ Read more details in the documentation. You can start from here: [Quick Start](d
  - [fio](docs/usage/fio.en.md) for benchmarks
  - [NBD](docs/usage/nbd.en.md) for kernel mounts
  - [QEMU and qemu-img](docs/usage/qemu.en.md)
-  - [NFS](docs/usage/nfs.en.md) clustered file system and pseudo-FS proxy
+  - [NFS](docs/usage/nfs.en.md) emulator for VMWare and similar
  - [Administration](docs/usage/admin.en.md)
 - Performance
  - [Understanding storage performance](docs/performance/understanding.en.md)
  - [Theoretical performance](docs/performance/theoretical.en.md)
  - [Example comparison with Ceph](docs/performance/comparison1.en.md)
  - [Newer benchmark of Vitastor 1.3.1](docs/performance/bench2.en.md)
 ## Author and License
--- a/copy-fio-includes.sh
+++ b/copy-fio-includes.sh
@ -1,6 +1,6 @@
 #!/bin/bash
-gcc -I. -E -o fio_headers.i src/util/fio_headers.h
+gcc -I. -E -o fio_headers.i src/fio_headers.h
 rm -rf fio-copy
 for i in `grep -Po 'fio/[^"]+' fio_headers.i | sort | uniq`; do
--- a/copy-qemu-includes.sh
+++ b/copy-qemu-includes.sh
@ -5,7 +5,7 @@
 #cd b/qemu; make qapi
 gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
-    -I qemu/include -E -o qemu_driver.i src/client/qemu_driver.c
+    -I qemu/include -E -o qemu_driver.i src/qemu_driver.c
 rm -rf qemu-copy
 for i in `grep -Po 'qemu/[^"]+' qemu_driver.i | sort | uniq`; do
--- a/csi/Makefile
+++ b/csi/Makefile
@ -1,9 +1,9 @@
-VITASTOR_VERSION ?= v1.9.3
+VERSION ?= v1.4.4
 all: build push
 build:
-	@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
+	@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
 push:
-	@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
+	@docker push vitalif/vitastor-csi:$(VERSION)
--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v1.9.3
+          image: vitalif/vitastor-csi:v1.4.4
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@ -121,7 +121,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v1.9.3
+          image: vitalif/vitastor-csi:v1.4.4
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/go.mod
+++ b/csi/go.mod
@ -3,10 +3,10 @@ module vitastor.io/csi
 go 1.15
 require (
-	github.com/container-storage-interface/spec v1.8.0
+	github.com/container-storage-interface/spec v1.4.0
 	github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
 	github.com/kubernetes-csi/csi-lib-utils v0.9.1
-	golang.org/x/net v0.7.0
+	golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
 	google.golang.org/grpc v1.33.1
 	google.golang.org/protobuf v1.24.0
--- a/csi/go.sum
+++ b/csi/go.sum
@ -41,8 +41,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
-github.com/container-storage-interface/spec v1.8.0 h1:D0vhF3PLIZwlwZEf2eNbpujGCNwspwTYf2idJRJx4xI=
+github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
-github.com/container-storage-interface/spec v1.8.0/go.mod h1:ROLik+GhPslwwWRNFF1KasPzroNARibH2rfz1rkg4H0=
+github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -182,7 +182,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
@ -196,7 +195,6 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -215,7 +213,6 @@ golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCc
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
 golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -231,10 +228,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
 golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
 golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -245,7 +240,6 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -265,22 +259,13 @@ golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@ -301,10 +286,8 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
 golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/csi/src/config.go
+++ b/csi/src/config.go
@ -5,7 +5,7 @@ package vitastor
 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "1.9.3"
+    vitastorCSIDriverVersion = "1.4.4"
 )
 // Config struct fills the parameters of request or user input
--- a/csi/src/controllerserver.go
+++ b/csi/src/controllerserver.go
@ -8,9 +8,11 @@ import (
    "encoding/json"
    "fmt"
    "strings"
    "bytes"
    "strconv"
    "time"
    "os"
    "os/exec"
    "io/ioutil"
    "github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
@ -112,6 +114,22 @@ func GetConnectionParams(params map[string]string) (map[string]string, error)
    return ctxVars, nil
 }
 func system(program string, args ...string) ([]byte, []byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var stdout, stderr bytes.Buffer
    c.Stdout, c.Stderr = &stdout, &stderr
    err := c.Run()
    if (err != nil)
    {
        stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s, status %s\n", stdoutStr+stderrStr, err)
        return nil, nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
    }
    return stdout.Bytes(), stderr.Bytes(), nil
 }
 func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
 {
    if (ctxVars["configPath"] != "")
@ -140,12 +158,6 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
        return nil, status.Error(codes.InvalidArgument, "volume capabilities is a required field")
    }
    err := cs.checkCaps(volumeCapabilities)
    if (err != nil)
    {
        return nil, err
    }
    etcdVolumePrefix := req.Parameters["etcdVolumePrefix"]
    poolId, _ := strconv.ParseUint(req.Parameters["poolId"], 10, 64)
    if (poolId == 0)
@ -289,44 +301,13 @@ func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req
        return nil, status.Error(codes.InvalidArgument, "volumeCapabilities is nil")
    }
    err := cs.checkCaps(volumeCapabilities)
    if (err != nil)
    {
        return nil, err
    }
    return &csi.ValidateVolumeCapabilitiesResponse{
        Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
            VolumeCapabilities: req.VolumeCapabilities,
        },
    }, nil
 }
 func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability) error
 {
    var volumeCapabilityAccessModes []*csi.VolumeCapability_AccessMode
    for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
        csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
        csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY,
        csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY,
        csi.VolumeCapability_AccessMode_SINGLE_NODE_SINGLE_WRITER,
        csi.VolumeCapability_AccessMode_SINGLE_NODE_MULTI_WRITER,
    } {
        volumeCapabilityAccessModes = append(volumeCapabilityAccessModes, &csi.VolumeCapability_AccessMode{Mode: mode})
    }
    for _, capability := range volumeCapabilities
    {
        if (capability.GetBlock() != nil)
        {
            for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
                csi.VolumeCapability_AccessMode_MULTI_NODE_SINGLE_WRITER,
        csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
    } {
        volumeCapabilityAccessModes = append(volumeCapabilityAccessModes, &csi.VolumeCapability_AccessMode{Mode: mode})
    }
            break
        }
    }
    capabilitySupport := false
    for _, capability := range volumeCapabilities
@ -342,10 +323,14 @@ func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability
    if (!capabilitySupport)
    {
-        return status.Errorf(codes.NotFound, "%v not supported", volumeCapabilities)
+        return nil, status.Errorf(codes.NotFound, "%v not supported", req.GetVolumeCapabilities())
    }
-    return nil
+    return &csi.ValidateVolumeCapabilitiesResponse{
        Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
            VolumeCapabilities: req.VolumeCapabilities,
        },
    }, nil
 }
 // ListVolumes returns a list of volumes
--- a/csi/src/nodeserver.go
+++ b/csi/src/nodeserver.go
@ -5,13 +5,14 @@ package vitastor
 import (
    "context"
    "errors"
    "encoding/json"
    "fmt"
    "os"
    "os/exec"
    "path/filepath"
    "strconv"
    "strings"
    "sync"
    "syscall"
    "time"
@ -33,9 +34,6 @@ type NodeServer struct
    stateDir string
    mounter mount.Interface
    restartInterval time.Duration
    mu sync.Mutex
    cond *sync.Cond
    volumeLocks map[string]bool
 }
 type DeviceState struct
@ -65,9 +63,7 @@ func NewNodeServer(driver *Driver) *NodeServer
        useVduse: checkVduseSupport(),
        stateDir: stateDir,
        mounter: mount.New(""),
        volumeLocks: make(map[string]bool),
    }
    ns.cond = sync.NewCond(&ns.mu)
    if (ns.useVduse)
    {
        ns.restoreVduseDaemons()
@ -85,24 +81,299 @@ func NewNodeServer(driver *Driver) *NodeServer
    return ns
 }
-func (ns *NodeServer) lockVolume(lockId string)
+func checkVduseSupport() bool
 {
-    ns.mu.Lock()
+    // Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
-    defer ns.mu.Unlock()
+    vduse := true
-    for (ns.volumeLocks[lockId])
+    for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
    {
-        ns.cond.Wait()
+        _, err := os.Stat("/sys/module/"+mod)
        if (err != nil)
        {
            if (!errors.Is(err, os.ErrNotExist))
            {
                klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
            }
-    ns.volumeLocks[lockId] = true
+            c := exec.Command("/sbin/modprobe", mod)
-    ns.cond.Broadcast()
+            c.Stdout = os.Stderr
            c.Stderr = os.Stderr
            err := c.Run()
            if (err != nil)
            {
                klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
                vduse = false
                break
            }
        }
    }
    // Check that vdpa tool functions
    if (vduse)
    {
        c := exec.Command("/sbin/vdpa", "-j", "dev")
        c.Stderr = os.Stderr
        err := c.Run()
        if (err != nil)
        {
            klog.Errorf("/sbin/vdpa -j dev failed: %v", err)
            vduse = false
        }
    }
    if (!vduse)
    {
        klog.Errorf(
            "Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
            " For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
        )
    }
    return vduse
 }
-func (ns *NodeServer) unlockVolume(lockId string)
+// NodeStageVolume mounts the volume to a staging path on the node.
 func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error)
 {
-    ns.mu.Lock()
+    return &csi.NodeStageVolumeResponse{}, nil
-    defer ns.mu.Unlock()
+}
-    delete(ns.volumeLocks, lockId)
+
-    ns.cond.Broadcast()
+// NodeUnstageVolume unstages the volume from the staging path
 func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error)
 {
    return &csi.NodeUnstageVolumeResponse{}, nil
 }
 func Contains(list []string, s string) bool
 {
    for i := 0; i < len(list); i++
    {
        if (list[i] == s)
        {
            return true
        }
    }
    return false
 }
 func (ns *NodeServer) mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
 {
    // Map NBD device
    // FIXME: Check if already mapped
    args := []string{
        "map", "--image", volName,
    }
    if (ctxVars["configPath"] != "")
    {
        args = append(args, "--config_path", ctxVars["configPath"])
    }
    if (readonly)
    {
        args = append(args, "--readonly", "1")
    }
    stdout, stderr, err := system("/usr/bin/vitastor-nbd", args...)
    dev := strings.TrimSpace(string(stdout))
    if (dev == "")
    {
        return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
    }
    return dev, err
 }
 func (ns *NodeServer) unmapNbd(devicePath string)
 {
    // unmap NBD device
    unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
    if (unmapErr != nil)
    {
        klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
    }
 }
 func findByPidFile(pidFile string) (*os.Process, error)
 {
    pidBuf, err := os.ReadFile(pidFile)
    if (err != nil)
    {
        return nil, err
    }
    pid, err := strconv.ParseInt(strings.TrimSpace(string(pidBuf)), 0, 64)
    if (err != nil)
    {
        return nil, err
    }
    proc, err := os.FindProcess(int(pid))
    if (err != nil)
    {
        return nil, err
    }
    return proc, nil
 }
 func killByPidFile(pidFile string) error
 {
    klog.Infof("killing process with PID from file %s", pidFile)
    proc, err := findByPidFile(pidFile)
    if (err != nil)
    {
        return err
    }
    return proc.Signal(syscall.SIGTERM)
 }
 func startStorageDaemon(vdpaId, volName, pidFile, configPath string, readonly bool) error
 {
    // Start qemu-storage-daemon
    blockSpec := map[string]interface{}{
        "node-name": "disk1",
        "driver": "vitastor",
        "image": volName,
        "cache": map[string]bool{
            "direct": true,
            "no-flush": false,
        },
        "discard": "unmap",
    }
    if (configPath != "")
    {
        blockSpec["config-path"] = configPath
    }
    blockSpecJson, _ := json.Marshal(blockSpec)
    writable := "true"
    if (readonly)
    {
        writable = "false"
    }
    _, _, err := system(
        "/usr/bin/qemu-storage-daemon", "--daemonize", "--pidfile", pidFile, "--blockdev", string(blockSpecJson),
        "--export", "vduse-blk,id="+vdpaId+",node-name=disk1,name="+vdpaId+",num-queues=16,queue-size=128,writable="+writable,
    )
    return err
 }
 func (ns *NodeServer) mapVduse(volName string, ctxVars map[string]string, readonly bool) (string, string, error)
 {
    // Generate state file
    stateFd, err := os.CreateTemp(ns.stateDir, "vitastor-vduse-*.json")
    if (err != nil)
    {
        return "", "", err
    }
    stateFile := stateFd.Name()
    stateFd.Close()
    vdpaId := filepath.Base(stateFile)
    vdpaId = vdpaId[0:len(vdpaId)-5] // remove ".json"
    pidFile := ns.stateDir + vdpaId + ".pid"
    // Map VDUSE device via qemu-storage-daemon
    err = startStorageDaemon(vdpaId, volName, pidFile, ctxVars["configPath"], readonly)
    if (err == nil)
    {
        // Add device to VDPA bus
        _, _, err = system("/sbin/vdpa", "-j", "dev", "add", "name", vdpaId, "mgmtdev", "vduse")
        if (err == nil)
        {
            // Find block device name
            var matches []string
            matches, err = filepath.Glob("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/*")
            if (err == nil && len(matches) == 0)
            {
                err = errors.New("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/* is not found")
            }
            if (err == nil)
            {
                blockdev := "/dev/"+filepath.Base(matches[0])
                _, err = os.Stat(blockdev)
                if (err == nil)
                {
                    // Generate state file
                    stateJSON, _ := json.Marshal(&DeviceState{
                        ConfigPath: ctxVars["configPath"],
                        VdpaId:     vdpaId,
                        Image:      volName,
                        Blockdev:   blockdev,
                        Readonly:   readonly,
                        PidFile:    pidFile,
                    })
                    err = os.WriteFile(stateFile, stateJSON, 0600)
                    if (err == nil)
                    {
                        return blockdev, vdpaId, nil
                    }
                }
            }
        }
        killErr := killByPidFile(pidFile)
        if (killErr != nil)
        {
            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", killErr)
        }
        os.Remove(stateFile)
        os.Remove(pidFile)
    }
    return "", "", err
 }
 func (ns *NodeServer) unmapVduse(devicePath string)
 {
    if (len(devicePath) < 6 || devicePath[0:6] != "/dev/v")
    {
        klog.Errorf("%s does not start with /dev/v", devicePath)
        return
    }
    vduseDev, err := os.Readlink("/sys/block/"+devicePath[5:])
    if (err != nil)
    {
        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx): %v", devicePath, err)
        return
    }
    vdpaId := ""
    p := strings.Index(vduseDev, "/vduse/")
    if (p >= 0)
    {
        vduseDev = vduseDev[p+7:]
        p = strings.Index(vduseDev, "/")
        if (p >= 0)
        {
            vdpaId = vduseDev[0:p]
        }
    }
    if (vdpaId == "")
    {
        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx), but is %v", devicePath, vduseDev)
        return
    }
    ns.unmapVduseById(vdpaId)
 }
 func (ns *NodeServer) unmapVduseById(vdpaId string)
 {
    _, err := os.Stat("/sys/bus/vdpa/devices/"+vdpaId)
    if (err != nil)
    {
        klog.Errorf("failed to stat /sys/bus/vdpa/devices/"+vdpaId+": %v", err)
    }
    else
    {
        _, _, _ = system("/sbin/vdpa", "-j", "dev", "del", vdpaId)
    }
    stateFile := ns.stateDir + vdpaId + ".json"
    os.Remove(stateFile)
    pidFile := ns.stateDir + vdpaId + ".pid"
    _, err = os.Stat(pidFile)
    if (os.IsNotExist(err))
    {
        // ok, already killed
    }
    else if (err != nil)
    {
        klog.Errorf("Failed to stat %v: %v", pidFile, err)
        return
    }
    else
    {
        err = killByPidFile(pidFile)
        if (err != nil)
        {
            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", err)
        }
        os.Remove(pidFile)
    }
 }
 func (ns *NodeServer) restarter()
@ -151,38 +422,8 @@ func (ns *NodeServer) restoreVduseDaemons()
        vdpaId := filepath.Base(stateFile)
        vdpaId = vdpaId[0:len(vdpaId)-5]
        // Check if VDPA device is still added to the bus
-        if (devs[vdpaId] == nil)
+        if (devs[vdpaId] != nil)
        {
            // Unused, clean it up
            unmapVduseById(ns.stateDir, vdpaId)
            continue
        }
        stateJSON, err := os.ReadFile(stateFile)
        if (err != nil)
        {
            klog.Warningf("error reading state file %v: %v", stateFile, err)
            continue
        }
        var state DeviceState
        err = json.Unmarshal(stateJSON, &state)
        if (err != nil)
        {
            klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
            continue
        }
        ns.lockVolume(state.ConfigPath+":"+state.Image)
        // Recheck state file after locking
        _, err = os.ReadFile(stateFile)
        if (err != nil)
        {
            klog.Warningf("state file %v disappeared, skipping volume", stateFile)
            ns.unlockVolume(state.ConfigPath+":"+state.Image)
            continue
        }
            // Check if the storage daemon is still active
            pidFile := ns.stateDir + vdpaId + ".pid"
            exists := false
@ -194,65 +435,45 @@ func (ns *NodeServer) restoreVduseDaemons()
            if (!exists)
            {
                // Restart daemon
                stateJSON, err := os.ReadFile(stateFile)
                if (err != nil)
                {
                    klog.Warningf("error reading state file %v: %v", stateFile, err)
                }
                else
                {
                    var state DeviceState
                    err := json.Unmarshal(stateJSON, &state)
                    if (err != nil)
                    {
                        klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
                    }
                    else
                    {
                        klog.Warningf("restarting storage daemon for volume %v (VDPA ID %v)", state.Image, vdpaId)
                        _ = startStorageDaemon(vdpaId, state.Image, pidFile, state.ConfigPath, state.Readonly)
                    }
-
+                }
-        ns.unlockVolume(state.ConfigPath+":"+state.Image)
+            }
        }
        else
        {
            // Unused, clean it up
            ns.unmapVduseById(vdpaId)
        }
    }
 }
-// NodeStageVolume mounts the volume to a staging path on the node.
+// NodePublishVolume mounts the volume mounted to the staging path to the target path
-func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error)
+func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error)
 {
-    klog.Infof("received node stage volume request %+v", protosanitizer.StripSecrets(req))
+    klog.Infof("received node publish volume request %+v", protosanitizer.StripSecrets(req))
-    ctxVars := make(map[string]string)
+    targetPath := req.GetTargetPath()
    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    _, err = GetConnectionParams(ctxVars)
    if (err != nil)
    {
        return nil, err
    }
    volName := ctxVars["name"]
    ns.lockVolume(ctxVars["configPath"]+":"+volName)
    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
    targetPath := req.GetStagingTargetPath()
    isBlock := req.GetVolumeCapability().GetBlock() != nil
    // Check that it's not already mounted
-    notmnt, err := mount.IsNotMountPoint(ns.mounter, targetPath)
+    _, err := mount.IsNotMountPoint(ns.mounter, targetPath)
    if (err == nil)
    {
        if (!notmnt)
        {
            klog.Errorf("target path %s is already mounted", targetPath)
            return nil, fmt.Errorf("target path %s is already mounted", targetPath)
        }
        var finfo os.FileInfo
        finfo, err = os.Stat(targetPath)
        if (err != nil)
        {
            klog.Errorf("failed to stat %s: %v", targetPath, err)
            return nil, err
        }
        if (finfo.IsDir() != (!isBlock))
        {
            err = os.Remove(targetPath)
            if (err != nil)
            {
                klog.Errorf("failed to remove %s (to recreate it with correct type): %v", targetPath, err)
                return nil, err
            }
            err = os.ErrNotExist
        }
    }
    if (err != nil)
    {
        if (os.IsNotExist(err))
@ -288,14 +509,28 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
        }
    }
    ctxVars := make(map[string]string)
    err = json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := ctxVars["name"]
    _, err = GetConnectionParams(ctxVars)
    if (err != nil)
    {
        return nil, err
    }
    var devicePath, vdpaId string
    if (!ns.useVduse)
    {
-        devicePath, err = mapNbd(volName, ctxVars, false)
+        devicePath, err = ns.mapNbd(volName, ctxVars, req.GetReadonly())
    }
    else
    {
-        devicePath, vdpaId, err = mapVduse(ns.stateDir, volName, ctxVars, false)
+        devicePath, vdpaId, err = ns.mapVduse(volName, ctxVars, req.GetReadonly())
    }
    if (err != nil)
    {
@ -305,7 +540,6 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
    diskMounter := &mount.SafeFormatAndMount{Interface: ns.mounter, Exec: utilexec.New()}
    if (isBlock)
    {
        klog.Infof("bind-mounting %s to %s", devicePath, targetPath)
        err = diskMounter.Mount(devicePath, targetPath, "", []string{"bind"})
    }
    else
@ -335,40 +569,39 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
        readOnly := Contains(opt, "ro")
        if (existingFormat == "" && !readOnly)
        {
            var cmdOut []byte
            switch fsType
            {
                case "ext4":
                    args := []string{"-m0", "-Enodiscard,lazy_itable_init=1,lazy_journal_init=1", devicePath}
-                    _, err = systemCombined("mkfs.ext4", args...)
+                    cmdOut, err = diskMounter.Exec.Command("mkfs.ext4", args...).CombinedOutput()
                case "xfs":
-                    _, err = systemCombined("mkfs.xfs", "-K", devicePath)
+                    cmdOut, err = diskMounter.Exec.Command("mkfs.xfs", "-K", devicePath).CombinedOutput()
            }
            if (err != nil)
            {
                klog.Errorf("failed to run mkfs error: %v, output: %v", err, string(cmdOut))
                goto unmap
            }
        }
        klog.Infof("formatting and mounting %s to %s with FS %s, options: %v", devicePath, targetPath, fsType, opt)
        err = diskMounter.FormatAndMount(devicePath, targetPath, fsType, opt)
        if (err == nil)
        {
            klog.Infof("successfully mounted %s to %s", devicePath, targetPath)
        }
        // Try to run online resize on mount.
        // FIXME: Implement online resize. It requires online resize support in vitastor-nbd.
        if (err == nil && existingFormat != "" && !readOnly)
        {
            var cmdOut []byte
            switch (fsType)
            {
                case "ext4":
-                    _, err = systemCombined("resize2fs", devicePath)
+                    cmdOut, err = diskMounter.Exec.Command("resize2fs", devicePath).CombinedOutput()
                case "xfs":
-                    _, err = systemCombined("xfs_growfs", devicePath)
+                    cmdOut, err = diskMounter.Exec.Command("xfs_growfs", devicePath).CombinedOutput()
            }
            if (err != nil)
            {
                klog.Errorf("failed to run resizefs error: %v, output: %v", err, string(cmdOut))
                goto unmap
            }
        }
@ -381,202 +614,26 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
        )
        goto unmap
    }
-    return &csi.NodeStageVolumeResponse{}, nil
+    return &csi.NodePublishVolumeResponse{}, nil
 unmap:
    if (!ns.useVduse || len(devicePath) >= 8 && devicePath[0:8] == "/dev/nbd")
    {
-        unmapNbd(devicePath)
+        ns.unmapNbd(devicePath)
    }
    else
    {
-        unmapVduseById(ns.stateDir, vdpaId)
+        ns.unmapVduseById(vdpaId)
    }
    return nil, err
 }
 // NodeUnstageVolume unstages the volume from the staging path
 func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error)
 {
    klog.Infof("received node unstage volume request %+v", protosanitizer.StripSecrets(req))
    ctxVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := ctxVars["name"]
    ns.lockVolume(ctxVars["configPath"]+":"+volName)
    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
    targetPath := req.GetStagingTargetPath()
    devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
    if (err != nil)
    {
        if (os.IsNotExist(err))
        {
            return nil, status.Error(codes.NotFound, "Target path not found")
        }
        return nil, err
    }
    if (devicePath == "")
    {
        // volume not mounted
        klog.Warningf("%s is not a mountpoint, deleting", targetPath)
        os.Remove(targetPath)
        return &csi.NodeUnstageVolumeResponse{}, nil
    }
    refList, err := ns.mounter.GetMountRefs(targetPath)
    if (err != nil)
    {
        return nil, err
    }
    if (len(refList) > 0)
    {
        klog.Warningf("%s is still referenced: %v", targetPath, refList)
    }
    // unmount
    err = mount.CleanupMountPoint(targetPath, ns.mounter, false)
    if (err != nil)
    {
        return nil, err
    }
    // unmap device
    if (len(refList) == 0)
    {
        if (!ns.useVduse)
        {
            unmapNbd(devicePath)
        }
        else
        {
            unmapVduse(ns.stateDir, devicePath)
        }
    }
    return &csi.NodeUnstageVolumeResponse{}, nil
 }
 // NodePublishVolume mounts the volume mounted to the staging path to the target path
 func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error)
 {
    klog.Infof("received node publish volume request %+v", protosanitizer.StripSecrets(req))
    ctxVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    _, err = GetConnectionParams(ctxVars)
    if (err != nil)
    {
        return nil, err
    }
    volName := ctxVars["name"]
    ns.lockVolume(ctxVars["configPath"]+":"+volName)
    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
    stagingTargetPath := req.GetStagingTargetPath()
    targetPath := req.GetTargetPath()
    isBlock := req.GetVolumeCapability().GetBlock() != nil
    // Check that stagingTargetPath is mounted
    notmnt, err := mount.IsNotMountPoint(ns.mounter, stagingTargetPath)
    if (err != nil)
    {
        klog.Errorf("staging path %v is not mounted: %w", stagingTargetPath, err)
        return nil, fmt.Errorf("staging path %v is not mounted: %w", stagingTargetPath, err)
    }
    else if (notmnt)
    {
        klog.Errorf("staging path %v is not mounted", stagingTargetPath)
        return nil, fmt.Errorf("staging path %v is not mounted", stagingTargetPath)
    }
    // Check that targetPath is not already mounted
    notmnt, err = mount.IsNotMountPoint(ns.mounter, targetPath)
    if (err != nil)
    {
        if (os.IsNotExist(err))
        {
            if (isBlock)
            {
                pathFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_RDWR, 0o600)
                if (err != nil)
                {
                    klog.Errorf("failed to create block device mount target %s with error: %v", targetPath, err)
                    return nil, err
                }
                err = pathFile.Close()
                if (err != nil)
                {
                    klog.Errorf("failed to close %s with error: %v", targetPath, err)
                    return nil, err
                }
            }
            else
            {
                err := os.MkdirAll(targetPath, 0777)
                if (err != nil)
                {
                    klog.Errorf("failed to create fs mount target %s with error: %v", targetPath, err)
                    return nil, err
                }
            }
        }
        else
        {
            return nil, err
        }
    }
    else if (!notmnt)
    {
        klog.Errorf("target path %s is already mounted", targetPath)
        return nil, fmt.Errorf("target path %s is already mounted", targetPath)
    }
    execArgs := []string{"--bind", stagingTargetPath, targetPath}
    if (req.GetReadonly())
    {
        execArgs = append(execArgs, "-o", "ro")
    }
    cmd := exec.Command("mount", execArgs...)
    cmd.Stderr = os.Stderr
    klog.Infof("binding volume %v (%v) from %v to %v", volName, ctxVars["configPath"], stagingTargetPath, targetPath)
    out, err := cmd.Output()
    if (err != nil)
    {
        return nil, fmt.Errorf("Error running mount %v: %s", strings.Join(execArgs, " "), out)
    }
    return &csi.NodePublishVolumeResponse{}, nil
 }
 // NodeUnpublishVolume unmounts the volume from the target path
 func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error)
 {
    klog.Infof("received node unpublish volume request %+v", protosanitizer.StripSecrets(req))
    ctxVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := ctxVars["name"]
    ns.lockVolume(ctxVars["configPath"]+":"+volName)
    defer ns.unlockVolume(ctxVars["configPath"]+":"+volName)
    targetPath := req.GetTargetPath()
-    devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
+    devicePath, refCount, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
    if (err != nil)
    {
        if (os.IsNotExist(err))
@ -592,14 +649,24 @@ func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu
        os.Remove(targetPath)
        return &csi.NodeUnpublishVolumeResponse{}, nil
    }
    // unmount
    err = mount.CleanupMountPoint(targetPath, ns.mounter, false)
    if (err != nil)
    {
        return nil, err
    }
-
+    // unmap NBD device
    if (refCount == 1)
    {
        if (!ns.useVduse)
        {
            ns.unmapNbd(devicePath)
        }
        else
        {
            ns.unmapVduse(devicePath)
        }
    }
    return &csi.NodeUnpublishVolumeResponse{}, nil
 }
@ -618,17 +685,7 @@ func (ns *NodeServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandV
 // NodeGetCapabilities returns the supported capabilities of the node server
 func (ns *NodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error)
 {
-    return &csi.NodeGetCapabilitiesResponse{
+    return &csi.NodeGetCapabilitiesResponse{}, nil
        Capabilities: []*csi.NodeServiceCapability{
            &csi.NodeServiceCapability{
                Type: &csi.NodeServiceCapability_Rpc{
                    Rpc: &csi.NodeServiceCapability_RPC{
                        Type: csi.NodeServiceCapability_RPC_STAGE_UNSTAGE_VOLUME,
                    },
                },
            },
        },
    }, nil
 }
 // NodeGetInfo returns NodeGetInfoResponse for CO.
--- a/csi/src/utils.go
+++ b/csi/src/utils.go
@ -1,342 +0,0 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
 package vitastor
 import (
    "bytes"
    "errors"
    "encoding/json"
    "fmt"
    "os"
    "os/exec"
    "path/filepath"
    "strconv"
    "strings"
    "syscall"
    "k8s.io/klog"
    "google.golang.org/grpc/codes"
    "google.golang.org/grpc/status"
 )
 func Contains(list []string, s string) bool
 {
    for i := 0; i < len(list); i++
    {
        if (list[i] == s)
        {
            return true
        }
    }
    return false
 }
 func checkVduseSupport() bool
 {
    // Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
    vduse := true
    for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
    {
        _, err := os.Stat("/sys/module/"+mod)
        if (err != nil)
        {
            if (!errors.Is(err, os.ErrNotExist))
            {
                klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
            }
            c := exec.Command("/sbin/modprobe", mod)
            c.Stdout = os.Stderr
            c.Stderr = os.Stderr
            err := c.Run()
            if (err != nil)
            {
                klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
                vduse = false
                break
            }
        }
    }
    // Check that vdpa tool functions
    if (vduse)
    {
        c := exec.Command("/sbin/vdpa", "-j", "dev")
        c.Stderr = os.Stderr
        err := c.Run()
        if (err != nil)
        {
            klog.Errorf("/sbin/vdpa -j dev failed: %v", err)
            vduse = false
        }
    }
    if (!vduse)
    {
        klog.Errorf(
            "Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
            " For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
        )
    }
    else
    {
        klog.Infof("VDUSE support enabled successfully")
    }
    return vduse
 }
 func mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
 {
    // Map NBD device
    // FIXME: Check if already mapped
    args := []string{
        "map", "--image", volName,
    }
    if (ctxVars["configPath"] != "")
    {
        args = append(args, "--config_path", ctxVars["configPath"])
    }
    if (readonly)
    {
        args = append(args, "--readonly", "1")
    }
    stdout, stderr, err := system("/usr/bin/vitastor-nbd", args...)
    dev := strings.TrimSpace(string(stdout))
    if (dev == "")
    {
        return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
    }
    klog.Infof("Attached volume %s via NBD as %s", volName, dev)
    return dev, err
 }
 func unmapNbd(devicePath string)
 {
    // unmap NBD device
    unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
    if (unmapErr != nil)
    {
        klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
    }
 }
 func findByPidFile(pidFile string) (*os.Process, error)
 {
    pidBuf, err := os.ReadFile(pidFile)
    if (err != nil)
    {
        return nil, err
    }
    pid, err := strconv.ParseInt(strings.TrimSpace(string(pidBuf)), 0, 64)
    if (err != nil)
    {
        return nil, err
    }
    proc, err := os.FindProcess(int(pid))
    if (err != nil)
    {
        return nil, err
    }
    return proc, nil
 }
 func killByPidFile(pidFile string) error
 {
    klog.Infof("killing process with PID from file %s", pidFile)
    proc, err := findByPidFile(pidFile)
    if (err != nil)
    {
        return err
    }
    return proc.Signal(syscall.SIGTERM)
 }
 func startStorageDaemon(vdpaId, volName, pidFile, configPath string, readonly bool) error
 {
    // Start qemu-storage-daemon
    blockSpec := map[string]interface{}{
        "node-name": "disk1",
        "driver": "vitastor",
        "image": volName,
        "cache": map[string]bool{
            "direct": true,
            "no-flush": false,
        },
        "discard": "unmap",
    }
    if (configPath != "")
    {
        blockSpec["config-path"] = configPath
    }
    blockSpecJson, _ := json.Marshal(blockSpec)
    writable := "true"
    if (readonly)
    {
        writable = "false"
    }
    _, _, err := system(
        "/usr/bin/qemu-storage-daemon", "--daemonize", "--pidfile", pidFile, "--blockdev", string(blockSpecJson),
        "--export", "vduse-blk,id="+vdpaId+",node-name=disk1,name="+vdpaId+",num-queues=16,queue-size=128,writable="+writable,
    )
    return err
 }
 func mapVduse(stateDir string, volName string, ctxVars map[string]string, readonly bool) (string, string, error)
 {
    // Generate state file
    stateFd, err := os.CreateTemp(stateDir, "vitastor-vduse-*.json")
    if (err != nil)
    {
        return "", "", err
    }
    stateFile := stateFd.Name()
    stateFd.Close()
    vdpaId := filepath.Base(stateFile)
    vdpaId = vdpaId[0:len(vdpaId)-5] // remove ".json"
    pidFile := stateDir + vdpaId + ".pid"
    // Map VDUSE device via qemu-storage-daemon
    err = startStorageDaemon(vdpaId, volName, pidFile, ctxVars["configPath"], readonly)
    if (err == nil)
    {
        // Add device to VDPA bus
        _, _, err = system("/sbin/vdpa", "-j", "dev", "add", "name", vdpaId, "mgmtdev", "vduse")
        if (err == nil)
        {
            // Find block device name
            var matches []string
            matches, err = filepath.Glob("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/*")
            if (err == nil && len(matches) == 0)
            {
                err = errors.New("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/* is not found")
            }
            if (err == nil)
            {
                blockdev := "/dev/"+filepath.Base(matches[0])
                _, err = os.Stat(blockdev)
                if (err == nil)
                {
                    // Generate state file
                    stateJSON, _ := json.Marshal(&DeviceState{
                        ConfigPath: ctxVars["configPath"],
                        VdpaId:     vdpaId,
                        Image:      volName,
                        Blockdev:   blockdev,
                        Readonly:   readonly,
                        PidFile:    pidFile,
                    })
                    err = os.WriteFile(stateFile, stateJSON, 0600)
                    if (err == nil)
                    {
                        klog.Infof("Attached volume %s via VDUSE as %s (VDPA ID %s)", volName, blockdev, vdpaId)
                        return blockdev, vdpaId, nil
                    }
                }
            }
        }
        killErr := killByPidFile(pidFile)
        if (killErr != nil)
        {
            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", killErr)
        }
        os.Remove(stateFile)
        os.Remove(pidFile)
    }
    return "", "", err
 }
 func unmapVduse(stateDir, devicePath string)
 {
    if (len(devicePath) < 6 || devicePath[0:6] != "/dev/v")
    {
        klog.Errorf("%s does not start with /dev/v", devicePath)
        return
    }
    vduseDev, err := os.Readlink("/sys/block/"+devicePath[5:])
    if (err != nil)
    {
        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx): %v", devicePath, err)
        return
    }
    vdpaId := ""
    p := strings.Index(vduseDev, "/vduse/")
    if (p >= 0)
    {
        vduseDev = vduseDev[p+7:]
        p = strings.Index(vduseDev, "/")
        if (p >= 0)
        {
            vdpaId = vduseDev[0:p]
        }
    }
    if (vdpaId == "")
    {
        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx), but is %v", devicePath, vduseDev)
        return
    }
    unmapVduseById(stateDir, vdpaId)
 }
 func unmapVduseById(stateDir, vdpaId string)
 {
    _, err := os.Stat("/sys/bus/vdpa/devices/"+vdpaId)
    if (err != nil)
    {
        klog.Errorf("failed to stat /sys/bus/vdpa/devices/"+vdpaId+": %v", err)
    }
    else
    {
        _, _, _ = system("/sbin/vdpa", "-j", "dev", "del", vdpaId)
    }
    stateFile := stateDir + vdpaId + ".json"
    os.Remove(stateFile)
    pidFile := stateDir + vdpaId + ".pid"
    _, err = os.Stat(pidFile)
    if (os.IsNotExist(err))
    {
        // ok, already killed
    }
    else if (err != nil)
    {
        klog.Errorf("Failed to stat %v: %v", pidFile, err)
        return
    }
    else
    {
        err = killByPidFile(pidFile)
        if (err != nil)
        {
            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", err)
        }
        os.Remove(pidFile)
    }
 }
 func system(program string, args ...string) ([]byte, []byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var stdout, stderr bytes.Buffer
    c.Stdout, c.Stderr = &stdout, &stderr
    err := c.Run()
    if (err != nil)
    {
        stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s\nOutput:\n%s", err, stdoutStr+stderrStr)
        return nil, nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
    }
    return stdout.Bytes(), stderr.Bytes(), nil
 }
 func systemCombined(program string, args ...string) ([]byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var out bytes.Buffer
    c.Stdout, c.Stderr = &out, &out
    err := c.Run()
    if (err != nil)
    {
        outStr := string(out.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s, status %s\n", outStr, err)
        return nil, status.Error(codes.Internal, outStr+" (status "+err.Error()+")")
    }
    return out.Bytes(), nil
 }
--- a/debian/build-vitastor-bookworm.sh
+++ b/debian/build-vitastor-bookworm.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-bullseye.sh
+++ b/debian/build-vitastor-bullseye.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-buster.sh
+++ b/debian/build-vitastor-buster.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-ubuntu-jammy.sh
+++ b/debian/build-vitastor-ubuntu-jammy.sh
@ -1,7 +0,0 @@
 #!/bin/bash
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
 sudo podman build --build-arg DISTRO=ubuntu --build-arg REL=jammy -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/changelog
+++ b/debian/changelog
@ -1,4 +1,4 @@
-vitastor (1.9.3-1) unstable; urgency=medium
+vitastor (1.4.4-1) unstable; urgency=medium
  * Bugfixes
--- a/debian/control
+++ b/debian/control
@ -2,7 +2,7 @@ Source: vitastor
 Section: admin
 Priority: optional
 Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
-Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev
+Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config
 Standards-Version: 4.5.0
 Homepage: https://vitastor.io/
 Rules-Requires-Root: no
@ -53,9 +53,3 @@ Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version})
 Description: Vitastor Proxmox Virtual Environment storage plugin
 Vitastor storage plugin for Proxmox Virtual Environment.
 Package: vitastor-opennebula
 Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client, patch, python3, jq
 Description: Vitastor OpenNebula storage plugin
 Vitastor storage plugin for OpenNebula.
--- a/debian/libvirt.Dockerfile
+++ b/debian/libvirt.Dockerfile
@ -1,14 +1,13 @@
 # Build patched libvirt for Debian Buster or Bullseye/Sid inside a container
-# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/libvirt.Dockerfile .
+# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/libvirt.Dockerfile .
 ARG DISTRO=
 ARG REL=
-FROM $DISTRO:$REL
+FROM debian:$REL
 ARG REL=
 WORKDIR /root
-RUN if ([ "${DISTRO}" = "debian" ]) && ( [ "${REL}" = "buster" -o "${REL}" = "bullseye" ] ); then \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        echo >> /etc/apt/preferences; \
        echo 'Package: *' >> /etc/apt/preferences; \
@ -24,7 +23,7 @@ RUN apt-get -y build-dep libvirt0
 RUN apt-get -y install libglusterfs-dev
 RUN apt-get --download-only source libvirt
-ADD patches/libvirt-5.0-vitastor.diff patches/libvirt-7.0-vitastor.diff patches/libvirt-7.5-vitastor.diff patches/libvirt-7.6-vitastor.diff patches/libvirt-8.0-vitastor.diff /root
+ADD patches/libvirt-5.0-vitastor.diff patches/libvirt-7.0-vitastor.diff patches/libvirt-7.5-vitastor.diff patches/libvirt-7.6-vitastor.diff /root
 RUN set -e; \
    mkdir -p /root/packages/libvirt-$REL; \
    rm -rf /root/packages/libvirt-$REL/*; \
--- a/debian/patched-qemu.Dockerfile
+++ b/debian/patched-qemu.Dockerfile
@ -27,7 +27,7 @@ RUN apt-get -y build-dep qemu
 RUN apt-get --download-only source qemu
 ADD patches /root/vitastor/patches
-ADD src/client/qemu_driver.c /root/qemu_driver.c
+ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
 #RUN set -e; \
 #    apt-get install -y wget; \
@ -52,7 +52,7 @@ RUN set -e; \
    cd /root/packages/qemu-$REL/qemu-*/; \
    quilt push -a; \
    quilt add block/vitastor.c; \
-    cp /root/qemu_driver.c block/vitastor.c; \
+    cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
    quilt refresh; \
    V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \
    if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \
--- a/debian/vitastor-client.install
+++ b/debian/vitastor-client.install
@ -3,6 +3,4 @@ usr/bin/vitastor-cli
 usr/bin/vitastor-rm
 usr/bin/vitastor-nbd
 usr/bin/vitastor-nfs
 usr/bin/vitastor-kv
 usr/bin/vitastor-kv-stress
 usr/lib/*/libvitastor*.so*
--- a/debian/vitastor-mon.install
+++ b/debian/vitastor-mon.install
@ -1,3 +1,2 @@
-mon usr/lib/vitastor/
+mon usr/lib/vitastor
-mon/scripts/make-etcd usr/lib/vitastor/mon
+mon/vitastor-mon.service /lib/systemd/system
 mon/scripts/vitastor-mon.service /lib/systemd/system
--- a/debian/vitastor-mon.postinst
+++ b/debian/vitastor-mon.postinst
@ -6,6 +6,4 @@ if [ "$1" = "configure" ]; then
 	addgroup --system --quiet vitastor
 	adduser --system --quiet --ingroup vitastor --no-create-home --home /nonexistent vitastor
 	mkdir -p /etc/vitastor
 	mkdir -p /var/lib/vitastor
 	chown vitastor:vitastor /var/lib/vitastor
 fi
--- a/debian/vitastor-opennebula.install
+++ b/debian/vitastor-opennebula.install
@ -1,3 +0,0 @@
 opennebula/remotes var/lib/one/
 opennebula/sudoers.d etc/
 opennebula/install.sh var/lib/one/remotes/datastore/vitastor/
--- a/debian/vitastor-opennebula.postinst
+++ b/debian/vitastor-opennebula.postinst
@ -1,7 +0,0 @@
 #!/bin/sh
 set -e
 if [ "$1" = "configure" ]; then
 	/var/lib/one/remotes/datastore/vitastor/install.sh
 fi
--- a/debian/vitastor-opennebula.triggers
+++ b/debian/vitastor-opennebula.triggers
@ -1,4 +0,0 @@
 interest /var/lib/one/remotes/datastore/downloader.sh
 interest /etc/one/oned.conf
 interest /etc/one/vmm_exec/vmm_execrc
 interest /etc/apparmor.d/local/abstractions/libvirt-qemu
--- a/debian/vitastor-osd.install
+++ b/debian/vitastor-osd.install
@ -1,6 +1,6 @@
 usr/bin/vitastor-osd
 usr/bin/vitastor-disk
 usr/bin/vitastor-dump-journal
-mon/scripts/vitastor-osd@.service /lib/systemd/system
+mon/vitastor-osd@.service /lib/systemd/system
-mon/scripts/vitastor.target /lib/systemd/system
+mon/vitastor.target /lib/systemd/system
-mon/scripts/90-vitastor.rules /lib/udev/rules.d
+mon/90-vitastor.rules /lib/udev/rules.d
--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@ -1,20 +1,18 @@
 # Build Vitastor packages for Debian inside a container
-# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
+# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
 ARG DISTRO=debian
 ARG REL=
-FROM $DISTRO:$REL
+FROM debian:$REL
 ARG DISTRO=debian
 ARG REL=
 WORKDIR /root
-RUN set -e -x; \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
-    if [ "$REL" = "buster" ]; then \
+        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
-        apt-get update; \
+        echo >> /etc/apt/preferences; \
-        apt-get -y install wget; \
+        echo 'Package: *' >> /etc/apt/preferences; \
-        wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
+        echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
-        echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
+        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
    perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
@ -22,9 +20,10 @@ RUN set -e -x; \
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
 RUN apt-get update
-RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl
+RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
 RUN apt-get -y build-dep fio
 RUN apt-get --download-only source fio
 RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
 ADD . /root/vitastor
 RUN set -e -x; \
@ -36,10 +35,8 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
+    cp -r /root/vitastor vitastor-1.4.4; \
-    VER=${FULLVER%%-*}; \
+    cd vitastor-1.4.4; \
    cp -r /root/vitastor vitastor-$VER; \
    cd vitastor-$VER; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -51,14 +48,10 @@ RUN set -e -x; \
    echo fio-headers.patch >> debian/patches/series; \
    rm -rf a b; \
    echo "dep:fio=$FIO" > debian/fio_version; \
    cd /root/packages/vitastor-$REL/vitastor-$VER; \
    mkdir mon/node_modules; \
    cd mon/node_modules; \
    curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
    curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.4.orig.tar.xz vitastor-1.4.4; \
-    cd vitastor-$VER; \
+    cd vitastor-1.4.4; \
-    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
+    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
    rm -rf /root/packages/vitastor-$REL/vitastor-*/
--- a/docs/config/client.en.md
+++ b/docs/config/client.en.md
@ -9,10 +9,6 @@
 These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
 affect their interaction with the cluster.
 - [client_iothread_count](#client_iothread_count)
 - [client_retry_interval](#client_retry_interval)
 - [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_retry_enospc](#client_retry_enospc)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
@ -22,53 +18,6 @@ affect their interaction with the cluster.
 - [nbd_timeout](#nbd_timeout)
 - [nbd_max_devices](#nbd_max_devices)
 - [nbd_max_part](#nbd_max_part)
 - [osd_nearfull_ratio](#osd_nearfull_ratio)
 ## client_iothread_count
 - Type: integer
 - Default: 0
 Number of separate threads for handling TCP network I/O at client library
 side. Enabling 4 threads usually allows to increase peak performance of each
 client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
 100-150 to 400 thousand iops, but at the same time it increases latency.
 Latency increase depends on CPU: with CPU power saving disabled latency
 only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
 with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
 iops decrease from 2000 to 1000). RDMA isn't affected by this option.
 It's recommended to enable client I/O threads if you don't use RDMA and want
 to increase peak client performance.
 ## client_retry_interval
 - Type: milliseconds
 - Default: 50
 - Minimum: 10
 - Can be changed online: yes
 Retry time for I/O requests failed due to inactive PGs or network
 connectivity errors.
 ## client_eio_retry_interval
 - Type: milliseconds
 - Default: 1000
 - Can be changed online: yes
 Retry time for I/O requests failed due to data corruption or unfinished
 EC object deletions (has_incomplete PG state). 0 disables such retries
 and clients are not blocked and just get EIO error code instead.
 ## client_retry_enospc
 - Type: boolean
 - Default: true
 - Can be changed online: yes
 Retry writes on out of space errors to wait until some space is freed on
 OSDs.
 ## client_max_dirty_bytes
@ -186,18 +135,3 @@ Maximum number of NBD devices in the system. This value is passed as
 Maximum number of partitions per NBD device. This value is passed as
 `max_part` parameter for the nbd kernel module when vitastor-nbd autoloads it.
 Note that (nbds_max)*(1+max_part) usually can't exceed 256.
 ## osd_nearfull_ratio
 - Type: number
 - Default: 0.95
 - Can be changed online: yes
 Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
 Remember that some client writes may hang or complete with an error if even
 just one OSD becomes 100 % full!
 However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
 unable to start at all), so you'll be able to recover from "out of space" errors
 without destroying and recreating OSDs.
--- a/docs/config/client.ru.md
+++ b/docs/config/client.ru.md
@ -9,10 +9,6 @@
 Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
 затрагивают логику их работы с кластером.
 - [client_iothread_count](#client_iothread_count)
 - [client_retry_interval](#client_retry_interval)
 - [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_retry_enospc](#client_retry_enospc)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
@ -22,55 +18,6 @@
 - [nbd_timeout](#nbd_timeout)
 - [nbd_max_devices](#nbd_max_devices)
 - [nbd_max_part](#nbd_max_part)
 - [osd_nearfull_ratio](#osd_nearfull_ratio)
 ## client_iothread_count
 - Тип: целое число
 - Значение по умолчанию: 0
 Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
 клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
 производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
 чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
 секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
 при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
 падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
 и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
 RDMA данная опция не влияет.
 Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
 RDMA и хотите повысить пиковую производительность клиентов.
 ## client_retry_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 50
 - Минимальное значение: 10
 - Можно менять на лету: да
 Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
 ошибок сети.
 ## client_eio_retry_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 1000
 - Можно менять на лету: да
 Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
 или незавершённых удалений EC-объектов (состояния PG has_incomplete).
 0 отключает повторы таких запросов и клиенты не блокируются, а вместо
 этого просто получают код ошибки EIO.
 ## client_retry_enospc
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 - Можно менять на лету: да
 Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
 ожидать, пока на OSD не освободится место.
 ## client_max_dirty_bytes
@ -188,20 +135,3 @@ RDMA и хотите повысить пиковую производитель
 Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
 модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
 Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
 ## osd_nearfull_ratio
 - Тип: число
 - Значение по умолчанию: 0.95
 - Можно менять на лету: да
 Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
 выводе vitastor-cli status.
 Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
 если на 100 % заполнится хотя бы 1 OSD!
 Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
 заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
 восстановить работу кластера после ошибок отсутствия свободного места
 без уничтожения и пересоздания OSD.
--- a/docs/config/layout-cluster.en.md
+++ b/docs/config/layout-cluster.en.md
@ -56,24 +56,14 @@ Can't be smaller than the OSD data device sector.
 ## immediate_commit
 - Type: string
- Default: all
+- Default: false
-One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
+Another parameter which is really important for performance.
 This parameter is also really important for performance.
 TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
 power loss protection (nonvolatile write-through cache) and also for most HDDs.
 "none" or "small" should be only selected if you use desktop SSDs without
 capacitors or drives with slow write-back cache that can't be disabled. Check
 immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
 Detailed explanation:
 Desktop SSDs are very fast (100000+ iops) for simple random writes
 without cache flush. However, they are really slow (only around 1000 iops)
-if you try to fsync() each write, that is, if you want to guarantee that
+if you try to fsync() each write, that is, when you want to guarantee that
-each change gets actually persisted to the physical media.
+each change gets immediately persisted to the physical media.
 Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
 "Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -85,8 +75,8 @@ really slow when used with desktop SSDs. Vitastor, however, can also
 efficiently utilize desktop SSDs by postponing fsync until the client calls
 it explicitly.
-This is what this parameter regulates. When it's set to "all" Vitastor
+This is what this parameter regulates. When it's set to "all" the whole
-cluster commits each change to disks immediately and clients just
+Vitastor cluster commits each change to disks immediately and clients just
 ignore fsyncs because they know for sure that they're unneeded. This reduces
 the amount of network roundtrips performed by clients and improves
 performance. So it's always better to use server grade SSDs with
@ -106,8 +96,12 @@ SSD cache or "media-cache" - for example, a lot of Seagate EXOS drives have
 it (they have internal SSD cache even though it's not stated in datasheets).
 Setting this parameter to "all" or "small" in OSD parameters requires enabling
-[disable_journal_fsync](layout-osd.en.md#disable_journal_fsync) and
+[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
-[disable_meta_fsync](layout-osd.en.md#disable_meta_fsync), setting it to
+[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
-"all" also requires enabling [disable_data_fsync](layout-osd.en.md#disable_data_fsync).
+"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
-vitastor-disk tried to do that by default, first checking/disabling drive cache.
+
-If it can't disable drive cache, OSD get initialized with "none".
+TLDR: For optimal performance, set immediate_commit to "all" if you only use
 SSDs with supercapacitor-based power loss protection (nonvolatile
 write-through cache) for both data and journals in the whole Vitastor
 cluster. Set it to "small" if you only use such SSDs for journals. Leave
 empty if your drives have write-back cache.
--- a/docs/config/layout-cluster.ru.md
+++ b/docs/config/layout-cluster.ru.md
@ -57,18 +57,9 @@ amplification) и эффективность распределения нагр
 ## immediate_commit
 - Тип: строка
- Значение по умолчанию: all
+- Значение по умолчанию: false
-Одно из значений "none", "small" или "all". Глобальное значение, может быть
+Ещё один важный для производительности параметр.
 переопределено [на уровне пула](pool.ru.md#immediate_commit).
 Данный параметр тоже важен для производительности.
 Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
 суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
 устанавливать только при использовании SSD настольного класса без
 суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
 Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
 Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
 секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -89,7 +80,7 @@ Power Loss Protection" - одинаково быстрые и со сбросо
 эффективно утилизировать настольные SSD.
 Данный параметр влияет как раз на это. Когда он установлен в значение "all",
-кластер Vitastor мгновенно фиксирует каждое изменение на физические
+весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
 носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
 знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
 по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -112,6 +103,13 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
 указано в спецификациях).
 Указание "all" или "small" в настройках / командной строке OSD требует
-включения [disable_journal_fsync](layout-osd.ru.md#disable_journal_fsync) и
+включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
-[disable_meta_fsync](layout-osd.ru.md#disable_meta_fsync), значение "all"
+[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
-также требует включения [disable_data_fsync](layout-osd.ru.md#disable_data_fsync).
+также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
 Итого, вкратце: для оптимальной производительности установите
 immediate_commit в значение "all", если вы используете в кластере только SSD
 с суперконденсаторами и для данных, и для журналов. Если вы используете
 такие SSD для всех журналов, но не для данных - можете установить параметр
 в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
 оставьте параметр пустым.
--- a/docs/config/layout-osd.en.md
+++ b/docs/config/layout-osd.en.md
@ -118,13 +118,12 @@ Physical block size of the journal device. Must be a multiple of
 - Type: boolean
 - Default: false
-Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
+Do not issue fsyncs to the data device, i.e. do not flush its cache.
-Safe ONLY if your data device has write-through cache or if write-back
+Safe ONLY if your data device has write-through cache. If you disable
-cache is disabled. If you disable drive cache manually with `hdparm` or
+the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
-writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
+that the cache disable command is run every time before starting Vitastor
-every time before starting Vitastor OSD (vitastor-disk does it automatically).
+OSD, for example, in the systemd unit. See also `immediate_commit` option
-See also [immediate_commit](layout-cluster.en.md#immediate_commit)
+for the instructions to disable cache and how to benefit from it.
 for information about how to benefit from disabled cache.
 ## disable_meta_fsync
@ -172,7 +171,8 @@ size, it actually has to write the whole 4 KB sector.
 Because of this it can actually be beneficial to use SSDs which work well
 with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-and meta_block_size. But at the moment, no such SSDs are known...
+and meta_block_size. But the only SSD that may fit into this category is
 Intel Optane (probably, not tested yet).
 Clients don't need to be aware of disk_alignment, so it's not required to
 put a modified value into etcd key /vitastor/config/global.
--- a/docs/config/layout-osd.ru.md
+++ b/docs/config/layout-osd.ru.md
@ -122,14 +122,13 @@ SSD-диске, иначе производительность пострада
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
-Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
+Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
 Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-записью (write-through) или если кэш с отложенной записью (write-back) отключён.
+записью (write-through). Если вы отключаете кэш через `hdparm` или
-Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
+`scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
-то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
+выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
-(vitastor-disk делает это автоматически). Смотрите также опцию
+Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
-[immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
+и о том, как из этого извлечь выгоду.
 как извлечь выгоду из отключённого кэша.
 ## disable_meta_fsync
@ -180,8 +179,9 @@ SSD и HDD диски используют 4 КБ физические сект
 Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
 меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-journal_block_size и meta_block_size. Однако на данный момент такие SSD
+journal_block_size и meta_block_size. Однако единственные SSD, которые
-не известны...
+теоретически могут попасть в эту категорию - это Intel Optane (но и это
 пока не проверялось автором).
 Клиентам не обязательно знать про disk_alignment, так что помещать значение
 этого параметра в etcd в /vitastor/config/global не нужно.
--- a/docs/config/monitor.en.md
+++ b/docs/config/monitor.en.md
@ -8,14 +8,6 @@
 These parameters only apply to Monitors.
 - [use_antietcd](#use_antietcd)
 - [enable_prometheus](#enable_prometheus)
 - [mon_http_port](#mon_http_port)
 - [mon_http_ip](#mon_http_ip)
 - [mon_https_cert](#mon_https_cert)
 - [mon_https_key](#mon_https_key)
 - [mon_https_client_auth](#mon_https_client_auth)
 - [mon_https_ca](#mon_https_ca)
 - [etcd_mon_ttl](#etcd_mon_ttl)
 - [etcd_mon_timeout](#etcd_mon_timeout)
 - [etcd_mon_retries](#etcd_mon_retries)
@ -23,88 +15,6 @@ These parameters only apply to Monitors.
 - [mon_stats_timeout](#mon_stats_timeout)
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
 - [use_old_pg_combinator](#use_old_pg_combinator)
 ## use_antietcd
 - Type: boolean
 - Default: false
 Enable experimental built-in etcd replacement (clustered key-value database):
 [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
 When set to true, monitor runs internal antietcd automatically if it finds
 a network interface with an IP address matching one of addresses in the
 `etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
 the monitor command line). If there are multiple matching addresses, it also
 checks `antietcd_port` and antietcd is started for address with matching port.
 By default, antietcd accepts connection on the selected IP address, but it
 can also be overridden manually in the `antietcd_ip` option.
 When antietcd is started, monitor stores cluster metadata itself and exposes
 a etcd-compatible REST API. On disk, these metadata are stored in
 `/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
 or antietcd_data_dir options). All other antietcd parameters
 (see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
 cluster, cluster_key, persist_filter, stale_read can also be set in
 Vitastor configuration with `antietcd_` prefix.
 You can dump/load data to or from antietcd using Antietcd `anticli` tool:
 ```
 npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
 npm exec anticli -e http://antietcd:2379/v3 load < dump.json
 ```
 ## enable_prometheus
 - Type: boolean
 - Default: true
 Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
 Note that only the active (master) monitor exposes metrics, others return
 HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
 Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
 ## mon_http_port
 - Type: integer
 - Default: 8060
 HTTP port for monitors to listen on (including metrics exporter)
 ## mon_http_ip
 - Type: string
 IP address for monitors to listen on (all addresses by default)
 ## mon_https_cert
 - Type: string
 Path to PEM SSL certificate file for monitor to listen using HTTPS
 ## mon_https_key
 - Type: string
 Path to PEM SSL private key file for monitor to listen using HTTPS
 ## mon_https_client_auth
 - Type: boolean
 - Default: false
 Enable HTTPS client certificate-based authorization for monitor connections
 ## mon_https_ca
 - Type: string
 Path to CA certificate for client HTTPS authorization
 ## etcd_mon_ttl
@ -167,11 +77,3 @@ values.  Smaller priority means higher level in tree. For example,
 levels are always predefined and can't be removed. If one of them is not
 present in the configuration, then it is defined with the default priority
 (100 for "host", 101 for "osd").
 ## use_old_pg_combinator
 - Type: boolean
 - Default: false
 Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
 and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
--- a/docs/config/monitor.ru.md
+++ b/docs/config/monitor.ru.md
@ -8,14 +8,6 @@
 Данные параметры используются только мониторами Vitastor.
 - [use_antietcd](#use_antietcd)
 - [enable_prometheus](#enable_prometheus)
 - [mon_http_port](#mon_http_port)
 - [mon_http_ip](#mon_http_ip)
 - [mon_https_cert](#mon_https_cert)
 - [mon_https_key](#mon_https_key)
 - [mon_https_client_auth](#mon_https_client_auth)
 - [mon_https_ca](#mon_https_ca)
 - [etcd_mon_ttl](#etcd_mon_ttl)
 - [etcd_mon_timeout](#etcd_mon_timeout)
 - [etcd_mon_retries](#etcd_mon_retries)
@ -23,90 +15,6 @@
 - [mon_stats_timeout](#mon_stats_timeout)
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
 - [use_old_pg_combinator](#use_old_pg_combinator)
 ## use_antietcd
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
 [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
 Если параметр установлен в true, монитор запускает antietcd автоматически,
 если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
 конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
 командной строки монитора). Если таких адресов несколько, также проверяется
 опция `antietcd_port` и antietcd запускается для адреса с соответствующим
 портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
 IP, но его также можно определить вручную опцией `antietcd_ip`.
 При запуске antietcd монитор сам хранит центральные метаданные кластера и
 выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
 `/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
 antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
 (смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
 node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
 в конфигурации Vitastor с префиксом `antietcd_`.
 Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
 `anticli`:
 ```
 npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
 npm exec anticli -e http://antietcd:2379/v3 load < dump.json
 ```
 ## enable_prometheus
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
 Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
 возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
 в задание по сбору метрик Prometheus.
 Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
 ## mon_http_port
 - Тип: целое число
 - Значение по умолчанию: 8060
 Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
 ## mon_http_ip
 - Тип: строка
 IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
 ## mon_https_cert
 - Тип: строка
 Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
 ## mon_https_key
 - Тип: строка
 Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
 ## mon_https_client_auth
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
 ## mon_https_ca
 - Тип: строка
 Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
 ## etcd_mon_ttl
@ -170,11 +78,3 @@ OSD перед обновлением агрегированной статис
 "host" и "osd" являются предопределёнными и не могут быть удалены. Если
 один из них отсутствует в конфигурации, он доопределяется с приоритетом по
 умолчанию (100 для уровня "host", 101 для "osd").
 ## use_old_pg_combinator
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
 и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
--- a/docs/config/network.en.md
+++ b/docs/config/network.en.md
@ -25,11 +25,12 @@ between clients, OSDs and etcd.
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
 - [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
+- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)
 ## tcp_header_buffer_size
@ -68,17 +69,11 @@ but they are not connected to the cluster.
 - Type: string
 RDMA device name to use for Vitastor OSD communications (for example,
-"rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
+"rocep5s0f0"). Now Vitastor supports all adapters, even ones without
-device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
+ODP support, like Mellanox ConnectX-3 and non-Mellanox cards.
 or choose the first available RDMA device if no RoCE devices are
 found or if `osd_network` is not specified. Auto-selection is also
 unsupported with old libibverbs < v32, like in Debian 10 Buster or
 CentOS 7.
-Vitastor supports all adapters, even ones without ODP support, like
+Versions up to Vitastor 1.2.0 required ODP which is only present in
-Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
+Mellanox ConnectX >= 4. See also [rdma_odp](#rdma_odp).
 1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
 See also [rdma_odp](#rdma_odp).
 Run `ibv_devinfo -v` as root to list available RDMA devices and their
 features.
@ -101,17 +96,15 @@ your device has.
 ## rdma_gid_index
 - Type: integer
 - Default: 0
 Global address identifier index of the RDMA device to use. Different GID
 indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
 Search for "GID" in `ibv_devinfo -v` output to determine which GID index
 you need.
-If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
+**IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
-RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
+rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).
 GID auto-selection is unsupported with libibverbs < v32.
 A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
 ## rdma_mtu
@ -219,6 +212,17 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
 within this time, the connection to it is dropped and a reconnection attempt
 is scheduled.
 ## up_wait_retry_interval
 - Type: milliseconds
 - Default: 50
 - Minimum: 10
 - Can be changed online: yes
 OSDs respond to clients with a special error code when they receive I/O
 requests for a PG that's not synchronized and started. This parameter sets
 the time for the clients to wait before re-attempting such I/O requests.
 ## max_etcd_attempts
 - Type: integer
@ -253,10 +257,10 @@ Timeout for etcd requests which are allowed to wait for some time.
 Timeout for etcd connection HTTP Keep-Alive. Should be higher than
 etcd_report_interval to guarantee that keepalive actually works.
-## etcd_ws_keepalive_interval
+## etcd_ws_keepalive_timeout
 - Type: seconds
- Default: 5
+- Default: 30
 - Can be changed online: yes
 etcd websocket ping interval required to keep the connection alive and
--- a/docs/config/network.ru.md
+++ b/docs/config/network.ru.md
@ -25,11 +25,12 @@
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
 - [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
+- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)
 ## tcp_header_buffer_size
@ -71,17 +72,12 @@ RDMA может быть нужно только если у клиентов е
 - Тип: строка
 Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
-Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
+Сейчас Vitastor поддерживает все модели адаптеров, включая те, у которых
 [osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
 попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
 не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
 libibverbs < v32, например в Debian 10 Buster или CentOS 7.
 Vitastor поддерживает все модели адаптеров, включая те, у которых
 нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
-картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
+картами производства не Mellanox.
-требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
+
-См. также [rdma_odp](#rdma_odp).
+Версии Vitastor до 1.2.0 включительно требовали ODP, который есть только
 на Mellanox ConnectX 4 и более новых. См. также [rdma_odp](#rdma_odp).
 Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
 список доступных RDMA-устройств, их параметры и возможности.
@ -106,18 +102,15 @@ Control) и ECN (Explicit Congestion Notification).
 ## rdma_gid_index
 - Тип: целое число
 - Значение по умолчанию: 0
 Номер глобального идентификатора адреса RDMA-устройства, который следует
 использовать. Разным gid_index могут соответствовать разные протоколы связи:
 RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
 словом "GID" в выводе команды `ibv_devinfo -v`.
-Если не указан, Vitastor попробует автоматически выбрать сначала GID,
+**ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
-соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
+правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).
 RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
 libibverbs < v32.
 Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
 ## rdma_mtu
@ -228,6 +221,19 @@ OSD в любом случае согласовывают реальное зн
 Если OSD не отвечает за это время, соединение отключается и производится
 повторная попытка соединения.
 ## up_wait_retry_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 50
 - Минимальное значение: 10
 - Можно менять на лету: да
 Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
 поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
 они отвечают клиентам специальным кодом ошибки, означающим, что клиент
 должен некоторое время подождать перед повторением запроса. Именно это время
 ожидания задаёт данный параметр.
 ## max_etcd_attempts
 - Тип: целое число
@ -264,10 +270,10 @@ OSD в любом случае согласовывают реальное зн
 Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
 etcd_report_interval, чтобы keepalive гарантированно работал.
-## etcd_ws_keepalive_interval
+## etcd_ws_keepalive_timeout
 - Тип: секунды
- Значение по умолчанию: 5
+- Значение по умолчанию: 30
 - Можно менять на лету: да
 Интервал проверки живости вебсокет-подключений к etcd.
--- a/docs/config/osd.en.md
+++ b/docs/config/osd.en.md
@ -10,7 +10,6 @@ These parameters only apply to OSDs, are not fixed at the moment of OSD drive
 initialization and can be changed - either with an OSD restart or, for some of
 them, even without restarting by updating configuration in etcd.
 - [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
@ -62,18 +61,6 @@ them, even without restarting by updating configuration in etcd.
 - [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
 - [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
 ## osd_iothread_count
 - Type: integer
 - Default: 0
 TCP network I/O thread count for OSD. When non-zero, a single OSD process
 may handle more TCP I/O, but at a cost of increased latency because thread
 switching overhead occurs. RDMA isn't affected by this option.
 Because of latency, instead of enabling OSD I/O threads it's recommended to
 just create multiple OSDs per disk, or use RDMA.
 ## etcd_report_interval
 - Type: seconds
--- a/docs/config/osd.ru.md
+++ b/docs/config/osd.ru.md
@ -11,7 +11,6 @@
 момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
 изменения конфигурации в etcd.
 - [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
@ -63,19 +62,6 @@
 - [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
 - [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
 ## osd_iothread_count
 - Тип: целое число
 - Значение по умолчанию: 0
 Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
 стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
 по сети больше данных, но ухудшает задержку из-за накладных расходов
 переключения потоков. На работу RDMA опция не влияет.
 Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
 просто создавать по несколько OSD на каждом диске, или использовать RDMA.
 ## etcd_report_interval
 - Тип: секунды
--- a/docs/config/pool.en.md
+++ b/docs/config/pool.en.md
@ -32,8 +32,6 @@ Parameters:
 - [pg_minsize](#pg_minsize)
 - [pg_count](#pg_count)
 - [failure_domain](#failure_domain)
 - [level_placement](#level_placement)
 - [raw_placement](#raw_placement)
 - [max_osd_combinations](#max_osd_combinations)
 - [block_size](#block_size)
 - [bitmap_granularity](#bitmap_granularity)
@ -43,7 +41,6 @@ Parameters:
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
 - [used_for_fs](#used_for_fs)
 Examples:
@ -55,7 +52,7 @@ Examples:
 OSD placement tree is set in a separate etcd key `/vitastor/config/node_placement`
 in the following JSON format:
-```
+`
 {
  "<node name or OSD number>": {
    "level": "<level>",
@ -63,7 +60,7 @@ in the following JSON format:
  },
  ...
 }
-```
+`
 Here, if a node name is a number then it is assumed to refer to an OSD.
 Level of the OSD is always "osd" and cannot be overriden. You may only
@ -86,11 +83,7 @@ Parent node reference is required for intermediate tree nodes.
 Separate OSD settings are set in etc keys `/vitastor/config/osd/<number>`
 in JSON format `{"<key>":<value>}`.
-As of now, the following settings are supported:
+As of now, two settings are supported:
 - [reweight](#reweight)
 - [tags](#tags)
 - [noout](#noout)
 ## reweight
@ -113,14 +106,6 @@ subsets and then use a specific subset for pool instead of all OSDs.
 For example you can mark SSD OSDs with tag "ssd" and HDD OSDs with "hdd" and
 such tags will work as device classes.
 ## noout
 - Type: boolean
 - Default: false
 If set to true, [osd_out_time](monitor.en.md#osd_out_time) is ignored for this
 OSD and it's never removed from data distribution by the monitor.
 # Pool parameters
 ## name
@ -169,26 +154,6 @@ That is, if it becomes impossible to place PG data on at least (pg_minsize)
 OSDs, PG is deactivated for both read and write. So you know that a fresh
 write always goes to at least (pg_minsize) OSDs (disks).
 For example, the difference between pg_minsize 2 and 1 in a 3-way replicated
 pool (pg_size=3) is:
 - If 2 hosts go down with pg_minsize=2, the pool becomes inactive and remains
  inactive for [osd_out_time](monitor.en.md#osd_out_time) (10 minutes). After
  this timeout, the monitor selects replacement hosts/OSDs and the pool comes
  up and starts to heal. Therefore, if you don't have replacement OSDs, i.e.
  if you only have 3 hosts with OSDs and 2 of them are down, the pool remains
  inactive until you add or return at least 1 host (or change failure_domain
  to "osd").
 - If 2 hosts go down with pg_minsize=1, the pool only experiences a short
  I/O pause until the monitor notices that OSDs are down (5-10 seconds with
  the default [etcd_report_interval](osd.en.md#etcd_report_interval)). After
  this pause, I/O resumes, but new data is temporarily written in only 1 copy.
  Then, after osd_out_time, the monitor also selects replacement OSDs and the
  pool starts to heal.
 So, pg_minsize regulates the number of failures that a pool can tolerate
 without temporary downtime for [osd_out_time](monitor.en.md#osd_out_time),
 but at a cost of slightly reduced storage reliability.
 FIXME: pg_minsize behaviour may be changed in the future to only make PGs
 read-only instead of deactivating them.
@ -200,8 +165,8 @@ read-only instead of deactivating them.
 Number of PGs for this pool. The value should be big enough for the monitor /
 LP solver to be able to optimize data placement.
-"Enough" is usually around 10-100 PGs per OSD, i.e. you set pg_count for pool
+"Enough" is usually around 64-128 PGs per OSD, i.e. you set pg_count for pool
-to (total OSD count * 10 / pg_size). You can round it to the closest power of 2,
+to (total OSD count * 100 / pg_size). You can round it to the closest power of 2,
 because it makes it easier to reduce or increase PG count later by dividing or
 multiplying it by 2.
@ -223,69 +188,6 @@ never put on OSDs in the same failure domain (for example, on the same host).
 So failure domain specifies the unit which failure you are protecting yourself
 from.
 ## level_placement
 - Type: string
 Additional failure domain rules, applied in conjuction with failure_domain.
 Must be specified in the following form:
 `<placement level>=<sequence of characters>, <level2>=<sequence2>, ...`
 Sequence should be exactly [pg_size](#pg_size) character long. Each character
 corresponds to an OSD in the PG of this pool. Equal characters mean that
 corresponding items of the PG should be placed into the same placement tree
 item at this level. Different characters mean that items should be placed into
 different items.
 For example, if you want a EC 4+2 pool and you want every 2 chunks to be stored
 in its own datacenter and you also want each chunk to be stored on a different
 host, you should set `level_placement` to `dc=112233 host=123456`.
 Or you can set `level_placement` to `dc=112233` and leave `failure_domain` empty,
 because `host` is the default `failure_domain` and it will be applied anyway.
 Without this rule, it may happen that 3 chunks will be stored on OSDs in the
 same datacenter, and the data will become inaccessibly if that datacenter goes
 down in this case.
 Of course, you should group your hosts into datacenters before applying the rule
 by setting [placement_levels](monitor.en.md#placement_levels) to something like
 `{"dc":90,"host":100,"osd":110}` and add DCs to [node_placement](#placement-tree),
 like `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
 ## raw_placement
 - Type: string
 Raw PG placement rules, specified in the form of a DSL (domain-specific language).
 Use only if you really know what you're doing :)
 DSL specification:
 ```
 dsl := item | item ("\n" | ",") items
 item := "any" | rules
 rules := rule | rule rules
 rule := level operator arg
 level := /\w+/
 operator := "!=" | "=" | ">" | "?="
 arg := value | "(" values ")"
 values := value | value "," values
 value := item_ref | constant_id
 item_ref := /\d+/
 constant_id := /"([^"]+)"/
 ```
 "?=" operator means "preferred". I.e. `dc ?= "meow"` means "prefer datacenter meow
 for this chunk, but put into another dc if it's unavailable".
 Examples:
 - Simple 3 replicas with failure_domain=host: `any, host!=1, host!=(1,2)`
 - EC 4+2 in 3 DC: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
 - 1 replica in fixed DC + 2 in random DCs: `dc?=meow, dc!=1, dc!=(1,2)`
 ## max_osd_combinations
 - Type: integer
@ -377,25 +279,6 @@ of the OSDs containing a data chunk for a PG.
 Automatic scrubbing interval for this pool. Overrides
 [global scrub_interval setting](osd.en.md#scrub_interval).
 ## used_for_fs
 - Type: string
 If non-empty, the pool is marked as used for VitastorFS with metadata stored
 in block image (regular Vitastor volume) named as the value of this pool parameter.
 When a pool is marked as used for VitastorFS, regular block volume creation in it
 is disabled (vitastor-cli refuses to create images without --force) to protect
 the user from block volume and FS file ID collisions and data loss.
 [vitastor-nfs](../usage/nfs.ru.md), in its turn, refuses to use pools not marked
 for the corresponding FS when starting. This also implies that you can use one
 pool only for one VitastorFS.
 The second thing that is disabled for VitastorFS pools is reporting per-inode space
 usage statistics in etcd because a FS pool may store a very large number of files
 and statistics for them all would take a lot of space in etcd.
 # Examples
 ## Replicated pool
--- a/docs/config/pool.ru.md
+++ b/docs/config/pool.ru.md
@ -31,8 +31,6 @@
 - [pg_minsize](#pg_minsize)
 - [pg_count](#pg_count)
 - [failure_domain](#failure_domain)
 - [level_placement](#level_placement)
 - [raw_placement](#raw_placement)
 - [max_osd_combinations](#max_osd_combinations)
 - [block_size](#block_size)
 - [bitmap_granularity](#bitmap_granularity)
@ -42,7 +40,6 @@
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
 - [used_for_fs](#used_for_fs)
 Примеры:
@ -54,7 +51,7 @@
 Дерево размещения OSD задаётся в отдельном ключе etcd `/vitastor/config/node_placement`
 в следующем JSON-формате:
-```
+`
 {
  "<имя узла или номер OSD>": {
    "level": "<уровень>",
@ -62,7 +59,7 @@
  },
  ...
 }
-```
+`
 Здесь, если название узла - число, считается, что это OSD. Уровень OSD
 всегда равен "osd" и не может быть переопределён. Для OSD вы можете только
@ -85,11 +82,10 @@
 Настройки отдельных OSD задаются в ключах etcd `/vitastor/config/osd/<number>`
 в JSON-формате `{"<key>":<value>}`.
-На данный момент поддерживаются следующие настройки:
+На данный момент поддерживаются две настройки:
 - [reweight](#reweight)
 - [tags](#tags)
 - [noout](#noout)
 ## reweight
@ -113,14 +109,6 @@
 всех. Можно, например, пометить SSD OSD тегом "ssd", а HDD тегом "hdd", в
 этом смысле теги работают аналогично классам устройств.
 ## noout
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Если установлено в true, то [osd_out_time](monitor.ru.md#osd_out_time) для этого
 OSD игнорируется и OSD не удаляется из распределения данных монитором.
 # Параметры
 ## name
@ -169,26 +157,6 @@ OSD игнорируется и OSD не удаляется из распред
 OSD, PG деактивируется на чтение и запись. Иными словами, всегда известно,
 что новые блоки данных всегда записываются как минимум на pg_minsize дисков.
 Для примера, разница между pg_minsize 2 и 1 в реплицированном пуле с 3 копиями
 данных (pg_size=3), проявляется следующим образом:
 - Если 2 сервера отключаются при pg_minsize=2, пул становится неактивным и
  остаётся неактивным в течение [osd_out_time](monitor.ru.md#osd_out_time)
  (10 минут), после чего монитор назначает другие OSD/серверы на замену, пул
  поднимается и начинает восстанавливать недостающие копии данных. Соответственно,
  если OSD на замену нет - то есть, если у вас всего 3 сервера с OSD и 2 из них
  недоступны - пул так и остаётся недоступным до тех пор, пока вы не вернёте
  или не добавите хотя бы 1 сервер (или не переключите failure_domain на "osd").
 - Если 2 сервера отключаются при pg_minsize=1, ввод-вывод лишь приостанавливается
  на короткое время, до тех пор, пока монитор не поймёт, что OSD отключены
  (что занимает 5-10 секунд при стандартном [etcd_report_interval](osd.ru.md#etcd_report_interval)).
  После этого ввод-вывод восстанавливается, но новые данные временно пишутся
  всего в 1 копии. Когда же проходит osd_out_time, монитор точно так же назначает
  другие OSD на замену выбывшим и пул начинает восстанавливать копии данных.
 То есть, pg_minsize регулирует число отказов, которые пул может пережить без
 временной остановки обслуживания на [osd_out_time](monitor.ru.md#osd_out_time),
 но ценой немного пониженных гарантий надёжности.
 FIXME: Поведение pg_minsize может быть изменено в будущем с полной деактивации
 PG на перевод их в режим только для чтения.
@ -200,8 +168,8 @@ PG на перевод их в режим только для чтения.
 Число PG для данного пула. Число должно быть достаточно большим, чтобы монитор
 мог равномерно распределить по ним данные.
-Обычно это означает примерно 10-100 PG на 1 OSD, т.е. pg_count можно устанавливать
+Обычно это означает примерно 64-128 PG на 1 OSD, т.е. pg_count можно устанавливать
-равным (общему числу OSD * 10 / pg_size). Значение можно округлить до ближайшей
+равным (общему числу OSD * 100 / pg_size). Значение можно округлить до ближайшей
 степени 2, чтобы потом было легче уменьшать или увеличивать число PG, умножая
 или деля его на 2.
@ -222,71 +190,6 @@ PG в Vitastor эферемерны, то есть вы можете менят
 Иными словами, домен отказа - это то, от отказа чего вы защищаете себя избыточным
 хранением.
 ## level_placement
 - Тип: строка
 Правила дополнительных доменов отказа, применяемые вместе с failure_domain.
 Должны задаваться в следующем виде:
 `<уровень>=<последовательность символов>, <уровень2>=<последовательность2>, ...`
 Каждая `<последовательность>` должна состоять ровно из [pg_size](#pg_size) символов.
 Каждый символ соответствует одному OSD (размещению одной части PG) этого пула.
 Одинаковые символы означают, что соответствующие части размещаются в один и тот же
 узел дерева OSD на заданном `<уровне>`. Разные символы означают, что части
 размещаются в разные узлы.
 Например, если вы хотите сделать пул EC 4+2 и хотите поместить каждые 2 части
 данных в свой датацентр, и также вы хотите, чтобы каждая часть размещалась на
 другом хосте, то вы должны задать `level_placement` равным `dc=112233 host=123456`.
 Либо вы просто можете задать `level_placement` равным `dc=112233` и оставить
 `failure_domain` пустым, т.к. `host` это его значение по умолчанию и оно также
 применится автоматически.
 Без этого правила может получиться так, что в одном из датацентров окажется
 3 части данных одной PG и данные окажутся недоступными при временном отключении
 этого датацентра.
 Естественно, перед установкой правила вам нужно сгруппировать ваши хосты в
 датацентры, установив [placement_levels](monitor.ru.md#placement_levels) во что-то
 типа `{"dc":90,"host":100,"osd":110}` и добавив датацентры в [node_placement](#дерево-размещения),
 примерно так: `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
 ## raw_placement
 - Type: string
 Низкоуровневые правила генерации PG в форме DSL (доменно-специфичного языка).
 Используйте, только если действительно знаете, зачем вам это надо :)
 Спецификация DSL:
 ```
 dsl := item | item ("\n" | ",") items
 item := "any" | rules
 rules := rule | rule rules
 rule := level operator arg
 level := /\w+/
 operator := "!=" | "=" | ">" | "?="
 arg := value | "(" values ")"
 values := value | value "," values
 value := item_ref | constant_id
 item_ref := /\d+/
 constant_id := /"([^"]+)"/
 ```
 Оператор "?=" означает "предпочитаемый". Т.е. `dc ?= "meow"` означает "предпочитать
 датацентр meow для этой части данных, но разместить её в другом датацентре, если
 meow недоступен".
 Примеры:
 - Простые 3 реплики с failure_domain=host: `any, host!=1, host!=(1,2)`
 - EC 4+2 в 3 датацентрах: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
 - 1 копия в фиксированном ДЦ + 2 в других ДЦ: `dc?=meow, dc!=1, dc!=(1,2)`
 ## max_osd_combinations
 - Тип: целое число
@ -383,27 +286,6 @@ OSD с "all".
 Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
 Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
 ## used_for_fs
 - Type: string
 Если непусто, пул помечается как используемый для файловой системы VitastorFS с
 метаданными, хранимыми в блочном образе Vitastor с именем, равным значению
 этого параметра.
 Когда пул помечается как используемый для VitastorFS, создание обычных блочных
 образов в нём отключается (vitastor-cli отказывается создавать образы без --force),
 чтобы защитить пользователя от коллизий ID файлов и блочных образов и, таким
 образом, от потери данных.
 [vitastor-nfs](../usage/nfs.ru.md), в свою очередь, при запуске отказывается
 использовать для ФС пулы, не выделенные для неё. Это также означает, что один
 пул может использоваться только для одной VitastorFS.
 Также для ФС-пулов отключается передача статистики в etcd по отдельным инодам,
 так как ФС-пул может содержать очень много файлов и статистика по ним всем
 заняла бы очень много места в etcd.
 # Примеры
 ## Реплицированный пул
--- a/docs/config/src/client.yml
+++ b/docs/config/src/client.yml
@ -1,66 +1,3 @@
 - name: client_iothread_count
  type: int
  default: 0
  online: false
  info: |
    Number of separate threads for handling TCP network I/O at client library
    side. Enabling 4 threads usually allows to increase peak performance of each
    client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
    100-150 to 400 thousand iops, but at the same time it increases latency.
    Latency increase depends on CPU: with CPU power saving disabled latency
    only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
    with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
    iops decrease from 2000 to 1000). RDMA isn't affected by this option.
    It's recommended to enable client I/O threads if you don't use RDMA and want
    to increase peak client performance.
  info_ru: |
    Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
    клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
    производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
    чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
    секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
    при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
    падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
    и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
    RDMA данная опция не влияет.
    Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
    RDMA и хотите повысить пиковую производительность клиентов.
 - name: client_retry_interval
  type: ms
  min: 10
  default: 50
  online: true
  info: |
    Retry time for I/O requests failed due to inactive PGs or network
    connectivity errors.
  info_ru: |
    Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
    ошибок сети.
 - name: client_eio_retry_interval
  type: ms
  default: 1000
  online: true
  info: |
    Retry time for I/O requests failed due to data corruption or unfinished
    EC object deletions (has_incomplete PG state). 0 disables such retries
    and clients are not blocked and just get EIO error code instead.
  info_ru: |
    Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
    или незавершённых удалений EC-объектов (состояния PG has_incomplete).
    0 отключает повторы таких запросов и клиенты не блокируются, а вместо
    этого просто получают код ошибки EIO.
 - name: client_retry_enospc
  type: bool
  default: true
  online: true
  info: |
    Retry writes on out of space errors to wait until some space is freed on
    OSDs.
  info_ru: |
    Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
    ожидать, пока на OSD не освободится место.
 - name: client_max_dirty_bytes
  type: int
  default: 33554432
@ -229,27 +166,3 @@
    Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
    модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
    Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
 - name: osd_nearfull_ratio
  type: float
  default: 0.95
  online: true
  info: |
    Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
    Remember that some client writes may hang or complete with an error if even
    just one OSD becomes 100 % full!
    However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
    unable to start at all), so you'll be able to recover from "out of space" errors
    without destroying and recreating OSDs.
  info_ru: |
    Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
    выводе vitastor-cli status.
    Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
    если на 100 % заполнится хотя бы 1 OSD!
    Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
    заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
    восстановить работу кластера после ошибок отсутствия свободного места
    без уничтожения и пересоздания OSD.
--- a/docs/config/src/included.en.md
+++ b/docs/config/src/included.en.md
@ -56,8 +56,6 @@
 {{../../usage/nfs.en.md}}
 {{../../usage/admin.en.md}}
 ## Performance
 {{../../performance/understanding.en.md}}
@ -66,6 +64,4 @@
 {{../../performance/comparison1.en.md}}
 {{../../performance/bench2.en.md}}
 {{../../intro/author.en.md|indent=1}}
--- a/docs/config/src/included.ru.md
+++ b/docs/config/src/included.ru.md
@ -56,8 +56,6 @@
 {{../../usage/nfs.ru.md}}
 {{../../usage/admin.ru.md}}
 ## Производительность
 {{../../performance/understanding.ru.md}}
@ -66,6 +64,4 @@
 {{../../performance/comparison1.ru.md}}
 {{../../performance/bench2.ru.md}}
 {{../../intro/author.ru.md|indent=1}}
--- a/docs/config/src/layout-cluster.yml
+++ b/docs/config/src/layout-cluster.yml
@ -47,24 +47,14 @@
    Не может быть меньше размера сектора дисков данных OSD.
 - name: immediate_commit
  type: string
-  default: all
+  default: false
  info: |
-    One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
+    Another parameter which is really important for performance.
    This parameter is also really important for performance.
    TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
    power loss protection (nonvolatile write-through cache) and also for most HDDs.
    "none" or "small" should be only selected if you use desktop SSDs without
    capacitors or drives with slow write-back cache that can't be disabled. Check
    immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
    Detailed explanation:
    Desktop SSDs are very fast (100000+ iops) for simple random writes
    without cache flush. However, they are really slow (only around 1000 iops)
-    if you try to fsync() each write, that is, if you want to guarantee that
+    if you try to fsync() each write, that is, when you want to guarantee that
-    each change gets actually persisted to the physical media.
+    each change gets immediately persisted to the physical media.
    Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
    "Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -76,8 +66,8 @@
    efficiently utilize desktop SSDs by postponing fsync until the client calls
    it explicitly.
-    This is what this parameter regulates. When it's set to "all" Vitastor
+    This is what this parameter regulates. When it's set to "all" the whole
-    cluster commits each change to disks immediately and clients just
+    Vitastor cluster commits each change to disks immediately and clients just
    ignore fsyncs because they know for sure that they're unneeded. This reduces
    the amount of network roundtrips performed by clients and improves
    performance. So it's always better to use server grade SSDs with
@ -97,22 +87,17 @@
    it (they have internal SSD cache even though it's not stated in datasheets).
    Setting this parameter to "all" or "small" in OSD parameters requires enabling
-    [disable_journal_fsync](layout-osd.en.md#disable_journal_fsync) and
+    [disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
-    [disable_meta_fsync](layout-osd.en.md#disable_meta_fsync), setting it to
+    [disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
-    "all" also requires enabling [disable_data_fsync](layout-osd.en.md#disable_data_fsync).
+    "all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
-    vitastor-disk tried to do that by default, first checking/disabling drive cache.
+
-    If it can't disable drive cache, OSD get initialized with "none".
+    TLDR: For optimal performance, set immediate_commit to "all" if you only use
    SSDs with supercapacitor-based power loss protection (nonvolatile
    write-through cache) for both data and journals in the whole Vitastor
    cluster. Set it to "small" if you only use such SSDs for journals. Leave
    empty if your drives have write-back cache.
  info_ru: |
-    Одно из значений "none", "small" или "all". Глобальное значение, может быть
+    Ещё один важный для производительности параметр.
    переопределено [на уровне пула](pool.ru.md#immediate_commit).
    Данный параметр тоже важен для производительности.
    Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
    суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
    устанавливать только при использовании SSD настольного класса без
    суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
    Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
    Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
    секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -133,7 +118,7 @@
    эффективно утилизировать настольные SSD.
    Данный параметр влияет как раз на это. Когда он установлен в значение "all",
-    кластер Vitastor мгновенно фиксирует каждое изменение на физические
+    весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
    носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
    знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
    по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -156,6 +141,13 @@
    указано в спецификациях).
    Указание "all" или "small" в настройках / командной строке OSD требует
-    включения [disable_journal_fsync](layout-osd.ru.md#disable_journal_fsync) и
+    включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
-    [disable_meta_fsync](layout-osd.ru.md#disable_meta_fsync), значение "all"
+    [disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
-    также требует включения [disable_data_fsync](layout-osd.ru.md#disable_data_fsync).
+    также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
    Итого, вкратце: для оптимальной производительности установите
    immediate_commit в значение "all", если вы используете в кластере только SSD
    с суперконденсаторами и для данных, и для журналов. Если вы используете
    такие SSD для всех журналов, но не для данных - можете установить параметр
    в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
    оставьте параметр пустым.
--- a/docs/config/src/layout-osd.yml
+++ b/docs/config/src/layout-osd.yml
@ -110,22 +110,20 @@
  type: bool
  default: false
  info: |
-    Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
+    Do not issue fsyncs to the data device, i.e. do not flush its cache.
-    Safe ONLY if your data device has write-through cache or if write-back
+    Safe ONLY if your data device has write-through cache. If you disable
-    cache is disabled. If you disable drive cache manually with `hdparm` or
+    the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
-    writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
+    that the cache disable command is run every time before starting Vitastor
-    every time before starting Vitastor OSD (vitastor-disk does it automatically).
+    OSD, for example, in the systemd unit. See also `immediate_commit` option
-    See also [immediate_commit](layout-cluster.en.md#immediate_commit)
+    for the instructions to disable cache and how to benefit from it.
    for information about how to benefit from disabled cache.
  info_ru: |
-    Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
+    Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
    Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-    записью (write-through) или если кэш с отложенной записью (write-back) отключён.
+    записью (write-through). Если вы отключаете кэш через `hdparm` или
-    Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
+    `scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
-    то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
+    выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
-    (vitastor-disk делает это автоматически). Смотрите также опцию
+    Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
-    [immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
+    и о том, как из этого извлечь выгоду.
    как извлечь выгоду из отключённого кэша.
 - name: disable_meta_fsync
  type: bool
  default: false
@ -181,7 +179,8 @@
    Because of this it can actually be beneficial to use SSDs which work well
    with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-    and meta_block_size. But at the moment, no such SSDs are known...
+    and meta_block_size. But the only SSD that may fit into this category is
    Intel Optane (probably, not tested yet).
    Clients don't need to be aware of disk_alignment, so it's not required to
    put a modified value into etcd key /vitastor/config/global.
@ -199,8 +198,9 @@
    Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
    меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-    journal_block_size и meta_block_size. Однако на данный момент такие SSD
+    journal_block_size и meta_block_size. Однако единственные SSD, которые
-    не известны...
+    теоретически могут попасть в эту категорию - это Intel Optane (но и это
    пока не проверялось автором).
    Клиентам не обязательно знать про disk_alignment, так что помещать значение
    этого параметра в etcd в /vitastor/config/global не нужно.
--- a/docs/config/src/monitor.yml
+++ b/docs/config/src/monitor.yml
@ -1,103 +1,3 @@
 - name: use_antietcd
  type: bool
  default: false
  info: |
    Enable experimental built-in etcd replacement (clustered key-value database):
    [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
    When set to true, monitor runs internal antietcd automatically if it finds
    a network interface with an IP address matching one of addresses in the
    `etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
    the monitor command line). If there are multiple matching addresses, it also
    checks `antietcd_port` and antietcd is started for address with matching port.
    By default, antietcd accepts connection on the selected IP address, but it
    can also be overridden manually in the `antietcd_ip` option.
    When antietcd is started, monitor stores cluster metadata itself and exposes
    a etcd-compatible REST API. On disk, these metadata are stored in
    `/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
    or antietcd_data_dir options). All other antietcd parameters
    (see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
    cluster, cluster_key, persist_filter, stale_read can also be set in
    Vitastor configuration with `antietcd_` prefix.
    You can dump/load data to or from antietcd using Antietcd `anticli` tool:
    ```
    npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
    npm exec anticli -e http://antietcd:2379/v3 load < dump.json
    ```
  info_ru: |
    Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
    [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
    Если параметр установлен в true, монитор запускает antietcd автоматически,
    если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
    конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
    командной строки монитора). Если таких адресов несколько, также проверяется
    опция `antietcd_port` и antietcd запускается для адреса с соответствующим
    портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
    IP, но его также можно определить вручную опцией `antietcd_ip`.
    При запуске antietcd монитор сам хранит центральные метаданные кластера и
    выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
    `/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
    antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
    (смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
    node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
    в конфигурации Vitastor с префиксом `antietcd_`.
    Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
    `anticli`:
    ```
    npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
    npm exec anticli -e http://antietcd:2379/v3 load < dump.json
    ```
 - name: enable_prometheus
  type: bool
  default: true
  info: |
    Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
    Note that only the active (master) monitor exposes metrics, others return
    HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
    Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
  info_ru: |
    Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
    Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
    возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
    в задание по сбору метрик Prometheus.
    Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
 - name: mon_http_port
  type: int
  default: 8060
  info: HTTP port for monitors to listen on (including metrics exporter)
  info_ru: Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
 - name: mon_http_ip
  type: string
  info: IP address for monitors to listen on (all addresses by default)
  info_ru: IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
 - name: mon_https_cert
  type: string
  info: Path to PEM SSL certificate file for monitor to listen using HTTPS
  info_ru: Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
 - name: mon_https_key
  type: string
  info: Path to PEM SSL private key file for monitor to listen using HTTPS
  info_ru: Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
 - name: mon_https_client_auth
  type: bool
  default: false
  info: Enable HTTPS client certificate-based authorization for monitor connections
  info_ru: Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
 - name: mon_https_ca
  type: string
  info: Path to CA certificate for client HTTPS authorization
  info_ru: Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
 - name: etcd_mon_ttl
  type: sec
  min: 5
@ -163,12 +63,3 @@
    "host" и "osd" являются предопределёнными и не могут быть удалены. Если
    один из них отсутствует в конфигурации, он доопределяется с приоритетом по
    умолчанию (100 для уровня "host", 101 для "osd").
 - name: use_old_pg_combinator
  type: bool
  default: false
  info: |
    Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
    and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
  info_ru: |
    Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
    и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
--- a/docs/config/src/network.yml
+++ b/docs/config/src/network.yml
@ -48,17 +48,11 @@
  type: string
  info: |
    RDMA device name to use for Vitastor OSD communications (for example,
-    "rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
+    "rocep5s0f0"). Now Vitastor supports all adapters, even ones without
-    device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
+    ODP support, like Mellanox ConnectX-3 and non-Mellanox cards.
    or choose the first available RDMA device if no RoCE devices are
    found or if `osd_network` is not specified. Auto-selection is also
    unsupported with old libibverbs < v32, like in Debian 10 Buster or
    CentOS 7.
-    Vitastor supports all adapters, even ones without ODP support, like
+    Versions up to Vitastor 1.2.0 required ODP which is only present in
-    Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
+    Mellanox ConnectX >= 4. See also [rdma_odp](#rdma_odp).
    1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
    See also [rdma_odp](#rdma_odp).
    Run `ibv_devinfo -v` as root to list available RDMA devices and their
    features.
@ -70,17 +64,12 @@
    PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
  info_ru: |
    Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
-    Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
+    Сейчас Vitastor поддерживает все модели адаптеров, включая те, у которых
    [osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
    попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
    не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
    libibverbs < v32, например в Debian 10 Buster или CentOS 7.
    Vitastor поддерживает все модели адаптеров, включая те, у которых
    нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
-    картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
+    картами производства не Mellanox.
-    требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
+
-    См. также [rdma_odp](#rdma_odp).
+    Версии Vitastor до 1.2.0 включительно требовали ODP, который есть только
    на Mellanox ConnectX 4 и более новых. См. также [rdma_odp](#rdma_odp).
    Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
    список доступных RDMA-устройств, их параметры и возможности.
@ -105,29 +94,23 @@
    `ibv_devinfo -v`.
 - name: rdma_gid_index
  type: int
  default: 0
  info: |
    Global address identifier index of the RDMA device to use. Different GID
    indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
    Search for "GID" in `ibv_devinfo -v` output to determine which GID index
    you need.
-    If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
+    **IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
-    RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
+    rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).
    GID auto-selection is unsupported with libibverbs < v32.
    A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
  info_ru: |
    Номер глобального идентификатора адреса RDMA-устройства, который следует
    использовать. Разным gid_index могут соответствовать разные протоколы связи:
    RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
    словом "GID" в выводе команды `ibv_devinfo -v`.
-    Если не указан, Vitastor попробует автоматически выбрать сначала GID,
+    **ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
-    соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
+    правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).
    RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
    libibverbs < v32.
    Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
 - name: rdma_mtu
  type: int
  default: 4096
@ -260,6 +243,21 @@
    Максимальное время ожидания ответа на запрос проверки состояния соединения.
    Если OSD не отвечает за это время, соединение отключается и производится
    повторная попытка соединения.
 - name: up_wait_retry_interval
  type: ms
  min: 10
  default: 50
  online: true
  info: |
    OSDs respond to clients with a special error code when they receive I/O
    requests for a PG that's not synchronized and started. This parameter sets
    the time for the clients to wait before re-attempting such I/O requests.
  info_ru: |
    Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
    поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
    они отвечают клиентам специальным кодом ошибки, означающим, что клиент
    должен некоторое время подождать перед повторением запроса. Именно это время
    ожидания задаёт данный параметр.
 - name: max_etcd_attempts
  type: int
  default: 5
@ -297,9 +295,9 @@
  info_ru: |
    Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
    etcd_report_interval, чтобы keepalive гарантированно работал.
- name: etcd_ws_keepalive_interval
+- name: etcd_ws_keepalive_timeout
  type: sec
-  default: 5
+  default: 30
  online: true
  info: |
    etcd websocket ping interval required to keep the connection alive and
--- a/docs/config/src/osd.yml
+++ b/docs/config/src/osd.yml
@ -1,21 +1,3 @@
 - name: osd_iothread_count
  type: int
  default: 0
  info: |
    TCP network I/O thread count for OSD. When non-zero, a single OSD process
    may handle more TCP I/O, but at a cost of increased latency because thread
    switching overhead occurs. RDMA isn't affected by this option.
    Because of latency, instead of enabling OSD I/O threads it's recommended to
    just create multiple OSDs per disk, or use RDMA.
  info_ru: |
    Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
    стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
    по сети больше данных, но ухудшает задержку из-за накладных расходов
    переключения потоков. На работу RDMA опция не влияет.
    Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
    просто создавать по несколько OSD на каждом диске, или использовать RDMA.
 - name: etcd_report_interval
  type: sec
  default: 5
--- a/docs/installation/opennebula.en.md
+++ b/docs/installation/opennebula.en.md
@ -1,186 +0,0 @@
 [Documentation](../../README.md#documentation) → Installation → OpenNebula
 -----
 [Читать на русском](opennebula.ru.md)
 # OpenNebula
 ## Automatic Installation
 OpenNebula plugin is packaged as `vitastor-opennebula` Debian and RPM package since Vitastor 1.9.0. So:
 - Run `apt-get install vitastor-opennebula` or `yum install vitastor-opennebula` after installing OpenNebula on all nodes
 - Check that it prints "OK, Vitastor OpenNebula patches successfully applied" or "OK, Vitastor OpenNebula patches are already applied"
 - If it does not, refer to [Manual Installation](#manual-installation) and apply configuration file changes manually
 - Make sure that Vitastor patched versions of QEMU and libvirt are installed
  (`dpkg -l qemu-system-x86`, `dpkg -l | grep libvirt`, `rpm -qa | grep qemu`, `rpm -qa | grep qemu`, `rpm -qa | grep libvirt-libs` should show "vitastor" in version names)
 - [Block VM access to Vitastor cluster](#block-vm-access-to-vitastor-cluster)
 ## Manual Installation
 Install OpenNebula. Then, on each node:
 - Copy [opennebula/remotes](../../opennebula/remotes) into `/var/lib/one` recursively: `cp -r opennebula/remotes /var/lib/one/`
 - Copy [opennebula/sudoers.d](../../opennebula/sudoers.d) to `/etc`: `cp -r opennebula/sudoers.d /etc/`
 - Apply [downloader-vitastor.sh.diff](../../opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff) to `/var/lib/one/remotes/datastore/downloader.sh`:
  `patch /var/lib/one/remotes/datastore/downloader.sh < opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff` - or read the patch and apply the same change manually
 - Add `kvm-vitastor` to `LIVE_DISK_SNAPSHOTS` in `/etc/one/vmm_exec/vmm_execrc`
 - If on Debian or Ubuntu (and AppArmor is used), add Vitastor config file path(s) to `/etc/apparmor.d/local/abstractions/libvirt-qemu`: for example,
  `echo '  "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu`
 - Apply changes to `/etc/one/oned.conf`
 ### oned.conf changes
 1. Add deploy script override in kvm VM_MAD: add `-l deploy.vitastor` to ARGUMENTS.
 ```diff
 VM_MAD = [
     NAME           = "kvm",
     SUNSTONE_NAME  = "KVM",
     EXECUTABLE     = "one_vmm_exec",
 -    ARGUMENTS      = "-t 15 -r 0 kvm -p",
 +    ARGUMENTS      = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
     DEFAULT        = "vmm_exec/vmm_exec_kvm.conf",
     TYPE           = "kvm",
     KEEP_SNAPSHOTS = "yes",
     LIVE_RESIZE    = "yes",
     SUPPORT_SHAREABLE    = "yes",
     IMPORTED_VMS_ACTIONS = "terminate, terminate-hard, hold, release, suspend,
         resume, delete, reboot, reboot-hard, resched, unresched, disk-attach,
         disk-detach, nic-attach, nic-detach, snapshot-create, snapshot-delete,
         resize, updateconf, update"
 ]
 ```
 Optional: if you also want to save VM RAM checkpoints to Vitastor, use
 `-l deploy=deploy.vitastor,save=save.vitastor,restore=restore.vitastor`
 instead of just `-l deploy=deploy.vitastor`.
 2. Add `vitastor` to TM_MAD.ARGUMENTS and DATASTORE_MAD.ARGUMENTS:
 ```diff
 TM_MAD = [
     EXECUTABLE = "one_tm",
 -    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
 +    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
 ]
 DATASTORE_MAD = [
     EXECUTABLE = "one_datastore",
 -    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
 +    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
 ]
 ```
 3. Add INHERIT_DATASTORE_ATTR for two Vitastor attributes:
 ```
 INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
 INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
 ```
 4. Add TM_MAD_CONF and DS_MAD_CONF for Vitastor:
 ```
 TM_MAD_CONF = [
    NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
    DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
    TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
    DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
    CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
 ]
 DS_MAD_CONF = [
    NAME = "vitastor",
    REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
    PERSISTENT_ONLY = "NO",
    MARKETPLACE_ACTIONS = "export"
 ]
 ```
 ## Create Datastores
 Example Image and System Datastore definitions:
 [opennebula/vitastor-imageds.conf](../../opennebula/vitastor-imageds.conf) and
 [opennebula/vitastor-systemds.conf](../../opennebula/vitastor-systemds.conf).
 Change parameters to your will:
 - POOL_NAME is Vitastor pool name to store images.
 - IMAGE_PREFIX is a string prepended to all Vitastor image names.
 - BRIDGE_LIST is a list of hosts with access to Vitastor cluster, mostly used for image (not system) datastore operations.
 - VITASTOR_CONF is the path to cluster configuration. Note that it should be also added to `/etc/apparmor.d/local/abstractions/libvirt-qemu` if you use AppArmor.
 - STAGING_DIR is a temporary directory used when importing external images. Should have free space sufficient for downloading external images.
 Then create datastores using `onedatastore create vitastor-imageds.conf` and `onedatastore create vitastor-systemds.conf` (or use UI).
 ## Block VM access to Vitastor cluster
 Vitastor doesn't support any authentication yet, so you MUST block VM guest access to the Vitastor cluster at the network level.
 If you use VLAN networking for VMs - make sure you use different VLANs for VMs and hypervisor/storage network and
 block access between them using your firewall/switch configuration.
 If you use something more stupid like bridged networking, you probably have to use manual firewall/iptables setup
 to only allow access to Vitastor from hypervisor IPs.
 Also you need to switch network to "Bridged & Security Groups" and enable IP spoofing filters in OpenNebula.
 Problem is that OpenNebula's IP spoofing filter doesn't affect local interfaces of the hypervisor i.e. when
 it's enabled a VM can't talk to other VMs or to the outer world using a spoofed IP, but it CAN talk to the
 hypervisor if it takes an IP from its subnet. To fix that you also need some more iptables.
 So the complete "stupid" bridged network filter setup could look like the following
 (here `10.0.3.0/24` is the VM subnet and `10.0.2.0/24` is the hypervisor subnet):
 ```
 # Allow incoming traffic from physical device
 iptables -A INPUT -m physdev --physdev-in eth0 -j ACCEPT
 # Do not allow incoming traffic from VMs, but not from VM subnet
 iptables -A INPUT ! -s 10.0.3.0/24 -i onebr0 -j DROP
 # Drop traffic from VMs to hypervisor/storage subnet
 iptables -I FORWARD 1 -s 10.0.3.0/24 -d 10.0.2.0/24 -j DROP
 ```
 ## Testing
 The OpenNebula plugin includes quite a bit of bash scripts, so here's their description to get an idea about what they actually do.
 | Script                  | Action                                    | How to Test                                                                          |
 | ----------------------- | ----------------------------------------- | ------------------------------------------------------------------------------------ |
 | vmm/kvm/deploy.vitastor | Start a VM                                | Create and start a VM with Vitastor disk(s): persistent / non-persistent / volatile. |
 | vmm/kvm/save.vitastor   | Save VM memory checkpoint                 | Stop a VM using "Stop" command.                                                      |
 | vmm/kvm/restore.vitastor| Restore VM memory checkpoint              | Start a VM back after stopping it.                                                   |
 | datastore/clone         | Copy an image as persistent               | Create a VM template and instantiate it as persistent.                               |
 | datastore/cp            | Import an external image                  | Import a VM template with images from Marketplace.                                   |
 | datastore/export        | Export an image as URL                    | Probably: export a VM template with images to Marketplace.                           |
 | datastore/mkfs          | Create an image with FS                   | Storage → Images → Create → Type: Datablock, Location: Empty disk image, Filesystem: Not empty. |
 | datastore/monitor       | Monitor used space in image datastore     | Check reported used/free space in image datastore list.                              |
 | datastore/rm            | Remove a persistent image                 | Storage → Images → Select an image → Delete.                                         |
 | datastore/snap_delete   | Delete a snapshot of a persistent image   | Storage → Images → Select an image → Select a snapshot → Delete; <br> To create an image with snapshot: attach a persistent image to a VM; create a snapshot; detach the image. |
 | datastore/snap_flatten  | Revert an image to snapshot and delete other snapshots | Storage → Images → Select an image → Select a snapshot → Flatten.       |
 | datastore/snap_revert   | Revert an image to snapshot               | Storage → Images → Select an image → Select a snapshot → Revert.                     |
 | datastore/stat          | Get virtual size of an image in MB        | No idea. Seems to be unused both in Vitastor and Ceph datastores.                    |
 | tm/clone                | Clone a non-persistent image to a VM disk | Attach a non-persistent image to a VM.                                               |
 | tm/context              | Generate a contextualisation VM disk      | Create a VM with enabled contextualisation (default). Common host FS-based version is used in Vitastor and Ceph datastores. |
 | tm/cpds                 | Copy a VM disk / its snapshot to an image | Select a VM → Select a disk → Optionally select a snapshot → Save as.                |
 | tm/delete               | Delete a cloned or volatile VM disk       | Detach a volatile disk or a non-persistent image from a VM.                          |
 | tm/failmigrate          | Handle live migration failure             | No action. Script is empty in Vitastor and Ceph. In other datastores, should roll back actions done by tm/premigrate. |
 | tm/ln                   | Attach a persistent image to a VM         | No action. Script is empty in Vitastor and Ceph.                                     |
 | tm/mkimage              | Create a volatile disk, maybe with FS     | Attach a volatile disk to a VM, with or without file system.                         |
 | tm/mkswap               | Create a volatile swap disk               | Attach a volatile disk to a VM, formatted as swap.                                   |
 | tm/monitor              | Monitor used space in system datastore    | Check reported used/free space in system datastore list.                             |
 | tm/mv                   | Move a migrated VM disk between hosts     | Migrate a VM between hosts. In Vitastor and Ceph datastores, doesn't do any storage action. |
 | tm/mvds                 | Detach a persistent image from a VM       | No action. The opposite of tm/ln. Script is empty in Vitastor and Ceph. In other datastores, script may copy the image from VM host back to the datastore. |
 | tm/postbackup           | Executed after backup                     | Seems that the script just removes temporary files after backup. Perform a VM backup and check that temporary files are cleaned up. |
 | tm/postbackup_live      | Executed after backup of a running VM     | Same as tm/postbackup, but for a running VM.                                         |
 | tm/postmigrate          | Executed after VM live migration          | No action. Only executed for system datastore, so the script tries to call other TMs for other disks. Except that, the script does nothing in Vitastor and Ceph datastores. |
 | tm/prebackup            | Actual backup script: backup VM disks     | Set up "rsync" backup datastore → Backup a VM to it.                                 |
 | tm/prebackup_live       | Backup VM disks of a running VM           | Same as tm/prebackup, but also does fsfreeze/thaw. So perform a live backup, restore it and check that disks are consistent. |
 | tm/premigrate           | Executed before live migration            | No action. Only executed for system datastore, so the script tries to call other TMs for other disks. Except that, the script does nothing in Vitastor and Ceph datastores. |
 | tm/resize               | Resize a VM disk                          | Select a VM → Select a non-persistent disk → Resize.                                 |
 | tm/restore              | Restore VM disks from backup              | Set up "rsync" backup datastore → Backup a VM to it → Restore it back.               |
 | tm/snap_create          | Create a VM disk snapshot                 | Select a VM → Select a disk → Create snapshot.                                       |
 | tm/snap_create_live     | Create a VM disk snapshot for a live VM   | Select a running VM → Select a disk → Create snapshot.                               |
 | tm/snap_delete          | Delete a VM disk snapshot                 | Select a VM → Select a disk → Select a snapshot → Delete.                            |
 | tm/snap_revert          | Revert a VM disk to a snapshot            | Select a VM → Select a disk → Select a snapshot → Revert.                            |
--- a/docs/installation/opennebula.ru.md
+++ b/docs/installation/opennebula.ru.md
@ -1,189 +0,0 @@
 [Документация](../../README-ru.md#документация) → Установка → OpenNebula
 -----
 [Read in English](opennebula.en.md)
 # OpenNebula
 ## Автоматическая установка
 Плагин OpenNebula Vitastor распространяется как Debian и RPM пакет `vitastor-opennebula`, начиная с версии Vitastor 1.9.0. Так что:
 - Запустите `apt-get install vitastor-opennebula` или `yum install vitastor-opennebula` после установки OpenNebula на всех серверах
 - Проверьте, что он выводит "OK, Vitastor OpenNebula patches successfully applied" или "OK, Vitastor OpenNebula patches are already applied" в процессе установки
 - Если сообщение не выведено, пройдите по шагам инструкцию [Ручная установка](#ручная-установка) и примените правки файлов конфигурации вручную
 - Удостоверьтесь, что установлены версии QEMU и libvirt с изменениями Vitastor
  (`dpkg -l qemu-system-x86`, `dpkg -l | grep libvirt`, `rpm -qa | grep qemu`, `rpm -qa | grep qemu`, `rpm -qa | grep libvirt-libs` должны показывать "vitastor" в номере версии)
 - [Заблокируйте доступ виртуальных машин в Vitastor](#блокировка-доступа-вм-в-vitastor)
 ## Ручная установка
 Сначала установите саму OpenNebula. После этого, на каждом сервере:
 - Скопируйте директорию [opennebula/remotes](../../opennebula/remotes) в `/var/lib/one`: `cp -r opennebula/remotes /var/lib/one/`
 - Скопируйте директорию [opennebula/sudoers.d](../../opennebula/sudoers.d) в `/etc`: `cp -r opennebula/sudoers.d /etc/`
 - Примените патч [downloader-vitastor.sh.diff](../../opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff) к `/var/lib/one/remotes/datastore/downloader.sh`:
  `patch /var/lib/one/remotes/datastore/downloader.sh < opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff` - либо прочитайте патч и примените изменение вручную
 - Добавьте `kvm-vitastor` в список `LIVE_DISK_SNAPSHOTS` в файле `/etc/one/vmm_exec/vmm_execrc`
 - Если вы используете Debian или Ubuntu (и AppArmor), добавьте пути к файлу(ам) конфигурации Vitastor в файл `/etc/apparmor.d/local/abstractions/libvirt-qemu`: например,
  `echo '  "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu`
 - Примените изменения `/etc/one/oned.conf`
 ### Изменения oned.conf
 1. Добавьте переопределение скрипта deploy в VM_MAD kvm, добавив `-l deploy.vitastor` в `ARGUMENTS`:
 ```diff
 VM_MAD = [
     NAME           = "kvm",
     SUNSTONE_NAME  = "KVM",
     EXECUTABLE     = "one_vmm_exec",
 -    ARGUMENTS      = "-t 15 -r 0 kvm -p",
 +    ARGUMENTS      = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
     DEFAULT        = "vmm_exec/vmm_exec_kvm.conf",
     TYPE           = "kvm",
     KEEP_SNAPSHOTS = "yes",
     LIVE_RESIZE    = "yes",
     SUPPORT_SHAREABLE    = "yes",
     IMPORTED_VMS_ACTIONS = "terminate, terminate-hard, hold, release, suspend,
         resume, delete, reboot, reboot-hard, resched, unresched, disk-attach,
         disk-detach, nic-attach, nic-detach, snapshot-create, snapshot-delete,
         resize, updateconf, update"
 ]
 ```
 Опционально: если вы хотите также сохранять снимки памяти ВМ в Vitastor, добавьте
 `-l deploy=deploy.vitastor,save=save.vitastor,restore=restore.vitastor`
 вместо просто `-l deploy=deploy.vitastor`.
 2. Добавьте `vitastor` в значения TM_MAD.ARGUMENTS и DATASTORE_MAD.ARGUMENTS:
 ```diff
 TM_MAD = [
     EXECUTABLE = "one_tm",
 -    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
 +    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
 ]
 DATASTORE_MAD = [
     EXECUTABLE = "one_datastore",
 -    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
 +    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
 ]
 ```
 3. Добавьте строчки с INHERIT_DATASTORE_ATTR для двух атрибутов Vitastor-хранилищ:
 ```
 INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
 INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
 ```
 4. Добавьте TM_MAD_CONF и DS_MAD_CONF для Vitastor:
 ```
 TM_MAD_CONF = [
    NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
    DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
    TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
    DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
    CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
 ]
 DS_MAD_CONF = [
    NAME = "vitastor",
    REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
    PERSISTENT_ONLY = "NO",
    MARKETPLACE_ACTIONS = "export"
 ]
 ```
 ## Создайте хранилища
 Примеры настроек хранилищ образов (image) и дисков ВМ (system):
 [opennebula/vitastor-imageds.conf](../../opennebula/vitastor-imageds.conf) и
 [opennebula/vitastor-systemds.conf](../../opennebula/vitastor-systemds.conf).
 Скопируйте настройки и поменяйте следующие параметры так, как вам необходимо:
 - POOL_NAME - имя пула Vitastor для сохранения образов дисков.
 - IMAGE_PREFIX - строка, добавляемая в начало имён образов дисков.
 - BRIDGE_LIST - список серверов с доступом к кластеру Vitastor, используемых для операций с хранилищем образов (image, не system).
 - VITASTOR_CONF - путь к конфигурации Vitastor. Имейте в виду, что этот путь также надо добавить в `/etc/apparmor.d/local/abstractions/libvirt-qemu`, если вы используете AppArmor.
 - STAGING_DIR - путь к временному каталогу, используемому при импорте внешних образов. Должен иметь достаточно свободного места, чтобы вмещать скачанные образы.
 После этого создайте хранилища с помощью команд `onedatastore create vitastor-imageds.conf` и `onedatastore create vitastor-systemds.conf` (либо через UI).
 ## Блокировка доступа ВМ в Vitastor
 Vitastor пока не поддерживает никакую аутентификацию, так что вы ДОЛЖНЫ заблокировать доступ гостевых ВМ
 в кластер Vitastor на сетевом уровне.
 Если вы используете VLAN-сети для ВМ - удостоверьтесь, что ВМ и гипервизор/сеть хранения помещены в разные
 изолированные друг от друга VLAN-ы.
 Если вы используете что-то более примитивное, например, мосты (bridge), вам, скорее всего, придётся вручную
 настроить iptables / межсетевой экран, чтобы разрешить доступ к Vitastor только с IP гипервизоров.
 Также в этом случае нужно будет переключить обычные мосты на "Bridged & Security Groups" и включить фильтр
 спуфинга IP в OpenNebula. Правда, реализация этого фильтра пока не полная, и она не блокирует доступ к
 локальным интерфейсам гипервизора. То есть, включённый фильтр спуфинга IP запрещает ВМ отправлять трафик
 с чужими IP к другим ВМ или во внешний мир, но не запрещает отправлять его напрямую гипервизору. Чтобы
 исправить это, тоже нужны дополнительные правила iptables.
 Таким образом, более-менее полная блокировка при использовании простой сети на сетевых мостах может
 выглядеть так (здесь `10.0.3.0/24` - подсеть ВМ, `10.0.2.0/24` - подсеть гипервизора):
 ```
 # Разрешаем входящий трафик с физического устройства
 iptables -A INPUT -m physdev --physdev-in eth0 -j ACCEPT
 # Запрещаем трафик со всех ВМ, но с IP не из подсети ВМ
 iptables -A INPUT ! -s 10.0.3.0/24 -i onebr0 -j DROP
 # Запрещаем трафик от ВМ к сети гипервизора
 iptables -I FORWARD 1 -s 10.0.3.0/24 -d 10.0.2.0/24 -j DROP
 ```
 ## Тестирование
 Плагин OpenNebula по большей части состоит из bash-скриптов, и чтобы было понятнее, что они
 вообще делают - ниже приведены описания процедур, которыми можно протестировать каждый из них.
 | Скрипт                  | Описание                                      | Как протестировать                                                                   |
 | ----------------------- | --------------------------------------------- | ------------------------------------------------------------------------------------ |
 | vmm/kvm/deploy.vitastor | Запустить виртуальную машину                  | Создайте и запустите виртуальную машину с дисками Vitastor: постоянным / непостоянным / волатильным (временным). |
 | vmm/kvm/save.vitastor   | Сохранить снимок памяти ВМ                    | Остановите виртуальную машину командой "Остановить".                                 |
 | vmm/kvm/restore.vitastor| Восстановить снимок памяти ВМ                 | Запустите ВМ после остановки обратно.                                                |
 | datastore/clone         | Скопировать образ как "постоянный"            | Создайте шаблон ВМ и создайте из него постоянную ВМ.                                 |
 | datastore/cp            | Импортировать внешний образ                   | Импортируйте шаблон ВМ с образами дисков из Магазина OpenNebula.                     |
 | datastore/export        | Экспортировать образ как URL                  | Вероятно: экспортируйте шаблон ВМ с образами в Магазин.                              |
 | datastore/mkfs          | Создать образ с файловой системой             | Хранилище → Образы → Создать → Тип: базовый блок данных, Расположение: пустой образ диска, Файловая система: любая непустая. |
 | datastore/monitor       | Вывод статистики места в хранилище образов    | Проверьте статистику свободного/занятого места в списке хранилищ образов.            |
 | datastore/rm            | Удалить "постоянный" образ                    | Хранилище → Образы → Выберите образ → Удалить.                                       |
 | datastore/snap_delete   | Удалить снимок "постоянного" образа           | Хранилище → Образы → Выберите образ → Выберите снимок → Удалить; <br> Чтобы создать образ со снимком: подключите постоянный образ к ВМ, создайте снимок, отключите образ. |
 | datastore/snap_flatten  | Откатить образ к снимку, удалив другие снимки | Хранилище → Образы → Выберите образ → Выберите снимок → "Выровнять" (flatten).       |
 | datastore/snap_revert   | Откатить образ к снимку                       | Хранилище → Образы → Выберите образ → Выберите снимок → Откатить.                    |
 | datastore/stat          | Показать виртуальный размер образа в МБ       | Неизвестно. По-видимому, в плагинах Vitastor и Ceph не используется.                 |
 | tm/clone                | Клонировать "непостоянный" образ в диск ВМ    | Подключите "непостоянный" образ к ВМ.                                                |
 | tm/context              | Создать диск контекстуализации ВМ             | Создайте ВМ с контекстуализацией, как обычно. Но тестировать особенно нечего: в плагинах Vitastor и Ceph образ контекста хранится в локальной ФС гипервизора. |
 | tm/cpds                 | Копировать диск ВМ/его снимок в новый образ   | Выберите ВМ → Выберите диск → Опционально выберите снимок → "Сохранить как".         |
 | tm/delete               | Удалить диск-клон или волатильный диск ВМ     | Отключите волатильный или не-постоянный диск от ВМ.                                  |
 | tm/failmigrate          | Обработать неудачную миграцию                 | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. В других плагинах скрипт должен откатывать действия tm/premigrate. |
 | tm/ln                   | Подключить "постоянный" образ к ВМ            | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph.                          |
 | tm/mkimage              | Создать волатильный диск, без или с ФС        | Подключите волатильный диск к ВМ, с или без файловой системы.                        |
 | tm/mkswap               | Создать волатильный диск подкачки             | Подключите волатильный диск к ВМ, форматированный как диск подкачки (swap).          |
 | tm/monitor              | Вывод статистики места в хранилище дисков ВМ  | Проверьте статистику свободного/занятого места в списке хранилищ дисков ВМ.          |
 | tm/mv                   | Мигрировать диск ВМ между хостами             | Мигрируйте ВМ между серверами. Правда, с точки зрения хранилища в плагинах Vitastor и Ceph этот скрипт ничего не делает. |
 | tm/mvds                 | Отключить "постоянный" образ от ВМ            | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. В целом же скрипт обратный к tm/ln и в других хранилищах он может, например, копировать образ ВМ с диска гипервизора обратно в хранилище. |
 | tm/postbackup           | Выполняется после бэкапа                      | По-видимому, скрипт просто удаляет временные файлы после резервного копирования. Так что можно провести его и проверить, что на серверах не осталось временных файлов. |
 | tm/postbackup_live      | Выполняется после бэкапа запущенной ВМ        | То же, что tm/postbackup, но для запущенной ВМ.                                      |
 | tm/postmigrate          | Выполняется после миграции ВМ                 | Тестировать нечего. Однако, OpenNebula запускает скрипт только для системного хранилища, поэтому он вызывает аналогичные скрипты для хранилищ других дисков той же ВМ. Помимо этого в плагинах Vitastor и Ceph скрипт ничего не делает. |
 | tm/prebackup            | Выполнить резервное копирование дисков ВМ     | Создайте хранилище резервных копий типа "rsync" → Забэкапьте в него ВМ.              |
 | tm/prebackup_live       | То же самое для запущенной ВМ                 | То же, что tm/prebackup, но запускает fsfreeze/thaw (остановку доступа к дискам). Так что смысл теста - проведите резервное копирование и проверьте, что данные скопировались консистентно. |
 | tm/premigrate           | Выполняется перед миграцией ВМ                | Тестировать нечего. Аналогично tm/postmigrate запускается только для системного хранилища. |
 | tm/resize               | Изменить размер диска ВМ                      | Выберите ВМ → Выберите непостоянный диск → Измените его размер.                      |
 | tm/restore              | Восстановить диски ВМ из бэкапа               | Создайте хранилище резервных копий → Забэкапьте в него ВМ → Восстановите её обратно. |
 | tm/snap_create          | Создать снимок диска ВМ                       | Выберите ВМ → Выберите диск → Создайте снимок.                                       |
 | tm/snap_create_live     | Создать снимок диска запущенной ВМ            | Выберите запущенную ВМ → Выберите диск → Создайте снимок.                            |
 | tm/snap_delete          | Удалить снимок диска ВМ                       | Выберите ВМ → Выберите диск → Выберите снимок → Удалить.                             |
 | tm/snap_revert          | Откатить диск ВМ к снимку                     | Выберите ВМ → Выберите диск → Выберите снимок → Откатить.                            |
--- a/docs/installation/packages.en.md
+++ b/docs/installation/packages.en.md
@ -16,6 +16,8 @@
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
  - Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
    stable version from 0.9.x branch instead of 1.x
 - For Debian 10 (Buster) also enable backports repository:
  `deb http://deb.debian.org/debian buster-backports main`
 - Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
 ## CentOS
--- a/docs/installation/packages.ru.md
+++ b/docs/installation/packages.ru.md
@ -16,6 +16,8 @@
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
  - Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
    установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
 - Для Debian 10 (Buster) также включите репозиторий backports:
  `deb http://deb.debian.org/debian buster-backports main`
 - Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
 ## CentOS
--- a/docs/installation/proxmox.en.md
+++ b/docs/installation/proxmox.en.md
@ -17,10 +17,10 @@ To enable Vitastor support in Proxmox Virtual Environment (6.4-8.1 are supported
 - Restart pvedaemon: `systemctl restart pvedaemon`
 `/etc/pve/storage.cfg` example (the only required option is vitastor_pool, all others
-are listed below with their default values; `vitastor_ssd` is Proxmox storage pool id):
+are listed below with their default values):
 ```
-vitastor: vitastor_ssd
+vitastor: vitastor
    # pool to put new images into
    vitastor_pool testpool
    # path to the configuration file
--- a/docs/installation/proxmox.ru.md
+++ b/docs/installation/proxmox.ru.md
@ -16,10 +16,10 @@
 - Перезапустите демон Proxmox: `systemctl restart pvedaemon`
 Пример `/etc/pve/storage.cfg` (единственная обязательная опция - vitastor_pool, все остальные
-перечислены внизу для понимания значений по умолчанию; `vitastor_ssd` - имя хранилища в Proxmox):
+перечислены внизу для понимания значений по умолчанию):
 ```
-vitastor: vitastor_ssd
+vitastor: vitastor
    # Пул, в который будут помещаться образы дисков
    vitastor_pool testpool
    # Путь к файлу конфигурации
--- a/docs/installation/source.en.md
+++ b/docs/installation/source.en.md
@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
 QEMU build process. To do that:
 - Install vitastor client library headers (from source or from vitastor-client-dev package)
 - Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
- Copy `src/client/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
+- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
 - Build QEMU as usual
 But it is also possible to build it out-of-tree. To do that:
--- a/docs/installation/source.ru.md
+++ b/docs/installation/source.ru.md
@ -41,7 +41,7 @@ cmake .. && make -j8 install
 Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
 - Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
 - Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
- Скопируйте [src/client/qemu_driver.c](../../src/client/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
+- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
 - Соберите QEMU как обычно
 Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
--- a/docs/intro/architecture.en.md
+++ b/docs/intro/architecture.en.md
@ -6,150 +6,19 @@
 # Architecture
 - [Server-side components](#server-side-components)
 - [Basic concepts](#basic-concepts)
 - [Client-side components](#client-side-components)
 - [Additional utilities](#additional-utilities)
 - [Overall read/write process](#overall-read-write-process)
  - [Nuances of request handling](#nuances-of-request-handling)
 - [Similarities to Ceph](#similarities-to-ceph)
 - [Differences from Ceph](#differences-from-ceph)
 - [Implementation Principles](#implementation-principles)
 ## Server-side components
 - **OSD** (Object Storage Daemon) is a process that directly works with the disk, stores data
  and serves read/write requests. One OSD serves one disk (or one partition). OSDs talk to etcd
  and to each other — they receive cluster state from etcd, and send read/write requests for
  secondary copies of data to other OSDs.
 - **etcd** — clustered key/value database, used as a reliable storage for configuration
  and high-level cluster state. Etcd is the component that prevents splitbrain in the cluster.
  Data blocks are not stored in etcd, etcd doesn't participate in data write or read path.
 - **Монитор** — a separate node.js based daemon which monitors the cluster, calculates
  required configuration changes and saves them to etcd, thus commanding OSDs to apply these
  changes. Monitor also aggregates cluster statistics. OSD don't talk to monitor, monitor
  only sends and receives data from etcd.
 ## Basic concepts
- **Pool** is a container for data that has equal redundancy scheme and disk placement rules.
+- OSD (Object Storage Daemon) is a process that stores data and serves read/write requests.
- **PG (Placement Group)** is a "shard" of the cluster, subdivision unit that has its own
+- PG (Placement Group) is a "shard" of the cluster, group of data stored on one set of replicas.
-  set of OSDs for data storage.
+- Pool is a container for data that has equal redundancy scheme and placement rules.
- **Failure Domain** is a group of OSDs, from the simultaneous failure of which you are
+- Monitor is a separate daemon that watches cluster state and handles failures.
-  protected by Vitastor. Default failure domain is "host" (server), but you choose a
+- Failure Domain is a group of OSDs that you allow to fail. It's "host" by default.
-  larger (for example, a rack of servers) or smaller (a single drive) failure domain
+- Placement Tree groups OSDs in a hierarchy to later split them into Failure Domains.
  for every pool.
 - **Placement Tree** (similar to Ceph CRUSH Tree) groups OSDs in a hierarchy to later
  split them into Failure Domains.
 ## Client-side components
 - **Client library** encapsulates client I/O logic. Client library connects to etcd and to all OSDs,
  receives cluster state from etcd, sends read and write requests directly to all OSDs. Due
  to the symmetric distributed architecture, all data blocks (each 128 KB by default) are placed
  to different OSDs, but clients always know where each data block is stored and connect directly
  to the right OSD.
 All other client-side components are based on the client library:
 - **[vitastor-cli](../usage/cli.en.md)** — command-line utility for cluster management.
  Allows to view cluster state, manage pools and images, i.e. create, modify and remove
  virtual disks, their snapshots and clones.
 - **[QEMU driver](../usage/qemu.en.md)** — pluggable QEMU module allowing QEMU/KVM virtual
  machines work with virtual Vitastor disks directly from userspace through the client library,
  without the need to attach disks as kernel block devices. However, if you want to attach
  disks, you can also do that with the same driver and [VDUSE](../usage/qemu.en.md#vduse).
 - **[vitastor-nbd](../usage/nbd.en.md)** — utility that allows to attach Vitastor disks as
  kernel block devices using NBD (Network Block Device), which works more like "BUSE"
  (Block Device In Userspace). Vitastor doesn't have Linux kernel modules for the same task
  (at least by now). NBD is an older, non-recommended way to attach disks — you should use
  VDUSE whenever you can.
 - **[CSI driver](../installation/kubernetes.en.md)** — driver for attaching Vitastor images
  as Kubernetes persistent volumes. Works through VDUSE (when available) or NBD — images are
  attached as kernel block devices and mounted into containers.
 - **Drivers for Proxmox, OpenStack and so on** — pluggable modules for corresponding systems,
  allowing to use Vitastor as storage in them.
 - **[vitastor-nfs](../usage/nfs.en.md)** — NFS 3.0 server allowing export of two file system variants:
  the first is a simplified pseudo-FS for file-based access to Vitastor block images (for non-QEMU
  hypervisors with NFS support), the second is **VitastorFS**, full-featured clustered POSIX FS.
  Both variants support parallel access from multiple vitastor-nfs servers. In fact, you are
  not required to setup separate NFS servers at all and use vitastor-nfs mount command on every
  client node — it starts the NFS server and mounts the FS locally.
 - **[fio driver](../usage/fio.en.md)** — pluggable module for fio disk benchmarking tool for
  running performance tests on your Vitastor cluster.
 - **vitastor-kv** — client for a key-value DB working over shared block volumes (usual
  vitastor images). VitastorFS metadata is stored in vitastor-kv.
 ## Additional utilities
 - **vitastor-disk** — a Vitastor OSD disk management tool. You can create, remove,
  resize and move OSD partitions with it.
 ## Overall read/write process
 - Vitastor stores virtual disks, also named "images" or "inodes".
 - Each image is stored in some pool. Pool specifies storage parameters such as redundancy
  scheme (replication or EC — erasure codes, i.e. error correction codes), failure domain
  and restrictions on OSD selection for image data placement. See [Pool configuration](../config/pool.en.md) for details.
 - Each image is split into objects/blocks of fixed size, equal to [block_size](../config/layout-cluster.en.md#block_size)
  (128 KB by default), multiplied by data part count for EC or 1 for replicas. That is,
  if a pool uses EC 4+2 coding scheme (4 data parts + 2 parity parts), then, with the
  default block_size, images are split into 512 KB objects.
 - Client read/write requests are split into parts at object boundaries.
 - Each object is mapped to a PG number it belongs to, by simply taking a remainder of
  division of its offset by PG count of the image's pool.
 - Client reads primary OSD for all PGs from etcd. Primary OSD for each PG is assigned
  by the monitor during cluster operation, along with the full PG OSD set.
 - If not already connected, client connects to primary OSDs of all PGs involved in a
  read/write request and sends parts of the request to them.
 - If a primary OSD is unavailable, client retries connection attempts indefinitely
  either until it becomes available or until the monitor assigns another OSD as primary
  for that PG.
 - Client also retries requests if the primary OSD replies with error code EPIPE, meaning
  that the PG is inactive at this OSD at the moment - for example, when the primary OSD
  is switched, or if the primary OSD itself loses connection to replicas during request
  handling.
 - Primary OSD determines where the parts of the object are stored. By default, all objects
  are assumed to be stored at the target OSD set of a PG, but some of them may be present
  at a different OSD set if they are degraded or moved, or if the data rebalancing process
  is active. OSDs doesn't do any network requests, if calculates locations of all objects
  during PG activation and stores it in memory.
 - Primary OSD handles the request locally when it can - for example, when it's a read
  from a replicated pool or when it's a read from a EC pool involving only one data part
  stored on the OSD's local disk.
 - When a request requires reads or writes to additional OSDs, primary OSD uses already
  established connections to secondary OSDs of the PG to execute these requests. This happens
  in parallel to local disk operations. All such connections are guaranteed to be already
  established when the PG is active, and if any of them is dropped, PG is restarted and
  all current read/write operations to it fail with EPIPE error and are retried by clients.
 - After completing all secondary read/write requests, primary OSD sends the response to
  the client.
 ### Nuances of request handling
 - If a pool uses erasure codes and some of the OSDs are unavailable, primary OSDs recover
  data from the remaining parts during read.
 - Each object has a version number. During write, primary OSD first determines the current
  version of the object. As primary OSD usually stores the object or its part itself, most
  of the time version is read from the memory of the OSD itself. However, if primary OSD
  doesn't contain parts of the object, it requests the version number from a secondary OSD
  which has that part. Such request still doesn't involve reading from the disk though,
  because object metadata, including version number, is always stored in OSD memory.
 - If a pool uses erasure codes, partial writes of an object require reading other parts of
  it from secondary OSDs or from the local disk of the primary OSD itself. This is called
  "read-modify-write" process.
 - If a pool uses erasure codes, two-phase write process is used to get rid of the Write Hole
  problem: first a new version of object parts is written to all secondary OSDs without
  removing the previous version, and then, after receiving successful write confirmations
  from all OSDs, new version is committed and the old one is allowed to be removed.
 - In a pool doesn't use immediate_commit mode, then write requests sent by clients aren't
  treated as committed to physical media instantly. Clients have to send separate type of
  requests (SYNC) to commit changes, and before it isn't sent, new versions of data are
  allowed to be lost if some OSDs die. Thus, when immediate_commit is disabled, clients
  store copies of all write requests in memory and repeat them from there when the
  connection to primary OSD is lost. This in-memory copy is removed after a successful
  SYNC, and to prevent excessive memory usage, clients also do an automatic SYNC
  every [client_dirty_limit](../config/network.en.md#client_dirty_limit) written bytes.
 ## Similarities to Ceph
--- a/docs/intro/architecture.ru.md
+++ b/docs/intro/architecture.ru.md
@ -11,7 +11,6 @@
 - [Серверные компоненты](#серверные-компоненты)
 - [Базовые понятия](#базовые-понятия)
 - [Клиентские компоненты](#клиентские-компоненты)
 - [Дополнительные утилиты](#дополнительные-утилиты)
 - [Общий процесс записи и чтения](#общий-процесс-записи-и-чтения)
  - [Особенности обработки запросов](#особенности-обработки-запросов)
 - [Схожесть с Ceph](#схожесть-с-ceph)
@ -35,9 +34,8 @@
 - **Пул (Pool)** — контейнер для данных, имеющих одну и ту же схему избыточности и правила распределения по OSD.
 - **PG (Placement Group)** — "шард", единица деления пулов в кластере, которой назначается свой набор
  OSD для хранения данных (копий или частей объектов).
- **Домен отказа (Failure Domain)** — группа OSD, от одновременного падения которых должен защищать
+- **Домен отказа (Failure Domain)** — группа OSD, одновременное падение которых рассматривается
-  Vitastor. По умолчанию домен отказа — "host" (сервер), но вы можете установить для пула как больший
+  как вероятное. По умолчанию это "host" (сервер).
  домен отказа (например, стойку серверов), так и меньший (например, отдельный диск).
 - **Дерево распределения** (Placement Tree, в Ceph CRUSH Tree) — иерархическая группировка OSD
  в узлы, которые далее можно использовать как домены отказа.
@ -51,39 +49,25 @@
 На базе клиентской библиотеки реализованы все остальные клиенты:
- **[vitastor-cli](../usage/cli.ru.md)** — утилита командной строки для управления кластером.
+- **vitastor-cli** — утилита командной строки для управления кластером. В данный момент позволяет
-  Позволяет просматривать общее состояние кластера, управлять пулами и образами — то есть
+  просматривать общее состояние кластера и управлять образами — т.е. создавать, менять и удалять
-  создавать, менять и удалять виртуальные диски, их снимки и клоны.
+  виртуальные диски, их снимки и клоны.
- **[Драйвер QEMU](../usage/qemu.ru.md)** — подключаемый модуль QEMU, позволяющий QEMU/KVM
+- **Драйвер QEMU** — подключаемый модуль QEMU, позволяющий QEMU/KVM виртуальным машинам работать
-  виртуальным машинам работать с виртуальными дисками Vitastor напрямую из пространства пользователя
+  с виртуальными дисками Vitastor напрямую из пространства пользователя с помощью клиентской
-  с помощью клиентской библиотеки, без необходимости подключения дисков в виде блочных устройств
+  библиотеки, без необходимости отображения дисков в виде блочных устройств. Тот же драйвер
-  Linux. Если, однако, вы хотите подключать диски в виде блочных устройств, то вы тоже можете
+  позволяет подключать диски в систему через [VDUSE](../usage/qemu.ru.md#vduse).
-  сделать это с помощью того же самого драйвера и [VDUSE](../usage/qemu.ru.md#vduse).
+- **vitastor-nbd** — утилита, позволяющая монтировать образы Vitastor в виде блочных устройств
- **[vitastor-nbd](../usage/nbd.ru.md)** — утилита, позволяющая монтировать образы Vitastor
+  с помощью NBD (Network Block Device), на самом деле скорее работающего как "BUSE"
-  в виде блочных устройств с помощью NBD (Network Block Device), на самом деле скорее работающего
+  (Block Device In Userspace). Модуля ядра Linux для выполнения той же задачи в Vitastor нет
-  как "BUSE" (Block Device In Userspace). Модуля ядра Linux для выполнения той же задачи в
+  (по крайней мере, пока).
-  Vitastor нет (по крайней мере, пока). NBD — более старый и нерекомендуемый способ подключения
+- **CSI драйвер** — драйвер для подключения Vitastor-образов в виде персистентных томов (PV) Kubernetes.
-  дисков — вам следует использовать VDUSE всегда, когда это возможно.
+  Работает через vitastor-nbd — образы отражаются в виде блочных устройств и монтируются
- **[CSI драйвер](../installation/kubernetes.ru.md)** — драйвер для подключения Vitastor-образов
+  в контейнеры.
  в виде персистентных томов (PV) Kubernetes. Работает через VDUSE (если доступно) или через
  NBD — образы отражаются в виде блочных устройств и монтируются в контейнеры.
 - **Драйвера Proxmox, OpenStack и т.п.** — подключаемые модули для соответствующих систем,
  позволяющие использовать Vitastor как хранилище в оных.
- **[vitastor-nfs](../usage/nfs.ru.md)** — NFS 3.0 сервер, предоставляющий два варианта файловой системы:
+- **vitastor-nfs** — утилита, предоставляющая файловый доступ к образам в кластере Vitastor
-  первая — упрощённая для файлового доступа к блочным образам (для не-QEMU гипервизоров, поддерживающих NFS),
+  по протоколу NFS 3.0. Предназначена для гипервизоров, не основанных на QEMU и Linux, но при
-  вторая — VitastorFS, полноценная кластерная POSIX ФС. Оба варианта поддерживают параллельный
+  этом поддерживающих NFS.
  доступ с нескольких vitastor-nfs серверов. На самом деле можно вообще не выделять
  отдельные NFS-серверы, а вместо этого использовать команду vitastor-nfs mount, запускающую
  NFS-сервер прямо на клиентской машине и монтирующую ФС локально.
 - **[Драйвер fio](../usage/fio.ru.md)** — подключаемый модуль для утилиты тестирования
  производительности дисков fio, позволяющий тестировать Vitastor-кластеры.
 - **vitastor-kv** — клиент для key-value базы данных, работающей поверх разделяемого блочного
  образа (обычного блочного образа vitastor). Метаданные VitastorFS хранятся именно в vitastor-kv.
 ## Дополнительные утилиты
 - **vitastor-disk** — утилита для разметки дисков под Vitastor OSD. С её помощью можно
  создавать, удалять, менять размеры или перемещать разделы OSD.
 ## Общий процесс записи и чтения
@ -114,22 +98,16 @@
  находиться на других OSD, если эти объекты деградированы или перемещены, или идёт процесс
  ребаланса. Запросы для проверки по сети не отправляются, информация о местоположении всех
  объектов рассчитывается первичным OSD при активации PG и хранится в памяти.
- Когда это возможно, первичный OSD обрабатывает запрос локально. Например, так происходит
+- Первичный OSD соединяется (если ещё не соединён) с вторичными OSD, на которых располагаются
-  при чтениях объектов из пулов с репликацией или при чтении из EC пула, затрагивающего
+  части объекта, и отправляет им запросы чтения/записи, а также читает/пишет из/в своё локальное
-  только часть, хранимую на диске самого первичного OSD.
+  хранилище, если сам входит в набор.
 - Когда запрос требует записи или чтения с вторичных OSD, первичный OSD использует заранее
  установленные соединения с ними для выполнения этих запросов. Это происходит параллельно
  локальным операциям чтения/записи с диска самого OSD. Так как соединения к вторичным OSD PG
  устанавливаются при её запуске, то они уже гарантированно установлены, когда PG активна,
  и если любое из этих соединений отключается, PG перезапускается, а все текущие запросы чтения
  и записи в неё завершаются с ошибкой EPIPE, после чего повторяются клиентами.
 - После завершения всех вторичных операций чтения/записи первичный OSD отправляет ответ клиенту.
 ### Особенности обработки запросов
 - Если в пуле используются коды коррекции ошибок и при этом часть OSD недоступна, первичный
  OSD при чтении восстанавливает данные из оставшихся частей.
- Каждый объект имеет номер версии. При записи объекта первичный OSD сначала получает номер
+- Каждый объект имеет номер версии. При записи объекта первичный OSD сначала читает из номер
  версии объекта. Так как первичный OSD обычно сам хранит копию или часть объекта, номер
  версии обычно читается из памяти самого OSD. Однако, если ни одна часть обновляемого объекта
  не находится на первичном OSD, для получения номера версии он обращается к одному из вторичных
@ -137,20 +115,20 @@
  так как метаданные объектов, включая номер версии, все OSD хранят в памяти.
 - Если в пуле используются коды коррекции ошибок, перед частичной записью объекта для вычисления
  чётности зачастую требуется чтение частей объекта с вторичных OSD или с локального диска
-  самого первичного OSD. Это называется процессом "чтение-модификация-запись" (read-modify-write).
+  самого первичного OSD.
- Если в пуле используются коды коррекции ошибок, для закрытия Write Hole применяется
+- Также, если в пуле используются коды коррекции ошибок, для закрытия Write Hole применяется
  двухфазный алгоритм записи: сначала на все вторичные OSD записывается новая версия частей
  объекта, но при этом старая версия не удаляется, а потом, после получения подтверждения
  успешной записи от всех вторичных OSD, новая версия фиксируется и разрешается удаление старой.
- Если в пуле не включён режим immediate_commit, то запросы записи, отправляемые клиентами,
+- Если в кластере не включён режим immediate_commit, то запросы записи, отправляемые клиентами,
  не считаются зафиксированными на физических накопителях сразу. Для фиксации данных клиенты
  должны отдельно отправлять запросы SYNC (отдельный от чтения и записи вид запроса),
  а пока такой запрос не отправлен, считается, что записанные данные могут исчезнуть,
  если соответствующий OSD упадёт. Поэтому, когда режим immediate_commit отключён, все
  запросы записи клиенты копируют в памяти и при потере соединения и повторном соединении
-  с OSD повторяют из памяти. Скопированные в память данные удаляются при успешном SYNC,
+  с OSD повторяют из памяти. Скопированные в память данные удаляются при успешном fsync,
  а чтобы хранение этих данных не приводило к чрезмерному потреблению памяти, клиенты
-  автоматически выполняют SYNC каждые [client_dirty_limit](../config/network.ru.md#client_dirty_limit)
+  автоматически выполняют fsync каждые [client_dirty_limit](../config/network.ru.md#client_dirty_limit)
  записанных байт.
 ## Схожесть с Ceph
--- a/docs/intro/features.en.md
+++ b/docs/intro/features.en.md
@ -13,7 +13,7 @@
 ## Server-side features
 - Basic part: highly-available block storage with symmetric clustering and no SPOF
- [Performance](../performance/bench2.en.md) ;-D
+- [Performance](../performance/comparison1.en.md) ;-D
 - [Multiple redundancy schemes](../config/pool.en.md#scheme): Replication, XOR n+1, Reed-Solomon erasure codes
  based on jerasure and ISA-L libraries with any number of data and parity drives in a group
 - Configuration via simple JSON data structures in etcd (parameters, pools and images)
@ -33,16 +33,9 @@
 - [Checksums](../config/layout-osd.en.md#data_csum_type)
 - [Client write-back cache](../config/client.en.md#client_enable_writeback)
 - [Intelligent recovery auto-tuning](../config/osd.en.md#recovery_tune_interval)
 - [Clustered file system](../usage/nfs.en.md#vitastorfs)
 - [Experimental internal etcd replacement - antietcd](../config/monitor.en.md#use_antietcd)
 - [Built-in Prometheus metric exporter](../config/monitor.en.md#enable_prometheus)
 ## Plugins and tools
 - [Proxmox storage plugin and packages](../installation/proxmox.en.md)
 - [OpenNebula storage plugin](../installation/opennebula.en.md)
 - [CSI plugin for Kubernetes](../installation/kubernetes.en.md)
 - [OpenStack support: Cinder driver, Nova and libvirt patches](../installation/openstack.en.md)
 - [Debian and CentOS packages](../installation/packages.en.md)
 - [Image management CLI (vitastor-cli)](../usage/cli.en.md)
 - [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
@ -50,15 +43,20 @@
 - [Native QEMU driver](../usage/qemu.en.md)
 - [Loadable fio engine for benchmarks](../usage/fio.en.md)
 - [NBD proxy for kernel mounts](../usage/nbd.en.md)
- [Simplified NFS proxy for file-based image access emulation (suitable for VMWare)](../usage/nfs.en.md#pseudo-fs)
+- [CSI plugin for Kubernetes](../installation/kubernetes.en.md)
 - [OpenStack support: Cinder driver, Nova and libvirt patches](../installation/openstack.en.md)
 - [Proxmox storage plugin and packages](../installation/proxmox.en.md)
 - [Simplified NFS proxy for file-based image access emulation (suitable for VMWare)](../usage/nfs.en.md)
 ## Roadmap
 The following features are planned for the future:
 - File system
 - Control plane optimisation
 - Other administrative tools
 - Web GUI
 - OpenNebula plugin
 - iSCSI and NVMeoF gateways
 - Multi-threaded client
 - Faster failover
--- a/docs/intro/features.ru.md
+++ b/docs/intro/features.ru.md
@ -13,7 +13,7 @@
 ## Серверные функции
 - Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
- [Производительность](../performance/bench2.ru.md) ;-D
+- [Производительность](../performance/comparison1.ru.md) ;-D
 - [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
  Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
 - Конфигурация через простые человекочитаемые JSON-структуры в etcd
@ -35,16 +35,9 @@
 - [Контрольные суммы](../config/layout-osd.ru.md#data_csum_type)
 - [Буферизация записи на стороне клиента](../config/client.ru.md#client_enable_writeback)
 - [Интеллектуальная автоподстройка скорости восстановления](../config/osd.ru.md#recovery_tune_interval)
 - [Кластерная файловая система](../usage/nfs.ru.md#vitastorfs)
 - [Экспериментальная встроенная замена etcd - antietcd](../config/monitor.ru.md#use_antietcd)
 - [Встроенный Prometheus-экспортер метрик](../config/monitor.ru.md#enable_prometheus)
 ## Драйверы и инструменты
 - [Плагин для Proxmox](../installation/proxmox.ru.md)
 - [Плагин для OpenNebula](../installation/opennebula.ru.md)
 - [CSI-плагин для Kubernetes](../installation/kubernetes.ru.md)
 - [Базовая поддержка OpenStack: драйвер Cinder, патчи для Nova и libvirt](../installation/openstack.ru.md)
 - [Пакеты для Debian и CentOS](../installation/packages.ru.md)
 - [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
 - [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
@ -52,13 +45,18 @@
 - [Драйвер диска для QEMU](../usage/qemu.ru.md)
 - [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
 - [NBD-прокси для монтирования образов ядром](../usage/nbd.ru.md) ("блочное устройство в режиме пользователя")
- [Упрощённая NFS-прокси для эмуляции файлового доступа к образам (подходит для VMWare)](../usage/nfs.ru.md#псевдо-фс)
+- [CSI-плагин для Kubernetes](../installation/kubernetes.ru.md)
 - [Базовая поддержка OpenStack: драйвер Cinder, патчи для Nova и libvirt](../installation/openstack.ru.md)
 - [Плагин для Proxmox](../installation/proxmox.ru.md)
 - [Упрощённая NFS-прокси для эмуляции файлового доступа к образам (подходит для VMWare)](../usage/nfs.ru.md)
 ## Планы развития
 - Файловая система
 - Оптимизация слоя управления
 - Другие инструменты администрирования
 - Web-интерфейс
 - Плагин для OpenNebula
 - iSCSI и NVMeoF прокси
 - Многопоточный клиент
 - Более быстрое переключение при отказах
--- a/docs/intro/quickstart.en.md
+++ b/docs/intro/quickstart.en.md
@ -14,7 +14,6 @@
 - [Check cluster status](#check-cluster-status)
 - [Create an image](#create-an-image)
 - [Install plugins](#install-plugins)
 - [Create VitastorFS](#create-vitastorfs)
 ## Preparation
@ -22,7 +21,7 @@
  with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
  [here](../config/layout-cluster.en.md#immediate_commit).
 - If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
-  Toshiba MG, Seagate EXOS or something similar. If your drives don't have such cache then
+  Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
  you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
 - Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
 - Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
@ -32,8 +31,8 @@
 - SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
 - NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
-  Intel DC-P3700/P4500/P4600, Intel D5-P4320, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
+  Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
+- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
 ## Configure monitors
@ -68,26 +67,28 @@ On the monitor hosts:
    but some free unpartitioned space must be available because the script creates new partitions for journals.
 - You can change OSD configuration in units or in `vitastor.conf`.
  Check [Configuration Reference](../config.en.md) for parameter descriptions.
 - If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
  without `--disable_data_fsync off` at the first step, then put the following
  setting into etcd: \
  `etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
 - Start all OSDs: `systemctl start vitastor.target`
 ## Create a pool
-Create a pool using vitastor-cli:
+Create pool configuration in etcd:
 ```
-vitastor-cli create-pool testpool --pg_size 2 --pg_count 256
+etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
  "scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}'
 ```
 For EC pools the configuration should look like the following:
 ```
-vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
+etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
  "scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
 ```
 Add `--immediate_commit none` if you added `--disable_data_fsync off` at the OSD
 initialization step, or if `vitastor-disk` complained about impossibility to
 disable drive cache.
 After you do this, one of the monitors will configure PGs and OSDs will start them.
 If you use HDDs you should also add `"block_size": 1048576` to pool configuration.
@ -115,9 +116,3 @@ After that, you can [run benchmarks](../usage/fio.en.md) or [start QEMU manually
 - [Proxmox](../installation/proxmox.en.md)
 - [OpenStack](../installation/openstack.en.md)
 - [Kubernetes CSI](../installation/kubernetes.en.md)
 ## Create VitastorFS
 If you want to use clustered file system in addition to VM or container images:
 - [Follow the instructions here](../usage/nfs.en.md#vitastorfs)
--- a/docs/intro/quickstart.ru.md
+++ b/docs/intro/quickstart.ru.md
@ -14,7 +14,6 @@
 - [Проверьте состояние кластера](#проверьте-состояние-кластера)
 - [Создайте образ](#создайте-образ)
 - [Установите плагины](#установите-плагины)
 - [Создайте VitastorFS](#создайте-vitastorfs)
 ## Подготовка
@ -22,7 +21,7 @@
  использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
  О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
 - Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
-  Toshiba MG, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
+  Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
  обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
 - Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
 - Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
@ -32,8 +31,8 @@
 - SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
 - NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
-  Intel DC-P3700/P4500/P4600, Intel D5-P4320, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
+  Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
+- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
 ## Настройте мониторы
@ -69,26 +68,29 @@
    для журналов, на SSD должно быть доступно свободное нераспределённое место.
 - Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
  смотрите в [справке по конфигурации](../config.ru.md).
 - Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
  вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
  не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
  в глобальную конфигурацию в etcd: \
  `etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
 - Запустите все OSD: `systemctl start vitastor.target`
 ## Создайте пул
-Создайте пул с помощью vitastor-cli:
+Создайте конфигурацию пула с помощью etcdctl:
 ```
-vitastor-cli create-pool testpool --pg_size 2 --pg_count 256
+etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
  "scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}'
 ```
 Для пулов с кодами коррекции ошибок конфигурация должна выглядеть примерно так:
 ```
-vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
+etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
  "scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
 ```
 Добавьте также опцию `--immediate_commit none`, если вы добавляли `--disable_data_fsync off`
 на этапе инициализации OSD, либо если `vitastor-disk` ругался на невозможность отключения
 кэша дисков.
 После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
 Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.
@ -116,10 +118,3 @@ vitastor-cli create -s 10G testimg
 - [Proxmox](../installation/proxmox.ru.md)
 - [OpenStack](../installation/openstack.ru.md)
 - [Kubernetes CSI](../installation/kubernetes.ru.md)
 ## Создайте VitastorFS
 Если вы хотите использовать не только блочные образы виртуальных машин или контейнеров,
 а также кластерную файловую систему, то:
 - [Следуйте инструкциям](../usage/nfs.ru.md#vitastorfs)
--- a/docs/performance/bench2.en.md
+++ b/docs/performance/bench2.en.md
@ -1,154 +0,0 @@
 [Documentation](../../README.md#documentation) → Performance → Newer benchmark of Vitastor 1.3.1
 -----
 [Читать на русском](bench2.ru.md)
 # Newer benchmark of Vitastor 1.3.1
 - [Test environment](#test-environment)
 - [Notes](#notes)
 - [Raw drive performance](#raw-drive-performance)
 - [2 replicas](#2-replicas)
 - [3 replicas](#3-replicas)
 - [EC 2+1](#ec-2-1)
 ## Test environment
 Hardware configuration: 3 nodes, each with:
 - 8x NVMe Samsung PM9A3 1.92 TB
 - 2x Xeon Gold 6342 (24 cores @ 2.8 GHz)
 - 256 GB RAM
 - Dual-port 25 GbE Mellanox ConnectX-4 LX network card with RoCEv2
 - Connected to 2 Mellanox SN2010 switches with MLAG
 ## Notes
 Vitastor version was 1.3.1.
 Tests were ran from the storage nodes - 4 fio clients per each of 3 nodes.
 The same large 3 TB image was tested from all hosts because Vitastor has no
 performance penalties related to running multiple clients over a single inode.
 CPU power saving was disabled. 4 OSDs were created per each NVMe.
 Checksums were not enabled. Tests with checksums will be conducted later,
 along with the newer version of Vitastor, and results will be updated.
 CPU configuration was not optimal because of NUMA. It's better to avoid 2-socket
 platforms. It was especially noticeable in RDMA tests - in the form of ksoftirqd
 processes (usually 1 per server) eating 100 % of one CPU core and actual bandwidth
 of one network port reduced to 3-5 Gbit/s instead of 25 Gbit/s - probably because
 of RFS (Receive Flow Steering) misses. Many network configurations were tried during
 tests, but nothing helped to solve this problem, so final tests were conducted with
 the default settings.
 # Raw drive performance
 - Linear write ~1000-2000 MB/s, depending on current state of the drive's garbage collector
 - Linear read ~3300 MB/s
 - T1Q1 random write ~60000 iops (latency ~0.015ms)
 - T1Q1 random read ~14700 iops (latency ~0.066ms)
 - T1Q16 random write ~180000 iops
 - T1Q16 random read ~120000 iops
 - T1Q32 random write ~180000 iops
 - T1Q32 random read ~195000 iops
 - T1Q128 random write ~180000 iops
 - T1Q128 random read ~195000 iops
 - T4Q128 random write ~525000 iops
 - T4Q128 random read ~750000 iops
 These numbers make obvious that results could be much better if a faster network
 was available, because NVMe drives obviously weren't a bottleneck. For example,
 theoretical maximum linear read performance for 24 drives could be 79.2 GByte/s,
 which is 633 Gbit/s. Real Vitastor read speed (both linear and random) was around
 16 Gbyte/s, which is 130 Gbit/s. It's important to note that it was still much
 larger than the network bandwidth of one server (50 Gbit/s). This is also correct
 because tests were conducted from all 3 nodes.
 ## 2 replicas
 |                              | TCP          | RDMA         |
 |------------------------------|--------------|--------------|
 | Linear read (4M T6 Q16)      | 13.13 GB/s   | 16.25 GB/s   |
 | Linear write (4M T6 Q16)     | 8.16 GB/s    | 7.88 GB/s    |
 | Read 4k T1 Q1                | 8745 iops    | 10252 iops   |
 | Write 4k T1 Q1               | 8097 iops    | 11488 iops   |
 | Read 4k T12 Q128             | 1305936 iops | 4265861 iops |
 | Write 4k T12 Q128            | 660490 iops  | 1384033 iops |
 CPU consumption OSD per 1 disk:
 |                              | TCP     | RDMA    |
 |------------------------------|---------|---------|
 | Linear read (4M T6 Q16)      | 29.7 %  | 29.8 %  |
 | Linear write (4M T6 Q16)     | 84.4 %  | 33.2 %  |
 | Read 4k T12 Q128             | 98.4 %  | 119.1 % |
 | Write 4k T12 Q128            | 173.4 % | 175.9 % |
 CPU consumption per 1 client (fio):
 |                              | TCP    | RDMA   |
 |------------------------------|--------|--------|
 | Linear read (4M T6 Q16)      | 100 %  | 85.2 % |
 | Linear write (4M T6 Q16)     | 55.8 % | 48.8 % |
 | Read 4k T12 Q128             | 99.9 % | 96 %   |
 | Write 4k T12 Q128            | 71.6 % | 48.5 % |
 ## 3 replicas
 |                              | TCP          | RDMA         |
 |------------------------------|--------------|--------------|
 | Linear read (4M T6 Q16)      | 13.98 GB/s   | 16.54 GB/s   |
 | Linear write (4M T6 Q16)     | 5.38 GB/s    | 5.7 GB/s     |
 | Read 4k T1 Q1                | 8969 iops    | 9980 iops    |
 | Write 4k T1 Q1               | 8126 iops    | 11672 iops   |
 | Read 4k T12 Q128             | 1358818 iops | 4279088 iops |
 | Write 4k T12 Q128            | 433890 iops  | 993506 iops  |
 CPU consumption OSD per 1 disk:
 |                              | TCP    | RDMA    |
 |------------------------------|--------|---------|
 | Linear read (4M T6 Q16)      | 24.9 % | 25.4 %  |
 | Linear write (4M T6 Q16)     | 99.3 % | 38.4 %  |
 | Read 4k T12 Q128             | 95.3 % | 111.7 % |
 | Write 4k T12 Q128            | 173 %  | 194 %   |
 CPU consumption per 1 client (fio):
 |                              | TCP    | RDMA   |
 |------------------------------|--------|--------|
 | Linear read (4M T6 Q16)      | 99.9 % | 85.8 % |
 | Linear write (4M T6 Q16)     | 38.9 % | 38.1 % |
 | Read 4k T12 Q128             | 100 %  | 96.1 % |
 | Write 4k T12 Q128            | 51.6 % | 41.9 % |
 ## EC 2+1
 |                              | TCP          | RDMA         |
 |------------------------------|--------------|--------------|
 | Linear read (4M T6 Q16)      | 10.07 GB/s   | 11.43 GB/s   |
 | Linear write (4M T6 Q16)     | 7.74 GB/s    | 8.32 GB/s    |
 | Read 4k T1 Q1                | 7408 iops    | 8891 iops    |
 | Write 4k T1 Q1               | 3525 iops    | 4903 iops    |
 | Read 4k T12 Q128             | 1216496 iops | 2552765 iops |
 | Write 4k T12 Q128            | 278110 iops  | 821261 iops  |
 CPU consumption OSD per 1 disk:
 |                              | TCP     | RDMA    |
 |------------------------------|---------|---------|
 | Linear read (4M T6 Q16)      | 68.6 %  | 33.6 %  |
 | Linear write (4M T6 Q16)     | 108.3 % | 50.2 %  |
 | Read 4k T12 Q128             | 138.1 % | 97.9 %  |
 | Write 4k T12 Q128            | 168.7 % | 188.5 % |
 CPU consumption per 1 client (fio):
 |                              | TCP    | RDMA   |
 |------------------------------|--------|--------|
 | Linear read (4M T6 Q16)      | 88.2 % | 52.4 % |
 | Linear write (4M T6 Q16)     | 51.8 % | 46.8 % |
 | Read 4k T12 Q128             | 99.7 % | 61.3 % |
 | Write 4k T12 Q128            | 35.1 % | 31.3 % |
--- a/docs/performance/bench2.ru.md
+++ b/docs/performance/bench2.ru.md
@ -1,157 +0,0 @@
 [Документация](../../README-ru.md#документация) → Производительность → Более новый тест Vitastor 1.3.1
 -----
 [Read in English](bench2.en.md)
 # Более новый тест Vitastor 1.3.1
 - [Описание стенда](#описание-стенда)
 - [Примечания](#примечания)
 - [Производительность голых дисков](#производительность-голых-дисков)
 - [2 реплики](#2-реплики)
 - [3 реплики](#3-реплики)
 - [EC 2+1](#ec-2-1)
 ## Описание стенда
 Железо: 3 сервера, в каждом:
 - 8x NVMe Samsung PM9A3 1.92 TB
 - 2x Xeon Gold 6342 (24 ядра @ 2.8 GHz)
 - 256 GB RAM
 - Двухпортовая 25 GbE сетевая карта Mellanox ConnectX-4 LX с поддержкой RoCEv2
 - Подключение к 2 коммутаторам Mellanox SN2010 в MLAG
 ## Примечания
 Версия Vitastor 1.3.1.
 Тесты проводились с самих серверов хранения - по 4 клиента fio с каждого из 3 серверов.
 Тестировался один большой образ размером 3 ТБ со всех хостов - создавать отдельные образы
 для тестов в Vitastor необязательно, т.к. в Vitastor нет замедления при записи в один
 узел несколькими клиентами.
 Экономия энергии CPU отключена. На каждый NVMe создавалось 4 OSD.
 Контрольные суммы не включались. Тесты с контрольными суммами будут проведены
 позднее. Тогда же будет протестирована более новая версия Vitastor, и результаты
 будут обновлены.
 Конфигурация CPU стенда неоптимальна из-за NUMA - двухпроцессорных серверов лучше
 избегать. Особенно это проявлялось во время тестов с RDMA - выражалось это в потреблении
 100% одного ядра CPU одним процессом ksoftirqd и работой одного из двух сетевых портов
 на скорости 3-5 ГБит/с вместо 25 ГБит/с - предположительно, из-за "непопаданий" RFS
 (Receive Flow Steering) на нужные ядра. Решить эту проблему во время проведения тестов
 не получилось. Было перепробовано множество различных настроек, но в итоге тесты проведены
 с настройками по умолчанию, т.к. улучшения добиться не удалось.
 # Производительность голых дисков
 - Линейная запись ~1000-2000 МБ/с, в зависимости от состояния сборщика мусора диска
 - Линейное чтение ~3300 МБ/с
 - T1Q1 запись ~60000 iops (задержка ~0.015ms)
 - T1Q1 чтение ~14700 iops (задержка ~0.066ms)
 - T1Q16 запись ~180000 iops
 - T1Q16 чтение ~120000 iops
 - T1Q32 запись ~180000 iops
 - T1Q32 чтение ~195000 iops
 - T1Q128 запись ~180000 iops
 - T1Q128 чтение ~195000 iops
 - T4Q128 запись ~525000 iops
 - T4Q128 чтение ~750000 iops
 Из данных цифр очевидно, что при наличии более быстрой сети результаты были бы
 значительно лучше, так как в диски тест, очевидно, не упирался. Например, теоретический предел по
 линейному чтению для 24 таких дисков был бы около 79.2 ГБайт/с, то есть,
 633 гигабита в секунду. Реальная скорость чтения (и случайного, и линейного)
 Vitastor составила примерно 16 ГБайт/с, то есть 130 гигабит в секунду. При этом
 следует заметить, что этот результат всё равно значительно лучше пропускной способности
 сети отдельно взятого узла - что тоже вполне логично, так как тест выполнялся со
 всех трёх узлов.
 ## 2 реплики
 |                              | TCP          | RDMA         |
 |------------------------------|--------------|--------------|
 | Линейное чтение (4M T6 Q16)  | 13.13 ГБ/с   | 16.25 ГБ/с   |
 | Линейная запись (4M T6 Q16)  | 8.16 ГБ/с    | 7.88 ГБ/с    |
 | Чтение 4k T1 Q1              | 8745 iops    | 10252 iops   |
 | Запись 4k T1 Q1              | 8097 iops    | 11488 iops   |
 | Чтение 4k T12 Q128           | 1305936 iops | 4265861 iops |
 | Запись 4k T12 Q128           | 660490 iops  | 1384033 iops |
 Потребление CPU OSD на 1 диск:
 |                              | TCP     | RDMA    |
 |------------------------------|---------|---------|
 | Линейное чтение (4M T6 Q16)  | 29.7 %  | 29.8 %  |
 | Линейная запись (4M T6 Q16)  | 84.4 %  | 33.2 %  |
 | Чтение 4k T12 Q128           | 98.4 %  | 119.1 % |
 | Запись 4k T12 Q128           | 173.4 % | 175.9 % |
 Потребление CPU на 1 клиента (fio):
 |                              | TCP    | RDMA   |
 |------------------------------|--------|--------|
 | Линейное чтение (4M T6 Q16)  | 100 %  | 85.2 % |
 | Линейная запись (4M T6 Q16)  | 55.8 % | 48.8 % |
 | Чтение 4k T12 Q128           | 99.9 % | 96 %   |
 | Запись 4k T12 Q128           | 71.6 % | 48.5 % |
 ## 3 реплики
 |                              | TCP          | RDMA         |
 |------------------------------|--------------|--------------|
 | Линейное чтение (4M T6 Q16)  | 13.98 ГБ/с   | 16.54 ГБ/с   |
 | Линейная запись (4M T6 Q16)  | 5.38 ГБ/с    | 5.7 ГБ/с     |
 | Чтение 4k T1 Q1              | 8969 iops    | 9980 iops    |
 | Запись 4k T1 Q1              | 8126 iops    | 11672 iops   |
 | Чтение 4k T12 Q128           | 1358818 iops | 4279088 iops |
 | Запись 4k T12 Q128           | 433890 iops  | 993506 iops  |
 Потребление CPU OSD на 1 диск:
 |                              | TCP    | RDMA    |
 |------------------------------|--------|---------|
 | Линейное чтение (4M T6 Q16)  | 24.9 % | 25.4 %  |
 | Линейная запись (4M T6 Q16)  | 99.3 % | 38.4 %  |
 | Чтение 4k T12 Q128           | 95.3 % | 111.7 % |
 | Запись 4k T12 Q128           | 173 %  | 194 %   |
 Потребление CPU на 1 клиента (fio):
 |                              | TCP    | RDMA   |
 |------------------------------|--------|--------|
 | Линейное чтение (4M T6 Q16)  | 99.9 % | 85.8 % |
 | Линейная запись (4M T6 Q16)  | 38.9 % | 38.1 % |
 | Чтение 4k T12 Q128           | 100 %  | 96.1 % |
 | Запись 4k T12 Q128           | 51.6 % | 41.9 % |
 ## EC 2+1
 |                              | TCP          | RDMA         |
 |------------------------------|--------------|--------------|
 | Линейное чтение (4M T6 Q16)  | 10.07 ГБ/с   | 11.43 ГБ/с   |
 | Линейная запись (4M T6 Q16)  | 7.74 ГБ/с    | 8.32 ГБ/с    |
 | Чтение 4k T1 Q1              | 7408 iops    | 8891 iops    |
 | Запись 4k T1 Q1              | 3525 iops    | 4903 iops    |
 | Чтение 4k T12 Q128           | 1216496 iops | 2552765 iops |
 | Запись 4k T12 Q128           | 278110 iops  | 821261 iops  |
 Потребление CPU OSD на 1 диск:
 |                              | TCP     | RDMA    |
 |------------------------------|---------|---------|
 | Линейное чтение (4M T6 Q16)  | 68.6 %  | 33.6 %  |
 | Линейная запись (4M T6 Q16)  | 108.3 % | 50.2 %  |
 | Чтение 4k T12 Q128           | 138.1 % | 97.9 %  |
 | Запись 4k T12 Q128           | 168.7 % | 188.5 % |
 Потребление CPU на 1 клиента (fio):
 |                              | TCP    | RDMA   |
 |------------------------------|--------|--------|
 | Линейное чтение (4M T6 Q16)  | 88.2 % | 52.4 % |
 | Линейная запись (4M T6 Q16)  | 51.8 % | 46.8 % |
 | Чтение 4k T12 Q128           | 99.7 % | 61.3 % |
 | Запись 4k T12 Q128           | 35.1 % | 31.3 % |
--- a/docs/usage/admin.en.md
+++ b/docs/usage/admin.en.md
@ -1,265 +0,0 @@
 [Documentation](../../README.md#documentation) → Usage → Administration
 -----
 [Читать на русском](admin.ru.md)
 # Administration
 - [Pool states](#pool-states)
 - [PG states](#pg-states)
  - [Base PG states](#base-pg-states)
  - [Additional PG states](#additional-pg-states)
 - [Removing a healthy disk](#removing-a-healthy-disk)
 - [Removing a failed disk](#removing-a-failed-disk)
 - [Adding a disk](#adding-a-disk)
 - [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration)
 - [Upgrading Vitastor](#upgrading-vitastor)
 - [OSD memory usage](#osd-memory-usage)
 ## Pool states
 Pool is active — that is, fully available for client input/output — when all its PGs are
 'active' (maybe with some additional state flags).
 If at least 1 PG is inactive, pool is also inactive and all clients suspend their I/O and
 wait until you fix the cluster. :-)
 ## PG states
 PG states may be seen in [vitastor-cli status](cli.en.md#status) output.
 PG state consists of exactly 1 base state and an arbitrary number of additional states.
 ### Base PG states
 PG state always includes exactly 1 of the following base states:
 - **active** — PG is active and handles user I/O.
 - **incomplete** — Not enough OSDs are available to activate this PG. That is, more disks
  are lost than it's allowed by the pool's redundancy scheme. For example, if the pool has
  pg_size=3 and pg_minsize=1, part of the data may be written only to 1 OSD. If that exact
  OSD is lost, PG will become **incomplete**.
 - **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
  this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
  or the primary OSD refuses to start this PG (for example, because of wrong block_size),
  or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
 - **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
 - **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
  the PG state.
 - **repeering** — PG is waiting for current I/O operations to complete and will
  then transition to **peering**.
 - **stopping** — PG is waiting for current I/O operations to complete and will
  then transition to **offline** or be activated by another OSD.
 All states except **active** mean that PG is inactive and client I/O is suspended.
 **peering** state is normally visible only for a short period of time during OSD restarts
 and during switching primary OSD of PGs.
 **starting**, **repeering**, **stopping** states normally almost aren't visible at all.
 If you notice them for any noticeable time — chances are some operations on some OSDs hung.
 Search for "slow op" in OSD logs to find them — operations hung for more than
 [slow_log_interval](../config/osd.en.md#slow_log_interval) are logged as "slow ops".
 State transition diagram:
 ![PG state transitions](pg_states.svg "PG state transitions")
 ### Additional PG states
 If a PG is active it can also have any number of the following additional states:
 - **degraded** — PG is running on reduced number of drives (OSDs), redundancy of all
  objects in this PG is reduced.
 - **has_incomplete** — some objects in this PG are incomplete (unrecoverable), that is,
  they have too many lost EC parts (more than pool's [parity_chunks](../config/pool.en.md#parity_chunks)).
 - **has_degraded** — some objects in this PG have reduced redundancy
  compared to the rest of the PG (so PG can be degraded+has_degraded at the same time).
  These objects should be healed automatically by recovery process, unless
  it's disabled by [no_recovery](../config/osd.en.md#no_recovery).
 - **has_misplaced** — some objects in this PG are stored on an OSD set different from
  the target set of the PG. These objects should be moved automatically, unless
  rebalance is disabled by [no_rebalance](../config/osd.en.md#no_rebalance). Objects
  that are degraded and misplaced at the same time are treated as just degraded.
 - **has_unclean** — one more state normally noticeable only for very short time during
  PG activation. It's used only with EC pools and means that some objects of this PG
  have started but not finished modifications. All such objects are either quickly
  committed or rolled back by the primary OSD when starting the PG, that is why the
  state shouldn't be noticeable. If you notice it, it probably means that commit or
  rollback operations are hung.
 - **has_invalid** — PG contains objects with incorrect part ID. Never occurs normally.
  It can only occur if you delete a non-empty EC pool and then recreate it as a replica
  pool or with smaller data part count.
 - **has_corrupted** — PG has corrupted objects, discovered by checking checksums during
  read or during scrub. When possible, such objects should be recovered automatically.
  If objects remain corrupted, use [vitastor-cli describe](cli.en.md#describe) to find
  out details and/or look into the log of the primary OSD of the PG.
 - **has_inconsistent** — PG has objects with non-matching parts or copies on different OSDs,
  and it's impossible to determine which copy is correct automatically. It may happen
  if you use a pool with 2 replica and you don't enable checksums, and if data on one
  of replicas becomes corrupted. You should also use vitastor-cli [describe](cli.en.md#describe)
  and [fix](cli.en.md#fix) commands to remove the incorrect version in this case.
 - **left_on_dead** — part of the data of this PG is left on unavailable OSD that isn't
  fully removed from the cluster. You should either start the corresponding OSD back and
  let it remove the unneeded data or remove it from cluster using vitastor-cli
  [rm-osd](cli.en.md#rm-osd) if you know that it's gone forever (for example, if the disk died).
 - **scrubbing** — data [scrub](../config/osd.en.md#auto_scrub) is running for this PG.
 ## Removing a healthy disk
 Before removing a healthy disk from the cluster set its OSD weight(s) to 0 to
 move data away. To do that, run `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
 Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
 Zero weight can also be put manually into etcd key `/vitastor/config/osd/<НОМЕР_OSD>`, for example:
 ```
 etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
 ```
 ## Removing a failed disk
 If a disk is already dead, its OSD(s) are likely already stopped.
 In this case just remove OSD(s) from the cluster by running `vitastor-cli rm-osd OSD_NUMBER`.
 ## Adding a disk
 If you're adding a server, first install Vitastor packages and copy the
 `/etc/vitastor/vitastor.conf` configuration file to it.
 After that you can just run `vitastor-disk prepare /dev/nvmeXXX`, of course with
 the same parameters which you used for other OSDs in your cluster before.
 ## Restoring from lost pool configuration
 If you remove or corrupt `/vitastor/config/pools` key in etcd all pools will
 be deleted. Don't worry, the data won't be lost, but you'll need to perform
 a specific recovery procedure.
 First you need to restore previous configuration of the pool with the same ID
 and EC/replica parameters and wait until pool PGs appear in `vitastor-cli status`.
 Then add all OSDs into the history records of all PGs. You can do it by running
 the following script (just don't forget to use your own PG_COUNT and POOL_ID):
 ```
 PG_COUNT=32
 POOL_ID=1
 ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
    perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
 for i in $(seq 1 $PG_COUNT); do
    etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
 done
 ```
 After that all PGs should peer and find all previous data.
 ## Upgrading Vitastor
 Every upcoming Vitastor version is usually compatible with previous both forward
 and backward regarding the network protocol and etcd data structures.
 So, by default, if this page doesn't contain explicit different instructions, you
 can upgrade your Vitastor cluster by simply upgrading packages and restarting all
 OSDs and monitors in any order.
 Upgrading is performed without stopping clients (VMs/containers), you just need to
 upgrade and restart servers one by one. However, ideally you should restart VMs too
 to make them use the new version of the client library.
 ### 1.7.x to 1.8.0
 It's recommended to upgrade from version <= 1.7.x to version >= 1.8.0 with full downtime,
 i.e. you should first stop clients and then the cluster (OSDs and monitor), because 1.8.0
 includes a fix for etcd event stream inconsistency which could lead to "incomplete" objects
 appearing in EC pools, and in rare cases, probably, even to data corruption during mass OSD
 restarts. It doesn't mean that you WILL hit this problem if you upgrade without full downtime,
 but it's better to secure yourself against it.
 Also, if you upgrade version from <= 1.7.x to version >= 1.8.0, BUT <= 1.9.0: restart all clients
 (VMs and so on), otherwise they will hang when monitor clears old PG configuration key,
 which happens 24 hours after upgrade.
 This is fixed in 1.9.1. So, after upgrading version <= 1.7.x directly to version >= 1.9.1,
 you DO NOT have to restart all old clients immediately - they will work like before until
 you decide to upgrade them too. The downside is that you'll have to remove the old PG
 configuration key (`/vitastor/config/pgs`) from etcd by hand when you make sure that all
 your clients are restarted.
 ### 1.1.x to 1.2.0
 Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
 to be performed with full downtime: first you should stop all clients, then all OSDs,
 then upgrade and start everything back — because versions before 1.2.0 have several
 bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
 ### 0.8.7 to 0.9.0
 Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
 upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
 without this intermediate step, client I/O will hang until the end of upgrade process.
 ### 0.5.x to 0.6.x
 Upgrading from <= 0.5.x to >= 0.6.x is not supported.
 ## Downgrade
 Downgrade are also allowed freely, except the following specific instructions:
 ### 1.8.0 to 1.7.1
 Before downgrading from version >= 1.8.0 to version <= 1.7.1
 you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
 ```
 etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
  etcdctl --endpoints=http://... put /vitastor/config/pgs
 ```
 Then you can just install older packages and restart all services.
 If you performed downgrade without first copying that key, run "add all OSDs into the
 history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
 ### 1.0.0 to 0.9.x
 Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
 be rolled back to 0.9.x or previous versions.
 ### 0.8.0 to 0.7.x
 Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
 start with older versions (0.4.x - 0.7.x). :-)
 ## OSD memory usage
 OSD uses RAM mainly for:
 - Metadata index: `data_size`/[`block_size`](../config/layout-cluster.en.md#block_size) * `approximately 1.1` * `32` bytes.
  Consumed always.
 - Copy of the on-disk metadata area: `data_size`/[`block_size`](../config/layout-cluster.en.md#block_size) * `28` bytes.
  Consumed if [inmemory_metadata](../config/osd.en.md#inmemory_metadata) isn't disabled.
 - Bitmaps: `data_size`/[`bitmap_granularity`](../config/layout-cluster.en.md#bitmap_granularity)/`8` * `2` bytes.
  Consumed always.
 - Journal index: between 0 and, approximately, journal size. Consumed always.
 - Copy of the on-disk journal area: exactly journal size. Consumed if
  [inmemory_journal](../config/osd.en.md#inmemory_journal) isn't disabled.
 - Checksums: `data_size`/[`csum_block_size`](../config/osd.en.md#csum_block_size) * 4 bytes.
  Consumed if checksums are enabled and [inmemory_metadata](../config/osd.en.md#inmemory_metadata) isn't disabled.
 bitmap_granularity is almost always 4 KB.
 So with default SSD settings (block_size=128k, journal_size=32M, csum_block_size=4k) memory usage is:
 - Metadata and bitmaps: ~600 MB per 1 TB of data.
 - Journal: up to 64 MB per 1 OSD.
 - Checksums: 1 GB per 1 TB of data.
 With default HDD settings (block_size=1M, journal_size=128M, csum_block_size=32k):
 - Metadata and bitmaps: ~128 MB per 1 TB of data.
 - Journal: up to 256 MB per 1 OSD.
 - Checksums: 128 MB per 1 TB of data.
--- a/docs/usage/admin.ru.md
+++ b/docs/usage/admin.ru.md
@ -1,262 +0,0 @@
 [Документация](../../README-ru.md#документация) → Использование → Администрирование
 -----
 [Read in English](admin.en.md)
 # Администрирование
 - [Состояния пулов](#состояния-пулов)
 - [Состояния PG](#состояния-pg)
  - [Базовые состояния PG](#базовые-состояния-pg)
  - [Дополнительные состояния PG](#дополнительные-состояния-pg)
 - [Удаление исправного диска](#удаление-исправного-диска)
 - [Удаление неисправного диска](#удаление-неисправного-диска)
 - [Добавление диска](#добавление-диска)
 - [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов)
 - [Обновление Vitastor](#обновление-vitastor)
 - [Потребление памяти OSD](#потребление-памяти-osd)
 ## Состояния пулов
 Пул активен — то есть, полностью доступен для клиентского ввода-вывода — когда все его PG
 активны, то есть, имеют статус active, возможно, с любым набором дополнительных флагов.
 Если хотя бы 1 PG неактивна, пул неактивен и все клиенты зависают и ждут, пока вы почините
 кластер. :-)
 ## Состояния PG
 Вы можете видеть состояния PG в выводе команды [vitastor-cli status](cli.ru.md#status).
 Состояние PG состоит из ровно 1 базового флага состояния, плюс любого числа дополнительных.
 ### Базовые состояния PG
 Состояние PG включает в себя ровно 1 флаг из следующих:
 - **active** — PG активна и обрабатывает запросы ввода-вывода от пользователей.
 - **incomplete** — Недостаточно живых OSD, чтобы включить эту PG.
  То есть, дисков потеряно больше, чем разрешено схемой отказоустойчивости пула и pg_minsize.
  Например, если у пула pg_size=3 и pg_minsize=1, то часть данных может записаться всего на 1 OSD.
  Если потом конкретно этот OSD упадёт, PG окажется **incomplete**.
 - **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
  (если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
  назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
  либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
 - **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
 - **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
 - **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
 - **stopping** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **offline** или поднимется на другом OSD.
 Все состояния, кроме **active**, означают, что PG неактивна и ввод-вывод приостановлен.
 Состояние **peering** в норме заметно только при перезапуске OSD или переключении первичных
 OSD, на протяжении небольшого периода времени.
 Состояния **starting**, **repeering**, **stopping** в норме практически не заметны вообще,
 PG должны очень быстро переходить из них в другие. Если эти состояния заметны
 хоть сколько-то значительное время — вероятно, какие-то операции на каких-то OSD зависли.
 Чтобы найти их, ищите "slow op" в журналах OSD — операции, зависшие дольше,
 чем на [slow_log_interval](../config/osd.ru.md#slow_log_interval), записываются в
 журналы OSD как "slow op".
 Диаграмма переходов:
 ![Диаграмма переходов](pg_states.svg "Диаграмма переходов")
 ### Дополнительные состояния PG
 Если PG активна, она также может иметь любое число дополнительных флагов состояний:
 - **degraded** — PG поднята на неполном числе дисков (OSD), избыточность хранения всех объектов снижена.
 - **has_incomplete** — часть объектов в PG неполные (невосстановимые), то есть, у них потеряно
  слишком много EC-частей (больше, чем [parity_chunks](../config/pool.ru.md#parity_chunks) пула).
 - **has_degraded** — часть объектов в PG деградированы, избыточность их хранения снижена по сравнению
  с остальным содержимым данной PG (то есть, PG может одновременно быть degraded+has_degraded).
  Данные объекты должны восстановиться автоматически, если только восстановление не отключено
  через [no_recovery](../config/osd.ru.md#no_recovery).
 - **has_misplaced** — часть объектов в PG сейчас расположена не на целевом наборе OSD этой PG.
  Данные объекты должны переместиться автоматически, если только перебалансировка не отключена
  через [no_rebalance](../config/osd.ru.md#no_rebalance). Объекты, являющиеся одновременно
  degraded и misplaced, считаются просто degraded.
 - **has_unclean** — ещё одно состояние, в норме заметное только очень короткое время при поднятии PG.
  Применяется только к EC и означает, что на каких-то OSD этой PG есть EC-части объектов, для которых
  был начат, но не завершён процесс записи. Все такие объекты первичный OSD либо завершает, либо
  откатывает при поднятии PG первым делом, поэтому состояние и не должно быть заметно. Опять-таки,
  если оно заметно — значит, скорее всего, операции отката или завершения записи на каких-то OSD зависли.
 - **has_invalid** — в PG найдены объекты с некорректными ID части. В норме не проявляется вообще
  никогда, проявляется только если, не удалив данные, создать на месте EC-пула либо реплика-пул,
  либо EC-пул с меньшим числом частей данных.
 - **has_corrupted** — в PG есть повреждённые объекты, обнаруженные с помощью контрольных сумм или
  скраба (сверки копий). Если объекты можно восстановить, они восстановятся автоматически. Если
  не восстанавливаются, используйте команду [vitastor-cli describe](cli.ru.md#describe) для
  выяснения деталей и/или смотрите в журнал первичного OSD данной PG.
 - **has_inconsistent** — в PG есть объекты, у которых не совпадают копии/части данных на разных OSD,
  и при этом автоматически определить, какая копия верная, а какая нет, невозможно. Такое может
  произойти, если вы используете 2 реплики, не включали контрольные суммы, и на одной из реплик
  данные повредились. В этом случае тоже надо использовать команды vitastor-cli [describe](cli.ru.md#describe)
  и [fix](cli.ru.md#fix) для удаления некорректной версии.
 - **left_on_dead** — часть данных PG осталась на отключённом, но не удалённом из кластера окончательно,
  OSD. Вам нужно либо вернуть соответствующий OSD в строй и дать ему очистить лишние данные, либо
  удалить его из кластера окончательно с помощью vitastor-cli [rm-osd](cli.ru.md#rm-osd), если
  известно, что он уже не вернётся (например, если умер диск).
 - **scrubbing** — идёт фоновая проверка данных PG ([скраб](../config/osd.ru.md#auto_scrub)).
 ## Удаление исправного диска
 Перед удалением исправного диска из кластера установите его OSD вес в 0, чтобы убрать с него данные.
 Для этого выполните команду `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
 Дождитесь завершения перебалансировки данных, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
 Также вес 0 можно прописать вручную прямо в etcd в ключ `/vitastor/config/osd/<НОМЕР_OSD>`, например:
 ```
 etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
 ```
 ## Удаление неисправного диска
 Если диск уже умер, его OSD, скорее всего, уже будет/будут остановлен(ы).
 В этом случае просто удалите OSD из etcd командой `vitastor-cli rm-osd НОМЕР_OSD`.
 ## Добавление диска
 Если сервер новый, установите на него пакеты Vitastor и скопируйте файл конфигурации
 `/etc/vitastor/vitastor.conf`.
 После этого достаточно выполнить команду `vitastor-disk prepare /dev/nvmeXXX`, разумеется,
 с параметрами, аналогичными другим OSD в вашем кластере.
 ## Восстановление потерянной конфигурации пулов
 Если удалить или повредить ключ `/vitastor/config/pools` в etcd, все пулы будут удалены.
 Не волнуйтесь, данные потеряны не будут, но вам нужно будет провести специальную
 процедуру восстановления.
 Сначала нужно будет восстановить конфигурацию пулов, создав пул с таким же ID и
 с такими же параметрами EC/реплик, и подождать, пока PG пула появятся в `vitastor-cli status`.
 Далее нужно будет добавить все OSD в исторические записи всех PG. Примерно так
 (только подставьте свои PG_COUNT и POOL_ID):
 ```
 PG_COUNT=32
 POOL_ID=1
 ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
    perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
 for i in $(seq 1 $PG_COUNT); do
    etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
 done
 ```
 После этого все PG должны пройти peering и найти все предыдущие данные.
 ## Обновление Vitastor
 Обычно каждая следующая версия Vitastor совместима с предыдущими и "вперёд", и "назад"
 с точки зрения сетевого протокола и структур данных в etcd.
 Так что по умолчанию, если на данной странице не указано обратное, считается, что для
 обновления достаточно обновить пакеты и перезапустить все OSD и мониторы Vitastor в
 произвольном порядке.
 Обновление производится без остановки клиентов (виртуальных машин/контейнеров), для этого
 достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
 начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
 ### 1.7.x -> 1.8.0
 Обновляться с версий <= 1.7.x до версий >= 1.8.0 рекомендуется с полной остановкой
 сначала клиентов, а затем кластера, так как в 1.8.0 исправлена проблема (неконсистентность
 потоков событий от etcd), способная приводить к появлению incomplete объектов в EC-пулах
 и, хоть и редко, но даже к повреждению данных при массовых перезапусках OSD. Если вы
 обновляетесь без полной остановки - это не значит, что вы обязательно столкнётесь с этой
 проблемой, но лучше подстраховаться.
 Также, если вы обновляетесь с версии <= 1.7.x до версии >= 1.8.0, НО <= 1.9.0: перезапустите всех
 клиентов (процессы виртуальных машин можно перезапустить путём миграции на другой сервер),
 иначе они зависнут, когда монитор удалит старый ключ конфигурации PG, что происходит через
 24 часа после обновления.
 Однако, это исправлено в 1.9.1. Так что, если вы обновляетесь с <= 1.7.x сразу до >= 1.9.1,
 вам НЕ нужно сразу перезапускать всех клиентов - они будут работать, как раньше. Минус,
 правда, в том, что старый ключ конфигурации PG (`/vitastor/config/pgs`) будет нужно удалить
 вам из etcd вручную - после того, как вы убедитесь, что все клиенты перезапущены.
 ### 1.1.x -> 1.2.0
 Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
 рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
 потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
 могли приводить к некорректному чтению данных в деградированных EC-пулах.
 ### 0.8.7 -> 0.9.0
 Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
 нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
 Иначе клиентский ввод-вывод зависнет до завершения обновления.
 ### 0.5.x -> 0.6.x
 Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
 ## Откат версии
 Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
 ### 1.8.0 -> 1.7.1
 Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
 etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
 ```
 etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
  etcdctl --endpoints=http://... put /vitastor/config/pgs
 ```
 После этого можно просто установить более старые пакеты и перезапустить все сервисы.
 Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
 OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
 ### 1.0.0 -> 0.9.x
 В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
 нельзя откатить до версии 0.9.x и более ранних.
 ### 0.8.0 -> 0.7.x
 В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
 более ранних версиях (0.4.x - 0.7.x). :-)
 ## Потребление памяти OSD
 Основное потребление памяти складывается из:
 - Индекс метаданных: `размер_данных`/[`block_size`](../config/layout-cluster.ru.md#block_size) * `примерно 1.1` * `32` байт.
  Потребляется всегда.
 - Копия дисковой области метаданных: `размер_данных`/[`block_size`](../config/layout-cluster.ru.md#block_size) * `28` байт.
  Потребляется, если не отключена настройка [inmemory_metadata](../config/osd.ru.md#inmemory_metadata).
 - Битмапы: `размер_данных`/[`bitmap_granularity`](../config/layout-cluster.ru.md#bitmap_granularity)/`8` * `2` байт.
  Потребляется всегда.
 - Индекс журнала: от 0 до, приблизительно, размера журнала. Потребляется всегда.
 - Копия дисковой области журнала: в точности размер журнала. Потребляется,
  если не отключена настройка [inmemory_journal](../config/osd.ru.md#inmemory_journal).
 - Контрольные суммы: `размер_данных`/[`csum_block_size`](../config/osd.ru.md#csum_block_size) * `4` байт.
  Потребляется, если включены контрольные суммы и не отключена настройка [inmemory_metadata](../config/osd.ru.md#inmemory_metadata).
 bitmap_granularity, как правило, никогда не меняется и равен 4 килобайтам.
 Таким образом, при SSD-настройках по умолчанию (block_size=128k, journal_size=32M, csum_block_size=4k) потребляется:
 - Метаданные и битмапы: ~600 МБ на 1 ТБ данных
 - Журнал: до 64 МБ на 1 OSD
 - Контрольные суммы: 1 ГБ на 1 ТБ данных
 При HDD-настройках по умолчанию (block_size=1M, journal_size=128M, csum_block_size=32k):
 - Метаданные и битмапы: ~128 МБ на 1 ТБ данных
 - Журнал: до 256 МБ на 1 OSD
 - Контрольные суммы: 128 МБ на 1 ТБ данных
--- a/docs/usage/cli.en.md
+++ b/docs/usage/cli.en.md
@ -16,7 +16,6 @@ It supports the following commands:
 - [create](#create)
 - [snap-create](#create)
 - [modify](#modify)
 - [dd](#dd)
 - [rm](#rm)
 - [flatten](#flatten)
 - [rm-data](#rm-data)
@ -25,14 +24,6 @@ It supports the following commands:
 - [fix](#fix)
 - [alloc-osd](#alloc-osd)
 - [rm-osd](#rm-osd)
 - [osd-tree](#osd-tree)
 - [ls-osd](#ls-osd)
 - [modify-osd](#modify-osd)
 - [pg-list](#pg-list)
 - [create-pool](#create-pool)
 - [modify-pool](#modify-pool)
 - [ls-pools](#ls-pools)
 - [rm-pool](#rm-pool)
 Global options:
@ -140,69 +131,25 @@ See also about [how to export snapshots](qemu.en.md#exporting-snapshots).
 ## modify
-`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`
+`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
 Rename, resize image or change its readonly status. Images with children can't be made read-write.
 If the new size is smaller than the old size, extra data will be purged.
 You should resize file system in the image, if present, before shrinking it.
 * `-f|--force` - Proceed with shrinking or setting readwrite flag even if the image has children.
 * `--down-ok` - Proceed with shrinking even if some data will be left on unavailable OSDs.
 ## dd
 ```
-vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
+-f|--force  Proceed with shrinking or setting readwrite flag even if the image has children.
    [count=N] [seek/oseek=N] [skip/iseek=M] [iodepth=N] [status=progress] \
    [conv=nocreat,noerror,nofsync,trunc,nosparse] [iflag=direct] [oflag=direct,append]
 ```
 Copy data between Vitastor images, files and pipes.
 Options can be specified in classic dd style (`key=value`) or like usual (`--key value`).
 | <!-- -->        | <!-- -->                                                                |
 |-----------------|-------------------------------------------------------------------------|
 | `iimg=<image>`  | Copy from Vitastor image `<image>`                                      |
 | `if=<file>`     | Copy from file `<file>`                                                 |
 | `oimg=<image>`  | Copy to Vitastor image `<image>`                                        |
 | `of=<file>`     | Copy to file `<file>`                                                   |
 | `bs=1M`         | Set copy block size                                                     |
 | `count=N`       | Copy only N input blocks. If N ends in B it counts bytes, not blocks    |
 | `seek/oseek=N`  | Skip N output blocks. If N ends in B it counts bytes, not blocks        |
 | `skip/iseek=N`  | Skip N input blocks. If N ends in B it counts bytes, not blocks         |
 | `iodepth=N`     | Send N reads or writes in parallel (default 4)                          |
 | `status=LEVEL`  | The LEVEL of information to print to stderr: none/noxfer/progress       |
 | `size=N`        | Specify size for the created output file/image (defaults to input size) |
 | `iflag=direct`  | For input files only: use direct I/O                                    |
 | `oflag=direct`  | For output files only: use direct I/O                                   |
 | `oflag=append`  | For files only: append to output file                                   |
 | `conv=nocreat`  | Do not create output file/image                                         |
 | `conv=trunc`    | Truncate output file/image                                              |
 | `conv=noerror`  | Continue copying after errors                                           |
 | `conv=nofsync`  | Do not call fsync before finishing (default behaviour is fsync)         |
 | `conv=nosparse` | Write all output blocks including all-zero blocks                       |
 ## rm
-`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
+`vitastor-cli rm <from> [<to>] [--writers-stopped]`
-`vitastor-cli rm (--exact|--matching) <glob> ...`
+Remove `<from>` or all layers between `<from>` and `<to>` (`<to>` must be a child of `<from>`),
-
+rebasing all their children accordingly. --writers-stopped allows merging to be a bit
-Remove layer(s) and rebase all their children accordingly.
+more effective in case of a single 'slim' read-write child and 'fat' removed parent:
-
+the child is merged into parent and parent is renamed to child in that case.
-In the first form, remove `<from>` or layers between `<from>` and its child `<to>`.
+In other cases parent layers are always merged into children.
 In the second form, remove all images with exact or pattern-matched names.
 Options:
 * `--writers-stopped` allows optimised removal in case of a single 'slim' read-write
  child and 'fat' removed parent: the child is merged into parent and parent is renamed
  to child in that case. In other cases parent layers are always merged into children.
 * `--exact` - remove multiple images with names matching given glob patterns.
 * `--matching` - remove multiple images with given names
 * `--down-ok` - continue deletion/merging even if some data will be left on unavailable OSDs.
 ## flatten
@ -220,7 +167,6 @@ Remove inode data without changing metadata.
 --wait-list   Retrieve full objects listings before starting to remove objects.
              Requires more memory, but allows to show correct removal progress.
 --min-offset  Purge only data starting with specified offset.
 --max-offset  Purge only data before specified offset.
 ```
 ## merge-data
@ -233,9 +179,11 @@ Merge layer data without changing metadata. Merge `<from>`..`<to>` to `<target>`
 ## describe
-`vitastor-cli describe [OPTIONS]`
+`vitastor-cli describe [--osds <osds>] [--object-state <states>] [--pool <pool>]
    [--inode <ino>] [--min-inode <ino>] [--max-inode <ino>]
    [--min-offset <offset>] [--max-offset <offset>]`
-Describe unclean object locations in the cluster. Options:
+Describe unclean object locations in the cluster.
 ```
 --osds <osds>
@ -245,8 +193,6 @@ Describe unclean object locations in the cluster. Options:
    degraded, misplaced, incomplete, corrupted, inconsistent.
 --pool <pool name or number>
    Only list objects in the given pool.
 --pg <pg number>
    Only list objects in the given PG of the pool.
 --inode, --min-inode, --max-inode
    Restrict listing to specific inode numbers.
 --min-offset, --max-offset
@ -292,169 +238,3 @@ Refuses to remove OSDs with data without `--force` and `--allow-data-loss`.
 With `--dry-run` only checks if deletion is possible without data loss and
 redundancy degradation.
 ## osd-tree
 `vitastor-cli osd-tree [-l|--long]`
 Show current OSD tree, optionally with I/O statistics if -l is specified.
 Example output:
 ```
 TYPE     NAME       UP    SIZE  USED%    TAGS          WEIGHT  BLOCK  BITMAP  IMM   NOOUT
 host     kaveri
  disk   nvme0n1p1
    osd  3          down  100G  0 %      abc,kaveri    1       128k   4k      none  -
    osd  4          down  100G  0 %                    1       128k   4k      none  -
  disk   nvme1n1p1
    osd  5          down  100G  0 %      abc,kaveri    1       128k   4k      none  -
    osd  6          down  100G  0 %                    1       128k   4k      none  -
 host     stump
  osd    1          up    100G  37.29 %  osdone        1       128k   4k      all   -
  osd    2          up    100G  26.8 %   abc           1       128k   4k      all   -
  osd    7          up    100G  21.84 %                1       128k   4k      all   -
  osd    8          up    100G  21.63 %                1       128k   4k      all   -
  osd    9          up    100G  20.69 %                1       128k   4k      all   -
  osd    10         up    100G  21.61 %                1       128k   4k      all   -
  osd    11         up    100G  21.53 %                1       128k   4k      all   -
  osd    12         up    100G  22.4 %                 1       128k   4k      all   -
 ```
 ## ls-osd
 `vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
 Show current OSDs as list, optionally with I/O statistics if -l is specified.
 Example output:
 ```
 OSD  PARENT            UP    SIZE  USED%    TAGS          WEIGHT  BLOCK  BITMAP  IMM   NOOUT
 3    kaveri/nvme0n1p1  down  100G  0 %      globl,kaveri  1       128k   4k      none  -
 4    kaveri/nvme0n1p1  down  100G  0 %                    1       128k   4k      none  -
 5    kaveri/nvme1n1p1  down  100G  0 %      globl,kaveri  1       128k   4k      none  -
 6    kaveri/nvme1n1p1  down  100G  0 %                    1       128k   4k      none  -
 1    stump             up    100G  37.29 %  osdone        1       128k   4k      all   -
 2    stump             up    100G  26.8 %   globl         1       128k   4k      all   -
 7    stump             up    100G  21.84 %                1       128k   4k      all   -
 8    stump             up    100G  21.63 %                1       128k   4k      all   -
 9    stump             up    100G  20.69 %                1       128k   4k      all   -
 10   stump             up    100G  21.61 %                1       128k   4k      all   -
 11   stump             up    100G  21.53 %                1       128k   4k      all   -
 12   stump             up    100G  22.4 %                 1       128k   4k      all   -
 ```
 ## modify-osd
 `vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
 Set OSD reweight, tags or noout flag. See detail description in [OSD config documentation](../config/pool.en.md#osd-settings).
 ## pg-list
 `vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
 List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:
 ```
 --pool <pool name or number>  Only list PGs of the given pool.
 --min <min pg number>         Only list PGs with number >= min.
 --max <max pg number>         Only list PGs with number <= max.
 ```
 Examples:
 `vitastor-cli pg-list active+degraded`
 `vitastor-cli pg-list ^active`
 ## create-pool
 `vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`
 Create a pool. Required parameters:
 | <!-- -->                 | <!-- -->                                                                              |
 |--------------------------|---------------------------------------------------------------------------------------|
 | `-s R` or `--pg_size R`  | Number of replicas for replicated pools                                               |
 | `--ec N+K`               | Number of data (N) and parity (K) chunks for erasure-coded pools                      |
 | `-n N` or `--pg_count N` | PG count for the new pool (start with 10*<OSD count>/pg_size rounded to a power of 2) |
 Optional parameters:
 | <!-- -->                       | <!-- -->                                                                   |
 |--------------------------------|----------------------------------------------------------------------------|
 | `--pg_minsize <number>`        | R or N+K minus number of failures to tolerate without downtime ([details](../config/pool.en.md#pg_minsize)) |
 | `--failure_domain host`        | Failure domain: host, osd or a level from placement_levels. Default: host  |
 | `--root_node <node>`           | Put pool only on child OSDs of this placement tree node                    |
 | `--osd_tags <tag>[,<tag>]...`  | Put pool only on OSDs tagged with all specified tags                       |
 | `--block_size 128k`            | Put pool only on OSDs with this data block size                            |
 | `--bitmap_granularity 4k`      | Put pool only on OSDs with this logical sector size                        |
 | `--immediate_commit none`      | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
 | `--level_placement <rules>`    | Use additional failure domain rules (example: "dc=112233")                 |
 | `--raw_placement <rules>`      | Specify raw PG generation rules ([details](../config/pool.en.md#raw_placement)) |
 | `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags               |
 | `--scrub_interval <time>`      | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y  |
 | `--used_for_fs <name>`         | Mark pool as used for VitastorFS with metadata in image <name>             |
 | `--pg_stripe_size <number>`    | Increase object grouping stripe                                            |
 | `--max_osd_combinations 10000` | Maximum number of random combinations for LP solver input                  |
 | `--wait`                       | Wait for the new pool to come online                                       |
 | `-f` or `--force`              | Do not check that cluster has enough OSDs to create the pool               |
 See also [Pool configuration](../config/pool.en.md) for detailed parameter descriptions.
 Examples:
 `vitastor-cli create-pool test_x4 -s 4 -n 32`
 `vitastor-cli create-pool test_ec42 --ec 4+2 -n 32`
 ## modify-pool
 `vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]`
 Modify an existing pool. Modifiable parameters:
 ```
 [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]
 [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--no_inode_stats 0|1]
 [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]
 ```
 Non-modifiable parameters (changing them WILL lead to data loss):
 ```
 [--block_size <size>] [--bitmap_granularity <size>]
 [--immediate_commit <all|small|none>] [--pg_stripe_size <size>]
 ```
 These, however, can still be modified with -f|--force.
 See [create-pool](#create-pool) for parameter descriptions.
 Examples:
 `vitastor-cli modify-pool pool_A --name pool_B`
 `vitastor-cli modify-pool 2 --pg_size 4 -n 128`
 ## rm-pool
 `vitastor-cli rm-pool|pool-rm [--force] <id|name>`
 Remove a pool. Refuses to remove pools with images without `--force`.
 ## ls-pools
 `vitastor-cli ls-pools|pool-ls|ls-pool|pools [-l] [--detail] [--sort FIELD] [-r] [-n N] [--stats] [<glob> ...]`
 List pools (only matching <glob> patterns if passed).
 | <!-- -->             | <!-- -->                                              |
 |----------------------|-------------------------------------------------------|
 | `-l` or `--long`     | Also report I/O statistics                            |
 | `--detail`           | Use list format (not table), show all details         |
 | `--sort FIELD`       | Sort by specified field (see fields in --json output) |
 | `-r` or `--reverse`  | Sort in descending order                              |
 | `-n` or `--count N`  | Only list first N items                               |
--- a/docs/usage/cli.ru.md
+++ b/docs/usage/cli.ru.md
@ -17,21 +17,12 @@ vitastor-cli - интерфейс командной строки для адм
 - [create](#create)
 - [snap-create](#create)
 - [modify](#modify)
 - [dd](#dd)
 - [rm](#rm)
 - [flatten](#flatten)
 - [rm-data](#rm-data)
 - [merge-data](#merge-data)
 - [alloc-osd](#alloc-osd)
 - [rm-osd](#rm-osd)
 - [osd-tree](#osd-tree)
 - [ls-osd](#ls-osd)
 - [modify-osd](#modify-osd)
 - [pg-list](#pg-list)
 - [create-pool](#create-pool)
 - [modify-pool](#modify-pool)
 - [ls-pools](#ls-pools)
 - [rm-pool](#rm-pool)
 Глобальные опции:
@ -94,8 +85,8 @@ kaveri    2/1     32   0 B      10 G    0 B        100%    0%
 `vitastor-cli ls [-l] [-p POOL] [--sort FIELD] [-r] [-n N] [<glob> ...]`
-Показать список образов, если передан(ы) шаблон(ы) `<glob>`, то только с именами,
+Показать список образов, если переданы шаблоны `<glob>`, то только с именами,
-соответствующими одному из шаблонов (стандартные ФС-шаблоны с * и ?).
+соответствующими этим шаблонам (стандартные ФС-шаблоны с * и ?).
 Опции:
@ -141,7 +132,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
 ## modify
-`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`
+`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
 Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения"
 и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя.
@ -149,64 +140,23 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
 Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением
 образа сначала уменьшите файловую систему в нём.
 * `-f|--force` - Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
 * `--down-ok` - Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD.
 ## dd
 ```
-vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
+-f|--force  Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
    [count=N] [seek/oseek=N] [skip/iseek=M] [iodepth=N] [status=progress] \
    [conv=nocreat,noerror,nofsync,trunc,nosparse] [iflag=direct] [oflag=direct,append]
 ```
 Копировать данные между образами Vitastor, файлами и каналами.
 Опции можно передавать в классическом стиле dd (`key=value`) или как обычно (`--key value`).
 | <!-- -->        | <!-- -->                                                                |
 |-----------------|-------------------------------------------------------------------------|
 | `iimg=<image>`  | Копировать из образа Vitastor `<image>`                                 |
 | `if=<file>`     | Копировать из файла `<file>`                                            |
 | `oimg=<image>`  | Копировать в образ Vitastor `<image>`                                   |
 | `of=<file>`     | Копировать в файл `<file>`                                              |
 | `bs=1M`         | Задать размер блока копирования                                         |
 | `count=N`       | Копировать не более N блоков. Если N заканчивается на B - то N байт.    |
 | `seek/oseek=N`  | Пропустить N выходных блоков. Если N заканчивается на B - то N байт.    |
 | `skip/iseek=N`  | Пропустить N входных блоков. Если N заканчивается на B - то N байт.     |
 | `iodepth=N`     | Отправлять N чтений/записей параллельно (по умолчанию 4).               |
 | `status=LEVEL`  | Уровень вывода в консоль: none/noxfer/progress                          |
 | `size=N`        | Задать размер выходного файла/образа (по умолчанию равен размеру входа).|
 | `iflag=direct`  | Только для входного файла: использовать прямой ввод-вывод               |
 | `oflag=direct`  | Только для выходного файла: использовать прямой ввод-вывод              |
 | `oflag=append`  | Только для файлов: дописывать в конец выходного файла                   |
 | `conv=nocreat`  | Не создавать выходной файл/образ                                        |
 | `conv=trunc`    | Обрезать выходной файл/образ до размера входа                           |
 | `conv=noerror`  | Продолжать копирование после ошибок                                     |
 | `conv=nofsync`  | Не вызывать fsync перед завершением                                     |
 | `conv=nosparse` | Записывать все выходные блоки, включая пустые                           |
 ## rm
-`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
+`vitastor-cli rm <from> [<to>] [--writers-stopped]`
-`vitastor-cli rm (--exact|--matching) <glob> ...`
+Удалить образ `<from>` или все слои от `<from>` до `<to>` (`<to>` должен быть дочерним
 образом `<from>`), одновременно меняя родительские образы их клонов (если таковые есть).
-Удалить образ(ы), корректно перебазируя их дочерние образы.
+`--writers-stopped` позволяет чуть более эффективно удалять образы в частом случае, когда
 у удаляемой цепочки есть только один дочерний образ, содержащий небольшой объём данных.
 В этом случае дочерний образ вливается в родительский и удаляется, а родительский
 переименовывается в дочерний.
-В первой форме удаляет один образ `<from>` или все слои между `<from>` и его дочерним `<to>`.
+В других случаях родительские слои вливаются в дочерние.
 Во второй форме, удаляет все образы с точными именами или именами, подходящими под шаблон(ы).
 Опции:
 * `--writers-stopped` позволяет чуть более эффективно удалять образы в частом случае, когда
  у удаляемой цепочки есть только один дочерний образ, содержащий небольшой объём данных.
  В этом случае дочерний образ вливается в родительский и удаляется, а родительский
  переименовывается в дочерний.
 * `--exact` - удалить все образы с именами, подходящими под переданные glob-шаблоны.
 * `--matching` - удалить все образы с точно заданными именами.
 * `--down-ok` - продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD.
 ## flatten
@ -225,7 +175,6 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
 --wait-list   Сначала запросить полный листинг объектов, а потом начать удалять.
              Требует больше памяти, но позволяет правильно печатать прогресс удаления.
 --min-offset  Удалять только данные, начиная с заданного смещения.
 --max-offset  Удалять только данные до (исключительно) заданного смещения.
 ```
 ## merge-data
@ -238,10 +187,12 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
 ## describe
-`vitastor-cli describe [ОПЦИИ]`
+`vitastor-cli describe [--osds <osds>] [--object-state <состояния>] [--pool <пул>]
    [--inode <номер>] [--min-inode <номер>] [--max-inode <номер>]
    [--min-offset <смещение>] [--max-offset <смещение>]`
 Описать состояние "грязных" объектов в кластере, то есть таких объектов, копии
-или части которых хранятся на наборе OSD, не равном целевому. Опции:
+или части которых хранятся на наборе OSD, не равном целевому.
 ```
 --osds <osds>
@ -256,8 +207,6 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
    - inconsistent - неконсистентный, с неоднозначным расхождением копий/частей
 --pool <имя или ID пула>
    Перечислять только объекты из заданного пула.
 --pg <номер PG>
    Перечислять только объекты из заданной PG пула.
 --inode, --min-inode, --max-inode
    Перечислять только объекты из указанных номеров инодов (образов).
 --min-offset, --max-offset
@ -306,170 +255,3 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
 С опцией `--dry-run` только проверяет, возможно ли удаление без потери данных и деградации
 избыточности.
 ## osd-tree
 `vitastor-cli osd-tree [-l|--long]`
 Показать дерево OSD, со статистикой ввода-вывода, если установлено -l.
 Пример вывода:
 ```
 TYPE     NAME       UP    SIZE  USED%    TAGS          WEIGHT  BLOCK  BITMAP  IMM   NOOUT
 host     kaveri
  disk   nvme0n1p1
    osd  3          down  100G  0 %      globl,kaveri  1       128k   4k      none  -
    osd  4          down  100G  0 %                    1       128k   4k      none  -
  disk   nvme1n1p1
    osd  5          down  100G  0 %      globl,kaveri  1       128k   4k      none  -
    osd  6          down  100G  0 %                    1       128k   4k      none  -
 host     stump
  osd    1          up    100G  37.29 %  osdone        1       128k   4k      all   -
  osd    2          up    100G  26.8 %   globl         1       128k   4k      all   -
  osd    7          up    100G  21.84 %                1       128k   4k      all   -
  osd    8          up    100G  21.63 %                1       128k   4k      all   -
  osd    9          up    100G  20.69 %                1       128k   4k      all   -
  osd    10         up    100G  21.61 %                1       128k   4k      all   -
  osd    11         up    100G  21.53 %                1       128k   4k      all   -
  osd    12         up    100G  22.4 %                 1       128k   4k      all   -
 ```
 ## ls-osd
 `vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
 Показать список OSD, со статистикой ввода-вывода, если установлено -l.
 Пример вывода:
 ```
 OSD  PARENT            UP    SIZE  USED%    TAGS          WEIGHT  BLOCK  BITMAP  IMM   NOOUT
 3    kaveri/nvme0n1p1  down  100G  0 %      globl,kaveri  1       128k   4k      none  -
 4    kaveri/nvme0n1p1  down  100G  0 %                    1       128k   4k      none  -
 5    kaveri/nvme1n1p1  down  100G  0 %      globl,kaveri  1       128k   4k      none  -
 6    kaveri/nvme1n1p1  down  100G  0 %                    1       128k   4k      none  -
 1    stump             up    100G  37.29 %  osdone        1       128k   4k      all   -
 2    stump             up    100G  26.8 %   globl         1       128k   4k      all   -
 7    stump             up    100G  21.84 %                1       128k   4k      all   -
 8    stump             up    100G  21.63 %                1       128k   4k      all   -
 9    stump             up    100G  20.69 %                1       128k   4k      all   -
 10   stump             up    100G  21.61 %                1       128k   4k      all   -
 11   stump             up    100G  21.53 %                1       128k   4k      all   -
 12   stump             up    100G  22.4 %                 1       128k   4k      all   -
 ```
 ## modify-osd
 `vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
 Установить вес OSD, теги или флаг noout. Смотрите подробное описание в [документации настроек OSD](../config/pool.ru.md#настройки-osd).
 ## pg-list
 `vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
 Вывести список PG с состояними, удовлетворяющими любому из переданных фильтров (^ или !
 в начале фильтра означает отрицание). Опции:
 ```
 --pool <pool name or number>  Only list PGs of the given pool.
 --min <min pg number>         Only list PGs with number >= min.
 --max <max pg number>         Only list PGs with number <= max.
 ```
 Примеры:
 `vitastor-cli pg-list active+degraded`
 `vitastor-cli pg-list ^active`
 ## create-pool
 `vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`
 Создать пул. Обязательные параметры:
 | <!-- -->                  | <!-- -->                                                                                    |
 |---------------------------|---------------------------------------------------------------------------------------------|
 | `-s R` или `--pg_size R`  | Число копий данных для реплицированных пулов                                                |
 | `--ec N+K`                | Число частей данных (N) и чётности (K) для пулов с кодами коррекции ошибок                  |
 | `-n N` или `--pg_count N` | Число PG для нового пула (начните с 10*<число OSD>/pg_size, округлённого до степени двойки) |
 Необязательные параметры:
 | <!-- -->                       | <!-- -->                                                                   |
 |--------------------------------|----------------------------------------------------------------------------|
 | `--pg_minsize <number>`        | (R или N+K) минус число разрешённых отказов без остановки пула ([подробнее](../config/pool.ru.md#pg_minsize)) |
 | `--failure_domain host`        | Домен отказа: host, osd или другой из placement_levels. По умолчанию: host |
 | `--root_node <node>`           | Использовать для пула только дочерние OSD этого узла дерева размещения     |
 | `--osd_tags <tag>[,<tag>]...`  | ...только OSD со всеми заданными тегами                                    |
 | `--block_size 128k`            | ...только OSD с данным размером блока                                      |
 | `--bitmap_granularity 4k`      | ...только OSD с данным размером логического сектора                        |
 | `--immediate_commit none`      | ...только OSD с этим или большим immediate_commit (none < small < all)     |
 | `--level_placement <rules>`    | Задать правила дополнительных доменов отказа (пример: "dc=112233")         |
 | `--raw_placement <rules>`      | Задать низкоуровневые правила генерации PG ([детали](../config/pool.ru.md#raw_placement)) |
 | `--primary_affinity_tags tags` | Предпочитать OSD со всеми данными тегами для роли первичных                |
 | `--scrub_interval <time>`      | Включить скрабы с заданным интервалом времени (число + единица s/m/h/d/M/y) |
 | `--pg_stripe_size <number>`    | Увеличить блок группировки объектов по PG                                  |
 | `--max_osd_combinations 10000` | Максимальное число случайных комбинаций OSD для ЛП-солвера                 |
 | `--wait`                       | Подождать, пока новый пул будет активирован                                |
 | `-f` или `--force`             | Не проверять, что в кластере достаточно доменов отказа для создания пула   |
 Подробно о параметрах см. [Конфигурация пулов](../config/pool.ru.md).
 Примеры:
 `vitastor-cli create-pool test_x4 -s 4 -n 32`
 `vitastor-cli create-pool test_ec42 --ec 4+2 -n 32`
 ## modify-pool
 `vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]`
 Изменить настройки существующего пула. Изменяемые параметры:
 ```
 [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]
 [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>]
 [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]
 ```
 Неизменяемые параметры (их изменение ПРИВЕДЁТ к потере данных):
 ```
 [--block_size <size>] [--bitmap_granularity <size>]
 [--immediate_commit <all|small|none>] [--pg_stripe_size <size>]
 ```
 Эти параметры можно изменить, только если явно передать опцию -f или --force.
 Описания параметров смотрите в [create-pool](#create-pool).
 Примеры:
 `vitastor-cli modify-pool pool_A --name pool_B`
 `vitastor-cli modify-pool 2 --pg_size 4 -n 128`
 ## rm-pool
 `vitastor-cli rm-pool|pool-rm [--force] <id|name>`
 Удалить пул. Отказывается удалять пул, в котором ещё есть образы, без `--force`.
 ## ls-pools
 `vitastor-cli ls-pools|pool-ls|ls-pool|pools [-l] [--detail] [--sort FIELD] [-r] [-n N] [--stats] [<glob> ...]`
 Показать список пулов. Если передан(ы) шаблон(ы) `<glob>`, то только с именами,
 соответствующими одному из шаблонов (стандартные ФС-шаблоны с * и ?).
 | <!-- -->              | <!-- -->                                                   |
 |-----------------------|------------------------------------------------------------|
 | `-l` или `--long`     | Вывести также статистику ввода-вывода                      |
 | `--detail`            | Максимально подробный вывод в виде списка (а не таблицы)   |
 | `--sort FIELD`        | Сортировать по заданному полю (поля см. в выводе с --json) |
 | `-r` или `--reverse`  | Сортировать в обратном порядке                             |
 | `-n` или `--count N`  | Выводить только первые N записей                           |
--- a/docs/usage/disk.en.md
+++ b/docs/usage/disk.en.md
@ -13,7 +13,6 @@ It supports the following commands:
 - [prepare](#prepare)
 - [upgrade-simple](#upgrade-simple)
 - [resize](#resize)
 - [raw-resize](#raw-resize)
 - [start/stop/restart/enable/disable](#start/stop/restart/enable/disable)
 - [purge](#purge)
 - [read-sb](#read-sb)
@ -51,16 +50,12 @@ Options (automatic mode):
 --osd_per_disk <N>
  Create <N> OSDs on each disk (default 1)
 --hybrid
-  Prepare hybrid (HDD+SSD, NVMe+SATA or etc) OSDs using provided devices. By default,
+  Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for
-  any passed SSDs will be used for journals and metadata, HDDs will be used for data,
+  journals and metadata, HDDs will be used for data. Partitions for journals and
-  but you can override this behaviour with --fast-devices option. Journal and metadata
+  metadata will be created automatically. Whether disks are SSD or HDD is decided
-  partitions will be created automatically. In the default mode, SSD and HDD disks
+  by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object
-  are distinguished by the `/sys/block/.../queue/rotational` flag. When HDDs are used
+  size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,
-  for data in hybrid mode, default block_size is 1 MB instead of 128 KB, default journal
+  and throttle_small_writes is enabled by default.
  size is 1 GB instead of 32 MB, and throttle_small_writes is enabled by default.
 --fast-devices /dev/nvmeX,/dev/nvmeY
  In --hybrid mode, use these devices for journal and metadata instead of auto-detecting
  and extracting them from the main [devices...] list.
 --disable_data_fsync auto
  Disable data device cache and fsync (1/yes/true = on, default auto)
 --disable_meta_fsync auto
@ -93,7 +88,7 @@ Options (both modes):
 --block_size 1M/128k       Set blockstore object size
 --bitmap_granularity 4k    Set bitmap granularity
 --data_csum_type none      Set data checksum type (crc32c or none)
--csum_block_size 4k/32k   Set data checksum block size (SSD/HDD default)
+--csum_block_size 4k       Set data checksum block size
 --data_device_block 4k     Override data device block size
 --meta_device_block 4k     Override metadata device block size
 --journal_device_block 4k  Override journal device block size
@ -132,49 +127,25 @@ Requires the `sfdisk` utility.
 ## resize
-`vitastor-disk resize <osd_num>|<osd_device> [OPTIONS]`
+`vitastor-disk resize <ALL_OSD_PARAMETERS> <NEW_LAYOUT> [--iodepth 32]`
-Resize data area and/or move journal and metadata:
+Resize data area and/or rewrite/move journal and metadata.
 | <!-- -->                  | <!-- -->                               |
 |---------------------------|----------------------------------------|
 | `--move-journal TARGET`   | move journal to `TARGET`               |
 | `--move-meta TARGET`      | move metadata to `TARGET`              |
 | `--journal-size NEW_SIZE` | resize journal to `NEW_SIZE`           |
 | `--data-size NEW_SIZE`    | resize data device to `NEW_SIZE`       |
 | `--dry-run`               | only show new layout, do not apply it  |
 `NEW_SIZE` may include k/m/g/t suffixes.
 `TARGET` may be one of:
 | <!-- -->       | <!-- -->                                                                 |
 |----------------|--------------------------------------------------------------------------|
 | `<partition>`  | move journal/metadata to an existing GPT partition                       |
 | `<raw_device>` | create a GPT partition on `<raw_device>` and move journal/metadata to it |
 | `""`           | (empty string) move journal/metadata back to the data device             |
 ## raw-resize
 `vitastor-disk raw-resize <ALL_OSD_PARAMETERS> <NEW_LAYOUT> [--iodepth 32]`
 Resize data area and/or rewrite/move journal and metadata (manual format).
 `ALL_OSD_PARAMETERS` must include all (at least all disk-related)
 parameters from OSD command line (i.e. from systemd unit or superblock).
 `NEW_LAYOUT` may include new disk layout parameters:
-| <!-- -->                    | <!-- -->                                  |
+```
-|-----------------------------|-------------------------------------------|
+--new_data_offset SIZE     resize data area so it starts at SIZE
-| `--new_data_offset SIZE`    | resize data area so it starts at `SIZE`   |
+--new_data_len SIZE        resize data area to SIZE bytes
-| `--new_data_len SIZE`       | resize data area to `SIZE` bytes          |
+--new_meta_device PATH     use PATH for new metadata
-| `--new_meta_device PATH`    | use `PATH` for new metadata               |
+--new_meta_offset SIZE     make new metadata area start at SIZE
-| `--new_meta_offset SIZE`    | make new metadata area start at `SIZE`    |
+--new_meta_len SIZE        make new metadata area SIZE bytes long
-| `--new_meta_len SIZE`       | make new metadata area `SIZE` bytes long  |
+--new_journal_device PATH  use PATH for new journal
-| `--new_journal_device PATH` | use `PATH` for new journal                |
+--new_journal_offset SIZE  make new journal area start at SIZE
-| `--new_journal_offset SIZE` | make new journal area start at `SIZE`     |
+--new_journal_len SIZE     make new journal area SIZE bytes long
-| `--new_journal_len SIZE`    | make new journal area `SIZE` bytes long   |
+```
 SIZE may include k/m/g/t suffixes. If any of the new layout parameter
 options are not specified, old values will be used.
@ -246,14 +217,10 @@ Intended for use from startup scripts (i.e. from systemd units).
 ## dump-journal
 `vitastor-disk dump-journal [OPTIONS] <osd_device>`
 `vitastor-disk dump-journal [OPTIONS] <journal_file> <journal_block_size> <offset> <size>`
 Dump journal in human-readable or JSON (if `--json` is specified) format.
 You can specify any OSD device (data, metadata or journal), or the layout manually.
 Options:
 ```
@ -266,35 +233,23 @@ Options:
 ## write-journal
 `vitastor-disk write-journal <osd_device>`
 `vitastor-disk write-journal <journal_file> <journal_block_size> <bitmap_size> <offset> <size>`
 Write journal from JSON taken from standard input in the same format as produced by
 `dump-journal --json --format data`.
 You can specify any OSD device (data, metadata or journal), or the layout manually.
 ## dump-meta
 `vitastor-disk dump-meta <osd_device>`
 `vitastor-disk dump-meta <meta_file> <meta_block_size> <offset> <size>`
 Dump metadata in JSON format.
 You can specify any OSD device (data, metadata or journal), or the layout manually.
 ## write-meta
 `vitastor-disk write-meta <osd_device>`
 `vitastor-disk write-meta <meta_file> <offset> <size>`
 Write metadata from JSON taken from standard input in the same format as produced by `dump-meta`.
 You can specify any OSD device (data, metadata or journal), or the layout manually.
 ## simple-offsets
 `vitastor-disk simple-offsets <device>`
@ -306,7 +261,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
 ```
 --object_size 128k       Set blockstore block size
 --bitmap_granularity 4k  Set bitmap granularity
--journal_size 32M       Set journal size
+--journal_size 16M       Set journal size
 --data_csum_type none    Set data checksum type (crc32c or none)
 --csum_block_size 4k     Set data checksum block size
 --device_block_size 4k   Set device block size
--- a/docs/usage/disk.ru.md
+++ b/docs/usage/disk.ru.md
@ -13,7 +13,6 @@ vitastor-disk - инструмент командной строки для уп
 - [prepare](#prepare)
 - [upgrade-simple](#upgrade-simple)
 - [resize](#resize)
 - [raw-resize](#raw-resize)
 - [start/stop/restart/enable/disable](#start/stop/restart/enable/disable)
 - [purge](#purge)
 - [read-sb](#read-sb)
@ -51,17 +50,12 @@ vitastor-disk - инструмент командной строки для уп
 --osd_per_disk <N>
  Создавать по несколько (<N>) OSD на каждом диске (по умолчанию 1)
 --hybrid
-  Инициализировать гибридные (HDD+SSD, NVMe+SATA и т.п.) OSD на указанных дисках.
+  Инициализировать гибридные (HDD+SSD) OSD на указанных дисках. SSD будут
-  По умолчанию, SSD будут использованы для журналов и метаданных, а HDD - для данных,
+  использованы для журналов и метаданных, а HDD - для данных. Разделы для журналов
-  но вы можете поменять это поведение опцией --fast-devices. Разделы для журналов
+  и метаданных будут созданы автоматически. Является ли диск SSD или HDD, определяется
-  и метаданных будут созданы автоматически. В режиме по умолчанию SSD и HDD-диски
+  по флагу `/sys/block/.../queue/rotational`. В гибридном режиме по умолчанию
-  различаются по флагу `/sys/block/.../queue/rotational`. Когда в гибридном режиме
+  используется размер объекта 1 МБ вместо 128 КБ, размер журнала 1 ГБ вместо 32 МБ
-  для данных используются HDD, по умолчанию размер блока устанавливается 1 МБ вместо
+  и включённый throttle_small_writes.
  128 КБ, размер журнала 1 ГБ вместо 32 МБ, и throttle_small_writes включается по
  умолчанию.
 --fast-devices /dev/nvmeX,/dev/nvmeY
  Использовать данные диски для журналов и метаданных в гибридном режиме вместо их
  автоопределения и извлечения из основного списка [devices...].
 --disable_data_fsync auto
  Отключать кэш и fsync-и для устройств данных. (1/yes/true = да, по умолчанию автоопределение)
 --disable_meta_fsync auto
@ -95,7 +89,7 @@ vitastor-disk - инструмент командной строки для уп
 --block_size 1M/128k       Задать размер объекта хранилища
 --bitmap_granularity 4k    Задать гранулярность битовых карт
 --data_csum_type none      Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k/32k   Задать размер блока расчёта контрольных сумм (дефолт SSD/HDD)
+--csum_block_size 4k       Задать размер блока расчёта контрольных сумм
 --data_device_block 4k     Задать размер блока устройства данных
 --meta_device_block 4k     Задать размер блока метаданных
 --journal_device_block 4k  Задать размер блока журнала
@ -135,51 +129,27 @@ throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
 ## resize
-`vitastor-disk resize <osd_num>|<osd_device> [OPTIONS]`
+`vitastor-disk resize <ALL_OSD_PARAMETERS> <NEW_LAYOUT> [--iodepth 32]`
-Изменить размер области данных и/или переместить журнал и метаданные:
+Изменить размер области данных и/или переместить журнал и метаданные.
-| <!-- -->                      | <!-- -->                                       |
+В `ALL_OSD_PARAMETERS` нужно указать все относящиеся к диску параметры OSD
 |-------------------------------|------------------------------------------------|
 | `--move-journal ЦЕЛЬ`         | переместить журнал на `ЦЕЛЬ`                   |
 | `--move-meta ЦЕЛЬ`            | переместить метаданные на `ЦЕЛЬ`               |
 | `--journal-size НОВЫЙ_РАЗМЕР` | изменить размер журнала на `НОВЫЙ_РАЗМЕР`      |
 | `--data-size НОВЫЙ_РАЗМЕР`    | изменить размер диска данных на `НОВЫЙ_РАЗМЕР` |
 | `--dry-run`                   | показать новые параметры, но не применять их   |
 `НОВЫЙ_РАЗМЕР` может быть указан с суффиксами k/m/g/t (кило/мега/гига/терабайт).
 `ЦЕЛЬ` может быть одним из:
 | <!-- -->        | <!-- -->                                                                            |
 |-----------------|-------------------------------------------------------------------------------------|
 | `<раздел>`      | переместить журнал/метаданные на существующий GPT-раздел                            |
 | `<полный_диск>` | создать GPT-раздел на диске `<полный_диск>` и переместить журнал/метаданные на него |
 | `""`            | (пустая строка) переместить журнал/метаданные обратно на диск данных                |
 ## raw-resize
 `vitastor-disk raw-resize <ВСЕ_ПАРАМЕТРЫ_OSD> <НОВЫЕ_РАЗМЕРЫ> [--iodepth 32]`
 Изменить размер области данных и/или переместить журнал и метаданные (ручной формат).
 В `ВСЕ_ПАРАМЕТРЫ_OSD` нужно указать все относящиеся к диску параметры OSD
 из суперблока OSD или из файла сервиса systemd (в старых версиях).
-В `НОВЫЕ_РАЗМЕРЫ` нужно указать новые параметры расположения данных:
+В `NEW_LAYOUT` нужно указать новые параметры расположения данных:
-| <!-- -->                      | <!-- -->                                              |
+```
-|-------------------------------|-------------------------------------------------------|
+--new_data_offset РАЗМЕР     сдвинуть начало области данных на РАЗМЕР байт
-| `--new_data_offset РАЗМЕР`    | сдвинуть начало области данных на `РАЗМЕР` байт       |
+--new_data_len РАЗМЕР        изменить размер области данных до РАЗМЕР байт
-| `--new_data_len РАЗМЕР`       | изменить размер области данных до `РАЗМЕР` байт       |
+--new_meta_device ПУТЬ       использовать ПУТЬ как новое устройство метаданных
-| `--new_meta_device ПУТЬ`      | использовать `ПУТЬ` как новое устройство метаданных   |
+--new_meta_offset РАЗМЕР     разместить новые метаданные по смещению РАЗМЕР байт
-| `--new_meta_offset РАЗМЕР`    | разместить новые метаданные по смещению `РАЗМЕР` байт |
+--new_meta_len РАЗМЕР        сделать новые метаданные размером РАЗМЕР байт
-| `--new_meta_len РАЗМЕР`       | сделать новые метаданные размером `РАЗМЕР` байт       |
+--new_journal_device ПУТЬ    использовать ПУТЬ как новое устройство журнала
-| `--new_journal_device ПУТЬ`   | использовать `ПУТЬ` как новое устройство журнала      |
+--new_journal_offset РАЗМЕР  разместить новый журнал по смещению РАЗМЕР байт
-| `--new_journal_offset РАЗМЕР` | разместить новый журнал по смещению `РАЗМЕР` байт     |
+--new_journal_len РАЗМЕР     сделать новый журнал размером РАЗМЕР байт
-| `--new_journal_len РАЗМЕР`    | сделать новый журнал размером `РАЗМЕР` байт           |
+```
-`РАЗМЕР` может быть указан с суффиксами k/m/g/t. Если любой из новых параметров
+РАЗМЕР может быть указан с суффиксами k/m/g/t. Если любой из новых параметров
 расположения не указан, он принимается равным старому значению.
 ## start/stop/restart/enable/disable
@ -254,15 +224,10 @@ OSD отключены fsync-и.
 ## dump-journal
 `vitastor-disk dump-journal <osd_device>`
 `vitastor-disk dump-journal [OPTIONS] <journal_file> <journal_block_size> <offset> <size>`
 Вывести журнал в человекочитаемом или в JSON (с опцией `--json`) виде.
 Вы можете указать любой раздел OSD - данных, журнала или метаданных - либо указать все
 параметры расположения вручную.
 Опции:
 ```
@ -275,37 +240,22 @@ OSD отключены fsync-и.
 ## write-journal
 `vitastor-disk write-journal <osd_device>`
 `vitastor-disk write-journal <journal_file> <journal_block_size> <bitmap_size> <offset> <size>`
 Записать журнал из JSON со стандартного ввода в формате, аналогичном `dump-journal --json --format data`.
 Вы можете указать любой раздел OSD - данных, журнала или метаданных - либо указать все
 параметры расположения вручную.
 ## dump-meta
 `vitastor-disk dump-meta <osd_device>`
 `vitastor-disk dump-meta <meta_file> <meta_block_size> <offset> <size>`
 Вывести метаданные в формате JSON.
 Вы можете указать любой раздел OSD - данных, журнала или метаданных - либо указать все
 параметры расположения вручную.
 ## write-meta
 `vitastor-disk write-meta <osd_device>`
 `vitastor-disk write-meta <meta_file> <offset> <size>`
 Записать метаданные из JSON со стандартного ввода в формате, аналогичном `dump-meta`.
 Вы можете указать любой раздел OSD - данных, журнала или метаданных - либо указать все
 параметры расположения вручную.
 ## simple-offsets
 `vitastor-disk simple-offsets <device>`
@ -317,7 +267,7 @@ OSD отключены fsync-и.
 ```
 --object_size 128k       Размер блока хранилища
 --bitmap_granularity 4k  Гранулярность битовых карт
--journal_size 32M       Размер журнала
+--journal_size 16M       Размер журнала
 --data_csum_type none    Задать тип контрольных сумм (crc32c или none)
 --csum_block_size 4k     Задать размер блока расчёта контрольных сумм
 --device_block_size 4k   Размер блока устройства
--- a/docs/usage/nbd.en.md
+++ b/docs/usage/nbd.en.md
@ -15,21 +15,12 @@ See also [VDUSE](qemu.en.md#vduse) as a better alternative to NBD.
 Vitastor Kubernetes CSI driver uses NBD when VDUSE is unavailable.
-Supports the following commands:
+## Map image
 - [map](#map)
 - [unmap](#unmap)
 - [ls](#ls)
 - [netlink-map](#netlink-map)
 - [netlink-unmap](#netlink-unmap)
 - [netlink-revive](#netlink-revive)
 ## map
 To create a local block device for a Vitastor image run:
 ```
-vitastor-nbd map [/dev/nbdN] --image testimg
+vitastor-nbd map --image testimg
 ```
 It will output a block device name like /dev/nbd0 which you can then use as a normal disk.
@ -38,25 +29,25 @@ You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--ima
 vitastor-nbd supports all usual Vitastor configuration options like `--config_file <path_to_config>` plus NBD-specific:
-* `--nbd_timeout 0` \
+* `--nbd_timeout 300` \
-  Timeout for I/O operations in seconds after exceeding which the kernel stops the device.
+  Timeout for I/O operations in seconds after exceeding which the kernel stops
-  Before Linux 5.19, if nbd_timeout is 0, a dead NBD device can't be removed from
+  the device. You can set it to 0 to disable the timeout, but beware that you
-  the system at all without rebooting.
+  won't be able to stop the device at all if vitastor-nbd process dies.
 * `--nbd_max_devices 64 --nbd_max_part 3` \
  Options for the `nbd` kernel module when modprobing it (`nbds_max` and `max_part`).
  note that maximum allowed (nbds_max)*(1+max_part) is 256.
 * `--logfile /path/to/log/file.txt` \
  Write log messages to the specified file instead of dropping them (in background mode)
  or printing them to the standard output (in foreground mode).
 * `--dev_num N` \
-  Use the specified device /dev/nbdN instead of automatic selection (alternative syntax
+  Use the specified device /dev/nbdN instead of automatic selection.
  to /dev/nbdN positional parameter).
 * `--foreground 1` \
  Stay in foreground, do not daemonize.
 Note that `nbd_timeout`, `nbd_max_devices` and `nbd_max_part` options may also be specified
 in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_file`.
-## unmap
+## Unmap image
 To unmap the device run:
@ -64,14 +55,12 @@ To unmap the device run:
 vitastor-nbd unmap /dev/nbd0
 ```
-## ls
+## List mapped images
 ```
 vitastor-nbd ls [--json]
 ```
 List mapped images.
 Example output (normal format):
 ```
@ -89,45 +78,3 @@ Example output (JSON format):
 ```
 {"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
 ```
 ## netlink-map
 ```
 vitastor-nbd netlink-map [/dev/nbdN] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
 ```
 On recent kernel versions it's also possinle to map NBD devices using netlink interface.
 This is an experimental feature because it doesn't solve all issues of NBD. Differences from regular ioctl-based 'map':
 1. netlink-map can create new `/dev/nbdN` devices (those not present in /dev/).
 2. netlink-mapped devices can be unmapped only using `netlink-unmap` command.
 3. netlink-mapped devices don't show up `ls` output (yet).
 4. Dead netlink-mapped devices can be 'revived' using `netlink-revive`.
   However, old I/O requests will hang forever if `nbd_timeout` is not specified.
 5. netlink-map supports additional options:
 * `--nbd_conn_timeout 0` \
  Disconnect a dead device automatically after this number of seconds.
 * `--nbd_destroy_on_disconnect 1` \
  Delete the nbd device on disconnect.
 * `--nbd_disconnect_on_close 1` \
  Disconnect the nbd device on close by last opener.
 * `--nbd_ro 1` \
  Set device into read only mode.
 ## netlink-unmap
 ```
 vitastor-nbd netlink-unmap /dev/nbdN
 ```
 Unmap a device using netlink interface. Works with both netlink and ioctl mapped devices.
 ## netlink-revive
 ```
 vitastor-nbd netlink-revive /dev/nbdX (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
 ```
 Restart a dead NBD netlink-mapped device without removing it. Supports the same options as `netlink-map`.
--- a/docs/usage/nbd.ru.md
+++ b/docs/usage/nbd.ru.md
@ -18,21 +18,12 @@ NBD немного снижает производительность из-за
 CSI-драйвер Kubernetes Vitastor использует NBD, когда VDUSE недоступен.
-Поддерживаются следующие команды:
+## Подключить устройство
 - [map](#map)
 - [unmap](#unmap)
 - [ls](#ls)
 - [netlink-map](#netlink-map)
 - [netlink-unmap](#netlink-unmap)
 - [netlink-revive](#netlink-revive)
 ## map
 Чтобы создать локальное блочное устройство для образа, выполните команду:
 ```
-vitastor-nbd map [/dev/nbdN] --image testimg
+vitastor-nbd map --image testimg
 ```
 Команда напечатает название блочного устройства вида /dev/nbd0, которое потом можно
@ -44,13 +35,16 @@ vitastor-nbd map [/dev/nbdN] --image testimg
 vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_file <path_to_config>`,
 плюс специфичные для NBD:
-* `--nbd_timeout 0` \
+* `--nbd_timeout 30` \
  Максимальное время выполнения любой операции чтения/записи в секундах, при
-  превышении которого ядро остановит NBD-устройство. На ядрах Linux старее 5.19,
+  превышении которого ядро остановит NBD-устройство. Вы можете установить опцию
-  если таймаут установлен в 0, NBD-устройство вообще невозможно отключить из системы
+  в 0, чтобы отключить ограничение времени, но имейте в виду, что в этом случае
-  при нештатном завершении процесса.
+  вы вообще не сможете отключить NBD-устройство при нештатном завершении процесса
  vitastor-nbd.
 * `--nbd_max_devices 64 --nbd_max_part 3` \
-  Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd (`nbds_max` и `max_part`).
+  Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd
  (`nbds_max` и `max_part`). Имейте в виду, что (nbds_max)*(1+max_part)
  обычно не должно превышать 256.
 * `--logfile /path/to/log/file.txt` \
  Писать сообщения о процессе работы в заданный файл, вместо пропуска их
  при фоновом режиме запуска или печати на стандартный вывод при запуске
@ -64,7 +58,7 @@ vitastor-nbd поддерживает все обычные опции Vitastor,
 также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
 заданном опцией `--config_file`.
-## unmap
+## Отключить устройство
 Для отключения устройства выполните:
@ -72,14 +66,12 @@ vitastor-nbd поддерживает все обычные опции Vitastor,
 vitastor-nbd unmap /dev/nbd0
 ```
-## ls
+## Вывести подключённые устройства
 ```
 vitastor-nbd ls [--json]
 ```
 Вывести подключённые устройства.
 Пример вывода в обычном формате:
 ```
@ -97,46 +89,3 @@ pid: 584546
 ```
 {"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
 ```
 ## netlink-map
 ```
 vitastor-nbd netlink-map [/dev/nbdN] (--image <image> | --pool <POOL> --inode <INODE> --size <SIZE>)
 ```
 На свежих версиях ядра Linux также возможно подключать NBD-устройства через интерфейс netlink.
 Это экспериментальная функция, так как она не решает всех проблем NBD. Отличия от обычного 'map':
 1. Можно создавать новые `/dev/nbdN` устройства (отсутствующие в /dev/).
 2. Отключать netlink-устройства можно только командой `netlink-unmap`.
 3. netlink-устройства не видно в выводе `ls` (пока что).
 4. Мёртвые netlink-устройства можно "оживить" командой `netlink-revive`. Правда, предыдущие
   запросы ввода-вывода всё равно зависнут навсегда, если `nbd_timeout` не задан.
 5. Поддерживаются дополнительные опции:
 * `--nbd_conn_timeout 0` \
  Отключать мёртвое устройство автоматически через данное число секунд.
 * `--nbd_destroy_on_disconnect 1` \
  Удалять NBD-устройство при отключении.
 * `--nbd_disconnect_on_close 1` \
  Отключать NBD-устройство автоматически, когда его все закроют.
 * `--nbd_ro 1` \
  Установить для NBD-устройства режим "только для чтения".
 ## netlink-unmap
 ```
 vitastor-nbd netlink-unmap /dev/nbdN
 ```
 Отключить устройство через интерфейс netlink. Работает и с обычными, и с netlink-устройствами.
 ## netlink-revive
 ```
 vitastor-nbd netlink-revive /dev/nbdX (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
 ```
 Оживить мёртвое NBD-устройство, ранее подключённое через netlink, без удаления. Поддерживает
 те же опции, что и `netlink-map`.
--- a/docs/usage/nfs.en.md
+++ b/docs/usage/nfs.en.md
@ -1,182 +1,45 @@
-[Documentation](../../README.md#documentation) → Usage → VitastorFS and pseudo-FS
+[Documentation](../../README.md#documentation) → Usage → NFS
 -----
 [Читать на русском](nfs.ru.md)
-# VitastorFS and pseudo-FS
+# NFS
-Vitastor has two file system implementations. Both can be used via `vitastor-nfs`.
+Vitastor has a simplified NFS 3.0 proxy for file-based image access emulation. It's not
 suitable as a full-featured file system, at least because all file/image metadata is stored
 in etcd and kept in memory all the time - thus you can't put a lot of files in it.
-Commands:
+However, NFS proxy is totally fine as a method to provide VM image access and allows to
- [mount](#mount)
+plug Vitastor into, for example, VMWare. It's important to note that for VMWare it's a much
- [start](#start)
+better access method than iSCSI, because with iSCSI we'd have to put all VM images into one
- [upgrade](#upgrade)
+Vitastor image exported as a LUN to VMWare and formatted with VMFS. VMWare doesn't use VMFS
- [defrag](#defrag)
+over NFS.
-## Pseudo-FS
+NFS proxy is stateless if you use immediate_commit=all mode (for SSD with capacitors or
 HDDs with disabled cache), so you can run multiple NFS proxies and use a network load
 balancer or any failover method you want to in that case.
-Simplified pseudo-FS proxy is used for file-based image access emulation. It's not
+vitastor-nfs usage:
 suitable as a full-featured file system: it lacks a lot of FS features, it stores
 all file/image metadata in memory and in etcd. So it's fine for hundreds or thousands
 of large files/images, but not for millions.
 Pseudo-FS proxy is intended for environments where other block volume access methods
 can't be used or impose additional restrictions - for example, VMWare. NFS is better
 for VMWare than, for example, iSCSI, because with iSCSI, VMWare puts all VM images
 into one large shared block image in its own VMFS file system, and with NFS, VMWare
 doesn't use VMFS and puts each VM disk in a regular file which is equal to one
 Vitastor block image, just as originally intended.
 To use Vitastor pseudo-FS locally, run `vitastor-nfs mount --block /mnt/vita`.
 Also you can start the network server:
 ```
-vitastor-nfs start --block --etcd_address 192.168.5.10:2379 --portmap 0 --port 2050 --pool testpool
+vitastor-nfs [STANDARD OPTIONS] [OTHER OPTIONS]
 --subdir <DIR>    export images prefixed <DIR>/ (default empty - export all images)
 --portmap 0       do not listen on port 111 (portmap/rpcbind, requires root)
 --bind <IP>       bind service to <IP> address (default 0.0.0.0)
 --nfspath <PATH>  set NFS export path to <PATH> (default is /)
 --port <PORT>     use port <PORT> for NFS services (default is 2049)
 --pool <POOL>     use <POOL> as default pool for new files (images)
 --foreground 1    stay in foreground, do not daemonize
 ```
-To mount the FS exported by this server, run:
+Example start and mount commands (etcd_address is optional):
 ```
-mount server:/ /mnt/ -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
+vitastor-nfs --etcd_address 192.168.5.10:2379 --portmap 0 --port 2050 --pool testpool
 ```
-## VitastorFS
+```
-
+mount localhost:/ /mnt/ -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
-VitastorFS is a full-featured clustered (Read-Write-Many) file system. It supports most POSIX
+```
 features like hierarchical organization, symbolic links, hard links, quick renames and so on.
 VitastorFS metadata is stored in a Parallel Optimistic B-Tree key-value database,
 implemented over a regular Vitastor block volume. Directory entries and inodes
 are stored in a simple human-readable JSON format in the B-Tree. `vitastor-kv` tool
 can be used to inspect the database.
 To use VitastorFS:
 1. Create a pool or choose an existing empty pool for FS data
 2. Create an image for FS metadata, preferably in a faster (SSD or replica-HDD) pool,
   but you can create it in the data pool too if you want (image size doesn't matter):
   `vitastor-cli create -s 10G -p fastpool testfs`
 3. Mark data pool as an FS pool: `vitastor-cli modify-pool --used-for-fs testfs data-pool`
 4. Either mount the FS: `vitastor-nfs mount --fs testfs --pool data-pool /mnt/vita`
 5. Or start the NFS server: `vitastor-nfs start --fs testfs --pool data-pool`
 ### Supported POSIX features
 - Read-after-write semantics (read returns new data immediately after write)
 - Linear and random read and write
 - Writing outside current file size
 - Hierarchical structure, immediate rename of files and directories
 - File size change support (truncate)
 - Permissions (chmod/chown)
 - Flushing data to stable storage (if required) (fsync)
 - Symbolic links
 - Hard links
 - Special files (devices, sockets, named pipes)
 - File modification and attribute change time tracking (mtime and ctime)
 - Modification time (mtime) and last access time (atime) change support (utimes)
 - Correct handling of directory listing during file creation/deletion
 ### Limitations
 POSIX features currently not implemented in VitastorFS:
 - File locking is not supported
 - Actually used space is not counted, so `du` always reports apparent file sizes
  instead of actually allocated space
 - Access times (`atime`) are not tracked (like `-o noatime`)
 - Modification time (`mtime`) is updated lazily every second (like `-o lazytime`)
 Other notable missing features which should be addressed in the future:
 - Inode ID reuse. Currently inode IDs always grow, the limit is 2^48 inodes, so
  in theory you may hit it if you create and delete a very large number of files
 - Compaction of the key-value B-Tree. Current implementation never merges or deletes
  B-Tree blocks, so B-Tree may become bloated over time. Currently you can
  use `vitastor-kv dumpjson` & `loadjson` commands to recreate the index in such
  situations.
 - Filesystem check tool. VitastorFS doesn't have journal because it would impose a
  severe performance hit, optimistic CAS-based transactions are used instead of it.
  So, again, in theory an abnormal shutdown of the FS server may leave some garbage
  in the DB. The FS is implemented is such way that this garbage doesn't affect its
  function, but having a tool to clean it up still seems a right thing to do.
 ## Horizontal scaling
 Linux NFS 3.0 client doesn't support built-in scaling or failover, i.e. you can't
 specify multiple server addresses when mounting the FS.
 However, you can use any regular TCP load balancing over multiple NFS servers.
 It's absolutely safe with `immediate_commit=all` and `client_enable_writeback=false`
 settings, because Vitastor NFS proxy doesn't keep uncommitted data in memory
 with these settings. But it may even work without `immediate_commit=all` because
 the Linux NFS client repeats all uncommitted writes if it loses the connection.
 ## Commands
 ### mount
 `vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>`
 Start local filesystem server and mount file system to <MOUNTPOINT>.
 Use regular `umount <MOUNTPOINT>` to unmount the FS.
 The server will be automatically stopped when the FS is unmounted.
 - `-o|--options <OPT>` - Pass additional NFS mount options (ex.: -o async).
 ### start
 `vitastor-nfs (--fs <NAME> | --block) start`
 Start network NFS server. Options:
 | <!-- -->        | <!-- -->                                                   |
 |-----------------|------------------------------------------------------------|
 | `--bind <IP>`   | bind service to \<IP> address (default 0.0.0.0)            |
 | `--port <PORT>` | use port \<PORT> for NFS services (default is 2049)        |
 | `--portmap 0`   | do not listen on port 111 (portmap/rpcbind, requires root) |
 ### upgrade
 `vitastor-nfs --fs <NAME> upgrade`
 Upgrade FS metadata. Can be run online, but server(s) should be restarted after upgrade.
 ### defrag
 `vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
 Defragment volumes used for small file storage having more than \<defrag_percent> %
 of data removed. Can be run online.
 In VitastorFS, small files are stored in large "volumes" / "shared inodes" one
 after another. When you delete or extend such files, they are moved and garbage is left
 behind. Defragmentation removes garbage and moves data still in use to new volumes.
 Options:
 | <!-- -->                   | <!-- -->                                                                |
 |----------------------------|------------------------------------------------------------------------ |
 | `--volume_untouched 86400` | Defragment volumes last appended to at least this number of seconds ago |
 | `--defrag_percent 50`      | Defragment volumes with at least this % of removed data                 |
 | `--defrag_block_count 16`  | Read this number of pool blocks at once during defrag                   |
 | `--defrag_iodepth 16`      | Move up to this number of files in parallel during defrag               |
 | `--trace`                  | Print verbose defragmentation status                                    |
 | `--dry-run`                | Skip modifications, only print status                                   |
 | `--recalc-stats`           | Recalculate all volume statistics                                       |
 | `--include-empty`          | Include old and empty volumes; make sure to restart NFS servers before using it |
 | `--no-rm`                  | Move, but do not delete data                                            |
 ## Common options
 | <!-- -->           | <!-- -->                                                 |
 |--------------------|----------------------------------------------------------|
 | `--fs <NAME>`      | use VitastorFS with metadata in image \<NAME>            |
 | `--block`          | use pseudo-FS presenting images as files                 |
 | `--pool <POOL>`    | use \<POOL> as default pool for new files                |
 | `--subdir <DIR>`   | export \<DIR> instead of root directory (pseudo-FS only) |
 | `--nfspath <PATH>` | set NFS export path to \<PATH> (default is /)            |
 | `--pidfile <FILE>` | write process ID to the specified file                   |
 | `--logfile <FILE>` | log to the specified file                                |
 | `--foreground 1`   | stay in foreground, do not daemonize                     |
--- a/docs/usage/nfs.ru.md
+++ b/docs/usage/nfs.ru.md
@ -1,190 +1,44 @@
-[Документация](../../README-ru.md#документация) → Использование → VitastorFS и псевдо-ФС
+[Документация](../../README-ru.md#документация) → Использование → NFS
 -----
 [Read in English](nfs.en.md)
-# VitastorFS и псевдо-ФС
+# NFS
-В Vitastor есть две реализации файловой системы. Обе используются через `vitastor-nfs`.
+В Vitastor реализована упрощённая NFS 3.0 прокси для эмуляции файлового доступа к образам.
 Это не полноценная файловая система, т.к. метаданные всех файлов (образов) сохраняются
 в etcd и всё время хранятся в оперативной памяти - то есть, положить туда много файлов
 не получится.
-Команды:
+Однако в качестве способа доступа к образам виртуальных машин NFS прокси прекрасно подходит
- [mount](#mount)
+и позволяет подключить Vitastor, например, к VMWare.
 - [start](#start)
 - [upgrade](#upgrade)
 - [defrag](#defrag)
-## Псевдо-ФС
+При этом, если вы используете режим immediate_commit=all (для SSD с конденсаторами или HDD
 с отключённым кэшем), то NFS-сервер не имеет состояния и вы можете свободно поднять
 его в нескольких экземплярах и использовать поверх них сетевой балансировщик нагрузки или
 схему с отказоустойчивостью.
-Упрощённая реализация псевдо-ФС используется для эмуляции файлового доступа к блочным
+Использование vitastor-nfs:
 образам Vitastor. Это не полноценная файловая система - в ней отсутствуют многие функции
 POSIX ФС, а метаданные всех файлов (образов) сохраняются в etcd и всё время хранятся в
 оперативной памяти - то есть, псевдо-ФС подходит для сотен или тысяч файлов, но не миллионов.
 Псевдо-ФС предназначена для доступа к образам виртуальных машин в средах, где другие
 способы невозможны или неудобны - например, в VMWare. Для VMWare это лучшая опция, чем
 iSCSI, так как при использовании iSCSI VMWare размещает все виртуальные машины в одном
 большом блочном образе внутри собственной ФС VMFS, а с NFS VMFS не используется и каждый
 диск ВМ представляется в виде одного файла, то есть, соответствует одному блочному образу
 Vitastor, как это и задумано изначально.
 Чтобы подключить псевдо-ФС Vitastor, выполните команду `vitastor-nfs mount --block /mnt/vita`.
 Либо же запустите сетевой вариант сервера:
 ```
-vitastor-nfs start --block --etcd_address 192.168.5.10:2379 --portmap 0 --port 2050 --pool testpool
+vitastor-nfs [СТАНДАРТНЫЕ ОПЦИИ] [ДРУГИЕ ОПЦИИ]
 --subdir <DIR>    экспортировать "поддиректорию" - образы с префиксом имени <DIR>/ (по умолчанию пусто - экспортировать все образы)
 --portmap 0       отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий)
 --bind <IP>       принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
 --nfspath <PATH>  установить путь NFS-экспорта в <PATH> (по умолчанию /)
 --port <PORT>     использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
 --pool <POOL>     использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
 --foreground 1    не уходить в фон после запуска
 ```
-Примонтировать ФС, запущенную с такими опциями, можно следующей командой:
+Пример монтирования Vitastor через NFS (etcd_address необязателен):
 ```
-mount server:/ /mnt/ -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
+vitastor-nfs --etcd_address 192.168.5.10:2379 --portmap 0 --port 2050 --pool testpool
 ```
-## VitastorFS
+```
-
+mount localhost:/ /mnt/ -o port=2050,mountport=2050,nfsvers=3,soft,nolock,tcp
-VitastorFS - полноценная кластерная (Read-Write-Many) файловая система. Она поддерживает
+```
 большую часть функций POSIX - иерархическую организацию, символические ссылки, жёсткие
 ссылки, быстрые переименования и так далее.
 Метаданные VitastorFS хранятся в собственной реализации БД формата ключ-значения,
 основанной на Параллельном Оптимистичном Б-дереве поверх обычного блочного образа Vitastor.
 И записи каталогов, и иноды, как обычно в Vitastor, хранятся в простом человекочитаемом
 JSON-формате :-). Для инспекции содержимого БД можно использовать инструмент `vitastor-kv`.
 Чтобы использовать VitastorFS:
 1. Создайте пул для данных ФС или выберите существующий пустой пул
 2. Создайте блочный образ для метаданных ФС, желательно, в более быстром пуле (на SSD
   или по крайней мере на HDD, но без EC), но можно и в том же пуле, что данные
   (размер образа значения не имеет):
   `vitastor-cli create -s 10G -p fastpool testfs`
 3. Пометьте пул данных как ФС-пул: `vitastor-cli modify-pool --used-for-fs testfs data-pool`
 4. Либо примонтируйте ФС: `vitastor-nfs mount --fs testfs --pool data-pool /mnt/vita`
 5. Либо запустите сетевой NFS-сервер: `vitastor-nfs start --fs testfs --pool data-pool`
 ### Поддерживаемые функции POSIX
 - Чтение актуальной версии данных сразу после записи
 - Последовательное и произвольное чтение и запись
 - Запись за пределами текущего размера файла
 - Иерархическая организация, мгновенное переименование файлов и каталогов
 - Изменение размера файла (truncate)
 - Права на файлы (chmod/chown)
 - Фиксация данных на диски (когда необходимо) (fsync)
 - Символические ссылки
 - Жёсткие ссылки
 - Специальные файлы (устройства, сокеты, каналы)
 - Отслеживание времён модификации (mtime), изменения атрибутов (ctime)
 - Ручное изменение времён модификации (mtime), последнего доступа (atime)
 - Корректная обработка изменений списка файлов во время листинга
 ### Ограничения
 Отсутствующие на данный момент в VitastorFS функции POSIX:
 - Блокировки файлов не поддерживаются
 - Фактически занятое файлами место не подсчитывается и не возвращается вызовами
  stat(2), так что `du` всегда показывает сумму размеров файлов, а не фактически занятое место
 - Времена доступа (`atime`) не отслеживаются (как будто ФС смонтирована с `-o noatime`)
 - Времена модификации (`mtime`) отслеживаются асинхронно (как будто ФС смонтирована с `-o lazytime`)
 Другие недостающие функции, которые нужно добавить в будущем:
 - Переиспользование номеров инодов. В текущей реализации номера инодов всё время
  увеличиваются, так что в теории вы можете упереться в лимит, если насоздаёте
  и наудаляете больше, чем 2^48 файлов.
 - Очистка места в Б-дереве метаданных. Текущая реализация никогда не сливает и не
  удаляет блоки Б-дерева, так что в теории дерево может разростись и стать неоптимальным.
  Если вы столкнётесь с такой ситуацией сейчас, вы можете решить её с помощью
  команд `vitastor-kv dumpjson` и `loadjson` (т.е. пересоздав и загрузив обратно все метаданные ФС).
 - Инструмент проверки метаданных файловой системы. У VitastorFS нет журнала, так как
  журнал бы сильно замедлил реализацию, вместо него используются оптимистичные
  транзакции на основе CAS (сравнить-и-записать), и теоретически при нештатном
  завершении сервера ФС в БД также могут оставаться неконсистентные "мусорные"
  записи. ФС устроена так, что на работу они не влияют, но для порядка и их стоит
  уметь подчищать.
 ## Горизонтальное масштабирование
 Клиент Linux NFS 3.0 не поддерживает встроенное масштабирование или отказоустойчивость.
 То есть, вы не можете задать несколько адресов серверов при монтировании ФС.
 Однако вы можете использовать любые стандартные сетевые балансировщики нагрузки
 или схемы с отказоустойчивостью. Это точно безопасно при настройках `immediate_commit=all` и
 `client_enable_writeback=false`, так как с ними NFS-сервер Vitastor вообще не хранит
 в памяти ещё не зафиксированные на дисках данные; и вполне вероятно безопасно
 даже без `immediate_commit=all`, потому что NFS-клиент ядра Linux повторяет все
 незафиксированные запросы при потере соединения.
 ## Команды
 ### mount
 `vitastor-nfs (--fs <NAME> | --block) mount [-o <OPT>] <MOUNTPOINT>`
 Запустить локальный сервер и примонтировать ФС в директорию <MOUNTPOINT>.
 Чтобы отмонтировать ФС, используйте обычную команду `umount <MOUNTPOINT>`.
 Сервер автоматически останавливается при отмонтировании ФС.
 - `-o|--options <OPT>` - Передать дополнительные опции монтирования NFS (пример: -o async).
 ### start
 `vitastor-nfs (--fs <NAME> | --block) start`
 Запустить сетевой NFS-сервер. Опции:
 | <!-- -->        | <!-- -->                                                              |
 |-----------------|-----------------------------------------------------------------------|
 | `--bind <IP>`   | принимать соединения по адресу \<IP> (по умолчанию 0.0.0.0 - на всех) |
 | `--port <PORT>` | использовать порт \<PORT> для NFS-сервисов (по умолчанию 2049)        |
 | `--portmap 0`   | отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий) |
 ### upgrade
 `vitastor-nfs --fs <NAME> upgrade`
 Обновить метаданные ФС. Можно запускать онлайн (при запущенных серверах NFS), но после выполнения их всё
 же желательно перезапустить.
 ### defrag
 `vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
 Дефрагментировать тома, используемые для хранения мелких файлов, в которых более, чем
 <defrag_percent> процентов данных удалено. Можно запускать онлайн.
 На уровне реализации ФС файлы, меньшие, чем размер объекта пула (block_size умножить на число
 частей данных, если пул EC), упаковываются друг за другом в большие "тома" / "общие иноды".
 Когда такие файлы удаляются или увеличиваются, они перемещаются и оставляют за собой "мусор".
 При дефрагментации мусор удаляется, а всё ещё используемые данные перемещаются в новые тома.
 Опции:
 | <!-- -->                   | <!-- -->                                                                |
 |----------------------------|------------------------------------------------------------------------ |
 | `--volume_untouched 86400` | Дефрагментировать только тома, в которые уже не писали это число секунд |
 | `--defrag_percent 50`      | Дефрагментировать только тома, в которых этот % данных удалён           |
 | `--defrag_block_count 16`  | Читать это количество блоков пула за один раз                           |
 | `--defrag_iodepth 16`      | Перемещать одновременно до этого числа файлов                           |
 | `--trace`                  | Печатать детальную статистику дефрагментации                            |
 | `--dry-run`                | Не производить никаких изменений, только описать выполняемые действия   |
 | `--recalc-stats`           | Пересчитать и сохранить статистику всех томов                           |
 | `--include-empty`          | Дефрагментировать старые и пустые тома; обязательно перезапустите NFS-сервера после использования этой опции |
 | `--no-rm`                  | Перемещать, но не удалять данные                                        |
 ## Общие опции
 | <!-- -->           | <!-- -->                                                |
 |--------------------|---------------------------------------------------------|
 | `--fs <NAME>`      | использовать VitastorFS с метаданными в образе \<NAME>  |
 | `--block`          | использовать псевдо-ФС для доступа к блочным образам    |
 | `--pool <POOL>`    | использовать пул \<POOL> для новых файлов (обязательно, если пул в кластере не один) |
 | `--subdir <DIR>`   | экспортировать подкаталог \<DIR>, а не корень (только для псевдо-ФС) |
 | `--nfspath <PATH>` | установить путь NFS-экспорта в \<PATH> (по умолчанию /) |
 | `--pidfile <FILE>` | записать ID процесса в заданный файл                    |
 | `--logfile <FILE>` | записывать логи в заданный файл                         |
 | `--foreground 1`   | не уходить в фон после запуска                          |
--- a/docs/usage/pg_states.dot
+++ b/docs/usage/pg_states.dot
@ -1,13 +0,0 @@
 digraph G {
    rankdir=LR;
    bgcolor=transparent;
    edge [color="#00A000"];
    node [shape=hexagon, fillcolor="#A0A000", fontcolor=white, fontname="sans-serif", fontsize=12, style=filled, penwidth=0];
    offline -> starting -> peering -> offline;
    stopping -> offline;
    starting -> incomplete -> offline;
    active -> repeering -> peering -> active -> stopping;
    offline [fillcolor="#A00000"];
    incomplete [fillcolor="#A00000"];
    active [fillcolor="#00A000"];
 }
--- a/docs/usage/pg_states.svg
+++ b/docs/usage/pg_states.svg
@ -1,114 +0,0 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <!-- Generated by graphviz version 2.43.0 (0)
 -->
 <!-- Title: G Pages: 1 -->
 <svg width="603pt" height="123pt"
 viewBox="0.00 0.00 602.66 122.55" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 118.55)">
 <title>G</title>
 <!-- offline -->
 <g id="node1" class="node">
 <title>offline</title>
 <polygon fill="#a00000" stroke="black" stroke-width="0" points="75.52,-56 56.6,-74 18.75,-74 -0.17,-56 18.75,-38 56.6,-38 75.52,-56"/>
 <text text-anchor="middle" x="37.67" y="-52.9" font-family="sans-serif" font-size="12.00" fill="white">offline</text>
 </g>
 <!-- starting -->
 <g id="node2" class="node">
 <title>starting</title>
 <polygon fill="#a0a000" stroke="black" stroke-width="0" points="199.56,-79 177.49,-97 133.35,-97 111.28,-79 133.35,-61 177.49,-61 199.56,-79"/>
 <text text-anchor="middle" x="155.42" y="-75.9" font-family="sans-serif" font-size="12.00" fill="white">starting</text>
 </g>
 <!-- offline&#45;&gt;starting -->
 <g id="edge1" class="edge">
 <title>offline&#45;&gt;starting</title>
 <path fill="none" stroke="#00a000" d="M69.39,-62.1C81.66,-64.54 96.04,-67.4 109.45,-70.06"/>
 <polygon fill="#00a000" stroke="#00a000" points="108.98,-73.54 119.47,-72.05 110.34,-66.67 108.98,-73.54"/>
 </g>
 <!-- peering -->
 <g id="node3" class="node">
 <title>peering</title>
 <polygon fill="#a0a000" stroke="black" stroke-width="0" points="335.57,-95 313.96,-113 270.74,-113 249.13,-95 270.74,-77 313.96,-77 335.57,-95"/>
 <text text-anchor="middle" x="292.35" y="-91.9" font-family="sans-serif" font-size="12.00" fill="white">peering</text>
 </g>
 <!-- starting&#45;&gt;peering -->
 <g id="edge2" class="edge">
 <title>starting&#45;&gt;peering</title>
 <path fill="none" stroke="#00a000" d="M194.36,-83.5C209.71,-85.32 227.6,-87.44 243.8,-89.36"/>
 <polygon fill="#00a000" stroke="#00a000" points="243.82,-92.89 254.16,-90.59 244.64,-85.94 243.82,-92.89"/>
 </g>
 <!-- incomplete -->
 <g id="node5" class="node">
 <title>incomplete</title>
 <polygon fill="#a00000" stroke="black" stroke-width="0" points="349.09,-41 320.72,-59 263.99,-59 235.62,-41 263.99,-23 320.72,-23 349.09,-41"/>
 <text text-anchor="middle" x="292.35" y="-37.9" font-family="sans-serif" font-size="12.00" fill="white">incomplete</text>
 </g>
 <!-- starting&#45;&gt;incomplete -->
 <g id="edge5" class="edge">
 <title>starting&#45;&gt;incomplete</title>
 <path fill="none" stroke="#00a000" d="M188.74,-69.9C204.92,-65.34 224.85,-59.73 242.82,-54.67"/>
 <polygon fill="#00a000" stroke="#00a000" points="243.9,-58 252.57,-51.92 242,-51.26 243.9,-58"/>
 </g>
 <!-- peering&#45;&gt;offline -->
 <g id="edge3" class="edge">
 <title>peering&#45;&gt;offline</title>
 <path fill="none" stroke="#00a000" d="M259.32,-103.69C222.67,-112.11 161.28,-121.52 111.35,-106 94.55,-100.78 78.2,-90.18 65.27,-80.08"/>
 <polygon fill="#00a000" stroke="#00a000" points="67.26,-77.19 57.3,-73.58 62.84,-82.61 67.26,-77.19"/>
 </g>
 <!-- active -->
 <g id="node6" class="node">
 <title>active</title>
 <polygon fill="#00a000" stroke="black" stroke-width="0" points="456.34,-49 438.55,-67 402.97,-67 385.18,-49 402.97,-31 438.55,-31 456.34,-49"/>
 <text text-anchor="middle" x="420.76" y="-45.9" font-family="sans-serif" font-size="12.00" fill="white">active</text>
 </g>
 <!-- peering&#45;&gt;active -->
 <g id="edge9" class="edge">
 <title>peering&#45;&gt;active</title>
 <path fill="none" stroke="#00a000" d="M322.99,-84.22C341.47,-77.49 365.34,-68.8 384.75,-61.74"/>
 <polygon fill="#00a000" stroke="#00a000" points="385.96,-65.03 394.16,-58.32 383.56,-58.45 385.96,-65.03"/>
 </g>
 <!-- stopping -->
 <g id="node4" class="node">
 <title>stopping</title>
 <polygon fill="#a0a000" stroke="black" stroke-width="0" points="591.65,-18 567.57,-36 519.39,-36 495.31,-18 519.39,0 567.57,0 591.65,-18"/>
 <text text-anchor="middle" x="543.48" y="-14.9" font-family="sans-serif" font-size="12.00" fill="white">stopping</text>
 </g>
 <!-- stopping&#45;&gt;offline -->
 <g id="edge4" class="edge">
 <title>stopping&#45;&gt;offline</title>
 <path fill="none" stroke="#00a000" d="M500.13,-14.3C440.78,-9.83 329.58,-4.07 235.49,-14 179.71,-19.89 116.5,-34.9 77.11,-45.29"/>
 <polygon fill="#00a000" stroke="#00a000" points="76.14,-41.92 67.38,-47.89 77.94,-48.69 76.14,-41.92"/>
 </g>
 <!-- incomplete&#45;&gt;offline -->
 <g id="edge6" class="edge">
 <title>incomplete&#45;&gt;offline</title>
 <path fill="none" stroke="#00a000" d="M240.25,-44.03C194.33,-46.76 127.57,-50.72 83.64,-53.33"/>
 <polygon fill="#00a000" stroke="#00a000" points="83.32,-49.84 73.54,-53.93 83.73,-56.83 83.32,-49.84"/>
 </g>
 <!-- active&#45;&gt;stopping -->
 <g id="edge10" class="edge">
 <title>active&#45;&gt;stopping</title>
 <path fill="none" stroke="#00a000" d="M449.46,-41.89C463.64,-38.25 481.26,-33.72 497.34,-29.59"/>
 <polygon fill="#00a000" stroke="#00a000" points="498.29,-32.96 507.11,-27.08 496.55,-26.18 498.29,-32.96"/>
 </g>
 <!-- repeering -->
 <g id="node7" class="node">
 <title>repeering</title>
 <polygon fill="#a0a000" stroke="black" stroke-width="0" points="594.84,-83 569.16,-101 517.8,-101 492.12,-83 517.8,-65 569.16,-65 594.84,-83"/>
 <text text-anchor="middle" x="543.48" y="-79.9" font-family="sans-serif" font-size="12.00" fill="white">repeering</text>
 </g>
 <!-- active&#45;&gt;repeering -->
 <g id="edge7" class="edge">
 <title>active&#45;&gt;repeering</title>
 <path fill="none" stroke="#00a000" d="M448.85,-56.63C462.9,-60.59 480.44,-65.53 496.53,-70.06"/>
 <polygon fill="#00a000" stroke="#00a000" points="495.74,-73.47 506.32,-72.82 497.64,-66.74 495.74,-73.47"/>
 </g>
 <!-- repeering&#45;&gt;peering -->
 <g id="edge8" class="edge">
 <title>repeering&#45;&gt;peering</title>
 <path fill="none" stroke="#00a000" d="M495.33,-85.27C451.99,-87.36 387.93,-90.44 343.63,-92.58"/>
 <polygon fill="#00a000" stroke="#00a000" points="343.2,-89.09 333.38,-93.07 343.54,-96.09 343.2,-89.09"/>
 </g>
 </g>
 </svg>
--- a/docs/usage/qemu.en.md
+++ b/docs/usage/qemu.en.md
@ -151,9 +151,9 @@ Example performance comparison:
 To try VDUSE you need at least Linux 5.15, built with VDUSE support
 (CONFIG_VDPA=m, CONFIG_VDPA_USER=m, CONFIG_VIRTIO_VDPA=m).
-Debian Linux kernels had these options disabled until 6.6, so make sure you install a newer kernel
+Debian Linux kernels have these options disabled by now, so if you want to try it on Debian,
-(from bookworm-backports, trixie or newer Debian version) if you want to try VDUSE. You can also
+use a kernel from Ubuntu [kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/), Proxmox,
-build modules for an existing kernel manually:
+or build modules for Debian kernel manually:
 ```
 mkdir build
--- a/docs/usage/qemu.ru.md
+++ b/docs/usage/qemu.ru.md
@ -154,9 +154,9 @@ VDUSE - на данный момент лучший интерфейс для п
 Чтобы попробовать VDUSE, вам нужно ядро Linux как минимум версии 5.15, собранное с поддержкой
 VDUSE (CONFIG_VDPA=m, CONFIG_VDPA_USER=m, CONFIG_VIRTIO_VDPA=m).
-В ядрах в Debian Linux эти опции включены, только начиная с 6.6, так что установите свежее ядро
+В ядрах в Debian Linux поддержка пока отключена по умолчанию, так что чтобы попробовать VDUSE
-из bookworm-backports, trixie или из более новой версии Debian, если хотите попробовать VDUSE.
+на Debian, поставьте ядро из Ubuntu [kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/),
-Либо же вы можете самостоятельно собрать модули для установленного ядра:
+из Proxmox или соберите модули для ядра Debian вручную:
 ```
 mkdir build
--- a/mon/.eslintrc.js
+++ b/mon/.eslintrc.js
@ -1,54 +0,0 @@
 module.exports = {
    "env": {
        "es6": true,
        "node": true
    },
    "extends": [
        "eslint:recommended",
        "plugin:node/recommended"
    ],
    "parserOptions": {
        "ecmaVersion": 2020
    },
    "plugins": [
        "import"
    ],
    "rules": {
        "indent": [
            "error",
            4
        ],
        "brace-style": [
            "error",
            "allman",
            { "allowSingleLine": true }
        ],
        "linebreak-style": [
            "error",
            "unix"
        ],
        "semi": [
            "error",
            "always"
        ],
        "no-useless-escape": [
            "off"
        ],
        "no-control-regex": [
            "off"
        ],
        "no-empty": [
            "off"
        ],
        "no-process-exit": [
            "off"
        ],
        "node/shebang": [
            "off"
        ],
        "import/no-unresolved": [
            2,
            { "commonjs": true }
        ]
    }
 };
--- a/mon/scripts/90-vitastor.rules
+++ b/mon/scripts/90-vitastor.rules
--- a/mon/pg_utils.js
+++ b/mon/pg_utils.js
@ -97,6 +97,7 @@ function scale_pg_history(prev_pg_history, prev_pgs, new_pgs)
 function scale_pg_count(prev_pgs, new_pg_count)
 {
    const old_pg_count = prev_pgs.length;
    // Just for the lp_solve optimizer - pick a "previous" PG for each "new" one
    if (prev_pgs.length < new_pg_count)
    {
--- a/mon/scripts/afr.js
+++ b/mon/scripts/afr.js
--- a/mon/scripts/afr_test.js
+++ b/mon/scripts/afr_test.js
--- a/mon/antietcd_adapter.js
+++ b/mon/antietcd_adapter.js
@ -1,188 +0,0 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)
 const AntiEtcd = require('antietcd');
 const vitastor_persist_filter = require('./vitastor_persist_filter.js');
 const { b64, local_ips } = require('./utils.js');
 class AntiEtcdAdapter
 {
    static async start_antietcd(config)
    {
        let antietcd;
        if (config.use_antietcd)
        {
            let cluster = config.etcd_address;
            if (!(cluster instanceof Array))
                cluster = cluster ? (''+(cluster||'')).split(/,+/) : [];
            cluster = Object.keys(cluster.reduce((a, url) =>
            {
                a[url.toLowerCase().replace(/^(https?:\/\/)/, '').replace(/\/.*$/, '')] = true;
                return a;
            }, {}));
            const cfg_port = config.antietcd_port;
            const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
            const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
            if (selected.length > 1)
            {
                console.error('More than 1 etcd_address matches local IPs, please specify port');
                process.exit(1);
            }
            else if (selected.length == 1)
            {
                const antietcd_config = {
                    ip: selected[0][0],
                    port: selected[0][1],
                    data: config.antietcd_data_file || ((config.antietcd_data_dir || '/var/lib/vitastor') + '/mon_'+selected[0][1]+'.json.gz'),
                    persist_filter: vitastor_persist_filter({ vitastor_prefix: config.etcd_prefix || '/vitastor' }),
                    node_id: selected[0][0]+':'+selected[0][1], // node_id = ip:port
                    cluster: (cluster.length == 1 ? null : cluster.reduce((a, c) => { a[c] = "http://"+c; return a; }, {})),
                    cluster_key: (config.etcd_prefix || '/vitastor'),
                    stale_read: 1,
                    log_level: 1,
                };
                for (const key in config)
                {
                    if (key.substr(0, 9) === 'antietcd_')
                    {
                        const noprefix = key.substr(9);
                        if (!(noprefix in antietcd_config) || noprefix == 'ip' || noprefix == 'cluster_key')
                        {
                            antietcd_config[noprefix] = config[key];
                        }
                    }
                }
                console.log('Starting Antietcd node '+antietcd_config.node_id);
                antietcd = new AntiEtcd(antietcd_config);
                await antietcd.start();
            }
            else
            {
                console.log('Antietcd is enabled, but etcd_address does not contain local IPs, proceeding without it');
            }
        }
        return antietcd;
    }
    constructor(mon, antietcd)
    {
        this.mon = mon;
        this.antietcd = antietcd;
        this.on_leader = [];
        this.on_change = (st) =>
        {
            if (st.state === 'leader')
            {
                for (const cb of this.on_leader)
                {
                    cb();
                }
                this.on_leader = [];
            }
        };
        this.antietcd.on('raftchange', this.on_change);
    }
    parse_config(/*config*/)
    {
    }
    stop_watcher()
    {
        this.antietcd.off('raftchange', this.on_change);
        const watch_id = this.watch_id;
        if (watch_id)
        {
            this.watch_id = null;
            this.antietcd.cancel_watch(watch_id).catch(console.error);
        }
    }
    async start_watcher()
    {
        if (this.watch_id)
        {
            await this.antietcd.cancel_watch(this.watch_id);
            this.watch_id = null;
        }
        const watch_id = await this.antietcd.create_watch({
            key: b64(this.mon.config.etcd_prefix+'/'),
            range_end: b64(this.mon.config.etcd_prefix+'0'),
            start_revision: ''+this.mon.etcd_watch_revision,
            watch_id: 1,
            progress_notify: true,
        }, (message) =>
        {
            setImmediate(() => this.mon.on_message(message.result));
        });
        console.log('Successfully subscribed to antietcd revision '+this.antietcd.etctree.mod_revision);
        this.watch_id = watch_id;
    }
    async become_master()
    {
        if (!this.antietcd.cluster)
        {
            console.log('Running in non-clustered mode');
        }
        else
        {
            console.log('Waiting to become master');
            if (this.antietcd.cluster.raft.state !== 'leader')
            {
                await new Promise(ok => this.on_leader.push(ok));
            }
        }
        const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
        await this.etcd_call('/kv/txn', {
            success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
        }, this.mon.config.etcd_start_timeout, 0);
        if (this.antietcd.cluster)
        {
            console.log('Became master');
        }
    }
    async etcd_call(path, body, timeout, retries)
    {
        let retry = 0;
        if (retries >= 0 && retries < 1)
        {
            retries = 1;
        }
        let prev = 0;
        while (retries < 0 || retry < retries)
        {
            retry++;
            if (this.mon.stopped)
            {
                throw new Error('Monitor instance is stopped');
            }
            try
            {
                if (Date.now()-prev < timeout)
                {
                    await new Promise(ok => setTimeout(ok, timeout-(Date.now()-prev)));
                }
                prev = Date.now();
                const res = await this.antietcd.api(path.replace(/^\/+/, '').replace(/\/+$/, '').replace(/\/+/g, '_'), body);
                if (res.error)
                {
                    console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
                }
                else
                {
                    return res;
                }
            }
            catch (e)
            {
                console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+e.stack);
            }
        }
        throw new Error('Failed to query antietcd ('+retries+' retries)');
    }
 }
 module.exports = AntiEtcdAdapter;
--- a/mon/etcd_adapter.js
+++ b/mon/etcd_adapter.js
@ -1,352 +0,0 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)
 const http = require('http');
 const WebSocket = require('ws');
 const { b64, local_ips } = require('./utils.js');
 const MON_STOPPED = 'Monitor instance is stopped';
 class EtcdAdapter
 {
    constructor(mon)
    {
        this.mon = mon;
        this.ws = null;
        this.ws_alive = false;
        this.ws_keepalive_timer = null;
    }
    parse_config(config)
    {
        this.parse_etcd_addresses(config.etcd_address||config.etcd_url);
    }
    parse_etcd_addresses(addrs)
    {
        const is_local_ip = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
        this.etcd_local = [];
        this.etcd_urls = [];
        this.selected_etcd_url = null;
        this.etcd_urls_to_try = [];
        if (!(addrs instanceof Array))
            addrs = addrs ? (''+(addrs||'')).split(/,/) : [];
        if (!addrs.length)
        {
            console.error('Vitastor etcd address(es) not specified. Please set on the command line or in the config file');
            process.exit(1);
        }
        for (let url of addrs)
        {
            let scheme = 'http';
            url = url.trim().replace(/^(https?):\/\//, (m, m1) => { scheme = m1; return ''; });
            const slash = url.indexOf('/');
            const colon = url.indexOf(':');
            const is_local = is_local_ip[colon >= 0 ? url.substr(0, colon) : (slash >= 0 ? url.substr(0, slash) : url)];
            url = scheme+'://'+(slash >= 0 ? url : url+'/v3');
            if (is_local)
                this.etcd_local.push(url);
            else
                this.etcd_urls.push(url);
        }
    }
    pick_next_etcd()
    {
        if (this.selected_etcd_url)
            return this.selected_etcd_url;
        if (!this.etcd_urls_to_try || !this.etcd_urls_to_try.length)
        {
            this.etcd_urls_to_try = [ ...this.etcd_local ];
            const others = [ ...this.etcd_urls ];
            while (others.length)
            {
                const url = others.splice(0|(others.length*Math.random()), 1);
                this.etcd_urls_to_try.push(url[0]);
            }
        }
        this.selected_etcd_url = this.etcd_urls_to_try.shift();
        return this.selected_etcd_url;
    }
    stop_watcher(cur_addr)
    {
        cur_addr = cur_addr || this.selected_etcd_url;
        if (this.ws)
        {
            console.log('Disconnected from etcd at '+this.ws_used_url);
            this.ws.close();
            this.ws = null;
        }
        if (this.ws_keepalive_timer)
        {
            clearInterval(this.ws_keepalive_timer);
            this.ws_keepalive_timer = null;
        }
        if (this.selected_etcd_url == cur_addr)
        {
            this.selected_etcd_url = null;
        }
    }
    restart_watcher(cur_addr)
    {
        this.stop_watcher(cur_addr);
        this.start_watcher(this.mon.config.etcd_mon_retries).catch(this.mon.die);
    }
    async start_watcher(retries)
    {
        let retry = 0;
        if (!retries || retries < 1)
        {
            retries = 1;
        }
        const tried = {};
        while (retries < 0 || retry < retries)
        {
            const cur_addr = this.pick_next_etcd();
            const base = 'ws'+cur_addr.substr(4);
            let now = Date.now();
            if (tried[base] && now-tried[base] < this.mon.config.etcd_start_timeout)
            {
                await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout-(now-tried[base])));
                now = Date.now();
            }
            tried[base] = now;
            if (this.mon.stopped)
            {
                return;
            }
            const ok = await new Promise(ok =>
            {
                const timer_id = setTimeout(() =>
                {
                    if (this.ws)
                    {
                        console.log('Disconnected from etcd at '+this.ws_used_url);
                        this.ws.close();
                        this.ws = null;
                    }
                    ok(false);
                }, this.mon.config.etcd_mon_timeout);
                this.ws = new WebSocket(base+'/watch');
                this.ws_used_url = cur_addr;
                const fail = () =>
                {
                    ok(false);
                };
                this.ws.on('error', fail);
                this.ws.on('open', () =>
                {
                    this.ws.removeListener('error', fail);
                    if (timer_id)
                        clearTimeout(timer_id);
                    ok(true);
                });
            });
            if (ok)
                break;
            if (this.selected_etcd_url == cur_addr)
                this.selected_etcd_url = null;
            this.ws = null;
            retry++;
        }
        if (!this.ws)
        {
            this.mon.die('Failed to open etcd watch websocket');
            return;
        }
        if (this.mon.stopped)
        {
            this.stop_watcher();
            return;
        }
        const cur_addr = this.selected_etcd_url;
        this.ws_alive = true;
        this.ws_keepalive_timer = setInterval(() =>
        {
            if (this.ws_alive && this.ws)
            {
                this.ws_alive = false;
                this.ws.send(JSON.stringify({ progress_request: {} }));
            }
            else
            {
                console.log('etcd websocket timed out, restarting it');
                this.restart_watcher(cur_addr);
            }
        }, (Number(this.mon.config.etcd_ws_keepalive_interval) || 5)*1000);
        this.ws.on('error', () => this.restart_watcher(cur_addr));
        this.ws.send(JSON.stringify({
            create_request: {
                key: b64(this.mon.config.etcd_prefix+'/'),
                range_end: b64(this.mon.config.etcd_prefix+'0'),
                start_revision: ''+this.mon.etcd_watch_revision,
                watch_id: 1,
                progress_notify: true,
            },
        }));
        this.ws.on('message', (msg) =>
        {
            if (this.mon.stopped)
            {
                this.stop_watcher();
                return;
            }
            this.ws_alive = true;
            let data;
            try
            {
                data = JSON.parse(msg);
            }
            catch (e)
            {
            }
            if (!data || !data.result)
            {
                console.error('Unknown message received from watch websocket: '+msg);
            }
            else if (data.result.canceled)
            {
                // etcd watch canceled
                if (data.result.compact_revision)
                {
                    // we may miss events if we proceed
                    this.mon.die('Revisions before '+data.result.compact_revision+' were compacted by etcd, exiting');
                }
                this.mon.die('Watch canceled by etcd, reason: '+data.result.cancel_reason+', exiting');
            }
            else if (data.result.created)
            {
                // etcd watch created
                console.log('Successfully subscribed to etcd at '+this.selected_etcd_url+', revision '+data.result.header.revision);
            }
            else
            {
                this.mon.on_message(data.result);
            }
        });
    }
    async become_master()
    {
        const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
        console.log('Waiting to become master');
        // eslint-disable-next-line no-constant-condition
        while (1)
        {
            const res = await this.etcd_call('/kv/txn', {
                compare: [ { target: 'CREATE', create_revision: 0, key: b64(this.mon.config.etcd_prefix+'/mon/master') } ],
                success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
            }, this.mon.config.etcd_start_timeout, 0);
            if (res.succeeded)
            {
                break;
            }
            await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout));
        }
        console.log('Became master');
    }
    async etcd_call(path, body, timeout, retries)
    {
        let retry = 0;
        if (retries >= 0 && retries < 1)
        {
            retries = 1;
        }
        const tried = {};
        while (retries < 0 || retry < retries)
        {
            retry++;
            const base = this.pick_next_etcd();
            let now = Date.now();
            if (tried[base] && now-tried[base] < timeout)
            {
                await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
                now = Date.now();
            }
            tried[base] = now;
            if (this.mon.stopped)
            {
                throw new Error(MON_STOPPED);
            }
            const res = await POST(base+path, body, timeout);
            if (this.mon.stopped)
            {
                throw new Error(MON_STOPPED);
            }
            if (res.error)
            {
                if (this.selected_etcd_url == base)
                    this.selected_etcd_url = null;
                console.error('Failed to query etcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
                continue;
            }
            if (res.json)
            {
                if (res.json.error)
                {
                    console.error(path+': etcd returned error: '+res.json.error);
                    break;
                }
                return res.json;
            }
        }
        throw new Error('Failed to query etcd ('+retries+' retries)');
    }
 }
 function POST(url, body, timeout)
 {
    return new Promise(ok =>
    {
        const body_text = Buffer.from(JSON.stringify(body));
        let timer_id = timeout > 0 ? setTimeout(() =>
        {
            if (req)
                req.abort();
            req = null;
            ok({ error: 'timeout' });
        }, timeout) : null;
        let req = http.request(url, { method: 'POST', headers: {
            'Content-Type': 'application/json',
            'Content-Length': body_text.length,
        } }, (res) =>
        {
            if (!req)
            {
                return;
            }
            clearTimeout(timer_id);
            let res_body = '';
            res.setEncoding('utf8');
            res.on('error', (error) => ok({ error }));
            res.on('data', chunk => { res_body += chunk; });
            res.on('end', () =>
            {
                if (res.statusCode != 200)
                {
                    ok({ error: res_body, code: res.statusCode });
                    return;
                }
                try
                {
                    res_body = JSON.parse(res_body);
                    ok({ response: res, json: res_body });
                }
                catch (e)
                {
                    ok({ error: e, response: res, body: res_body });
                }
            });
        });
        req.on('error', (error) => ok({ error }));
        req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
        req.write(body_text);
        req.end();
    });
 }
 module.exports = EtcdAdapter;
--- a/mon/etcd_schema.js
+++ b/mon/etcd_schema.js
@ -1,397 +0,0 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)
 // FIXME document all etcd keys and config variables in the form of JSON schema or similar
 const etcd_nonempty_keys = {
    'config/global': 1,
    'config/node_placement': 1,
    'config/pools': 1,
    'pg/config': 1,
    'history/last_clean_pgs': 1,
    'stats': 1,
 };
 const etcd_allow = new RegExp('^'+[
    'config/global',
    'config/node_placement',
    'config/pools',
    'config/osd/[1-9]\\d*',
    'config/pgs', // old name
    'pg/config',
    'config/inode/[1-9]\\d*/[1-9]\\d*',
    'osd/state/[1-9]\\d*',
    'osd/stats/[1-9]\\d*',
    'osd/inodestats/[1-9]\\d*',
    'osd/space/[1-9]\\d*',
    'mon/master',
    'mon/member/[a-f0-9]+',
    'pg/state/[1-9]\\d*/[1-9]\\d*',
    'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
    'pgstats/[1-9]\\d*/[1-9]\\d*',
    'pg/history/[1-9]\\d*/[1-9]\\d*',
    'history/last_clean_pgs',
    'inode/stats/[1-9]\\d*/\\d+',
    'pool/stats/[1-9]\\d*',
    'stats',
    'index/image/.*',
    'index/maxid/[1-9]\\d*',
 ].join('$|^')+'$');
 const etcd_tree = {
    config: {
        /* global: {
            // WARNING: NOT ALL OF THESE ARE ACTUALLY CONFIGURABLE HERE
            // THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
            // etcd connection
            config_path: "/etc/vitastor/vitastor.conf",
            etcd_prefix: "/vitastor",
            // etcd connection - configurable online
            etcd_address: "10.0.115.10:2379/v3",
            // mon
            etcd_mon_ttl: 5, // min: 1
            etcd_mon_timeout: 1000, // ms. min: 0
            etcd_mon_retries: 5, // min: 0
            mon_change_timeout: 1000, // ms. min: 100
            mon_retry_change_timeout: 50, // ms. min: 10
            mon_stats_timeout: 1000, // ms. min: 100
            osd_out_time: 600, // seconds. min: 0
            placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
            use_old_pg_combinator: false,
            osd_backfillfull_ratio: 0.99,
            // client and osd
            tcp_header_buffer_size: 65536,
            use_sync_send_recv: false,
            use_rdma: true,
            rdma_device: null, // for example, "rocep5s0f0"
            rdma_port_num: 1,
            rdma_gid_index: 0,
            rdma_mtu: 4096,
            rdma_max_sge: 128,
            rdma_max_send: 8,
            rdma_max_recv: 16,
            rdma_max_msg: 132096,
            block_size: 131072,
            disk_alignment: 4096,
            bitmap_granularity: 4096,
            immediate_commit: 'all', // 'none', 'all' or 'small'
            // client - configurable online
            client_max_dirty_bytes: 33554432,
            client_max_dirty_ops: 1024,
            client_enable_writeback: false,
            client_max_buffered_bytes: 33554432,
            client_max_buffered_ops: 1024,
            client_max_writeback_iodepth: 256,
            client_retry_interval: 50, // ms. min: 10
            client_eio_retry_interval: 1000, // ms
            client_retry_enospc: true,
            osd_nearfull_ratio: 0.95,
            // client and osd - configurable online
            log_level: 0,
            peer_connect_interval: 5, // seconds. min: 1
            peer_connect_timeout: 5, // seconds. min: 1
            osd_idle_timeout: 5, // seconds. min: 1
            osd_ping_timeout: 5, // seconds. min: 1
            max_etcd_attempts: 5,
            etcd_quick_timeout: 1000, // ms
            etcd_slow_timeout: 5000, // ms
            etcd_keepalive_timeout: 30, // seconds, default is max(30, etcd_report_interval*2)
            etcd_ws_keepalive_interval: 5, // seconds
            // osd
            etcd_report_interval: 5, // seconds
            etcd_stats_interval: 30, // seconds
            run_primary: true,
            osd_network: null, // "192.168.7.0/24" or an array of masks
            bind_address: "0.0.0.0",
            bind_port: 0,
            readonly: false,
            osd_memlock: false,
            // osd - configurable online
            autosync_interval: 5,
            autosync_writes: 128,
            client_queue_depth: 128, // unused
            recovery_queue_depth: 1,
            recovery_sleep_us: 0,
            recovery_tune_util_low: 0.1,
            recovery_tune_client_util_low: 0,
            recovery_tune_util_high: 1.0,
            recovery_tune_client_util_high: 0.5,
            recovery_tune_interval: 1,
            recovery_tune_agg_interval: 10, // 10 times recovery_tune_interval
            recovery_tune_sleep_min_us: 10, // 10 microseconds
            recovery_pg_switch: 128,
            recovery_sync_batch: 16,
            no_recovery: false,
            no_rebalance: false,
            print_stats_interval: 3,
            slow_log_interval: 10,
            inode_vanish_time: 60,
            auto_scrub: false,
            no_scrub: false,
            scrub_interval: '30d', // 1s/1m/1h/1d
            scrub_queue_depth: 1,
            scrub_sleep: 0, // milliseconds
            scrub_list_limit: 1000, // objects to list on one scrub iteration
            scrub_find_best: true,
            scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
            // blockstore - fixed in superblock
            block_size,
            disk_alignment,
            journal_block_size,
            meta_block_size,
            bitmap_granularity,
            journal_device,
            journal_offset,
            journal_size,
            disable_journal_fsync,
            data_device,
            data_offset,
            data_size,
            disable_data_fsync,
            meta_device,
            meta_offset,
            disable_meta_fsync,
            disable_device_lock,
            // blockstore - configurable offline
            inmemory_metadata,
            inmemory_journal,
            journal_sector_buffer_count,
            journal_no_same_sector_overwrites,
            // blockstore - configurable online
            max_write_iodepth,
            min_flusher_count: 1,
            max_flusher_count: 256,
            throttle_small_writes: false,
            throttle_target_iops: 100,
            throttle_target_mbs: 100,
            throttle_target_parallelism: 1,
            throttle_threshold_us: 50,
        }, */
        global: {},
        /* node_placement: {
            host1: { level: 'host', parent: 'rack1' },
            ...
        }, */
        node_placement: {},
        /* pools: {
            <id>: {
                name: 'testpool',
                // 'ec' uses Reed-Solomon-Vandermonde codes, 'jerasure' is an alias for 'ec'
                scheme: 'replicated' | 'xor' | 'ec' | 'jerasure',
                pg_size: 3,
                pg_minsize: 2,
                // number of parity chunks, required for EC
                parity_chunks?: 1,
                pg_count: 100,
                // default is failure_domain=host
                failure_domain?: 'host',
                // additional failure domain rules; failure_domain=x is equivalent to x=123..N
                level_placement?: 'dc=112233 host=123456',
                raw_placement?: 'any, dc=1 host!=1, dc=1 host!=(1,2)',
                old_combinator: false,
                max_osd_combinations: 10000,
                // block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
                block_size: 131072,
                bitmap_granularity: 4096,
                // 'all'/'small'/'none', same as in OSD options
                immediate_commit: 'all',
                pg_stripe_size: 0,
                root_node?: 'rack1',
                // restrict pool to OSDs having all of these tags
                osd_tags?: 'nvme' | [ 'nvme', ... ],
                // prefer to put primary on OSD with these tags
                primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
                // scrub interval
                scrub_interval?: '30d',
            },
            ...
        }, */
        pools: {},
        osd: {
            /* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
        },
        /* inode: {
            <pool_id>: {
                <inode_t>: {
                    name: string,
                    size?: uint64_t, // bytes
                    parent_pool?: <pool_id>,
                    parent_id?: <inode_t>,
                    readonly?: boolean,
                }
            }
        }, */
        inode: {},
    },
    osd: {
        state: {
            /* <osd_num_t>: {
                state: "up",
                addresses: string[],
                host: string,
                port: uint16_t,
                primary_enabled: boolean,
                blockstore_enabled: boolean,
            }, */
        },
        stats: {
            /* <osd_num_t>: {
                time: number, // unix time
                data_block_size: uint64_t, // bytes
                bitmap_granularity: uint64_t, // bytes
                immediate_commit: "all"|"small"|"none",
                blockstore_ready: boolean,
                size: uint64_t, // bytes
                free: uint64_t, // bytes
                host: string,
                op_stats: {
                    <string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
                },
                subop_stats: {
                    <string>: { count: uint64_t, usec: uint64_t },
                },
                recovery_stats: {
                    degraded: { count: uint64_t, bytes: uint64_t },
                    misplaced: { count: uint64_t, bytes: uint64_t },
                },
            }, */
        },
        inodestats: {
            /* <pool_id>: {
                <inode_t>: {
                    read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
                    write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
                    delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
                },
            }, */
        },
        space: {
            /* <osd_num_t>: {
                <pool_id>: {
                    <inode_t>: uint64_t, // bytes
                },
            }, */
        },
    },
    mon: {
        master: {
            /* ip: [ string ], id: uint64_t */
        },
        member: {
            /* <uint64_t>: { ip: [ string ] }, */
        },
    },
    pg: {
        /* config: {
            hash: string,
            items: {
                <pool_id>: {
                    <pg_id>: {
                        osd_set: [ 1, 2, 3 ],
                        primary: 1,
                        pause: false,
                    }
                }
            }
        }, */
        config: {},
        state: {
            /* <pool_id>: {
                <pg_id>: {
                    primary: osd_num_t,
                    state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
                        "degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
                        "has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
                }
            }, */
        },
        history: {
            /* <pool_id>: {
                <pg_id>: {
                    osd_sets: osd_num_t[][],
                    all_peers: osd_num_t[],
                    epoch: uint64_t,
                    next_scrub: uint64_t,
                },
            }, */
        },
    },
    pgstats: {
        /* <pool_id>: {
            <pg_id>: {
                object_count: uint64_t,
                clean_count: uint64_t,
                misplaced_count: uint64_t,
                degraded_count: uint64_t,
                incomplete_count: uint64_t,
                write_osd_set: osd_num_t[],
            },
        }, */
    },
    inode: {
        stats: {
            /* <pool_id>: {
                <inode_t>: {
                    raw_used: uint64_t, // raw used bytes on OSDs
                    read: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
                    write: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
                    delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
                },
            }, */
        },
    },
    pool: {
        stats: {
            /* <pool_id>: {
                used_raw_tb: float, // used raw space in the pool
                total_raw_tb: float, // maximum amount of space in the pool
                raw_to_usable: float, // raw to usable ratio
                space_efficiency: float, // 0..1
            } */
        },
    },
    stats: {
        /* op_stats: {
            <string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
        },
        subop_stats: {
            <string>: { count: uint64_t, usec: uint64_t, iops: uint64_t, lat: uint64_t },
        },
        recovery_stats: {
            degraded: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
            misplaced: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
        },
        object_counts: {
            object: uint64_t,
            clean: uint64_t,
            misplaced: uint64_t,
            degraded: uint64_t,
            incomplete: uint64_t,
        },
        object_bytes: {
            total: uint64_t,
            clean: uint64_t,
            misplaced: uint64_t,
            degraded: uint64_t,
            incomplete: uint64_t,
        }, */
    },
    history: {
        last_clean_pgs: {},
    },
    index: {
        image: {
            /* <name>: {
                id: uint64_t,
                pool_id: uint64_t,
            }, */
        },
        maxid: {
            /* <pool_id>: uint64_t, */
        },
    },
 };
 module.exports = {
    etcd_nonempty_keys,
    etcd_allow,
    etcd_tree,
 };
--- a/mon/http_server.js
+++ b/mon/http_server.js
@ -1,50 +0,0 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)
 const fsp = require('fs').promises;
 const http = require('http');
 const https = require('https');
 async function create_http_server(cfg, handler)
 {
    let server;
    if (cfg.mon_https_cert)
    {
        const tls = {
            key: await fsp.readFile(cfg.mon_https_key),
            cert: await fsp.readFile(cfg.mon_https_cert),
        };
        if (cfg.mon_https_ca)
        {
            tls.mon_https_ca = await fsp.readFile(cfg.mon_https_ca);
        }
        if (cfg.mon_https_client_auth)
        {
            tls.requestCert = true;
        }
        server = https.createServer(tls, handler);
    }
    else
    {
        server = http.createServer(handler);
    }
    try
    {
        let err;
        server.once('error', e => err = e);
        server.listen(cfg.mon_http_port || 8060, cfg.mon_http_ip || undefined);
        if (err)
            throw err;
    }
    catch (e)
    {
        console.error(
            'HTTP server disabled because listen at address: '+
            (cfg.mon_http_ip || '')+':'+(cfg.mon_http_port || 9090)+' failed with error: '+e
        );
        return null;
    }
    return server;
 }
 module.exports = { create_http_server };
--- a/mon/lp_optimizer/lp_optimizer.js
+++ b/mon/lp_optimizer/lp_optimizer.js
@ -50,15 +50,15 @@ async function lp_solve(text)
    return { score, vars };
 }
-// osd_weights = { [id]: weight }
+async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, ordered = false })
 async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3, pg_minsize = 2, parity_space = 1, ordered = false })
 {
-    if (!pg_count || !osd_weights)
+    if (!pg_count || !osd_tree)
    {
        return null;
    }
-    const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
+    const all_weights = Object.assign({}, ...Object.values(osd_tree));
-    const all_pgs = Object.values(make_cyclic(combinator.random_combinations(), parity_space));
+    const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
    const all_pgs = Object.values(random_combinations(osd_tree, pg_size, max_combinations, parity_space > 1));
    const pg_per_osd = {};
    for (const pg of all_pgs)
    {
@ -69,15 +69,15 @@ async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3
            pg_per_osd[osd].push((i >= pg_minsize ? parity_space+'*' : '')+"pg_"+pg.join("_"));
        }
    }
-    let pg_effsize = all_pgs.reduce((a, c) => Math.max(a, c.filter(e => e != NO_OSD).length), 0);
+    const pg_effsize = Math.min(pg_minsize, Object.keys(osd_tree).length)
-    pg_effsize = Math.min(pg_minsize, pg_effsize) + Math.max(0, Math.min(pg_size, pg_effsize) - pg_minsize) * parity_space;
+        + Math.max(0, Math.min(pg_size, Object.keys(osd_tree).length) - pg_minsize) * parity_space;
    let lp = '';
    lp += "max: "+all_pgs.map(pg => 'pg_'+pg.join('_')).join(' + ')+";\n";
    for (const osd in pg_per_osd)
    {
        if (osd !== NO_OSD)
        {
-            let osd_pg_count = osd_weights[osd]/total_weight*pg_effsize*pg_count;
+            let osd_pg_count = all_weights[osd]/total_weight*pg_effsize*pg_count;
            lp += pg_per_osd[osd].join(' + ')+' <= '+osd_pg_count+';\n';
        }
    }
@ -93,7 +93,7 @@ async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3
        throw new Error('Problem is infeasible or unbounded - is it a bug?');
    }
    const int_pgs = make_int_pgs(lp_result.vars, pg_count, ordered);
-    const eff = pg_list_space_efficiency(int_pgs, osd_weights, pg_minsize, parity_space);
+    const eff = pg_list_space_efficiency(int_pgs, all_weights, pg_minsize, parity_space);
    const res = {
        score: lp_result.score,
        weights: lp_result.vars,
@ -104,22 +104,6 @@ async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3
    return res;
 }
 function make_cyclic(pgs, parity_space)
 {
    if (parity_space > 1)
    {
        for (const pg in pgs)
        {
            for (let i = 1; i < pg.size; i++)
            {
                const cyclic = [ ...pg.slice(i), ...pg.slice(0, i) ];
                pgs['pg_'+cyclic.join('_')] = cyclic;
            }
        }
    }
    return pgs;
 }
 function shuffle(array)
 {
    for (let i = array.length - 1, j, x; i > 0; i--)
@ -215,7 +199,7 @@ function calc_intersect_weights(old_pg_size, pg_size, pg_count, prev_weights, al
                {
                    const intersect_count = ordered
                        ? pg.reduce((a, osd, i) => a + (prev_hash[osd] == 1+i ? 1 : 0), 0)
-                        : pg.reduce((a, osd) => a + (prev_hash[osd] ? 1 : 0), 0);
+                        : pg.reduce((a, osd, i) => a + (prev_hash[osd] ? 1 : 0), 0);
                    if (max_int < intersect_count)
                    {
                        max_int = intersect_count;
@ -232,17 +216,47 @@ function calc_intersect_weights(old_pg_size, pg_size, pg_count, prev_weights, al
    return move_weights;
 }
-// Try to minimize data movement
+function add_valid_previous(osd_tree, prev_weights, all_pgs)
 async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator, pg_size = 3, pg_minsize = 2, parity_space = 1, ordered = false })
 {
-    if (!osd_weights)
+    // Add previous combinations that are still valid
    const hosts = Object.keys(osd_tree).sort();
    const host_per_osd = {};
    for (const host in osd_tree)
    {
        for (const osd in osd_tree[host])
        {
            host_per_osd[osd] = host;
        }
    }
    skip_pg: for (const pg_name in prev_weights)
    {
        const seen_hosts = {};
        const pg = pg_name.substr(3).split(/_/);
        for (const osd of pg)
        {
            if (!host_per_osd[osd] || seen_hosts[host_per_osd[osd]])
            {
                continue skip_pg;
            }
            seen_hosts[host_per_osd[osd]] = true;
        }
        if (!all_pgs[pg_name])
        {
            all_pgs[pg_name] = pg;
        }
    }
 }
 // Try to minimize data movement
 async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, ordered = false })
 {
    if (!osd_tree)
    {
        return null;
    }
    // FIXME: use parity_chunks with parity_space instead of pg_minsize
-    let all_pgs = make_cyclic(combinator.random_combinations(), parity_space);
+    const pg_effsize = Math.min(pg_minsize, Object.keys(osd_tree).length)
-    let pg_effsize = Object.values(all_pgs).reduce((a, c) => Math.max(a, c.filter(e => e != NO_OSD).length), 0);
+        + Math.max(0, Math.min(pg_size, Object.keys(osd_tree).length) - pg_minsize) * parity_space;
    pg_effsize = Math.min(pg_minsize, pg_effsize) + Math.max(0, Math.min(pg_size, pg_effsize) - pg_minsize) * parity_space;
    const pg_count = prev_int_pgs.length;
    const prev_weights = {};
    const prev_pg_per_osd = {};
@ -259,13 +273,10 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator
    }
    const old_pg_size = prev_int_pgs[0].length;
    // Get all combinations
    let all_pgs = random_combinations(osd_tree, pg_size, max_combinations, parity_space > 1);
    if (old_pg_size == pg_size)
    {
-        const still_valid = combinator.check_combinations(Object.keys(prev_weights).map(pg_name => pg_name.substr(3).split('_')));
+        add_valid_previous(osd_tree, prev_weights, all_pgs);
        for (const pg of still_valid)
        {
            all_pgs['pg_'+pg.join('_')] = pg;
        }
    }
    all_pgs = Object.values(all_pgs);
    const pg_per_osd = {};
@ -284,7 +295,8 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator
    // Calculate total weight - old PG weights
    const all_pg_names = all_pgs.map(pg => 'pg_'+pg.join('_'));
    const all_pgs_hash = all_pg_names.reduce((a, c) => { a[c] = true; return a; }, {});
-    const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
+    const all_weights = Object.assign({}, ...Object.values(osd_tree));
    const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
    // Generate the LP problem
    let lp = '';
    lp += 'max: '+all_pg_names.map(pg_name => (
@ -299,7 +311,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator
            )).join(' + ');
            const rm_osd_pg_count = (prev_pg_per_osd[osd]||[])
                .reduce((a, [ old_pg_name, space ]) => (a + (all_pgs_hash[old_pg_name] ? space : 0)), 0);
-            const osd_pg_count = osd_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
+            const osd_pg_count = all_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
            lp += osd_sum + ' <= ' + osd_pg_count + ';\n';
        }
    }
@ -409,7 +421,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator
        int_pgs: new_pgs,
        differs,
        osd_differs,
-        space: pg_effsize * pg_list_space_efficiency(new_pgs, osd_weights, pg_minsize, parity_space),
+        space: pg_effsize * pg_list_space_efficiency(new_pgs, all_weights, pg_minsize, parity_space),
        total_space: total_weight,
    };
 }
@ -490,6 +502,198 @@ function put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, keygen)
    }
 }
 // Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
 // levels = { string: number }
 // to a two-level osd_tree suitable for all_combinations()
 function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
 {
    osd_level = levels[osd_level] || osd_level;
    failure_domain_level = levels[failure_domain_level] || failure_domain_level;
    for (const node of osd_tree)
    {
        if ((levels[node.level] || node.level) < failure_domain_level)
        {
            flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
        }
        else
        {
            domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
        }
    }
    return domains;
 }
 function extract_osds(osd_tree, levels, osd_level, osds = {})
 {
    for (const node of osd_tree)
    {
        if ((levels[node.level] || node.level) >= osd_level)
        {
            osds[node.id] = node.size;
        }
        else
        {
            extract_osds(node.children||[], levels, osd_level, osds);
        }
    }
    return osds;
 }
 // ordered = don't treat (x,y) and (y,x) as equal
 function random_combinations(osd_tree, pg_size, count, ordered)
 {
    let seed = 0x5f020e43;
    let rng = () =>
    {
        seed ^= seed << 13;
        seed ^= seed >> 17;
        seed ^= seed << 5;
        return seed + 2147483648;
    };
    const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
    const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
    const r = {};
    // Generate random combinations including each OSD at least once
    for (let h = 0; h < hosts.length; h++)
    {
        for (let o = 0; o < osds[hosts[h]].length; o++)
        {
            const pg = [ osds[hosts[h]][o] ];
            const cur_hosts = [ ...hosts ];
            cur_hosts.splice(h, 1);
            for (let i = 1; i < pg_size && i < hosts.length; i++)
            {
                const next_host = rng() % cur_hosts.length;
                const next_osd = rng() % osds[cur_hosts[next_host]].length;
                pg.push(osds[cur_hosts[next_host]][next_osd]);
                cur_hosts.splice(next_host, 1);
            }
            const cyclic_pgs = [ pg ];
            if (ordered)
            {
                for (let i = 1; i < pg.size; i++)
                {
                    cyclic_pgs.push([ ...pg.slice(i), ...pg.slice(0, i) ]);
                }
            }
            for (const pg of cyclic_pgs)
            {
                while (pg.length < pg_size)
                {
                    pg.push(NO_OSD);
                }
                r['pg_'+pg.join('_')] = pg;
            }
        }
    }
    // Generate purely random combinations
    while (count > 0)
    {
        let host_idx = [];
        const cur_hosts = [ ...hosts.map((h, i) => i) ];
        const max_hosts = pg_size < hosts.length ? pg_size : hosts.length;
        if (ordered)
        {
            for (let i = 0; i < max_hosts; i++)
            {
                const r = rng() % cur_hosts.length;
                host_idx[i] = cur_hosts[r];
                cur_hosts.splice(r, 1);
            }
        }
        else
        {
            for (let i = 0; i < max_hosts; i++)
            {
                const r = rng() % (cur_hosts.length - (max_hosts - i - 1));
                host_idx[i] = cur_hosts[r];
                cur_hosts.splice(0, r+1);
            }
        }
        let pg = host_idx.map(h => osds[hosts[h]][rng() % osds[hosts[h]].length]);
        while (pg.length < pg_size)
        {
            pg.push(NO_OSD);
        }
        r['pg_'+pg.join('_')] = pg;
        count--;
    }
    return r;
 }
 // Super-stupid algorithm. Given the current OSD tree, generate all possible OSD combinations
 // osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
 // ordered = return combinations without duplicates having different order
 function all_combinations(osd_tree, pg_size, ordered, count)
 {
    const hosts = Object.keys(osd_tree).sort();
    const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
    while (hosts.length < pg_size)
    {
        osds[NO_OSD] = [ NO_OSD ];
        hosts.push(NO_OSD);
    }
    let host_idx = [];
    let osd_idx = [];
    for (let i = 0; i < pg_size; i++)
    {
        host_idx.push(i);
        osd_idx.push(0);
    }
    const r = [];
    while (!count || count < 0 || r.length < count)
    {
        r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
        let inc = pg_size-1;
        while (inc >= 0)
        {
            osd_idx[inc]++;
            if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
            {
                osd_idx[inc] = 0;
                inc--;
            }
            else
            {
                break;
            }
        }
        if (inc < 0)
        {
            // no osds left in the current host combination, select the next one
            inc = pg_size-1;
            same_again: while (inc >= 0)
            {
                host_idx[inc]++;
                for (let prev_host = 0; prev_host < inc; prev_host++)
                {
                    if (host_idx[prev_host] == host_idx[inc])
                    {
                        continue same_again;
                    }
                }
                if (host_idx[inc] < (ordered ? hosts.length-(pg_size-1-inc) : hosts.length))
                {
                    while ((++inc) < pg_size)
                    {
                        host_idx[inc] = (ordered ? host_idx[inc-1]+1 : 0);
                    }
                    break;
                }
                else
                {
                    inc--;
                }
            }
            if (inc < 0)
            {
                break;
            }
        }
    }
    return r;
 }
 function pg_weights_space_efficiency(weights, pg_count, osd_sizes)
 {
    const per_osd = {};
@ -548,8 +752,11 @@ module.exports = {
    pg_weights_space_efficiency,
    pg_list_space_efficiency,
    pg_per_osd_space_efficiency,
    flatten_tree,
    lp_solve,
    make_int_pgs,
    align_pgs,
    random_combinations,
    all_combinations,
 };
--- a/mon/lp_optimizer/dsl_pgs.js
+++ b/mon/lp_optimizer/dsl_pgs.js
@ -1,409 +0,0 @@
 const { select_murmur3 } = require('./murmur3.js');
 const NO_OSD = 'Z';
 class RuleCombinator
 {
    constructor(osd_tree, rules, max_combinations, ordered)
    {
        this.osd_tree = index_tree(Object.values(osd_tree).filter(o => o.id));
        this.rules = rules;
        this.max_combinations = max_combinations;
        this.ordered = ordered;
    }
    random_combinations()
    {
        return random_custom_combinations(this.osd_tree, this.rules, this.max_combinations, this.ordered);
    }
    check_combinations(pgs)
    {
        return check_custom_combinations(this.osd_tree, this.rules, pgs);
    }
 }
 // Convert alternative "level-index" format to rules
 // level_index = { [level: string]: string | string[] }
 // level_sequence = optional, levels from upper to lower, i.e. [ 'dc', 'host' ]
 // Example: level_index = { dc: "112233", host: "ABCDEF" }
 function parse_level_indexes(level_index, level_sequence)
 {
    const rules = [];
    const lvl_first = {};
    for (const level in level_index)
    {
        const idx = level_index[level];
        while (rules.length < idx.length)
        {
            rules.push([]);
        }
        const seen = {};
        for (let i = 0; i < idx.length; i++)
        {
            if (!seen[idx[i]])
            {
                const other = Object.values(seen);
                if (other.length)
                {
                    rules[i].push([ level, '!=', other ]);
                }
                seen[idx[i]] = i+1;
            }
            else
            {
                rules[i].push([ level, '=', seen[idx[i]] ]);
            }
        }
        lvl_first[level] = seen;
    }
    if (level_sequence)
    {
        // Prune useless rules for the sake of prettiness
        // For simplicity, call "upper" level DC and "lower" level host
        const level_prio = Object.keys(level_sequence).reduce((a, c) => { a[level_sequence[c]] = c; return a; }, {});
        for (let upper_i = 0; upper_i < level_sequence.length-1; upper_i++)
        {
            const upper_level = level_sequence[upper_i];
            for (let i = 0; i < rules.length; i++)
            {
                const noteq = {};
                for (let k = 0; k < level_index[upper_level].length; k++)
                {
                    // If upper_level[x] is different from upper_level[y]
                    // then lower_level[x] is also different from lower_level[y]
                    if (level_index[upper_level][k] != level_index[upper_level][i])
                    {
                        noteq[k+1] = true;
                    }
                }
                for (let j = 0; j < rules[i].length; j++)
                {
                    if (level_prio[rules[i][j][0]] != null && level_prio[rules[i][j][0]] > upper_i && rules[i][j][1] == '!=')
                    {
                        rules[i][j][2] = rules[i][j][2].filter(other_host => !noteq[other_host]);
                        if (!rules[i][j][2].length)
                        {
                            rules[i].splice(j--, 1);
                        }
                    }
                }
            }
        }
    }
    return rules;
 }
 // Parse rules in DSL format
 // dsl := item | item ("\n" | ",") items
 // item := "any" | rules
 // rules := rule | rule rules
 // rule := level operator arg
 // level := /\w+/
 // operator := "!=" | "=" | ">" | "?="
 // arg := value | "(" values ")"
 // values := value | value "," values
 // value := item_ref | constant_id
 // item_ref := /\d+/
 // constant_id := /"([^"]+)"/
 //
 // Output: [ level, operator, value ][][]
 function parse_pg_dsl(text)
 {
    const tokens = [ ...text.matchAll(/\w+|!=|\?=|[>=\(\),\n]|"([^\"]+)"/g) ].map(t => [ t[0], t.index ]);
    let positions = [ [] ];
    let rules = positions[0];
    for (let i = 0; i < tokens.length; )
    {
        if (tokens[i][0] === '\n' || tokens[i][0] === ',')
        {
            rules = [];
            positions.push(rules);
            i++;
        }
        else if (!rules.length && tokens[i][0] === 'any' && (i == tokens.length-1 || tokens[i+1][0] === ',' || tokens[i+1][0] === '\n'))
        {
            i++;
        }
        else
        {
            if (!/^\w/.exec(tokens[i][0]))
            {
                throw new Error('Unexpected '+tokens[i][0]+' at '+tokens[i][1]+' (level name expected)');
            }
            if (i > tokens.length-3)
            {
                throw new Error('Unexpected EOF (operator and value expected)');
            }
            if (/^\w/.exec(tokens[i+1][0]) || tokens[i+1][0] === ',' || tokens[i+1][0] === '\n')
            {
                throw new Error('Unexpected '+tokens[i+1][0]+' at '+tokens[i+1][1]+' (operator expected)');
            }
            if (!/^[\w"(]/.exec(tokens[i+2][0])) // "
            {
                throw new Error('Unexpected '+tokens[i+2][0]+' at '+tokens[i+2][1]+' (id, round brace, number or node ID expected)');
            }
            let rule = [ tokens[i][0], tokens[i+1][0], tokens[i+2][0] ];
            i += 3;
            if (rule[2][0] == '"')
            {
                rule[2] = { id: rule[2].substr(1, rule[2].length-2) };
            }
            else if (rule[2] === '(')
            {
                rule[2] = [];
                // eslint-disable-next-line no-constant-condition
                while (true)
                {
                    if (i > tokens.length-1)
                    {
                        throw new Error('Unexpected EOF (expected list and a closing round brace)');
                    }
                    if (tokens[i][0] === ',')
                    {
                        i++;
                    }
                    else if (tokens[i][0] === ')')
                    {
                        i++;
                        break;
                    }
                    else if (tokens[i][0][0] === '"')
                    {
                        rule[2].push({ id: tokens[i][0].substr(1, tokens[i][0].length-2) });
                        i++;
                    }
                    else if (/^\d+$/.exec(tokens[i][0]))
                    {
                        const n = 0|tokens[i][0];
                        if (!n)
                        {
                            throw new Error('Level reference cannot be 0 (refs count from 1) at '+tokens[i][1]);
                        }
                        else if (n > positions.length)
                        {
                            throw new Error('Forward references are forbidden at '+tokens[i][1]);
                        }
                        rule[2].push(n);
                        i++;
                    }
                    else if (!/^\w/.exec(tokens[i][0]))
                    {
                        throw new Error('Unexpected '+tokens[i][0]+' at '+tokens[i][1]+' (number or node ID expected)');
                    }
                    else
                    {
                        rule[2].push({ id: tokens[i][0] });
                        i++;
                    }
                }
            }
            else if (!/^\d+$/.exec(rule[2]))
            {
                rule[2] = { id: rule[2] };
            }
            else
            {
                rule[2] = 0|rule[2];
                if (!rule[2])
                {
                    throw new Error('Level reference cannot be 0 (refs count from 1) at '+tokens[i-1][1]);
                }
                else if (rule[2] > positions.length)
                {
                    throw new Error('Forward references are forbidden at '+tokens[i-1][1]);
                }
            }
            rules.push(rule);
        }
    }
    return positions;
 }
 // osd_tree = index_tree() output
 // levels = { string: number }
 // rules = [ level, operator, value ][][]
 //   level = string
 //   operator = '=' | '!=' | '>' | '?='
 //   value = number|number[] | { id: string|string[] }
 // examples:
 // 1) simple 3 replicas with failure_domain=host:
 //    [ [], [ [ 'host', '!=', 1 ] ], [ [ 'host', '!=', [ 1, 2 ] ] ] ]
 //    in DSL form: any, host!=1, host!=(1,2)
 // 2) EC 4+2 in 3 DC:
 //    [ [], [ [ 'dc', '=', 1 ], [ 'host', '!=', 1 ] ],
 //      [ 'dc', '!=', 1 ], [ [ 'dc', '=', 3 ], [ 'host', '!=', 3 ] ],
 //      [ 'dc', '!=', [ 1, 3 ] ], [ [ 'dc', '=', 5 ], [ 'host', '!=', 5 ] ] ]
 //    in DSL form: any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5
 // 3) 1 replica in fixed DC + 2 in random DCs:
 //    [ [ [ 'dc', '=', { id: 'meow' } ] ], [ [ 'dc', '!=', 1 ] ], [ [ 'dc', '!=', [ 1, 2 ] ] ] ]
 //    in DSL form: dc=meow, dc!=1, dc!=(1,2)
 // 4) 2 replicas in each DC (almost the same as (2)):
 //    DSL: any, dc=1 host!=1, dc!=1, dc=3 host!=3
 // Alternative simpler way to specify rules would be: [ DC: 112233 HOST: 123456 ]
 function random_custom_combinations(osd_tree, rules, count, ordered)
 {
    const r = {};
    const first = filter_tree_by_rules(osd_tree, rules[0], []);
    let max_size = 0;
    // All combinations for the first item (usually "any") to try to include each OSD at least once
    for (const f of first)
    {
        const selected = [ f ];
        for (let i = 1; i < rules.length; i++)
        {
            const filtered = filter_tree_by_rules(osd_tree, rules[i], selected);
            const idx = select_murmur3(filtered.length, i => 'p:'+f.id+':'+filtered[i].id);
            selected.push(idx == null ? { levels: {}, id: null } : filtered[idx]);
        }
        const size = selected.filter(s => s.id !== null).length;
        max_size = max_size < size ? size : max_size;
        const pg = selected.map(s => s.id === null ? NO_OSD : (0|s.id));
        if (!ordered)
            pg.sort();
        r['pg_'+pg.join('_')] = pg;
    }
    // Pseudo-random selection
    for (let n = 0; n < count; n++)
    {
        const selected = [];
        for (const item_rules of rules)
        {
            const filtered = selected.length ? filter_tree_by_rules(osd_tree, item_rules, selected) : first;
            const idx = select_murmur3(filtered.length, i => n+':'+filtered[i].id);
            selected.push(idx == null ? { levels: {}, id: null } : filtered[idx]);
        }
        const size = selected.filter(s => s.id !== null).length;
        max_size = max_size < size ? size : max_size;
        const pg = selected.map(s => s.id === null ? NO_OSD : (0|s.id));
        if (!ordered)
            pg.sort();
        r['pg_'+pg.join('_')] = pg;
    }
    // Exclude PGs with less successful selections than maximum
    for (const k in r)
    {
        if (r[k].filter(s => s !== NO_OSD).length < max_size)
        {
            delete r[k];
        }
    }
    return r;
 }
 function filter_tree_by_rules(osd_tree, rules, selected)
 {
    let cur = osd_tree[''].children;
    for (const rule of rules)
    {
        const val = (rule[2] instanceof Array ? rule[2] : [ rule[2] ])
            .map(v => v instanceof Object ? v.id : selected[v-1].levels[rule[0]]);
        let preferred = [], other = [];
        for (let i = 0; i < cur.length; i++)
        {
            const item = cur[i];
            const level_id = item.levels[rule[0]];
            if (level_id)
            {
                if (rule[1] == '>' && val.filter(v => level_id <= v).length == 0 ||
                    (rule[1] == '=' || rule[1] == '?=') && val.filter(v => level_id != v).length == 0 ||
                    rule[1] == '!=' && val.filter(v => level_id == v).length == 0)
                {
                    // Include
                    preferred.push(item);
                }
                else if (rule[1] == '?=' && val.filter(v => level_id != v).length > 0)
                {
                    // Non-preferred
                    other.push(item);
                }
            }
            else if (item.children)
            {
                // Descend
                cur.splice(i+1, 0, ...item.children);
            }
        }
        cur = preferred.length ? preferred : other;
    }
    // Get leaf items
    for (let i = 0; i < cur.length; i++)
    {
        if (cur[i].children)
        {
            // Descend
            cur.splice(i, 1, ...cur[i].children);
            i--;
        }
    }
    return cur;
 }
 // Convert from
 // node_list = { id: string|number, level: string, size?: number, parent?: string|number }[]
 // to
 // node_tree = { [node_id]: { id, level, size?, parent?, children?: child_node_id[], levels: { [level]: id, ... } } }
 function index_tree(node_list)
 {
    const tree = { '': { children: [], levels: {} } };
    for (const node of node_list)
    {
        tree[node.id] = { ...node, levels: {} };
        delete tree[node.id].children;
    }
    for (const node of node_list)
    {
        const parent_id = node.parent && tree[node.parent] ? node.parent : '';
        tree[parent_id].children = tree[parent_id].children || [];
        tree[parent_id].children.push(tree[node.id]);
    }
    const cur = tree[''].children;
    for (let i = 0; i < cur.length; i++)
    {
        cur[i].levels[cur[i].level] = cur[i].id;
        if (cur[i].children)
        {
            for (const child of cur[i].children)
            {
                child.levels = { ...cur[i].levels, ...child.levels };
            }
            cur.splice(i, 1, ...cur[i].children);
            i--;
        }
    }
    return tree;
 }
 // selection = id[]
 // osd_tree = index_tree output
 // rules = parse_pg_dsl output
 function check_custom_combinations(osd_tree, rules, pgs)
 {
    const res = [];
    skip_pg: for (const pg of pgs)
    {
        let selected = pg.map(id => osd_tree[id] || null);
        for (let i = 0; i < rules.length; i++)
        {
            const filtered = filter_tree_by_rules(osd_tree, rules[i], selected);
            if (selected[i] === null && filtered.length ||
                !filtered.filter(ok => selected[i].id === ok.id).length)
            {
                continue skip_pg;
            }
        }
        res.push(pg);
    }
    return res;
 }
 module.exports = {
    RuleCombinator,
    NO_OSD,
    index_tree,
    parse_level_indexes,
    parse_pg_dsl,
    random_custom_combinations,
    check_custom_combinations,
 };
--- a/mon/lp_optimizer/murmur3.js
+++ b/mon/lp_optimizer/murmur3.js
@ -1,38 +0,0 @@
 function select_murmur3(count, cb)
 {
    if (!count)
    {
        return null;
    }
    else
    {
        let i = 0, maxh = -1;
        for (let j = 0; j < count; j++)
        {
            const h = murmur3(cb(j));
            if (h > maxh)
            {
                i = j;
                maxh = h;
            }
        }
        return i;
    }
 }
 function murmur3(s)
 {
    let hash = 0x12345678;
    for (let i = 0; i < s.length; i++)
    {
        hash ^= s.charCodeAt(i);
        hash = (hash*0x5bd1e995) & 0xFFFFFFFF;
        hash ^= (hash >> 15);
    }
    return hash;
 }
 module.exports = {
    murmur3,
    select_murmur3,
 };
--- a/mon/lp_optimizer/simple_pgs.js
+++ b/mon/lp_optimizer/simple_pgs.js
@ -1,241 +0,0 @@
 const { select_murmur3 } = require('./murmur3.js');
 const NO_OSD = 'Z';
 class SimpleCombinator
 {
    constructor(flat_tree, pg_size, max_combinations, ordered)
    {
        this.osd_tree = flat_tree;
        this.pg_size = pg_size;
        this.max_combinations = max_combinations;
        this.ordered = ordered;
    }
    random_combinations()
    {
        return random_combinations(this.osd_tree, this.pg_size, this.max_combinations, this.ordered);
    }
    check_combinations(pgs)
    {
        return check_combinations(this.osd_tree, pgs);
    }
 }
 // Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
 // levels = { string: number }
 // to a two-level osd_tree suitable for all_combinations()
 function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
 {
    osd_level = levels[osd_level] || osd_level;
    failure_domain_level = levels[failure_domain_level] || failure_domain_level;
    for (const node of osd_tree)
    {
        if ((levels[node.level] || node.level) < failure_domain_level)
        {
            flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
        }
        else
        {
            domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
        }
    }
    return domains;
 }
 function extract_osds(osd_tree, levels, osd_level, osds = {})
 {
    for (const node of osd_tree)
    {
        if ((levels[node.level] || node.level) >= osd_level)
        {
            osds[node.id] = node.size;
        }
        else
        {
            extract_osds(node.children||[], levels, osd_level, osds);
        }
    }
    return osds;
 }
 // ordered = don't treat (x,y) and (y,x) as equal
 function random_combinations(osd_tree, pg_size, count, ordered)
 {
    const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
    const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
    const r = {};
    // Generate random combinations including each OSD at least once
    for (let h = 0; h < hosts.length; h++)
    {
        for (let o = 0; o < osds[hosts[h]].length; o++)
        {
            const pg = [ osds[hosts[h]][o] ];
            const cur_hosts = [ ...hosts ];
            cur_hosts.splice(h, 1);
            for (let i = 1; i < pg_size && i < hosts.length; i++)
            {
                const next_host = select_murmur3(cur_hosts.length, i => pg[0]+':i:'+cur_hosts[i]);
                const next_osd = select_murmur3(osds[cur_hosts[next_host]].length, i => pg[0]+':i:'+osds[cur_hosts[next_host]][i]);
                pg.push(osds[cur_hosts[next_host]][next_osd]);
                cur_hosts.splice(next_host, 1);
            }
            while (pg.length < pg_size)
            {
                pg.push(NO_OSD);
            }
            r['pg_'+pg.join('_')] = pg;
        }
    }
    // Generate purely random combinations
    while (count > 0)
    {
        let host_idx = [];
        const cur_hosts = [ ...hosts.map((h, i) => i) ];
        const max_hosts = pg_size < hosts.length ? pg_size : hosts.length;
        if (ordered)
        {
            for (let i = 0; i < max_hosts; i++)
            {
                const r = select_murmur3(cur_hosts.length, i => count+':h:'+cur_hosts[i]);
                host_idx[i] = cur_hosts[r];
                cur_hosts.splice(r, 1);
            }
        }
        else
        {
            for (let i = 0; i < max_hosts; i++)
            {
                const r = select_murmur3(cur_hosts.length - (max_hosts - i - 1), i => count+':h:'+cur_hosts[i]);
                host_idx[i] = cur_hosts[r];
                cur_hosts.splice(0, r+1);
            }
        }
        let pg = host_idx.map(h => osds[hosts[h]][select_murmur3(osds[hosts[h]].length, i => count+':o:'+osds[hosts[h]][i])]);
        while (pg.length < pg_size)
        {
            pg.push(NO_OSD);
        }
        r['pg_'+pg.join('_')] = pg;
        count--;
    }
    return r;
 }
 // Super-stupid algorithm. Given the current OSD tree, generate all possible OSD combinations
 // osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
 // ordered = return combinations without duplicates having different order
 function all_combinations(osd_tree, pg_size, ordered, count)
 {
    const hosts = Object.keys(osd_tree).sort();
    const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
    while (hosts.length < pg_size)
    {
        osds[NO_OSD] = [ NO_OSD ];
        hosts.push(NO_OSD);
    }
    let host_idx = [];
    let osd_idx = [];
    for (let i = 0; i < pg_size; i++)
    {
        host_idx.push(i);
        osd_idx.push(0);
    }
    const r = [];
    while (!count || count < 0 || r.length < count)
    {
        r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
        let inc = pg_size-1;
        while (inc >= 0)
        {
            osd_idx[inc]++;
            if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
            {
                osd_idx[inc] = 0;
                inc--;
            }
            else
            {
                break;
            }
        }
        if (inc < 0)
        {
            // no osds left in the current host combination, select the next one
            inc = pg_size-1;
            same_again: while (inc >= 0)
            {
                host_idx[inc]++;
                for (let prev_host = 0; prev_host < inc; prev_host++)
                {
                    if (host_idx[prev_host] == host_idx[inc])
                    {
                        continue same_again;
                    }
                }
                if (host_idx[inc] < (ordered ? hosts.length-(pg_size-1-inc) : hosts.length))
                {
                    while ((++inc) < pg_size)
                    {
                        host_idx[inc] = (ordered ? host_idx[inc-1]+1 : 0);
                    }
                    break;
                }
                else
                {
                    inc--;
                }
            }
            if (inc < 0)
            {
                break;
            }
        }
    }
    return r;
 }
 function check_combinations(osd_tree, pgs)
 {
    const host_per_osd = {};
    for (const host in osd_tree)
    {
        for (const osd in osd_tree[host])
        {
            host_per_osd[osd] = host;
        }
    }
    const res = [];
    skip_pg: for (const pg of pgs)
    {
        const seen_hosts = {};
        for (const osd of pg)
        {
            if (!host_per_osd[osd] || seen_hosts[host_per_osd[osd]])
            {
                continue skip_pg;
            }
            seen_hosts[host_per_osd[osd]] = true;
        }
        res.push(pg);
    }
    return res;
 }
 function compat(params)
 {
    return {
        ...params,
        osd_weights: Object.assign({}, ...Object.values(params.osd_tree)),
        combinator: new SimpleCombinator(params.osd_tree, params.pg_size, params.max_combinations||10000),
    };
 }
 module.exports = {
    flatten_tree,
    all_combinations,
    SimpleCombinator,
    compat,
    NO_OSD,
 };
--- a/mon/lp_optimizer/test-parse-dsl.js
+++ b/mon/lp_optimizer/test-parse-dsl.js
@ -1,118 +0,0 @@
 const { random_custom_combinations, index_tree, parse_level_indexes, parse_pg_dsl } = require('./dsl_pgs.js');
 function check(result, expected)
 {
    console.dir(result, { depth: null });
    if (JSON.stringify(result) !== JSON.stringify(expected))
    {
        process.stderr.write('Unexpected value, expected: ');
        console.dir(expected, { depth: null });
        process.exit(1);
    }
 }
 check(
    parse_pg_dsl("any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5"),
    [
        [],
        [ [ 'dc', '=', 1 ], [ 'host', '!=', 1 ] ],
        [ [ 'dc', '!=', 1 ] ],
        [ [ 'dc', '=', 3 ], [ 'host', '!=', 3 ] ],
        [ [ 'dc', '!=', [ 1, 3 ] ] ],
        [ [ 'dc', '=', 5 ], [ 'host', '!=', 5 ] ],
    ]
 );
 check(
    parse_pg_dsl("dc=meow, dc!=1, dc>2"),
    [
        [ [ 'dc', '=', { id: 'meow' } ] ],
        [ [ 'dc', '!=', 1 ] ],
        [ [ 'dc', '>', 2 ] ],
    ]
 );
 check(
    parse_level_indexes({ dc: '112233', host: 'ABCDEF' }),
    [
        [],
        [ [ 'dc', '=', 1 ],         [ 'host', '!=', [ 1 ] ] ],
        [ [ 'dc', '!=', [ 1 ] ],    [ 'host', '!=', [ 1, 2 ] ] ],
        [ [ 'dc', '=', 3 ],         [ 'host', '!=', [ 1, 2, 3 ] ] ],
        [ [ 'dc', '!=', [ 1, 3 ] ], [ 'host', '!=', [ 1, 2, 3, 4 ] ] ],
        [ [ 'dc', '=', 5 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5 ] ] ],
    ]
 );
 check(
    parse_level_indexes({ dc: '112233', host: 'ABCDEF' }, [ 'dc', 'host' ]),
    [
        [],
        [ [ 'dc', '=', 1 ],         [ 'host', '!=', [ 1 ] ] ],
        [ [ 'dc', '!=', [ 1 ] ] ],
        [ [ 'dc', '=', 3 ],         [ 'host', '!=', [ 3 ] ] ],
        [ [ 'dc', '!=', [ 1, 3 ] ] ],
        [ [ 'dc', '=', 5 ],         [ 'host', '!=', [ 5 ] ] ],
    ]
 );
 check(
    parse_level_indexes({ dc: '112211223333', host: '123456789ABC' }),
    [
        [],
        [ [ 'dc', '=', 1 ],         [ 'host', '!=', [ 1 ] ] ],
        [ [ 'dc', '!=', [ 1 ] ],    [ 'host', '!=', [ 1, 2 ] ] ],
        [ [ 'dc', '=', 3 ],         [ 'host', '!=', [ 1, 2, 3 ] ] ],
        [ [ 'dc', '=', 1 ],         [ 'host', '!=', [ 1, 2, 3, 4 ] ] ],
        [ [ 'dc', '=', 1 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5 ] ] ],
        [ [ 'dc', '=', 3 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5, 6 ] ] ],
        [ [ 'dc', '=', 3 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7 ] ] ],
        [ [ 'dc', '!=', [ 1, 3 ] ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8 ] ] ],
        [ [ 'dc', '=', 9 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ] ],
        [ [ 'dc', '=', 9 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ] ],
        [ [ 'dc', '=', 9 ],         [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] ] ],
    ]
 );
 check(
    parse_level_indexes({ dc: '112211223333', host: '123456789ABC' }, [ 'dc', 'host' ]),
    [
        [],
        [ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
        [ [ 'dc', '!=', [ 1 ] ] ],
        [ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3 ] ] ],
        [ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2 ] ] ],
        [ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 5 ] ] ],
        [ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3, 4 ] ] ],
        [ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3, 4, 7 ] ] ],
        [ [ 'dc', '!=', [ 1, 3 ] ] ],
        [ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9 ] ] ],
        [ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9, 10 ] ] ],
        [ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9, 10, 11 ] ] ]
    ]
 );
 check(
    Object.keys(random_custom_combinations(index_tree([
        { id: '1', size: 1, level: 'osd' },
        { id: '2', size: 2, level: 'osd' },
        { id: '3', size: 3, level: 'osd' }
    ]), parse_level_indexes({ osd: '12' }), 10000)).sort(),
    [ 'pg_1_2', 'pg_1_3', 'pg_2_3' ]
 );
 check(
    Object.keys(random_custom_combinations(index_tree([
        { id: 'h1', level: 'host' },
        { id: 'h2', level: 'host' },
        { id: 'h3', level: 'host' },
        { id: '1', size: 1, level: 'osd', parent: 'h1' },
        { id: '2', size: 1, level: 'osd', parent: 'h2' },
        { id: '3', size: 1, level: 'osd', parent: 'h2' },
        { id: '4', size: 1, level: 'osd', parent: 'h3' },
        { id: '5', size: 1, level: 'osd', parent: 'h3' },
    ]), parse_level_indexes({ host: '1122', osd: '1234' }), 10000)).sort(),
    [ 'pg_2_3_4_5' ]
 );
 console.log('OK');
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
antilles	7fea69ff5f	Merge pull request 'master' (#3 ) from vitalif/vitastor:master into master Reviewed-on: antilles/vitastor#3	2024-02-13 14:44:09 +03:00
antilles	1e39b80f31	Merge pull request 'master' (#2 ) from vitalif/vitastor:master into master Reviewed-on: antilles/vitastor#2	2024-01-12 15:04:03 +03:00
antilles	f94f76ca89	Merge pull request 'Pull fresh master from base' (#1 ) from vitalif/vitastor:master into master Reviewed-on: antilles/vitastor#1	2024-01-09 13:25:13 +03:00
`@ -1,4 +1,4 @@`
	`vitastor (1.9.3-1) unstable; urgency=medium`	`vitastor (1.4.4-1) unstable; urgency=medium`

	`* Bugfixes`	`* Bugfixes`