K/V control prints (for debug only) O:-)

Fix eviction when random_pos selects the end
Implement min/max list_count to make listings during performance test reasonable
2023-12-01 02:33:04 +03:00 · 2023-12-01 01:43:03 +03:00 · 2023-12-01 01:17:04 +03:00 · 2023-12-01 01:17:04 +03:00 · 2023-12-01 01:17:04 +03:00 · 2023-12-01 01:17:04 +03:00
499 changed files with 8124 additions and 46786 deletions
--- a/.gitea/workflows/buildenv.Dockerfile
+++ b/.gitea/workflows/buildenv.Dockerfile
@ -22,7 +22,7 @@ RUN apt-get update
 RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
    liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
 RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
-RUN apt-get update && apt-get -y install jq lp-solve sudo nfs-common fdisk parted
+RUN apt-get -y install jq lp-solve sudo
 RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`

 RUN set -ex; \
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
@ -16,7 +16,6 @@ env:
  BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
  TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
  OSD_ARGS: '--etcd_quick_timeout 2000'
-  USE_RAMDISK: 1

 concurrency:
  group: ci-${{ github.ref }}
@ -65,13 +64,6 @@ jobs:
    # leak sanitizer sometimes crashes
    - run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test

-  npm_lint:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - run: cd /root/vitastor/mon && npm run lint
-
  test_add_osd:
    runs-on: ubuntu-latest
    needs: build
@ -198,24 +190,6 @@ jobs:
          echo ""
        done

-  test_etcd_fail_antietcd:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: ANTIETCD=1 /root/vitastor/tests/test_etcd_fail.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_interrupted_rebalance:
    runs-on: ubuntu-latest
    needs: build
@ -288,24 +262,6 @@ jobs:
          echo ""
        done

-  test_create_halfhost:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_create_halfhost.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_failure_domain:
    runs-on: ubuntu-latest
    needs: build
@ -414,24 +370,6 @@ jobs:
          echo ""
        done

-  test_rm_degraded:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_rm_degraded.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_snapshot_chain:
    runs-on: ubuntu-latest
    needs: build
@ -457,7 +395,7 @@ jobs:
    steps:
    - name: Run test
      id: test
-      timeout-minutes: 6
+      timeout-minutes: 3
      run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
@ -594,60 +532,6 @@ jobs:
          echo ""
        done

-  test_dd:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_dd.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_root_node:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_root_node.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_switch_primary:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_switch_primary.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_write:
    runs-on: ubuntu-latest
    needs: build
@ -738,24 +622,6 @@ jobs:
          echo ""
        done

-  test_heal_antietcd:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: ANTIETCD=1 /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_heal_csum_32k_dmj:
    runs-on: ubuntu-latest
    needs: build
@ -864,150 +730,6 @@ jobs:
          echo ""
        done

-  test_resize:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_resize.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_resize_auto:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_resize_auto.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_snapshot_pool2:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_snapshot_pool2.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_osd_tags:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_osd_tags.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_enospc:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_enospc.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_enospc_xor:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: SCHEME=xor /root/vitastor/tests/test_enospc.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_enospc_imm:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_enospc.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_enospc_imm_xor:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: IMMEDIATE_COMMIT=1 SCHEME=xor /root/vitastor/tests/test_enospc.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_scrub:
    runs-on: ubuntu-latest
    needs: build
@ -1116,21 +838,3 @@ jobs:
          echo ""
        done

-  test_nfs:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 3
-      run: /root/vitastor/tests/test_nfs.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
--- a/.gitea/workflows/tests-to-yaml.pl
+++ b/.gitea/workflows/tests-to-yaml.pl
@ -34,19 +34,11 @@ for my $line (<>)
            {
                $test_name .= '_imm';
            }
-            elsif ($1 eq 'ANTIETCD')
-            {
-                $test_name .= '_antietcd';
-            }
            else
            {
                $test_name .= '_'.lc($1).'_'.$2;
            }
        }
-        if ($test_name eq 'test_snapshot_chain_ec')
-        {
-            $timeout = 6;
-        }
        $line =~ s!\./test_!/root/vitastor/tests/test_!;
        # Gitea CI doesn't support artifacts yet, lol
        #- name: Upload results
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,16 @@
 package-lock.json
 fio
 qemu
+osd
+stub_osd
+stub_uring_osd
+stub_bench
+osd_test
+osd_peering_pg_test
+dump_journal
+nbd_proxy
+rm_inode
+test_allocator
+test_blockstore
+test_shit
+osd_rmw_test
--- a/CLA-en.md
+++ b/CLA-en.md
@ -1,115 +0,0 @@
-## Contributor License Agreement
-
-> This Agreement is made in the Russian and English languages. **The English
-text of Agreement is for informational purposes only** and is not binding
-for the Parties.
->
-> In the event of a conflict between the provisions of the Russian and
-English versions of this Agreement, the **Russian version shall prevail**.
->
-> Russian version is published at https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md
-
-This document represents the offer of Filippov Vitaliy Vladimirovich
-("Author"), author and copyright holder of Vitastor software ("Program"),
-acknowledged by a certificate of Federal Service for Intellectual
-Property of Russian Federation (Rospatent) # 2021617829 dated 20 May 2021,
-to "Contributors" to conclude this license agreement as follows
-("Agreement" or "Offer").
-
-In accordance with Art. 435, Art. 438 of the Civil Code of the Russian
-Federation, this Agreement is an offer and in case of acceptance of the
-offer, an agreement is considered concluded on the conditions specified
-in the offer.
-
-1. Applicable Terms. \
-   1.1. "Official Repository" shall mean the computer storage, operated by
-        the Author, containing all prior and future versions of the Source
-        Code of the Program, at Internet addresses https://git.yourcmc.ru/vitalif/vitastor/
-        or https://github.com/vitalif/vitastor/. \
-   1.2. "Contributions" shall mean results of intellectual activity
-        (including, but not limited to, source code, libraries, components,
-        texts, documentation) which can be software or elements of the software
-        and which are provided by Contributors to the Author for inclusion
-        in the Program. \
-   1.3. "Contributor" shall mean a person who provides Contributions to
-        the Author and agrees with all provisions of this Agreement.
-        A Сontributor can be: 1) an individual; or 2) a legal entity or an
-        individual entrepreneur in case when an individual provides Contributions
-        on behalf of third parties, including on behalf of his employer.
-
-2. Subject of the Agreement. \
-   2.1. Subject of the Agreement shall be the Contributions sent to the Author by Contributors. \
-   2.2. The Contributor grants to the Author the right to use Contributions at his own
-        discretion and without any necessity to get a prior approval from Contributor or
-        any other third party in any way, under a simple (non-exclusive), royalty-free,
-        irrevocable license throughout the world by all means not contrary to law, in whole
-        or as a part of the Program, or other open-source or closed-source computer programs,
-        products or services (hereinafter -- the "License"), including, but not limited to: \
-        2.2.1. to execute Contributions and use them for any tasks; \
-        2.2.2. to publish and distribute Contributions in modified or unmodified form and/or to rent them; \
-        2.2.3. to modify Contributions, add comments, illustrations or any explanations to Contributions while using them; \
-        2.2.4. to create other results of intellectual activity based on Contributions, including derivative works and composite works; \
-        2.2.5. to translate Contributions into other languages, including other programming languages; \
-        2.2.6. to carry out rental and public display of Contributions; \
-        2.2.7. to use Contributions under the trade name and/or any trademark or any other label, or without it, as the Author thinks fit; \
-   2.3. The Contributor grants to the Author the right to sublicense any of the aforementioned
-        rights to third parties on any terms at the Author's discretion. \
-   2.4. The License is provided for the entire duration of Contributor's
-        exclusive intellectual property rights to the Contributions. \
-   2.5. The Contributor grants to the Author the right to decide how and where to mention,
-        or to not mention at all, the fact of his authorship, name, nickname and/or company
-        details when including Contributions into the Program or in any other computer
-        programs, products or services.
-
-3. Acceptance of the Offer \
-   3.1. The Contributor may provide Contributions to the Author in the form of
-        a "Pull Request" in an Official Repository of the Program or by any
-        other electronic means of communication, including, but not limited to,
-        E-mail or messenger applications. \
-   3.2. The acceptance of the Offer shall be the fact of provision of Contributions
-        to the Author by the Contributor by any means with the following remark:
-        “I accept Vitastor CLA agreement: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md”
-        or “Я принимаю соглашение Vitastor CLA: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md”. \
-   3.3. Date of acceptance of the Offer shall be the date of such provision.
-
-4. Rights and obligations of the parties. \
-   4.1. The Contributor reserves the right to use Contributions by any lawful means
-        not contrary to this Agreement. \
-   4.2. The Author has the right to refuse to include Contributions into the Program
-        at any moment with no explanation to the Contributor.
-
-5. Representations and Warranties. \
-   5.1. The person providing Contributions for the purpose of their inclusion
-        in the Program represents and warrants that he is the Contributor
-        or legally acts on the Contributor's behalf. Name or company details
-        of the Contributor shall be provided with the Contribution at the moment
-        of their provision to the Author. \
-   5.2. The Contributor represents and warrants that he legally owns exclusive
-        intellectual property rights to the Contributions. \
-   5.3. The Contributor represents and warrants that any further use of
-        Contributions by the Author as provided by Contributor under the terms
-        of the Agreement does not infringe on intellectual and other rights and
-        legitimate interests of third parties. \
-   5.4. The Contributor represents and warrants that he has all rights and legal
-        capacity needed to accept this Offer; \
-   5.5. The Contributor represents and warrants that Contributions don't
-        contain malware or any information considered illegal under the law
-        of Russian Federation.
-
-6. Termination of the Agreement \
-   6.1. The Agreement may be terminated at will of both Author and Contributor,
-        formalised in the written form or if the Agreement is terminated on
-        reasons prescribed by the law of Russian Federation.
-
-7. Final Clauses \
-   7.1. The Contributor may optionally sign the Agreement in the written form. \
-   7.2. The Agreement is deemed to become effective from the Date of signing of
-        the Agreement and until the expiration of Contributor's exclusive
-        intellectual property rights to the Contributions. \
-   7.3. The Author may unilaterally alter the Agreement without informing Contributors.
-        The new version of the document shall come into effect 3 (three) days after
-        being published in the Official Repository of the Program at Internet address
-        [https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md).
-        Contributors should keep informed about the actual version of the Agreement themselves. \
-   7.4. If the Author and the Contributor fail to agree on disputable issues,
-        disputes shall be referred to the Moscow Arbitration court.
--- a/CLA-ru.md
+++ b/CLA-ru.md
@ -1,108 +0,0 @@
-## Лицензионное соглашение с участником
-
-> Данная Оферта написана в Русской и Английской версиях. **Версия на английском
-языке предоставляется в информационных целях** и не связывает стороны договора.
->
-> В случае несоответствий между положениями Русской и Английской версий Договора,
-**Русская версия имеет приоритет**.
->
-> Английская версия опубликована по адресу https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md
-
-Настоящий договор-оферта (далее по тексту – Оферта, Договор) адресована физическим
-и юридическим лицам (далее – Участникам) и является официальным публичным предложением
-Филиппова Виталия Владимировича (далее – Автора) программного обеспечения Vitastor,
-свидетельство Федеральной службы по интеллектуальной собственности (Роспатент) № 2021617829
-от 20 мая 2021 г. (далее – Программа) о нижеследующем:
-
-1. Термины и определения \
-   1.1. Репозиторий – электронное хранилище, содержащее исходный код Программы. \
-   1.2. Доработка – результат интеллектуальной деятельности Участника, включающий
-        в себя изменения или дополнения к исходному коду Программы, которые Участник
-        желает включить в состав Программы для дальнейшего использования и распространения
-        Автором и для этого направляет их Автору. \
-   1.3. Участник – физическое или юридическое лицо, вносящее Доработки в код Программы. \
-   1.4. ГК РФ – Гражданский кодекс Российской Федерации.
-
-2. Предмет оферты \
-   2.1. Предметом настоящей оферты являются Доработки, отправляемые Участником Автору. \
-   2.2. Участник предоставляет Автору право использовать Доработки по собственному усмотрению
-        и без необходимости предварительного согласования с Участником или иным третьим лицом
-        на условиях простой (неисключительной) безвозмездной безотзывной лицензии, полностью
-        или фрагментарно, в составе Программы или других программ, продуктов или сервисов
-        как с открытым, так и с закрытым исходным кодом, любыми способами, не противоречащими
-        закону, включая, но не ограничиваясь следующими: \
-        2.2.1. Запускать и использовать Доработки для выполнения любых задач; \
-        2.2.2. Распространять, импортировать и доводить Доработки до всеобщего сведения; \
-        2.2.3. Вносить в Доработки изменения, сокращения и дополнения, снабжать Доработки
-               при их использовании комментариями, иллюстрациями или пояснениями; \
-        2.2.4. Создавать на основе Доработок иные результаты интеллектуальной деятельности,
-               в том числе производные и составные произведения; \
-        2.2.5. Переводить Доработки на другие языки, в том числе на другие языки программирования; \
-        2.2.6. Осуществлять прокат и публичный показ Доработок; \
-        2.2.7. Использовать Доработки под любым фирменным наименованием, товарным знаком
-               (знаком обслуживания) или иным обозначением, или без такового. \
-   2.3. Участник предоставляет Автору право сублицензировать полученные права на Доработки
-        третьим лицам на любых условиях на усмотрение Автора. \
-   2.4. Участник предоставляет Автору права на Доработки на территории всего мира. \
-   2.5. Участник предоставляет Автору права на весь срок действия исключительного права
-        Участника на Доработки. \
-   2.6. Участник предоставляет Автору права на Доработки на безвозмездной основе. \
-   2.7. Участник разрешает Автору самостоятельно определять порядок, способ и
-        место указания его имени, реквизитов и/или псевдонима при включении
-        Доработок в состав Программы или других программ, продуктов или сервисов.
-
-3. Акцепт Оферты \
-   3.1. Участник может передавать Доработки в адрес Автора через зеркала официального
-        Репозитория Программы по адресам https://git.yourcmc.ru/vitalif/vitastor/ или
-        https://github.com/vitalif/vitastor/ в виде “запроса на слияние” (pull request),
-        либо в письменном виде или с помощью любых других электронных средств коммуникации,
-        например, электронной почты или мессенджеров. \
-   3.2. Факт передачи Участником Доработок в адрес Автора любым способом с одной из пометок
-        “I accept Vitastor CLA agreement: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md”
-        или “Я принимаю соглашение Vitastor CLA: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md”
-        является полным и безоговорочным акцептом (принятием) Участником условий настоящей
-        Оферты, т.е. Участник считается ознакомившимся с настоящим публичным договором и
-        в соответствии с ГК РФ признается лицом, вступившим с Автором в договорные отношения
-        на основании настоящей Оферты. \
-   3.3. Датой акцептирования настоящей Оферты считается дата такой передачи.
-
-4. Права и обязанности Сторон \
-   4.1. Участник сохраняет за собой право использовать Доработки любым законным
-        способом, не противоречащим настоящему Договору. \
-   4.2. Автор вправе отказать Участнику во включении Доработок в состав
-        Программы без объяснения причин в любой момент по своему усмотрению.
-
-5. Гарантии и заверения \
-   5.1. Лицо, направляющее Доработки для целей их включения в состав Программы,
-        гарантирует, что является Участником или представителем Участника. Имя или реквизиты
-        Участника должны быть указаны при их передаче в адрес Автора Программы. \
-   5.2. Участник гарантирует, что является законным обладателем исключительных прав
-        на Доработки. \
-   5.3. Участник гарантирует, что на момент акцептирования настоящей Оферты ему
-        ничего не известно (и не могло быть известно) о правах третьих лиц на
-        передаваемые Автору Доработки или их часть, которые могут быть нарушены
-        в связи с передачей Доработок по настоящему Договору. \
-   5.4. Участник гарантирует, что является дееспособным лицом и обладает всеми
-        необходимыми правами для заключения Договора. \
-   5.5. Участник гарантирует, что Доработки не содержат вредоносного ПО, а также
-        любой другой информации, запрещённой к распространению по законам Российской
-        Федерации.
-
-6. Прекращение действия оферты \
-   6.1. Действие настоящего договора может быть прекращено по соглашению сторон,
-        оформленному в письменном виде, а также вследствие его расторжения по основаниям,
-        предусмотренным законом.
-
-7. Заключительные положения \
-   7.1. Участник вправе по желанию подписать настоящий Договор в письменном виде. \
-   7.2. Настоящий договор действует с момента его заключения и до истечения срока
-        действия исключительных прав Участника на Доработки. \
-   7.3. Автор имеет право в одностороннем порядке вносить изменения и дополнения в договор
-        без специального уведомления об этом Участников. Новая редакция документа вступает
-        в силу через 3 (Три) календарных дня со дня опубликования в официальном Репозитории
-        Программы по адресу в сети Интернет
-        [https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md).
-        Участники самостоятельно отслеживают действующие условия Оферты. \
-   7.4. Все споры, возникающие между сторонами в процессе их взаимодействия по настоящему
-        договору, решаются путём переговоров. В случае невозможности урегулирования споров
-        переговорным порядком стороны разрешают их в Арбитражном суде г.Москвы.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)

 project(vitastor)

-set(VITASTOR_VERSION "2.1.0")
+set(VERSION "1.2.0")

 add_subdirectory(src)
--- a/README-ru.md
+++ b/README-ru.md
@ -1,4 +1,4 @@
-# Vitastor
+## Vitastor

 [Read English version](README.md)

@ -6,8 +6,8 @@

 Вернём былую скорость кластерному блочному хранилищу!

-Vitastor - распределённая блочная, файловая и объектная SDS (программная СХД), прямой аналог Ceph RBD, CephFS и RGW,
-а также внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
+Vitastor - распределённая блочная SDS (программная СХД), прямой аналог Ceph RBD и
+внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
 быстрый и при этом простой. Только пока маленький :-).

 Vitastor архитектурно похож на Ceph, что означает атомарность и строгую консистентность,
@ -19,10 +19,10 @@ Vitastor нацелен в первую очередь на SSD и SSD+HDD кл
 TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
 что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.

-Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
+Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, Proxmox, Kubernetes.
 Другие драйверы могут также быть легко реализованы.

-Подробности смотрите в документации по ссылкам. Можете начать отсюда: [Быстрый старт](docs/intro/quickstart.ru.md).
+Подробности смотрите в документации по ссылкам ниже.

 ## Презентации и записи докладов

@ -41,19 +41,16 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - [Автор и лицензия](docs/intro/author.ru.md)
 - Установка
  - [Пакеты](docs/installation/packages.ru.md)
-  - [Docker](docs/installation/docker.ru.md)
  - [Proxmox](docs/installation/proxmox.ru.md)
-  - [OpenNebula](docs/installation/opennebula.ru.md)
  - [OpenStack](docs/installation/openstack.ru.md)
  - [Kubernetes CSI](docs/installation/kubernetes.ru.md)
-  - [S3](docs/installation/s3.ru.md)
  - [Сборка из исходных кодов](docs/installation/source.ru.md)
 - Конфигурация
  - [Обзор](docs/config.ru.md)
  - Параметры
    - [Общие](docs/config/common.ru.md)
    - [Сетевые](docs/config/network.ru.md)
-    - [Клиентский код](docs/config/client.ru.md)
+    - [Клиентский код](docs/config/client.en.md)
    - [Глобальные дисковые параметры](docs/config/layout-cluster.ru.md)
    - [Дисковые параметры OSD](docs/config/layout-osd.ru.md)
    - [Прочие параметры OSD](docs/config/osd.ru.md)
@ -66,13 +63,11 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - [fio](docs/usage/fio.ru.md) для тестов производительности
  - [NBD](docs/usage/nbd.ru.md) для монтирования ядром
  - [QEMU и qemu-img](docs/usage/qemu.ru.md)
-  - [NFS](docs/usage/nfs.ru.md) кластерная файловая система и псевдо-ФС прокси
-  - [Администрирование](docs/usage/admin.ru.md)
+  - [NFS](docs/usage/nfs.ru.md)-прокси для VMWare и подобных
 - Производительность
  - [Понимание сути производительности](docs/performance/understanding.ru.md)
  - [Теоретический максимум](docs/performance/theoretical.ru.md)
  - [Пример сравнения с Ceph](docs/performance/comparison1.ru.md)
-  - [Более новый тест Vitastor 1.3.1](docs/performance/bench2.ru.md)

 ## Автор и лицензия

--- a/README.md
+++ b/README.md
@ -6,9 +6,9 @@

 Make Clustered Block Storage Fast Again.

-Vitastor is a distributed block, file and object SDS, direct replacement of Ceph RBD, CephFS and RGW,
-and also internal SDS's of public clouds. However, in contrast to them, Vitastor is fast
-and simple at the same time. The only thing is it's slightly young :-).
+Vitastor is a distributed block SDS, direct replacement of Ceph RBD and internal SDS's
+of public clouds. However, in contrast to them, Vitastor is fast and simple at the same time.
+The only thing is it's slightly young :-).

 Vitastor is architecturally similar to Ceph which means strong consistency,
 primary-replication, symmetric clustering and automatic data distribution over any
@ -19,10 +19,10 @@ supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1
 with proper hardware which is ~10 times faster than other popular SDS's like Ceph
 or internal systems of public clouds.

-Vitastor supports QEMU, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
+Vitastor supports QEMU, NBD, NFS protocols, OpenStack, Proxmox, Kubernetes drivers.
 More drivers may be created easily.

-Read more details in the documentation. You can start from here: [Quick Start](docs/intro/quickstart.en.md).
+Read more details below in the documentation.

 ## Talks and presentations

@ -41,12 +41,9 @@ Read more details in the documentation. You can start from here: [Quick Start](d
  - [Author and license](docs/intro/author.en.md)
 - Installation
  - [Packages](docs/installation/packages.en.md)
-  - [Docker](docs/installation/docker.en.md)
  - [Proxmox](docs/installation/proxmox.en.md)
-  - [OpenNebula](docs/installation/opennebula.en.md)
  - [OpenStack](docs/installation/openstack.en.md)
  - [Kubernetes CSI](docs/installation/kubernetes.en.md)
-  - [S3](docs/installation/s3.en.md)
  - [Building from Source](docs/installation/source.en.md)
 - Configuration
  - [Overview](docs/config.en.md)
@ -66,13 +63,11 @@ Read more details in the documentation. You can start from here: [Quick Start](d
  - [fio](docs/usage/fio.en.md) for benchmarks
  - [NBD](docs/usage/nbd.en.md) for kernel mounts
  - [QEMU and qemu-img](docs/usage/qemu.en.md)
-  - [NFS](docs/usage/nfs.en.md) clustered file system and pseudo-FS proxy
-  - [Administration](docs/usage/admin.en.md)
+  - [NFS](docs/usage/nfs.en.md) emulator for VMWare and similar
 - Performance
  - [Understanding storage performance](docs/performance/understanding.en.md)
  - [Theoretical performance](docs/performance/theoretical.en.md)
  - [Example comparison with Ceph](docs/performance/comparison1.en.md)
-  - [Newer benchmark of Vitastor 1.3.1](docs/performance/bench2.en.md)

 ## Author and License

--- a/copy-fio-includes.sh
+++ b/copy-fio-includes.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-gcc -I. -E -o fio_headers.i src/util/fio_headers.h
+gcc -I. -E -o fio_headers.i src/fio_headers.h

 rm -rf fio-copy
 for i in `grep -Po 'fio/[^"]+' fio_headers.i | sort | uniq`; do
--- a/copy-qemu-includes.sh
+++ b/copy-qemu-includes.sh
@ -5,7 +5,7 @@
 #cd b/qemu; make qapi

 gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
-    -I qemu/include -E -o qemu_driver.i src/client/qemu_driver.c
+    -I qemu/include -E -o qemu_driver.i src/qemu_driver.c

 rm -rf qemu-copy
 for i in `grep -Po 'qemu/[^"]+' qemu_driver.i | sort | uniq`; do
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 8de8b467acbca50cfd8835c20e0e379110f3b32b
+Subproject commit 45e6d1f13196a0824e2089a586c53b9de0283f17
--- a/csi/Dockerfile
+++ b/csi/Dockerfile
@ -1,15 +1,14 @@
 # Compile stage
-FROM golang:bookworm AS build
+FROM golang:buster AS build

 ADD go.sum go.mod /app/
 RUN cd /app; CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go mod download -x
 ADD . /app
-RUN perl -i -e '$/ = undef; while(<>) { s/\n\s*(\{\s*\n)/$1\n/g; s/\}(\s*\n\s*)else\b/$1} else/g; print; }' `find /app -name '*.go'` && \
-    cd /app && \
-    CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o vitastor-csi
+RUN perl -i -e '$/ = undef; while(<>) { s/\n\s*(\{\s*\n)/$1\n/g; s/\}(\s*\n\s*)else\b/$1} else/g; print; }' `find /app -name '*.go'`
+RUN cd /app; CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o vitastor-csi

 # Final stage
-FROM debian:bookworm
+FROM debian:buster

 LABEL maintainers="Vitaliy Filippov <vitalif@yourcmc.ru>"
 LABEL description="Vitastor CSI Driver"
@ -19,32 +18,19 @@ ENV CSI_ENDPOINT=""

 RUN apt-get update && \
    apt-get install -y wget && \
+    (echo deb http://deb.debian.org/debian buster-backports main > /etc/apt/sources.list.d/backports.list) && \
    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
    apt-get update && \
-    apt-get install -y e2fsprogs xfsprogs kmod iproute2 \
-        # NFS mount dependencies
-        nfs-common netbase \
-        # dependencies of qemu-storage-daemon
-        libnuma1 liburing2 libglib2.0-0 libfuse3-3 libaio1 libzstd1 libnettle8 \
-        libgmp10 libhogweed6 libp11-kit0 libidn2-0 libunistring2 libtasn1-6 libpcre2-8-0 libffi8 && \
+    apt-get install -y e2fsprogs xfsprogs kmod && \
    apt-get clean && \
    (echo options nbd nbds_max=128 > /etc/modprobe.d/nbd.conf)

 COPY --from=build /app/vitastor-csi /bin/

-RUN (echo deb http://vitastor.io/debian bookworm main > /etc/apt/sources.list.d/vitastor.list) && \
-    ((echo 'Package: *'; echo 'Pin: origin "vitastor.io"'; echo 'Pin-Priority: 1000') > /etc/apt/preferences.d/vitastor.pref) && \
+RUN (echo deb http://vitastor.io/debian buster main > /etc/apt/sources.list.d/vitastor.list) && \
    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg && \
    apt-get update && \
    apt-get install -y vitastor-client && \
-    wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-utils_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
-    wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-block-extra_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
-    dpkg -x qemu-utils*.deb tmp1 && \
-    dpkg -x qemu-block-extra*.deb tmp1 && \
-    cp -a tmp1/usr/bin/qemu-storage-daemon /usr/bin/ && \
-    mkdir -p /usr/lib/x86_64-linux-gnu/qemu && \
-    cp -a tmp1/usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/ && \
-    rm -rf tmp1 *.deb && \
    apt-get clean

 ENTRYPOINT ["/bin/vitastor-csi"]
--- a/csi/Makefile
+++ b/csi/Makefile
@ -1,9 +1,9 @@
-VITASTOR_VERSION ?= v2.1.0
+VERSION ?= v1.2.0

 all: build push

 build:
-	@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
+	@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .

 push:
-	@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
+	@docker push vitalif/vitastor-csi:$(VERSION)
--- a/csi/deploy/001-csi-config-map.yaml
+++ b/csi/deploy/001-csi-config-map.yaml
@ -2,7 +2,6 @@
 apiVersion: v1
 kind: ConfigMap
 data:
-  # You can add multiple configuration files here to use a multi-cluster setup
  vitastor.conf: |-
    {"etcd_address":"http://192.168.7.2:2379","etcd_prefix":"/vitastor"}
 metadata:
--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v2.1.0
+          image: vitalif/vitastor-csi:v1.2.0
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
@ -82,8 +82,6 @@ spec:
              name: host-sys
            - mountPath: /run/mount
              name: host-mount
-            - mountPath: /run/vitastor-csi
-              name: run-vitastor-csi
            - mountPath: /lib/modules
              name: lib-modules
              readOnly: true
@ -134,9 +132,6 @@ spec:
        - name: host-mount
          hostPath:
            path: /run/mount
-        - name: run-vitastor-csi
-          hostPath:
-            path: /run/vitastor-csi
        - name: lib-modules
          hostPath:
            path: /lib/modules
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@ -121,7 +121,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v2.1.0
+          image: vitalif/vitastor-csi:v1.2.0
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/009-storage-class.yaml
+++ b/csi/deploy/009-storage-class.yaml
@ -9,17 +9,12 @@ metadata:
 provisioner: csi.vitastor.io
 volumeBindingMode: Immediate
 parameters:
-  # CSI driver can create block-based volumes and VitastorFS-based volumes
-  # only VitastorFS-based volumes and raw block volumes (without FS) support ReadWriteMany mode
-  # set this parameter to VitastorFS metadata volume name to use VitastorFS
-  # if unset, block-based volumes will be created
-  vitastorfs: ""
-  # for block-based storage classes, pool ID may be either a string (name) or a number (ID)
-  # for vitastorFS-based storage classes it must be a string - name of the default pool for FS data
-  poolId: "testpool"
-  # volume name prefix for block-based storage classes or NFS subdirectory (including /) for FS-based volumes
-  volumePrefix: ""
+  etcdVolumePrefix: ""
+  poolId: "1"
  # you can choose other configuration file if you have it in the config map
-  # different etcd URLs and prefixes should also be put in the config
  #configPath: "/etc/vitastor/vitastor.conf"
+  # you can also specify etcdUrl here, maybe to connect to another Vitastor cluster
+  # multiple etcdUrls may be specified, delimited by comma
+  #etcdUrl: "http://192.168.7.2:2379"
+  #etcdPrefix: "/vitastor"
 allowVolumeExpansion: true
--- a/csi/deploy/example-storage-class-fs.yaml
+++ b/csi/deploy/example-storage-class-fs.yaml
@ -1,25 +0,0 @@
---
-apiVersion: storage.k8s.io/v1
-kind: StorageClass
-metadata:
-  namespace: vitastor-system
-  name: vitastor
-  annotations:
-    storageclass.kubernetes.io/is-default-class: "true"
-provisioner: csi.vitastor.io
-volumeBindingMode: Immediate
-parameters:
-  # CSI driver can create block-based volumes and VitastorFS-based volumes
-  # only VitastorFS-based volumes and raw block volumes (without FS) support ReadWriteMany mode
-  # set this parameter to VitastorFS metadata volume name to use VitastorFS
-  # if unset, block-based volumes will be created
-  vitastorfs: "testfs"
-  # for block-based storage classes, pool ID may be either a string (name) or a number (ID)
-  # for vitastorFS-based storage classes it must be a string - name of the default pool for FS data
-  poolId: "testpool"
-  # volume name prefix for block-based storage classes or NFS subdirectory (including /) for FS-based volumes
-  volumePrefix: "k8s/"
-  # you can choose other configuration file if you have it in the config map
-  # different etcd URLs and prefixes should also be put in the config
-  #configPath: "/etc/vitastor/vitastor.conf"
-allowVolumeExpansion: true
--- a/csi/go.mod
+++ b/csi/go.mod
@ -3,10 +3,10 @@ module vitastor.io/csi
 go 1.15

 require (
-	github.com/container-storage-interface/spec v1.8.0
+	github.com/container-storage-interface/spec v1.4.0
 	github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
 	github.com/kubernetes-csi/csi-lib-utils v0.9.1
-	golang.org/x/net v0.7.0
+	golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
 	google.golang.org/grpc v1.33.1
 	google.golang.org/protobuf v1.24.0
--- a/csi/go.sum
+++ b/csi/go.sum
@ -41,8 +41,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
-github.com/container-storage-interface/spec v1.8.0 h1:D0vhF3PLIZwlwZEf2eNbpujGCNwspwTYf2idJRJx4xI=
-github.com/container-storage-interface/spec v1.8.0/go.mod h1:ROLik+GhPslwwWRNFF1KasPzroNARibH2rfz1rkg4H0=
+github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
+github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -182,7 +182,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
@ -196,7 +195,6 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -215,7 +213,6 @@ golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCc
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
 golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -231,10 +228,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
 golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
-golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -245,7 +240,6 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -265,22 +259,13 @@ golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
-golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@ -301,10 +286,8 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
 golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/csi/src/config.go
+++ b/csi/src/config.go
@ -5,7 +5,7 @@ package vitastor

 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "2.1.0"
+    vitastorCSIDriverVersion = "1.2.0"
 )

 // Config struct fills the parameters of request or user input
--- a/csi/src/controllerserver.go
+++ b/csi/src/controllerserver.go
@ -8,8 +8,11 @@ import (
    "encoding/json"
    "fmt"
    "strings"
+    "bytes"
+    "strconv"
    "time"
    "os"
+    "os/exec"
    "io/ioutil"

    "github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
@ -59,7 +62,7 @@ func NewControllerServer(driver *Driver) *ControllerServer
    }
 }

-func GetConnectionParams(params map[string]string) (map[string]string, error)
+func GetConnectionParams(params map[string]string) (map[string]string, []string, string)
 {
    ctxVars := make(map[string]string)
    configPath := params["configPath"]
@ -67,59 +70,76 @@ func GetConnectionParams(params map[string]string) (map[string]string, error)
    {
        configPath = "/etc/vitastor/vitastor.conf"
    }
-    ctxVars["configPath"] = configPath
-    if (params["vitastorfs"] != "")
+    else
    {
-        ctxVars["vitastorfs"] = params["vitastorfs"]
+        ctxVars["configPath"] = configPath
    }
    config := make(map[string]interface{})
-    configFD, err := os.Open(configPath)
-    if (err != nil)
+    if configFD, err := os.Open(configPath); err == nil
    {
-        return nil, err
-    }
        defer configFD.Close()
        data, _ := ioutil.ReadAll(configFD)
        json.Unmarshal(data, &config)
-    // Check etcd URL in the config, but do not use the explicit etcdUrl
-    // parameter for CLI calls, otherwise users won't be able to later
-    // change them - storage class parameters are saved in volume IDs
+    }
+    // Try to load prefix & etcd URL from the config
    var etcdUrl []string
-    switch config["etcd_address"].(type)
+    if (params["etcdUrl"] != "")
    {
-    case string:
-        url := strings.TrimSpace(config["etcd_address"].(string))
-        if (url != "")
-        {
-            etcdUrl = strings.Split(url, ",")
-        }
-    case []string:
-        etcdUrl = config["etcd_address"].([]string)
-    case []interface{}:
-        for _, url := range config["etcd_address"].([]interface{})
-        {
-            s, ok := url.(string)
-            if (ok)
-            {
-                etcdUrl = append(etcdUrl, s)
-            }
-        }
+        ctxVars["etcdUrl"] = params["etcdUrl"]
+        etcdUrl = strings.Split(params["etcdUrl"], ",")
    }
    if (len(etcdUrl) == 0)
    {
-        return nil, status.Error(codes.InvalidArgument, "etcd_address is missing in "+configPath)
+        switch config["etcd_address"].(type)
+        {
+        case string:
+            etcdUrl = strings.Split(config["etcd_address"].(string), ",")
+        case []string:
+            etcdUrl = config["etcd_address"].([]string)
        }
-    return ctxVars, nil
+    }
+    etcdPrefix := params["etcdPrefix"]
+    if (etcdPrefix == "")
+    {
+        etcdPrefix, _ = config["etcd_prefix"].(string)
+        if (etcdPrefix == "")
+        {
+            etcdPrefix = "/vitastor"
+        }
+    }
+    else
+    {
+        ctxVars["etcdPrefix"] = etcdPrefix
+    }
+    return ctxVars, etcdUrl, etcdPrefix
 }

 func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
 {
+    if (ctxVars["etcdUrl"] != "")
+    {
+        args = append(args, "--etcd_address", ctxVars["etcdUrl"])
+    }
+    if (ctxVars["etcdPrefix"] != "")
+    {
+        args = append(args, "--etcd_prefix", ctxVars["etcdPrefix"])
+    }
    if (ctxVars["configPath"] != "")
    {
        args = append(args, "--config_path", ctxVars["configPath"])
    }
-    stdout, _, err := system("/usr/bin/vitastor-cli", args...)
-    return stdout, err
+    c := exec.Command("/usr/bin/vitastor-cli", args...)
+    var stdout, stderr bytes.Buffer
+    c.Stdout = &stdout
+    c.Stderr = &stderr
+    err := c.Run()
+    stderrStr := string(stderr.Bytes())
+    if (err != nil)
+    {
+        klog.Errorf("vitastor-cli %s failed: %s, status %s\n", strings.Join(args, " "), stderrStr, err)
+        return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
+    }
+    return stdout.Bytes(), nil
 }

 // Create the volume
@ -140,57 +160,27 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
        return nil, status.Error(codes.InvalidArgument, "volume capabilities is a required field")
    }

-    ctxVars, err := GetConnectionParams(req.Parameters)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    err = cs.checkCaps(volumeCapabilities, ctxVars["vitastorfs"] != "")
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    pool := req.Parameters["poolId"]
-    if (pool == "")
+    etcdVolumePrefix := req.Parameters["etcdVolumePrefix"]
+    poolId, _ := strconv.ParseUint(req.Parameters["poolId"], 10, 64)
+    if (poolId == 0)
    {
        return nil, status.Error(codes.InvalidArgument, "poolId is missing in storage class configuration")
    }
-    volumePrefix := req.Parameters["volumePrefix"]
-    if (volumePrefix == "")
-    {
-        // Old name
-        volumePrefix = req.Parameters["etcdVolumePrefix"]
-    }
-    volName := volumePrefix + req.GetName()
+
+    volName := etcdVolumePrefix + req.GetName()
    volSize := 1 * GB
    if capRange := req.GetCapacityRange(); capRange != nil
    {
        volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
    }

-    if (ctxVars["vitastorfs"] != "")
+    ctxVars, etcdUrl, _ := GetConnectionParams(req.Parameters)
+    if (len(etcdUrl) == 0)
    {
-        // Nothing to create, subdirectories are created during mounting
-        // FIXME: It would be cool to support quotas some day and set it here
-        if (req.VolumeContentSource.GetSnapshot() != nil)
-        {
-            return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
-        }
-        ctxVars["name"] = volName
-        ctxVars["pool"] = pool
-        volumeIdJson, _ := json.Marshal(ctxVars)
-        return &csi.CreateVolumeResponse{
-            Volume: &csi.Volume{
-                // Ugly, but VolumeContext isn't passed to DeleteVolume :-(
-                VolumeId: string(volumeIdJson),
-                CapacityBytes: volSize,
-            },
-        }, nil
+        return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
    }

-    args := []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", pool }
+    args := []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", fmt.Sprintf("%v", poolId) }

    // Support creation from snapshot
    var src *csi.VolumeContentSource
@ -217,7 +207,7 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
    }

    // Create image using vitastor-cli
-    _, err = invokeCLI(ctxVars, args)
+    _, err := invokeCLI(ctxVars, args)
    if (err != nil)
    {
        if (strings.Index(err.Error(), "already exists") > 0)
@ -267,17 +257,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
    }
    volName := volVars["name"]

-    ctxVars, err := GetConnectionParams(volVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        // FIXME: Delete FS subdirectory
-        return &csi.DeleteVolumeResponse{}, nil
-    }
+    ctxVars, _, _ := GetConnectionParams(volVars)

    _, err = invokeCLI(ctxVars, []string{ "rm", volName })
    if (err != nil)
@ -313,72 +293,19 @@ func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req
    {
        return nil, status.Error(codes.InvalidArgument, "volumeId is nil")
    }
-    volVars := make(map[string]string)
-    err := json.Unmarshal([]byte(volumeID), &volVars)
-    if (err != nil)
-    {
-        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
-    }
-    ctxVars, err := GetConnectionParams(volVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
    volumeCapabilities := req.GetVolumeCapabilities()
    if (volumeCapabilities == nil)
    {
        return nil, status.Error(codes.InvalidArgument, "volumeCapabilities is nil")
    }

-    err = cs.checkCaps(volumeCapabilities, ctxVars["vitastorfs"] != "")
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    return &csi.ValidateVolumeCapabilitiesResponse{
-        Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
-            VolumeCapabilities: req.VolumeCapabilities,
-        },
-    }, nil
-}
-
-func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability, fs bool) error
-{
    var volumeCapabilityAccessModes []*csi.VolumeCapability_AccessMode
    for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
        csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
-        csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY,
-        csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY,
-        csi.VolumeCapability_AccessMode_SINGLE_NODE_SINGLE_WRITER,
-        csi.VolumeCapability_AccessMode_SINGLE_NODE_MULTI_WRITER,
-    } {
-        volumeCapabilityAccessModes = append(volumeCapabilityAccessModes, &csi.VolumeCapability_AccessMode{Mode: mode})
-    }
-    for _, capability := range volumeCapabilities
-    {
-        if (capability.GetBlock() != nil)
-        {
-            if (fs)
-            {
-                return status.Errorf(codes.InvalidArgument, "%v not supported with FS-based volumes", capability)
-            }
-            for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
-                csi.VolumeCapability_AccessMode_MULTI_NODE_SINGLE_WRITER,
        csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
    } {
        volumeCapabilityAccessModes = append(volumeCapabilityAccessModes, &csi.VolumeCapability_AccessMode{Mode: mode})
    }
-            break
-        }
-    }
-
-    if (fs)
-    {
-        // All access modes including RWX are supported with FS-based volumes
-        return nil
-    }

    capabilitySupport := false
    for _, capability := range volumeCapabilities
@ -394,10 +321,14 @@ func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability

    if (!capabilitySupport)
    {
-        return status.Errorf(codes.InvalidArgument, "%v not supported", volumeCapabilities)
+        return nil, status.Errorf(codes.NotFound, "%v not supported", req.GetVolumeCapabilities())
    }

-    return nil
+    return &csi.ValidateVolumeCapabilitiesResponse{
+        Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
+            VolumeCapabilities: req.VolumeCapabilities,
+        },
+    }, nil
 }

 // ListVolumes returns a list of volumes
@ -486,12 +417,6 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
-    }
-
    volName := ctxVars["name"]

    // Create image using vitastor-cli
@ -544,16 +469,7 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS
    volName := volVars["name"]
    snapName := volVars["snapshot"]

-    ctxVars, err := GetConnectionParams(volVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
-    }
+    ctxVars, _, _ := GetConnectionParams(volVars)

    _, err = invokeCLI(ctxVars, []string{ "rm", volName+"@"+snapName })
    if (err != nil)
@ -580,16 +496,7 @@ func (cs *ControllerServer) ListSnapshots(ctx context.Context, req *csi.ListSnap
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := volVars["name"]
-    ctxVars, err := GetConnectionParams(volVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
-    }
+    ctxVars, _, _ := GetConnectionParams(volVars)

    inodeCfg, err := invokeList(ctxVars, volName+"@*", false)
    if (err != nil)
@ -648,21 +555,7 @@ func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := volVars["name"]
-    ctxVars, err := GetConnectionParams(volVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        // Nothing to change
-        // FIXME: Support quotas and change quota here
-        return &csi.ControllerExpandVolumeResponse{
-            CapacityBytes: req.CapacityRange.RequiredBytes,
-            NodeExpansionRequired: false,
-        }, nil
-    }
+    ctxVars, _, _ := GetConnectionParams(volVars)

    inodeCfg, err := invokeList(ctxVars, volName, true)
    if (err != nil)
--- a/csi/src/nodeserver.go
+++ b/csi/src/nodeserver.go
@ -5,19 +5,11 @@ package vitastor

 import (
    "context"
-    "crypto/sha1"
-    "encoding/hex"
-    "encoding/json"
-    "fmt"
    "os"
    "os/exec"
-    "path/filepath"
-    "regexp"
-    "strconv"
+    "encoding/json"
    "strings"
-    "sync"
-    "syscall"
-    "time"
+    "bytes"

    "google.golang.org/grpc/codes"
    "google.golang.org/grpc/status"
@ -33,354 +25,52 @@ import (
 type NodeServer struct
 {
    *Driver
-    useVduse        bool
-    stateDir        string
-    nfsStageDir     string
    mounter mount.Interface
-    restartInterval time.Duration
-    mu              sync.Mutex
-    cond            *sync.Cond
-    volumeLocks     map[string]bool
-}
-
-type DeviceState struct
-{
-    ConfigPath string `json:"configPath"`
-    VdpaId     string `json:"vdpaId"`
-    Image      string `json:"image"`
-    Blockdev   string `json:"blockdev"`
-    Readonly   bool   `json:"readonly"`
-    PidFile    string `json:"pidFile"`
-}
-
-type NfsState struct
-{
-    ConfigPath string `json:"configPath"`
-    FsName     string `json:"fsName"`
-    Pool       string `json:"pool"`
-    Path       string `json:"path"`
-    Port       int    `json:"port"`
 }

 // NewNodeServer create new instance node
 func NewNodeServer(driver *Driver) *NodeServer
 {
-    stateDir := os.Getenv("STATE_DIR")
-    if (stateDir == "")
-    {
-        stateDir = "/run/vitastor-csi"
-    }
-    if (stateDir[len(stateDir)-1] != '/')
-    {
-        stateDir += "/"
-    }
-    nfsStageDir := os.Getenv("NFS_STAGE_DIR")
-    if (nfsStageDir == "")
-    {
-        nfsStageDir = "/var/lib/kubelet/plugins/csi.vitastor.io/nfs"
-    }
-    ns := &NodeServer{
+    return &NodeServer{
        Driver: driver,
-        useVduse:    checkVduseSupport(),
-        stateDir:    stateDir,
-        nfsStageDir: nfsStageDir,
        mounter: mount.New(""),
-        volumeLocks: make(map[string]bool),
-    }
-    ns.cond = sync.NewCond(&ns.mu)
-    if (ns.useVduse)
-    {
-        ns.restoreVduseDaemons()
-        dur, err := time.ParseDuration(os.Getenv("RESTART_INTERVAL"))
-        if (err != nil)
-        {
-            dur = 10 * time.Second
-        }
-        ns.restartInterval = dur
-        if (ns.restartInterval != time.Duration(0))
-        {
-            go ns.restarter()
-        }
-    }
-    return ns
-}
-
-func (ns *NodeServer) lockVolume(lockId string)
-{
-    ns.mu.Lock()
-    defer ns.mu.Unlock()
-    for (ns.volumeLocks[lockId])
-    {
-        ns.cond.Wait()
-    }
-    ns.volumeLocks[lockId] = true
-    ns.cond.Broadcast()
-}
-
-func (ns *NodeServer) unlockVolume(lockId string)
-{
-    ns.mu.Lock()
-    defer ns.mu.Unlock()
-    delete(ns.volumeLocks, lockId)
-    ns.cond.Broadcast()
-}
-
-func (ns *NodeServer) restarter()
-{
-    // Restart dead VDUSE daemons at regular intervals
-    // Otherwise volume I/O may hang in case of a qemu-storage-daemon crash
-    // Moreover, it may lead to a kernel panic of the kernel is configured to
-    // panic on hung tasks
-    ticker := time.NewTicker(ns.restartInterval)
-    defer ticker.Stop()
-    for
-    {
-        <-ticker.C
-        ns.restoreVduseDaemons()
-    }
-}
-
-func (ns *NodeServer) restoreVduseDaemons()
-{
-    pattern := ns.stateDir+"vitastor-vduse-*.json"
-    stateFiles, err := filepath.Glob(pattern)
-    if (err != nil)
-    {
-        klog.Errorf("failed to list %s: %v", pattern, err)
-    }
-    if (len(stateFiles) == 0)
-    {
-        return
-    }
-    devList := make(map[string]interface{})
-    // example output: {"dev":{"test1":{"type":"block","mgmtdev":"vduse","vendor_id":0,"max_vqs":16,"max_vq_size":128}}}
-    devListJSON, _, err := system("/sbin/vdpa", "-j", "dev", "list")
-    if (err != nil)
-    {
-        return
-    }
-    err = json.Unmarshal(devListJSON, &devList)
-    devs, ok := devList["dev"].(map[string]interface{})
-    if (err != nil || !ok)
-    {
-        klog.Errorf("/sbin/vdpa -j dev list returned bad JSON (error %v): %v", err, string(devListJSON))
-        return
-    }
-    for _, stateFile := range stateFiles
-    {
-        ns.checkVduseState(stateFile, devs)
-    }
-}
-
-func (ns *NodeServer) checkVduseState(stateFile string, devs map[string]interface{})
-{
-    // Check if VDPA device is still added to the bus
-    vdpaId := filepath.Base(stateFile)
-    vdpaId = vdpaId[0:len(vdpaId)-5]
-    if (devs[vdpaId] == nil)
-    {
-        // Unused, clean it up
-        unmapVduseById(ns.stateDir, vdpaId)
-        return
-    }
-
-    // Read state file
-    stateJSON, err := os.ReadFile(stateFile)
-    if (err != nil)
-    {
-        klog.Warningf("error reading state file %v: %v", stateFile, err)
-        return
-    }
-    var state DeviceState
-    err = json.Unmarshal(stateJSON, &state)
-    if (err != nil)
-    {
-        klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
-        return
-    }
-
-    // Lock volume
-    ns.lockVolume(state.ConfigPath+":block:"+state.Image)
-    defer ns.unlockVolume(state.ConfigPath+":block:"+state.Image)
-
-    // Recheck state file after locking
-    _, err = os.ReadFile(stateFile)
-    if (err != nil)
-    {
-        klog.Warningf("state file %v disappeared, skipping volume", stateFile)
-        return
-    }
-
-    // Check if the storage daemon is still active
-    pidFile := ns.stateDir + vdpaId + ".pid"
-    exists := false
-    proc, err := findByPidFile(pidFile)
-    if (err == nil)
-    {
-        exists = proc.Signal(syscall.Signal(0)) == nil
-    }
-    if (!exists)
-    {
-        // Restart daemon
-        klog.Warningf("restarting storage daemon for volume %v (VDPA ID %v)", state.Image, vdpaId)
-        err = startStorageDaemon(vdpaId, state.Image, pidFile, state.ConfigPath, state.Readonly)
-        if (err != nil)
-        {
-            klog.Warningf("failed to restart storage daemon for volume %v: %v", state.Image, err)
-        }
-    }
-}
-
-func (ns *NodeServer) restoreNfsDaemons()
-{
-    pattern := ns.stateDir+"vitastor-nfs-*.json"
-    stateFiles, err := filepath.Glob(pattern)
-    if (err != nil)
-    {
-        klog.Errorf("failed to list %s: %v", pattern, err)
-    }
-    if (len(stateFiles) == 0)
-    {
-        return
-    }
-    activeNFS, err := ns.listActiveNFS()
-    if (err != nil)
-    {
-        return
-    }
-    // Check all state files and try to restore active mounts
-    for _, stateFile := range stateFiles
-    {
-        ns.checkNfsState(stateFile, activeNFS)
-    }
-}
-
-func (ns *NodeServer) readNfsState(stateFile string, allowNotExists bool) (*NfsState, error)
-{
-    stateJSON, err := os.ReadFile(stateFile)
-    if (err != nil)
-    {
-        if (allowNotExists && os.IsNotExist(err))
-        {
-            return nil, nil
-        }
-        klog.Warningf("error reading state file %v: %v", stateFile, err)
-        return nil, err
-    }
-    var state NfsState
-    err = json.Unmarshal(stateJSON, &state)
-    if (err != nil)
-    {
-        klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
-        return nil, err
-    }
-    return &state, nil
-}
-
-func (ns *NodeServer) checkNfsState(stateFile string, activeNfs map[int][]string)
-{
-    // Read state file
-    state, err := ns.readNfsState(stateFile, false)
-    if (err != nil)
-    {
-        return
-    }
-    // Lock FS
-    ns.lockVolume(state.ConfigPath+":fs:"+state.FsName)
-    defer ns.unlockVolume(state.ConfigPath+":fs:"+state.FsName)
-    // Check if NFS at this port is still mounted
-    pidFile := ns.stateDir + filepath.Base(stateFile)
-    pidFile = pidFile[0:len(pidFile)-5] + ".pid"
-    if (len(activeNfs[state.Port]) == 0)
-    {
-        // this is a stale state file, remove it
-        klog.Warningf("state file %v contains stale mount at port %d, removing it", stateFile, state.Port)
-        ns.stopNFS(stateFile, pidFile)
-        return
-    }
-    // Check PID file
-    exists := false
-    proc, err := findByPidFile(pidFile)
-    if (err == nil)
-    {
-        exists = proc.Signal(syscall.Signal(0)) == nil
-    }
-    if (!exists)
-    {
-        // Restart vitastor-nfs server
-        klog.Warningf("restarting NFS server for FS %v at port %v", state.FsName, state.Port)
-        _, _, err := system(
-            "/usr/bin/vitastor-nfs", "start",
-            "--pidfile", pidFile,
-            "--bind", "127.0.0.1",
-            "--port", fmt.Sprintf("%d", state.Port),
-            "--fs", state.FsName,
-            "--pool", state.Pool,
-            "--portmap", "0",
-        )
-        if (err != nil)
-        {
-            klog.Warningf("failed to restart NFS server for FS %v: %v", state.FsName, err)
-        }
    }
 }

 // NodeStageVolume mounts the volume to a staging path on the node.
 func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error)
 {
-    klog.Infof("received node stage volume request %+v", protosanitizer.StripSecrets(req))
-
-    ctxVars := make(map[string]string)
-    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
-    if (err != nil)
-    {
-        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
-    }
-    _, err = GetConnectionParams(ctxVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-    volName := ctxVars["name"]
-
-    if (ctxVars["vitastorfs"] != "")
-    {
    return &csi.NodeStageVolumeResponse{}, nil
+}
+
+// NodeUnstageVolume unstages the volume from the staging path
+func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error)
+{
+    return &csi.NodeUnstageVolumeResponse{}, nil
+}
+
+func Contains(list []string, s string) bool
+{
+    for i := 0; i < len(list); i++
+    {
+        if (list[i] == s)
+        {
+            return true
        }
+    }
+    return false
+}

-    ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
-    defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
+// NodePublishVolume mounts the volume mounted to the staging path to the target path
+func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error)
+{
+    klog.Infof("received node publish volume request %+v", protosanitizer.StripSecrets(req))

-    targetPath := req.GetStagingTargetPath()
+    targetPath := req.GetTargetPath()
    isBlock := req.GetVolumeCapability().GetBlock() != nil

    // Check that it's not already mounted
-    notmnt, err := mount.IsNotMountPoint(ns.mounter, targetPath)
-    if (err == nil)
-    {
-        if (!notmnt)
-        {
-            klog.Errorf("target path %s is already mounted", targetPath)
-            return nil, fmt.Errorf("target path %s is already mounted", targetPath)
-        }
-        var finfo os.FileInfo
-        finfo, err = os.Stat(targetPath)
-        if (err != nil)
-        {
-            klog.Errorf("failed to stat %s: %v", targetPath, err)
-            return nil, err
-        }
-        if (finfo.IsDir() != (!isBlock))
-        {
-            err = os.Remove(targetPath)
-            if (err != nil)
-            {
-                klog.Errorf("failed to remove %s (to recreate it with correct type): %v", targetPath, err)
-                return nil, err
-            }
-            err = os.ErrNotExist
-        }
-    }
+    _, err := mount.IsNotMountPoint(ns.mounter, targetPath)
    if (err != nil)
    {
        if (os.IsNotExist(err))
@ -391,13 +81,13 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
                if (err != nil)
                {
                    klog.Errorf("failed to create block device mount target %s with error: %v", targetPath, err)
-                    return nil, err
+                    return nil, status.Error(codes.Internal, err.Error())
                }
                err = pathFile.Close()
                if (err != nil)
                {
                    klog.Errorf("failed to close %s with error: %v", targetPath, err)
-                    return nil, err
+                    return nil, status.Error(codes.Internal, err.Error())
                }
            }
            else
@ -406,34 +96,60 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
                if (err != nil)
                {
                    klog.Errorf("failed to create fs mount target %s with error: %v", targetPath, err)
-                    return nil, err
+                    return nil, status.Error(codes.Internal, err.Error())
                }
            }
        }
        else
        {
-            return nil, err
+            return nil, status.Error(codes.Internal, err.Error())
        }
    }

-    var devicePath, vdpaId string
-    if (!ns.useVduse)
-    {
-        devicePath, err = mapNbd(volName, ctxVars, false)
-    }
-    else
-    {
-        devicePath, vdpaId, err = mapVduse(ns.stateDir, volName, ctxVars, false)
-    }
+    ctxVars := make(map[string]string)
+    err = json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
-        return nil, err
+        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
+    volName := ctxVars["name"]
+
+    _, etcdUrl, etcdPrefix := GetConnectionParams(ctxVars)
+    if (len(etcdUrl) == 0)
+    {
+        return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
+    }
+
+    // Map NBD device
+    // FIXME: Check if already mapped
+    args := []string{
+        "map", "--etcd_address", strings.Join(etcdUrl, ","),
+        "--etcd_prefix", etcdPrefix,
+        "--image", volName,
+    };
+    if (ctxVars["configPath"] != "")
+    {
+        args = append(args, "--config_path", ctxVars["configPath"])
+    }
+    if (req.GetReadonly())
+    {
+        args = append(args, "--readonly", "1")
+    }
+    c := exec.Command("/usr/bin/vitastor-nbd", args...)
+    var stdout, stderr bytes.Buffer
+    c.Stdout, c.Stderr = &stdout, &stderr
+    err = c.Run()
+    stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
+    if (err != nil)
+    {
+        klog.Errorf("vitastor-nbd map failed: %s, status %s\n", stdoutStr+stderrStr, err)
+        return nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
+    }
+    devicePath := strings.TrimSpace(stdoutStr)

    diskMounter := &mount.SafeFormatAndMount{Interface: ns.mounter, Exec: utilexec.New()}
    if (isBlock)
    {
-        klog.Infof("bind-mounting %s to %s", devicePath, targetPath)
        err = diskMounter.Mount(devicePath, targetPath, "", []string{"bind"})
    }
    else
@ -463,40 +179,39 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
        readOnly := Contains(opt, "ro")
        if (existingFormat == "" && !readOnly)
        {
+            var cmdOut []byte
            switch fsType
            {
                case "ext4":
                    args := []string{"-m0", "-Enodiscard,lazy_itable_init=1,lazy_journal_init=1", devicePath}
-                    _, err = systemCombined("mkfs.ext4", args...)
+                    cmdOut, err = diskMounter.Exec.Command("mkfs.ext4", args...).CombinedOutput()
                case "xfs":
-                    _, err = systemCombined("mkfs.xfs", "-K", devicePath)
+                    cmdOut, err = diskMounter.Exec.Command("mkfs.xfs", "-K", devicePath).CombinedOutput()
            }
            if (err != nil)
            {
+                klog.Errorf("failed to run mkfs error: %v, output: %v", err, string(cmdOut))
                goto unmap
            }
        }

-        klog.Infof("formatting and mounting %s to %s with FS %s, options: %v", devicePath, targetPath, fsType, opt)
        err = diskMounter.FormatAndMount(devicePath, targetPath, fsType, opt)
-        if (err == nil)
-        {
-            klog.Infof("successfully mounted %s to %s", devicePath, targetPath)
-        }

        // Try to run online resize on mount.
        // FIXME: Implement online resize. It requires online resize support in vitastor-nbd.
        if (err == nil && existingFormat != "" && !readOnly)
        {
+            var cmdOut []byte
            switch (fsType)
            {
                case "ext4":
-                    _, err = systemCombined("resize2fs", devicePath)
+                    cmdOut, err = diskMounter.Exec.Command("resize2fs", devicePath).CombinedOutput()
                case "xfs":
-                    _, err = systemCombined("xfs_growfs", devicePath)
+                    cmdOut, err = diskMounter.Exec.Command("xfs_growfs", devicePath).CombinedOutput()
            }
            if (err != nil)
            {
+                klog.Errorf("failed to run resizefs error: %v, output: %v", err, string(cmdOut))
                goto unmap
            }
        }
@ -509,423 +224,51 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
        )
        goto unmap
    }
-    return &csi.NodeStageVolumeResponse{}, nil
+    return &csi.NodePublishVolumeResponse{}, nil

 unmap:
-    if (!ns.useVduse || len(devicePath) >= 8 && devicePath[0:8] == "/dev/nbd")
+    // unmap NBD device
+    unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
+    if (unmapErr != nil)
    {
-        unmapNbd(devicePath)
+        klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
    }
-    else
-    {
-        unmapVduseById(ns.stateDir, vdpaId)
-    }
-    return nil, err
-}
-
-// NodeUnstageVolume unstages the volume from the staging path
-func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error)
-{
-    klog.Infof("received node unstage volume request %+v", protosanitizer.StripSecrets(req))
-
-    ctxVars := make(map[string]string)
-    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
-    if (err != nil)
-    {
-        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
-    }
-    volName := ctxVars["name"]
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        return &csi.NodeUnstageVolumeResponse{}, nil
-    }
-
-    ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
-    defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
-
-    targetPath := req.GetStagingTargetPath()
-    devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
-    if (err != nil)
-    {
-        if (os.IsNotExist(err))
-        {
-            return nil, status.Error(codes.NotFound, "Target path not found")
-        }
-        return nil, err
-    }
-    if (devicePath == "")
-    {
-        // volume not mounted
-        klog.Warningf("%s is not a mountpoint, deleting", targetPath)
-        os.Remove(targetPath)
-        return &csi.NodeUnstageVolumeResponse{}, nil
-    }
-
-    refList, err := ns.mounter.GetMountRefs(targetPath)
-    if (err != nil)
-    {
-        return nil, err
-    }
-    if (len(refList) > 0)
-    {
-        klog.Warningf("%s is still referenced: %v", targetPath, refList)
-    }
-
-    // unmount
-    err = mount.CleanupMountPoint(targetPath, ns.mounter, false)
-    if (err != nil)
-    {
-        return nil, err
-    }
-
-    // unmap device
-    if (len(refList) == 0)
-    {
-        if (!ns.useVduse)
-        {
-            unmapNbd(devicePath)
-        }
-        else
-        {
-            unmapVduse(ns.stateDir, devicePath)
-        }
-    }
-
-    return &csi.NodeUnstageVolumeResponse{}, nil
-}
-
-// Mount or check if NFS is already mounted
-func (ns *NodeServer) mountNFS(ctxVars map[string]string) (string, error)
-{
-    sum := sha1.Sum([]byte(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"]))
-    nfsHash := hex.EncodeToString(sum[:])
-    stateFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".json"
-    pidFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".pid"
-    mountPath := ns.nfsStageDir+"/"+nfsHash
-    state, err := ns.readNfsState(stateFile, true)
-    if (state != nil)
-    {
-        return state.Path, nil
-    }
-    if (err != nil)
-    {
-        return "", err
-    }
-    err = os.MkdirAll(mountPath, 0777)
-    if (err != nil)
-    {
-        return "", err
-    }
-    // Create a new mount
-    state = &NfsState{
-        ConfigPath: ctxVars["configPath"],
-        FsName:     ctxVars["vitastorfs"],
-        Pool:       ctxVars["pool"],
-        Path:       mountPath,
-    }
-    klog.Infof("starting new NFS server for FS %v", state.FsName)
-    stdout, _, err := system(
-        "/usr/bin/vitastor-nfs", "start",
-        "--pidfile", pidFile,
-        "--bind", "127.0.0.1",
-        "--port", "auto",
-        "--fs", state.FsName,
-        "--pool", state.Pool,
-        "--portmap", "0",
-    )
-    if (err != nil)
-    {
-        return "", err
-    }
-    match := regexp.MustCompile("Port: (\\d+)").FindStringSubmatch(string(stdout))
-    if (match == nil)
-    {
-        klog.Errorf("failed to find port in vitastor-nfs output: %v", string(stdout))
-        ns.stopNFS(stateFile, pidFile)
-        return "", fmt.Errorf("failed to find port in vitastor-nfs output (bad vitastor-nfs version?)")
-    }
-    port, _ := strconv.ParseUint(match[1], 0, 16)
-    state.Port = int(port)
-    // Write state file
-    stateJSON, _ := json.Marshal(state)
-    err = os.WriteFile(stateFile, stateJSON, 0600)
-    if (err != nil)
-    {
-        klog.Errorf("failed to write state file %v", stateFile)
-        ns.stopNFS(stateFile, pidFile)
-        return "", err
-    }
-    // Mount NFS
-    _, _, err = system(
-        "mount", "-t", "nfs", "127.0.0.1:/", state.Path,
-        "-o", fmt.Sprintf("port=%d,mountport=%d,nfsvers=3,soft,nolock,tcp", port, port),
-    )
-    if (err != nil)
-    {
-        ns.stopNFS(stateFile, pidFile)
-        return "", err
-    }
-    return state.Path, nil
-}
-
-// Mount or check if NFS is already mounted
-func (ns *NodeServer) checkStopNFS(ctxVars map[string]string)
-{
-    sum := sha1.Sum([]byte(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"]))
-    nfsHash := hex.EncodeToString(sum[:])
-    stateFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".json"
-    pidFile := ns.stateDir+"vitastor-nfs-"+nfsHash+".pid"
-    mountPath := ns.nfsStageDir+"/"+nfsHash
-    state, err := ns.readNfsState(stateFile, true)
-    if (state == nil)
-    {
-        return
-    }
-    activeNFS, err := ns.listActiveNFS()
-    if (err != nil)
-    {
-        return
-    }
-    if (len(activeNFS[state.Port]) > 0)
-    {
-        return
-    }
-    // All volume mounts are detached, unmount the root mount and kill the server
-    err = mount.CleanupMountPoint(mountPath, ns.mounter, false)
-    if (err != nil)
-    {
-        klog.Errorf("failed to unmount %v: %v", mountPath, err)
-        return
-    }
-    ns.stopNFS(stateFile, pidFile)
-}
-
-func (ns *NodeServer) stopNFS(stateFile, pidFile string)
-{
-    err := killByPidFile(pidFile)
-    if (err != nil)
-    {
-        klog.Errorf("failed to kill process with pid from %v: %v", pidFile, err)
-    }
-    os.Remove(pidFile)
-    os.Remove(stateFile)
-}
-
-func (ns *NodeServer) listActiveNFS() (map[int][]string, error)
-{
-    mounts, err := mount.ParseMountInfo("/proc/self/mountinfo")
-    if (err != nil)
-    {
-        klog.Errorf("failed to list mounts: %v", err)
-        return nil, err
-    }
-    activeNFS := make(map[int][]string)
-    for _, mount := range mounts
-    {
-        // Volume mounts always refer to subpaths
-        if (mount.FsType == "nfs" && mount.Root != "/")
-        {
-            for _, opt := range mount.MountOptions
-            {
-                if (strings.HasPrefix(opt, "port="))
-                {
-                    port64, err := strconv.ParseUint(opt[5:], 10, 16)
-                    if (err == nil)
-                    {
-                        activeNFS[int(port64)] = append(activeNFS[int(port64)], mount.MountPoint)
-                    }
-                }
-            }
-        }
-    }
-    return activeNFS, nil
-}
-
-// NodePublishVolume mounts the volume mounted to the staging path to the target path
-func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error)
-{
-    klog.Infof("received node publish volume request %+v", protosanitizer.StripSecrets(req))
-
-    ctxVars := make(map[string]string)
-    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
-    if (err != nil)
-    {
-        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
-    }
-    _, err = GetConnectionParams(ctxVars)
-    if (err != nil)
-    {
-        return nil, err
-    }
-    volName := ctxVars["name"]
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        ns.lockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
-        defer ns.unlockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
-    }
-    else
-    {
-        ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
-        defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
-    }
-
-    stagingTargetPath := req.GetStagingTargetPath()
-    targetPath := req.GetTargetPath()
-    isBlock := req.GetVolumeCapability().GetBlock() != nil
-
-    if (ctxVars["vitastorfs"] == "")
-    {
-        // Check that stagingTargetPath is mounted
-        notmnt, err := mount.IsNotMountPoint(ns.mounter, stagingTargetPath)
-        if (err != nil)
-        {
-            klog.Errorf("staging path %v is not mounted: %w", stagingTargetPath, err)
-            return nil, fmt.Errorf("staging path %v is not mounted: %w", stagingTargetPath, err)
-        }
-        else if (notmnt)
-        {
-            klog.Errorf("staging path %v is not mounted", stagingTargetPath)
-            return nil, fmt.Errorf("staging path %v is not mounted", stagingTargetPath)
-        }
-    }
-
-    // Check that targetPath is not already mounted
-    notmnt, err := mount.IsNotMountPoint(ns.mounter, targetPath)
-    if (err != nil)
-    {
-        if (os.IsNotExist(err))
-        {
-            if (isBlock)
-            {
-                pathFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_RDWR, 0o600)
-                if (err != nil)
-                {
-                    klog.Errorf("failed to create block device mount target %s with error: %v", targetPath, err)
-                    return nil, err
-                }
-                err = pathFile.Close()
-                if (err != nil)
-                {
-                    klog.Errorf("failed to close %s with error: %v", targetPath, err)
-                    return nil, err
-                }
-            }
-            else
-            {
-                err := os.MkdirAll(targetPath, 0777)
-                if (err != nil)
-                {
-                    klog.Errorf("failed to create fs mount target %s with error: %v", targetPath, err)
-                    return nil, err
-                }
-            }
-        }
-        else
-        {
-            return nil, err
-        }
-    }
-    else if (!notmnt)
-    {
-        klog.Errorf("target path %s is already mounted", targetPath)
-        return nil, fmt.Errorf("target path %s is already mounted", targetPath)
-    }
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        nfspath, err := ns.mountNFS(ctxVars)
-        if (err != nil)
-        {
-            ns.checkStopNFS(ctxVars)
-            return nil, err
-        }
-        // volName should include prefix
-        stagingTargetPath = nfspath+"/"+volName
-        err = os.MkdirAll(stagingTargetPath, 0777)
-        if (err != nil && !os.IsExist(err))
-        {
-            ns.checkStopNFS(ctxVars)
-            return nil, err
-        }
-    }
-
-    execArgs := []string{"--bind", stagingTargetPath, targetPath}
-    if (req.GetReadonly())
-    {
-        execArgs = append(execArgs, "-o", "ro")
-    }
-    cmd := exec.Command("mount", execArgs...)
-    cmd.Stderr = os.Stderr
-    klog.Infof("binding volume %v (%v) from %v to %v", volName, ctxVars["configPath"], stagingTargetPath, targetPath)
-    out, err := cmd.Output()
-    if (err != nil)
-    {
-        if (ctxVars["vitastorfs"] != "")
-        {
-            ns.checkStopNFS(ctxVars)
-        }
-        return nil, fmt.Errorf("Error running mount %v: %s", strings.Join(execArgs, " "), out)
-    }
-
-    return &csi.NodePublishVolumeResponse{}, nil
+    return nil, status.Error(codes.Internal, err.Error())
 }

 // NodeUnpublishVolume unmounts the volume from the target path
 func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error)
 {
    klog.Infof("received node unpublish volume request %+v", protosanitizer.StripSecrets(req))
-
-    ctxVars := make(map[string]string)
-    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
-    if (err != nil)
-    {
-        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
-    }
-    volName := ctxVars["name"]
-
-    if (ctxVars["vitastorfs"] != "")
-    {
-        ns.lockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
-        defer ns.unlockVolume(ctxVars["configPath"]+":fs:"+ctxVars["vitastorfs"])
-    }
-    else
-    {
-        ns.lockVolume(ctxVars["configPath"]+":block:"+volName)
-        defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
-    }
-
    targetPath := req.GetTargetPath()
-    devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
+    devicePath, refCount, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
    if (err != nil)
    {
        if (os.IsNotExist(err))
        {
            return nil, status.Error(codes.NotFound, "Target path not found")
        }
-        return nil, err
+        return nil, status.Error(codes.Internal, err.Error())
    }
    if (devicePath == "")
    {
-        // volume not mounted
-        klog.Warningf("%s is not a mountpoint, deleting", targetPath)
-        os.Remove(targetPath)
-        return &csi.NodeUnpublishVolumeResponse{}, nil
+        return nil, status.Error(codes.NotFound, "Volume not mounted")
    }
-
    // unmount
    err = mount.CleanupMountPoint(targetPath, ns.mounter, false)
    if (err != nil)
    {
-        return nil, err
+        return nil, status.Error(codes.Internal, err.Error())
    }
-
-    if (ctxVars["vitastorfs"] != "")
+    // unmap NBD device
+    if (refCount == 1)
    {
-        ns.checkStopNFS(ctxVars)
+        unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
+        if (unmapErr != nil)
+        {
+            klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
+        }
    }
-
    return &csi.NodeUnpublishVolumeResponse{}, nil
 }

@ -944,17 +287,7 @@ func (ns *NodeServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandV
 // NodeGetCapabilities returns the supported capabilities of the node server
 func (ns *NodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error)
 {
-    return &csi.NodeGetCapabilitiesResponse{
-        Capabilities: []*csi.NodeServiceCapability{
-            &csi.NodeServiceCapability{
-                Type: &csi.NodeServiceCapability_Rpc{
-                    Rpc: &csi.NodeServiceCapability_RPC{
-                        Type: csi.NodeServiceCapability_RPC_STAGE_UNSTAGE_VOLUME,
-                    },
-                },
-            },
-        },
-    }, nil
+    return &csi.NodeGetCapabilitiesResponse{}, nil
 }

 // NodeGetInfo returns NodeGetInfoResponse for CO.
--- a/csi/src/utils.go
+++ b/csi/src/utils.go
@ -1,342 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
-
-package vitastor
-
-import (
-    "bytes"
-    "errors"
-    "encoding/json"
-    "fmt"
-    "os"
-    "os/exec"
-    "path/filepath"
-    "strconv"
-    "strings"
-    "syscall"
-
-    "k8s.io/klog"
-    "google.golang.org/grpc/codes"
-    "google.golang.org/grpc/status"
-)
-
-func Contains(list []string, s string) bool
-{
-    for i := 0; i < len(list); i++
-    {
-        if (list[i] == s)
-        {
-            return true
-        }
-    }
-    return false
-}
-
-func checkVduseSupport() bool
-{
-    // Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
-    vduse := true
-    for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
-    {
-        _, err := os.Stat("/sys/module/"+mod)
-        if (err != nil)
-        {
-            if (!errors.Is(err, os.ErrNotExist))
-            {
-                klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
-            }
-            c := exec.Command("/sbin/modprobe", mod)
-            c.Stdout = os.Stderr
-            c.Stderr = os.Stderr
-            err := c.Run()
-            if (err != nil)
-            {
-                klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
-                vduse = false
-                break
-            }
-        }
-    }
-    // Check that vdpa tool functions
-    if (vduse)
-    {
-        c := exec.Command("/sbin/vdpa", "-j", "dev")
-        c.Stderr = os.Stderr
-        err := c.Run()
-        if (err != nil)
-        {
-            klog.Errorf("/sbin/vdpa -j dev failed: %v", err)
-            vduse = false
-        }
-    }
-    if (!vduse)
-    {
-        klog.Errorf(
-            "Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
-            " For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
-        )
-    }
-    else
-    {
-        klog.Infof("VDUSE support enabled successfully")
-    }
-    return vduse
-}
-
-func mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
-{
-    // Map NBD device
-    // FIXME: Check if already mapped
-    args := []string{
-        "map", "--image", volName,
-    }
-    if (ctxVars["configPath"] != "")
-    {
-        args = append(args, "--config_path", ctxVars["configPath"])
-    }
-    if (readonly)
-    {
-        args = append(args, "--readonly", "1")
-    }
-    stdout, stderr, err := system("/usr/bin/vitastor-nbd", args...)
-    dev := strings.TrimSpace(string(stdout))
-    if (dev == "")
-    {
-        return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
-    }
-    klog.Infof("Attached volume %s via NBD as %s", volName, dev)
-    return dev, err
-}
-
-func unmapNbd(devicePath string)
-{
-    // unmap NBD device
-    unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
-    if (unmapErr != nil)
-    {
-        klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
-    }
-}
-
-func findByPidFile(pidFile string) (*os.Process, error)
-{
-    pidBuf, err := os.ReadFile(pidFile)
-    if (err != nil)
-    {
-        return nil, err
-    }
-    pid, err := strconv.ParseInt(strings.TrimSpace(string(pidBuf)), 0, 64)
-    if (err != nil)
-    {
-        return nil, err
-    }
-    proc, err := os.FindProcess(int(pid))
-    if (err != nil)
-    {
-        return nil, err
-    }
-    return proc, nil
-}
-
-func killByPidFile(pidFile string) error
-{
-    klog.Infof("killing process with PID from file %s", pidFile)
-    proc, err := findByPidFile(pidFile)
-    if (err != nil)
-    {
-        return err
-    }
-    return proc.Signal(syscall.SIGTERM)
-}
-
-func startStorageDaemon(vdpaId, volName, pidFile, configPath string, readonly bool) error
-{
-    // Start qemu-storage-daemon
-    blockSpec := map[string]interface{}{
-        "node-name": "disk1",
-        "driver": "vitastor",
-        "image": volName,
-        "cache": map[string]bool{
-            "direct": true,
-            "no-flush": false,
-        },
-        "discard": "unmap",
-    }
-    if (configPath != "")
-    {
-        blockSpec["config-path"] = configPath
-    }
-    blockSpecJson, _ := json.Marshal(blockSpec)
-    writable := "true"
-    if (readonly)
-    {
-        writable = "false"
-    }
-    _, _, err := system(
-        "/usr/bin/qemu-storage-daemon", "--daemonize", "--pidfile", pidFile, "--blockdev", string(blockSpecJson),
-        "--export", "vduse-blk,id="+vdpaId+",node-name=disk1,name="+vdpaId+",num-queues=16,queue-size=128,writable="+writable,
-    )
-    return err
-}
-
-func mapVduse(stateDir string, volName string, ctxVars map[string]string, readonly bool) (string, string, error)
-{
-    // Generate state file
-    stateFd, err := os.CreateTemp(stateDir, "vitastor-vduse-*.json")
-    if (err != nil)
-    {
-        return "", "", err
-    }
-    stateFile := stateFd.Name()
-    stateFd.Close()
-    vdpaId := filepath.Base(stateFile)
-    vdpaId = vdpaId[0:len(vdpaId)-5] // remove ".json"
-    pidFile := stateDir + vdpaId + ".pid"
-    // Map VDUSE device via qemu-storage-daemon
-    err = startStorageDaemon(vdpaId, volName, pidFile, ctxVars["configPath"], readonly)
-    if (err == nil)
-    {
-        // Add device to VDPA bus
-        _, _, err = system("/sbin/vdpa", "-j", "dev", "add", "name", vdpaId, "mgmtdev", "vduse")
-        if (err == nil)
-        {
-            // Find block device name
-            var matches []string
-            matches, err = filepath.Glob("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/*")
-            if (err == nil && len(matches) == 0)
-            {
-                err = errors.New("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/* is not found")
-            }
-            if (err == nil)
-            {
-                blockdev := "/dev/"+filepath.Base(matches[0])
-                _, err = os.Stat(blockdev)
-                if (err == nil)
-                {
-                    // Generate state file
-                    stateJSON, _ := json.Marshal(&DeviceState{
-                        ConfigPath: ctxVars["configPath"],
-                        VdpaId:     vdpaId,
-                        Image:      volName,
-                        Blockdev:   blockdev,
-                        Readonly:   readonly,
-                        PidFile:    pidFile,
-                    })
-                    err = os.WriteFile(stateFile, stateJSON, 0600)
-                    if (err == nil)
-                    {
-                        klog.Infof("Attached volume %s via VDUSE as %s (VDPA ID %s)", volName, blockdev, vdpaId)
-                        return blockdev, vdpaId, nil
-                    }
-                }
-            }
-        }
-        killErr := killByPidFile(pidFile)
-        if (killErr != nil)
-        {
-            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", killErr)
-        }
-        os.Remove(stateFile)
-        os.Remove(pidFile)
-    }
-    return "", "", err
-}
-
-func unmapVduse(stateDir, devicePath string)
-{
-    if (len(devicePath) < 6 || devicePath[0:6] != "/dev/v")
-    {
-        klog.Errorf("%s does not start with /dev/v", devicePath)
-        return
-    }
-    vduseDev, err := os.Readlink("/sys/block/"+devicePath[5:])
-    if (err != nil)
-    {
-        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx): %v", devicePath, err)
-        return
-    }
-    vdpaId := ""
-    p := strings.Index(vduseDev, "/vduse/")
-    if (p >= 0)
-    {
-        vduseDev = vduseDev[p+7:]
-        p = strings.Index(vduseDev, "/")
-        if (p >= 0)
-        {
-            vdpaId = vduseDev[0:p]
-        }
-    }
-    if (vdpaId == "")
-    {
-        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx), but is %v", devicePath, vduseDev)
-        return
-    }
-    unmapVduseById(stateDir, vdpaId)
-}
-
-func unmapVduseById(stateDir, vdpaId string)
-{
-    _, err := os.Stat("/sys/bus/vdpa/devices/"+vdpaId)
-    if (err != nil)
-    {
-        klog.Errorf("failed to stat /sys/bus/vdpa/devices/"+vdpaId+": %v", err)
-    }
-    else
-    {
-        _, _, _ = system("/sbin/vdpa", "-j", "dev", "del", vdpaId)
-    }
-    stateFile := stateDir + vdpaId + ".json"
-    os.Remove(stateFile)
-    pidFile := stateDir + vdpaId + ".pid"
-    _, err = os.Stat(pidFile)
-    if (os.IsNotExist(err))
-    {
-        // ok, already killed
-    }
-    else if (err != nil)
-    {
-        klog.Errorf("Failed to stat %v: %v", pidFile, err)
-        return
-    }
-    else
-    {
-        err = killByPidFile(pidFile)
-        if (err != nil)
-        {
-            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", err)
-        }
-        os.Remove(pidFile)
-    }
-}
-
-func system(program string, args ...string) ([]byte, []byte, error)
-{
-    klog.Infof("Running "+program+" "+strings.Join(args, " "))
-    c := exec.Command(program, args...)
-    var stdout, stderr bytes.Buffer
-    c.Stdout, c.Stderr = &stdout, &stderr
-    err := c.Run()
-    if (err != nil)
-    {
-        stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
-        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s\nOutput:\n%s", err, stdoutStr+stderrStr)
-        return nil, nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
-    }
-    return stdout.Bytes(), stderr.Bytes(), nil
-}
-
-func systemCombined(program string, args ...string) ([]byte, error)
-{
-    klog.Infof("Running "+program+" "+strings.Join(args, " "))
-    c := exec.Command(program, args...)
-    var out bytes.Buffer
-    c.Stdout, c.Stderr = &out, &out
-    err := c.Run()
-    if (err != nil)
-    {
-        outStr := string(out.Bytes())
-        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s, status %s\n", outStr, err)
-        return nil, status.Error(codes.Internal, outStr+" (status "+err.Error()+")")
-    }
-    return out.Bytes(), nil
-}
--- a/debian/build-vitastor-bookworm.sh
+++ b/debian/build-vitastor-bookworm.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-bullseye.sh
+++ b/debian/build-vitastor-bullseye.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-buster.sh
+++ b/debian/build-vitastor-buster.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-ubuntu-jammy.sh
+++ b/debian/build-vitastor-ubuntu-jammy.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-cat < vitastor.Dockerfile > ../Dockerfile
-cd ..
-mkdir -p packages
-sudo podman build --build-arg DISTRO=ubuntu --build-arg REL=jammy -v `pwd`/packages:/root/packages -f Dockerfile .
-rm Dockerfile
--- a/debian/changelog
+++ b/debian/changelog
@ -1,10 +1,10 @@
-vitastor (2.1.0-1) unstable; urgency=medium
+vitastor (1.2.0-1) unstable; urgency=medium

  * Bugfixes

 -- Vitaliy Filippov <vitalif@yourcmc.ru>  Fri, 03 Jun 2022 02:09:44 +0300

-vitastor (0.7.0-1) unstable; urgency=medium
+vitastor (1.2.0-1) unstable; urgency=medium

  * Implement NFS proxy
  * Add documentation
--- a/debian/control
+++ b/debian/control
@ -2,10 +2,7 @@ Source: vitastor
 Section: admin
 Priority: optional
 Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
-Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8),
-  linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev,
-  libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
-  node-bindings <!nocheck>, node-gyp, node-nan
+Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config
 Standards-Version: 4.5.0
 Homepage: https://vitastor.io/
 Rules-Requires-Root: no
@ -56,15 +53,3 @@ Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version})
 Description: Vitastor Proxmox Virtual Environment storage plugin
 Vitastor storage plugin for Proxmox Virtual Environment.
-
-Package: vitastor-opennebula
-Architecture: amd64
-Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client, patch, python3, jq
-Description: Vitastor OpenNebula storage plugin
- Vitastor storage plugin for OpenNebula.
-
-Package: node-vitastor
-Architecture: amd64
-Depends: ${shlibs:Depends}, ${misc:Depends}, node-bindings
-Description: Node.js bindings for Vitastor client
- Node.js native bindings for the Vitastor client library (vitastor-client).
--- a/debian/libvirt.Dockerfile
+++ b/debian/libvirt.Dockerfile
@ -1,14 +1,13 @@
 # Build patched libvirt for Debian Buster or Bullseye/Sid inside a container
-# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/libvirt.Dockerfile .
+# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/libvirt.Dockerfile .

-ARG DISTRO=
 ARG REL=
-FROM $DISTRO:$REL
+FROM debian:$REL
 ARG REL=

 WORKDIR /root

-RUN if ([ "${DISTRO}" = "debian" ]) && ( [ "${REL}" = "buster" -o "${REL}" = "bullseye" ] ); then \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        echo >> /etc/apt/preferences; \
        echo 'Package: *' >> /etc/apt/preferences; \
@ -24,7 +23,7 @@ RUN apt-get -y build-dep libvirt0
 RUN apt-get -y install libglusterfs-dev
 RUN apt-get --download-only source libvirt

-ADD patches/libvirt-5.0-vitastor.diff patches/libvirt-7.0-vitastor.diff patches/libvirt-7.5-vitastor.diff patches/libvirt-7.6-vitastor.diff patches/libvirt-8.0-vitastor.diff /root
+ADD patches/libvirt-5.0-vitastor.diff patches/libvirt-7.0-vitastor.diff patches/libvirt-7.5-vitastor.diff patches/libvirt-7.6-vitastor.diff /root
 RUN set -e; \
    mkdir -p /root/packages/libvirt-$REL; \
    rm -rf /root/packages/libvirt-$REL/*; \
--- a/debian/node-vitastor.install
+++ b/debian/node-vitastor.install
@ -1 +0,0 @@
-usr/lib/x86_64-linux-gnu/nodejs/vitastor
--- a/debian/patched-qemu.Dockerfile
+++ b/debian/patched-qemu.Dockerfile
@ -1,23 +1,17 @@
 # Build patched QEMU for Debian inside a container
 # cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .

-ARG DISTRO=debian
 ARG REL=
-FROM $DISTRO:$REL
-ARG DISTRO=debian
+FROM debian:$REL
 ARG REL=

 WORKDIR /root

-RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then \
-        if [ "$REL" = "buster" ]; then \
-            echo "deb http://archive.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
-        else \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
-        fi; \
        echo >> /etc/apt/preferences; \
        echo 'Package: *' >> /etc/apt/preferences; \
-        echo "Pin: release n=$REL-backports" >> /etc/apt/preferences; \
+        echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
@ -26,14 +20,14 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf

 RUN apt-get update
-RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
-RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y build-dep qemu
+RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
+RUN apt-get -y build-dep qemu
 # To build a custom version
 #RUN cp /root/packages/qemu-orig/* /root
 RUN apt-get --download-only source qemu

 ADD patches /root/vitastor/patches
-ADD src/client/qemu_driver.c /root/qemu_driver.c
+ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c

 #RUN set -e; \
 #    apt-get install -y wget; \
@ -44,23 +38,23 @@ ADD src/client/qemu_driver.c /root/qemu_driver.c
 #    apt-get install -y vitastor-client vitastor-client-dev quilt

 RUN set -e; \
-    DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
+    dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
    apt-get update; \
-    DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install quilt; \
+    apt-get install -y quilt; \
    mkdir -p /root/packages/qemu-$REL; \
    rm -rf /root/packages/qemu-$REL/*; \
    cd /root/packages/qemu-$REL; \
    dpkg-source -x /root/qemu*.dsc; \
-    QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*?(\d+\.\d+).*!$1!'); \
+    QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*(\d+\.\d+).*!$1!'); \
    D=$(ls -d qemu*/); \
    cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch ./qemu-*/debian/patches; \
    echo qemu-$QEMU_VER-vitastor.patch >> $D/debian/patches/series; \
    cd /root/packages/qemu-$REL/qemu-*/; \
    quilt push -a; \
    quilt add block/vitastor.c; \
-    cp /root/qemu_driver.c block/vitastor.c; \
+    cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
    quilt refresh; \
-    V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(\+deb\d+u\d+)?(~bpo[\d\+]*)?\).*$/$1/')+vitastor5; \
+    V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \
    if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \
    DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
--- a/debian/rules
+++ b/debian/rules
@ -4,14 +4,6 @@ export DH_VERBOSE = 1
 %:
 	dh $@

-override_dh_install:
-	perl -pe 's!prefix=/usr!prefix='`pwd`'/debian/tmp/usr!' < obj-x86_64-linux-gnu/src/client/vitastor.pc > node-binding/vitastor.pc
-	cd node-binding && PKG_CONFIG_PATH=./ PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 npm install --unsafe-perm || exit 1
-	mkdir -p debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
-	cp -v node-binding/package.json node-binding/index.js node-binding/addon.cc node-binding/addon.h node-binding/client.cc node-binding/client.h debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor
-	cp -v node-binding/build/Release/addon.node debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
-	dh_install
-
 override_dh_installdeb:
 	cat debian/fio_version >> debian/vitastor-fio.substvars
 	[ -f debian/qemu_version ] && (cat debian/qemu_version >> debian/vitastor-qemu.substvars) || true
--- a/debian/vitastor-client.install
+++ b/debian/vitastor-client.install
@ -3,6 +3,4 @@ usr/bin/vitastor-cli
 usr/bin/vitastor-rm
 usr/bin/vitastor-nbd
 usr/bin/vitastor-nfs
-usr/bin/vitastor-kv
-usr/bin/vitastor-kv-stress
 usr/lib/*/libvitastor*.so*
--- a/debian/vitastor-mon.install
+++ b/debian/vitastor-mon.install
@ -1,3 +1,2 @@
-mon usr/lib/vitastor/
-mon/scripts/make-etcd usr/lib/vitastor/mon
-mon/scripts/vitastor-mon.service /lib/systemd/system
+mon usr/lib/vitastor
+mon/vitastor-mon.service /lib/systemd/system
--- a/debian/vitastor-mon.postinst
+++ b/debian/vitastor-mon.postinst
@ -6,6 +6,4 @@ if [ "$1" = "configure" ]; then
 	addgroup --system --quiet vitastor
 	adduser --system --quiet --ingroup vitastor --no-create-home --home /nonexistent vitastor
 	mkdir -p /etc/vitastor
-	mkdir -p /var/lib/vitastor
-	chown vitastor:vitastor /var/lib/vitastor
 fi
--- a/debian/vitastor-opennebula.install
+++ b/debian/vitastor-opennebula.install
@ -1,3 +0,0 @@
-opennebula/remotes var/lib/one/
-opennebula/sudoers.d etc/
-opennebula/install.sh var/lib/one/remotes/datastore/vitastor/
--- a/debian/vitastor-opennebula.postinst
+++ b/debian/vitastor-opennebula.postinst
@ -1,7 +0,0 @@
-#!/bin/sh
-
-set -e
-
-if [ "$1" = "configure" ]; then
-	/var/lib/one/remotes/datastore/vitastor/install.sh
-fi
--- a/debian/vitastor-opennebula.triggers
+++ b/debian/vitastor-opennebula.triggers
@ -1,4 +0,0 @@
-interest /var/lib/one/remotes/datastore/downloader.sh
-interest /etc/one/oned.conf
-interest /etc/one/vmm_exec/vmm_execrc
-interest /etc/apparmor.d/local/abstractions/libvirt-qemu
--- a/debian/vitastor-osd.install
+++ b/debian/vitastor-osd.install
@ -1,6 +1,6 @@
 usr/bin/vitastor-osd
 usr/bin/vitastor-disk
 usr/bin/vitastor-dump-journal
-mon/scripts/vitastor-osd@.service /lib/systemd/system
-mon/scripts/vitastor.target /lib/systemd/system
-mon/scripts/90-vitastor.rules /lib/udev/rules.d
+mon/vitastor-osd@.service /lib/systemd/system
+mon/vitastor.target /lib/systemd/system
+mon/90-vitastor.rules /lib/udev/rules.d
--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@ -1,31 +1,29 @@
 # Build Vitastor packages for Debian inside a container
-# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
+# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .

-ARG DISTRO=debian
 ARG REL=
-FROM $DISTRO:$REL
-ARG DISTRO=debian
+FROM debian:$REL
 ARG REL=

 WORKDIR /root

-RUN set -e -x; \
-    if [ "$REL" = "buster" ]; then \
-        apt-get update; \
-        apt-get -y install wget; \
-        wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
-        echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
+        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
+        echo >> /etc/apt/preferences; \
+        echo 'Package: *' >> /etc/apt/preferences; \
+        echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
+        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
    perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
    echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf

-RUN apt-get update && \
-    apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake \
-        libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
-    apt-get -y build-dep fio && \
-    apt-get --download-only source fio
+RUN apt-get update
+RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
+RUN apt-get -y build-dep fio
+RUN apt-get --download-only source fio
+RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev

 ADD . /root/vitastor
 RUN set -e -x; \
@ -37,10 +35,8 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
-    VER=${FULLVER%%-*}; \
-    cp -r /root/vitastor vitastor-$VER; \
-    cd vitastor-$VER; \
+    cp -r /root/vitastor vitastor-1.2.0; \
+    cd vitastor-1.2.0; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -52,14 +48,10 @@ RUN set -e -x; \
    echo fio-headers.patch >> debian/patches/series; \
    rm -rf a b; \
    echo "dep:fio=$FIO" > debian/fio_version; \
-    cd /root/packages/vitastor-$REL/vitastor-$VER; \
-    mkdir mon/node_modules; \
-    cd mon/node_modules; \
-    curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
-    curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
-    cd vitastor-$VER; \
-    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.2.0.orig.tar.xz vitastor-1.2.0; \
+    cd vitastor-1.2.0; \
+    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
+    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
    rm -rf /root/packages/vitastor-$REL/vitastor-*/
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -1,11 +1,9 @@
 # Build Docker image with Vitastor packages

-FROM debian:bookworm
+FROM debian:bullseye

-ADD etc/apt /etc/apt/
-RUN apt-get update && apt-get -y install vitastor udev systemd qemu-system-x86 qemu-system-common qemu-block-extra qemu-utils jq nfs-common && apt-get clean
-ADD sleep.sh /usr/bin/
-ADD install.sh /usr/bin/
-ADD scripts /opt/scripts/
-ADD etc /etc/
-RUN ln -s /usr/lib/vitastor/mon/make-etcd /usr/bin/make-etcd
+ADD vitastor.list /etc/apt/sources.list.d
+ADD vitastor.gpg /etc/apt/trusted.gpg.d
+ADD vitastor.pref /etc/apt/preferences.d
+ADD apt.conf /etc/apt/
+RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common && apt-get clean
--- a/docker/Makefile
+++ b/docker/Makefile
@ -1,9 +0,0 @@
-VITASTOR_VERSION ?= v2.1.0
-
-all: build push
-
-build:
-	@docker build --no-cache --rm -t vitalif/vitastor:$(VITASTOR_VERSION) .
-
-push:
-	@docker push vitalif/vitastor:$(VITASTOR_VERSION)
--- a/docker/etc/apt/apt.conf
+++ b/docker/etc/apt/apt.conf
--- a/docker/etc/apt/sources.list.d/vitastor.list
+++ b/docker/etc/apt/sources.list.d/vitastor.list
@ -1,2 +0,0 @@
-deb http://vitastor.io/debian bookworm main
-deb http://http.debian.net/debian/ bookworm-backports main
--- a/docker/etc/systemd/system/vitastor-etcd.service
+++ b/docker/etc/systemd/system/vitastor-etcd.service
@ -1,27 +0,0 @@
-[Unit]
-Description=Containerized etcd for Vitastor
-After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
-Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
-PartOf=vitastor.target
-
-[Service]
-Restart=always
-Environment=GOGC=50
-EnvironmentFile=/etc/vitastor/docker.conf
-EnvironmentFile=/etc/vitastor/etcd.conf
-SyslogIdentifier=etcd
-ExecStart=bash -c 'docker run --rm -i -v /var/lib/vitastor/etcd:/data \
-    --log-driver none --network host $CONTAINER_OPTIONS --name vitastor-etcd \
-    $ETCD_IMAGE /usr/local/bin/etcd --name "$ETCD_NAME" --data-dir /data \
-    --snapshot-count 10000 --advertise-client-urls http://$ETCD_IP:2379 --listen-client-urls http://$ETCD_IP:2379 \
-    --initial-advertise-peer-urls http://$ETCD_IP:2380 --listen-peer-urls http://$ETCD_IP:2380 \
-    --initial-cluster-token vitastor-etcd-1 --initial-cluster "$ETCD_INITIAL_CLUSTER" \
-    --initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \
-    --auto-compaction-retention=10 --auto-compaction-mode=revision'
-ExecStop=docker stop vitastor-etcd
-Restart=always
-StartLimitInterval=0
-RestartSec=10
-
-[Install]
-WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-host.service
+++ b/docker/etc/systemd/system/vitastor-host.service
@ -1,23 +0,0 @@
-[Unit]
-Description=Empty container for running Vitastor commands
-After=network-online.target local-fs.target time-sync.target docker.service
-Wants=network-online.target local-fs.target time-sync.target docker.service
-PartOf=vitastor.target
-
-[Service]
-Restart=always
-EnvironmentFile=/etc/vitastor/docker.conf
-ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev -v /run:/run \
-    --security-opt seccomp=unconfined --privileged --pid=host --log-driver none --network host --name vitastor vitastor:$VITASTOR_VERSION \
-    sleep.sh'
-ExecStartPost=udevadm trigger
-ExecStop=docker stop vitastor
-WorkingDirectory=/
-PrivateTmp=false
-TasksMax=infinity
-Restart=always
-StartLimitInterval=0
-RestartSec=10
-
-[Install]
-WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-mon.service
+++ b/docker/etc/systemd/system/vitastor-mon.service
@ -1,23 +0,0 @@
-[Unit]
-Description=Containerized Vitastor monitor
-After=network-online.target local-fs.target time-sync.target docker.service
-Wants=network-online.target local-fs.target time-sync.target docker.service
-PartOf=vitastor.target
-
-[Service]
-Restart=always
-EnvironmentFile=/etc/vitastor/docker.conf
-SyslogIdentifier=vitastor-mon
-ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /var/lib/vitastor:/var/lib/vitastor -v /dev:/dev \
-    --log-driver none --network host $CONTAINER_OPTIONS --name vitastor-mon vitastor:$VITASTOR_VERSION \
-    node /usr/lib/vitastor/mon/mon-main.js'
-ExecStop=docker stop vitastor-mon
-WorkingDirectory=/
-PrivateTmp=false
-TasksMax=infinity
-Restart=always
-StartLimitInterval=0
-RestartSec=10
-
-[Install]
-WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-osd@.service
+++ b/docker/etc/systemd/system/vitastor-osd@.service
@ -1,28 +0,0 @@
-[Unit]
-Description=Containerized Vitastor object storage daemon osd.%i
-After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
-Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
-PartOf=vitastor.target
-
-[Service]
-LimitNOFILE=1048576
-LimitNPROC=1048576
-LimitMEMLOCK=infinity
-EnvironmentFile=/etc/vitastor/docker.conf
-SyslogIdentifier=vitastor-osd%i
-ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev \
-    $(for i in $(ls /dev/vitastor/osd%i-*); do echo --device $i:$i; done) \
-    --log-driver none --network host --ulimit nofile=1048576 --ulimit memlock=-1 \
-    --security-opt seccomp=unconfined $CONTAINER_OPTIONS --name vitastor-osd%i \
-    vitastor:$VITASTOR_VERSION vitastor-disk exec-osd /dev/vitastor/osd%i-data'
-ExecStartPre=+docker exec vitastor vitastor-disk pre-exec /dev/vitastor/osd%i-data
-ExecStop=docker stop vitastor-etcd%i
-WorkingDirectory=/
-PrivateTmp=false
-TasksMax=infinity
-Restart=always
-StartLimitInterval=0
-RestartSec=10
-
-[Install]
-WantedBy=vitastor.target
--- a/docker/etc/udev/rules.d/90-vitastor.rules
+++ b/docker/etc/udev/rules.d/90-vitastor.rules
@ -1,7 +0,0 @@
-SUBSYSTEM=="block", ENV{ID_PART_ENTRY_TYPE}=="e7009fac-a5a1-4d72-af72-53de13059903", \
-    OWNER="vitastor", GROUP="vitastor", \
-    IMPORT{program}="/usr/bin/docker exec vitastor vitastor-disk udev $devnode", \
-    SYMLINK+="vitastor/$env{VITASTOR_ALIAS}"
-
-ENV{VITASTOR_OSD_NUM}!="", ACTION=="add", RUN{program}+="/usr/bin/systemctl enable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
-ENV{VITASTOR_OSD_NUM}!="", ACTION=="remove", RUN{program}+="/usr/bin/systemctl disable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
--- a/docker/etc/vitastor/docker.conf
+++ b/docker/etc/vitastor/docker.conf
@ -1,11 +0,0 @@
-#
-# Configuration file for containerized Vitastor installation
-# (non-Kubernetes, with systemd and udev-based orchestration)
-#
-
-# Desired Vitastor version
-VITASTOR_VERSION=v2.1.0
-
-# Additional arguments for all containers
-# For example, you may want to specify a custom logging driver here
-CONTAINER_OPTIONS=""
--- a/docker/etc/vitastor/etcd.conf
+++ b/docker/etc/vitastor/etcd.conf
@ -1,4 +0,0 @@
-ETCD_IMAGE=quay.io/coreos/etcd:v3.5.18
-ETCD_NAME=""
-ETCD_IP=""
-ETCD_INITIAL_CLUSTER=""
--- a/docker/etc/vitastor/vitastor.conf
+++ b/docker/etc/vitastor/vitastor.conf
@ -1,2 +0,0 @@
-{
-}
--- a/docker/install.sh
+++ b/docker/install.sh
@ -1,9 +0,0 @@
-#!/bin/bash
-
-set -e
-
-cp -urv /etc/default /host-etc/
-cp -urv /etc/systemd /host-etc/
-cp -urv /etc/udev /host-etc/
-cp -urnv /etc/vitastor /host-etc/
-cp -urnv /opt/scripts/* /host-bin/
--- a/docker/scripts/vitastor-cli
+++ b/docker/scripts/vitastor-cli
@ -1,3 +0,0 @@
-#!/bin/bash
-
-docker exec -it vitastor vitastor-cli "$@"
--- a/docker/scripts/vitastor-disk
+++ b/docker/scripts/vitastor-disk
@ -1,3 +0,0 @@
-#!/bin/bash
-
-docker exec -it vitastor vitastor-disk "$@"
--- a/docker/scripts/vitastor-fio
+++ b/docker/scripts/vitastor-fio
@ -1,3 +0,0 @@
-#!/bin/bash
-
-docker exec -it vitastor fio "$@"
--- a/docker/scripts/vitastor-nbd
+++ b/docker/scripts/vitastor-nbd
@ -1,3 +0,0 @@
-#!/bin/bash
-
-docker exec -it vitastor vitastor-nbd "$@"
--- a/docker/sleep.sh
+++ b/docker/sleep.sh
@ -1,3 +0,0 @@
-#!/bin/bash
-
-while :; do sleep infinity; done
--- a/docker/etc/apt/trusted.gpg.d/vitastor.gpg
+++ b/docker/etc/apt/trusted.gpg.d/vitastor.gpg
--- a/docker/vitastor.list
+++ b/docker/vitastor.list
@ -0,0 +1 @@
+deb http://vitastor.io/debian bullseye main
--- a/docker/etc/apt/preferences.d/vitastor.pref
+++ b/docker/etc/apt/preferences.d/vitastor.pref
--- a/docs/config.en.md
+++ b/docs/config.en.md
@ -13,7 +13,7 @@ Vitastor configuration consists of:
 - [Separate OSD settings](config/pool.en.md#osd-settings)
 - [Inode configuration](config/inode.en.md) i.e. image metadata like name, size and parent reference

-Configuration parameters can be set in 4 places:
+Configuration parameters can be set in 3 places:
 - Configuration file (`/etc/vitastor/vitastor.conf` or other path)
 - etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
  connection parameters should obviously be set in the configuration file.
--- a/docs/config.ru.md
+++ b/docs/config.ru.md
@ -14,7 +14,7 @@
 - [Настроек инодов](config/inode.ru.md), т.е. метаданных образов, таких, как имя, размер и ссылки на
  родительский образ

-Параметры конфигурации могут задаваться в 4 местах:
+Параметры конфигурации могут задаваться в 3 местах:
 - Файле конфигурации (`/etc/vitastor/vitastor.conf` или по другому пути)
 - Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
  задаваться там, кроме, естественно, самих параметров соединения с etcd,
--- a/docs/config/client.en.md
+++ b/docs/config/client.en.md
@ -6,83 +6,15 @@

 # Client Parameters

-These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
-affect their interaction with the cluster.
+These parameters apply only to clients and affect their interaction with
+the cluster.

- [client_iothread_count](#client_iothread_count)
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_retry_enospc](#client_retry_enospc)
- [client_wait_up_timeout](#client_wait_up_timeout)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
 - [client_max_buffered_bytes](#client_max_buffered_bytes)
 - [client_max_buffered_ops](#client_max_buffered_ops)
 - [client_max_writeback_iodepth](#client_max_writeback_iodepth)
- [nbd_timeout](#nbd_timeout)
- [nbd_max_devices](#nbd_max_devices)
- [nbd_max_part](#nbd_max_part)
- [osd_nearfull_ratio](#osd_nearfull_ratio)
-
-## client_iothread_count
-
- Type: integer
- Default: 0
-
-Number of separate threads for handling TCP network I/O at client library
-side. Enabling 4 threads usually allows to increase peak performance of each
-client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
-100-150 to 400 thousand iops, but at the same time it increases latency.
-Latency increase depends on CPU: with CPU power saving disabled latency
-only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
-with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
-iops decrease from 2000 to 1000). RDMA isn't affected by this option.
-
-It's recommended to enable client I/O threads if you don't use RDMA and want
-to increase peak client performance.
-
-## client_retry_interval
-
- Type: milliseconds
- Default: 50
- Minimum: 10
- Can be changed online: yes
-
-Retry time for I/O requests failed due to inactive PGs or network
-connectivity errors.
-
-## client_eio_retry_interval
-
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
-
-Retry time for I/O requests failed due to data corruption or unfinished
-EC object deletions (has_incomplete PG state). 0 disables such retries
-and clients are not blocked and just get EIO error code instead.
-
-## client_retry_enospc
-
- Type: boolean
- Default: true
- Can be changed online: yes
-
-Retry writes on out of space errors to wait until some space is freed on
-OSDs.
-
-## client_wait_up_timeout
-
- Type: seconds
- Default: 16
- Can be changed online: yes
-
-Wait for this number of seconds until PGs are up when doing operations
-which require all PGs to be up. Currently only used by object listings
-in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
-
-The default value is calculated as `1 + OSD lease timeout`, which is
-`1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.

 ## client_max_dirty_bytes

@ -169,49 +101,3 @@ Multiple consecutive modified data regions are counted as 1 write here.
 - Can be changed online: yes

 Maximum number of parallel writes when flushing buffered data to the server.
-
-## nbd_timeout
-
- Type: seconds
- Default: 300
-
-Timeout for I/O operations for [NBD](../usage/nbd.en.md). If an operation
-executes for longer than this timeout, including when your cluster is just
-temporarily down for more than timeout, the NBD device will detach by itself
-(and possibly break the mounted file system).
-
-You can set timeout to 0 to never detach, but in that case you won't be
-able to remove the kernel device at all if the NBD process dies - you'll have
-to reboot the host.
-
-## nbd_max_devices
-
- Type: integer
- Default: 64
-
-Maximum number of NBD devices in the system. This value is passed as
-`nbds_max` parameter for the nbd kernel module when vitastor-nbd autoloads it.
-
-## nbd_max_part
-
- Type: integer
- Default: 3
-
-Maximum number of partitions per NBD device. This value is passed as
-`max_part` parameter for the nbd kernel module when vitastor-nbd autoloads it.
-Note that (nbds_max)*(1+max_part) usually can't exceed 256.
-
-## osd_nearfull_ratio
-
- Type: number
- Default: 0.95
- Can be changed online: yes
-
-Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
-
-Remember that some client writes may hang or complete with an error if even
-just one OSD becomes 100 % full!
-
-However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
-unable to start at all), so you'll be able to recover from "out of space" errors
-without destroying and recreating OSDs.
--- a/docs/config/client.ru.md
+++ b/docs/config/client.ru.md
@ -6,85 +6,15 @@

 # Параметры клиентского кода

-Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
+Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD) и
 затрагивают логику их работы с кластером.

- [client_iothread_count](#client_iothread_count)
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_retry_enospc](#client_retry_enospc)
- [client_wait_up_timeout](#client_wait_up_timeout)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
 - [client_max_buffered_bytes](#client_max_buffered_bytes)
 - [client_max_buffered_ops](#client_max_buffered_ops)
 - [client_max_writeback_iodepth](#client_max_writeback_iodepth)
- [nbd_timeout](#nbd_timeout)
- [nbd_max_devices](#nbd_max_devices)
- [nbd_max_part](#nbd_max_part)
- [osd_nearfull_ratio](#osd_nearfull_ratio)
-
-## client_iothread_count
-
- Тип: целое число
- Значение по умолчанию: 0
-
-Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
-клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
-производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
-чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
-секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
-при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
-падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
-и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
-RDMA данная опция не влияет.
-
-Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
-RDMA и хотите повысить пиковую производительность клиентов.
-
-## client_retry_interval
-
- Тип: миллисекунды
- Значение по умолчанию: 50
- Минимальное значение: 10
- Можно менять на лету: да
-
-Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
-ошибок сети.
-
-## client_eio_retry_interval
-
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
-
-Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
-или незавершённых удалений EC-объектов (состояния PG has_incomplete).
-0 отключает повторы таких запросов и клиенты не блокируются, а вместо
-этого просто получают код ошибки EIO.
-
-## client_retry_enospc
-
- Тип: булево (да/нет)
- Значение по умолчанию: true
- Можно менять на лету: да
-
-Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
-ожидать, пока на OSD не освободится место.
-
-## client_wait_up_timeout
-
- Тип: секунды
- Значение по умолчанию: 16
- Можно менять на лету: да
-
-Время ожидания поднятия PG при операциях, требующих активности всех PG.
-В данный момент используется листингами объектов в командах, использующих
-удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
-
-Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
-`1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.

 ## client_max_dirty_bytes

@ -171,51 +101,3 @@ RDMA и хотите повысить пиковую производитель
 - Можно менять на лету: да

 Максимальное число параллельных операций записи при сбросе буферов на сервер.
-
-## nbd_timeout
-
- Тип: секунды
- Значение по умолчанию: 300
-
-Таймаут для операций чтения/записи через [NBD](../usage/nbd.ru.md). Если
-операция выполняется дольше таймаута, включая временную недоступность
-кластера на время, большее таймаута, NBD-устройство отключится само собой
-(и, возможно, сломает примонтированную ФС).
-
-Вы можете установить таймаут в 0, чтобы никогда не отключать устройство по
-таймауту, но в этом случае вы вообще не сможете удалить устройство, если
-процесс NBD умрёт - вам придётся перезагружать сервер.
-
-## nbd_max_devices
-
- Тип: целое число
- Значение по умолчанию: 64
-
-Максимальное число NBD-устройств в системе. Данное значение передаётся
-модулю ядра nbd как параметр `nbds_max`, когда его загружает vitastor-nbd.
-
-## nbd_max_part
-
- Тип: целое число
- Значение по умолчанию: 3
-
-Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
-модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
-Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
-
-## osd_nearfull_ratio
-
- Тип: число
- Значение по умолчанию: 0.95
- Можно менять на лету: да
-
-Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
-выводе vitastor-cli status.
-
-Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
-если на 100 % заполнится хотя бы 1 OSD!
-
-Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
-заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
-восстановить работу кластера после ошибок отсутствия свободного места
-без уничтожения и пересоздания OSD.
--- a/docs/config/layout-cluster.en.md
+++ b/docs/config/layout-cluster.en.md
@ -56,24 +56,14 @@ Can't be smaller than the OSD data device sector.
 ## immediate_commit

 - Type: string
- Default: all
+- Default: false

-One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
-
-This parameter is also really important for performance.
-
-TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
-power loss protection (nonvolatile write-through cache) and also for most HDDs.
-"none" or "small" should be only selected if you use desktop SSDs without
-capacitors or drives with slow write-back cache that can't be disabled. Check
-immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
-
-Detailed explanation:
+Another parameter which is really important for performance.

 Desktop SSDs are very fast (100000+ iops) for simple random writes
 without cache flush. However, they are really slow (only around 1000 iops)
-if you try to fsync() each write, that is, if you want to guarantee that
-each change gets actually persisted to the physical media.
+if you try to fsync() each write, that is, when you want to guarantee that
+each change gets immediately persisted to the physical media.

 Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
 "Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -85,8 +75,8 @@ really slow when used with desktop SSDs. Vitastor, however, can also
 efficiently utilize desktop SSDs by postponing fsync until the client calls
 it explicitly.

-This is what this parameter regulates. When it's set to "all" Vitastor
-cluster commits each change to disks immediately and clients just
+This is what this parameter regulates. When it's set to "all" the whole
+Vitastor cluster commits each change to disks immediately and clients just
 ignore fsyncs because they know for sure that they're unneeded. This reduces
 the amount of network roundtrips performed by clients and improves
 performance. So it's always better to use server grade SSDs with
@ -106,8 +96,12 @@ SSD cache or "media-cache" - for example, a lot of Seagate EXOS drives have
 it (they have internal SSD cache even though it's not stated in datasheets).

 Setting this parameter to "all" or "small" in OSD parameters requires enabling
-[disable_journal_fsync](layout-osd.en.md#disable_journal_fsync) and
-[disable_meta_fsync](layout-osd.en.md#disable_meta_fsync), setting it to
-"all" also requires enabling [disable_data_fsync](layout-osd.en.md#disable_data_fsync).
-vitastor-disk tried to do that by default, first checking/disabling drive cache.
-If it can't disable drive cache, OSD get initialized with "none".
+[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
+[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
+"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
+
+TLDR: For optimal performance, set immediate_commit to "all" if you only use
+SSDs with supercapacitor-based power loss protection (nonvolatile
+write-through cache) for both data and journals in the whole Vitastor
+cluster. Set it to "small" if you only use such SSDs for journals. Leave
+empty if your drives have write-back cache.
--- a/docs/config/layout-cluster.ru.md
+++ b/docs/config/layout-cluster.ru.md
@ -57,18 +57,9 @@ amplification) и эффективность распределения нагр
 ## immediate_commit

 - Тип: строка
- Значение по умолчанию: all
+- Значение по умолчанию: false

-Одно из значений "none", "small" или "all". Глобальное значение, может быть
-переопределено [на уровне пула](pool.ru.md#immediate_commit).
-
-Данный параметр тоже важен для производительности.
-
-Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
-суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
-устанавливать только при использовании SSD настольного класса без
-суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
-Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
+Ещё один важный для производительности параметр.

 Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
 секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -89,7 +80,7 @@ Power Loss Protection" - одинаково быстрые и со сбросо
 эффективно утилизировать настольные SSD.

 Данный параметр влияет как раз на это. Когда он установлен в значение "all",
-кластер Vitastor мгновенно фиксирует каждое изменение на физические
+весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
 носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
 знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
 по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -112,6 +103,13 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
 указано в спецификациях).

 Указание "all" или "small" в настройках / командной строке OSD требует
-включения [disable_journal_fsync](layout-osd.ru.md#disable_journal_fsync) и
-[disable_meta_fsync](layout-osd.ru.md#disable_meta_fsync), значение "all"
-также требует включения [disable_data_fsync](layout-osd.ru.md#disable_data_fsync).
+включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
+[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
+также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
+
+Итого, вкратце: для оптимальной производительности установите
+immediate_commit в значение "all", если вы используете в кластере только SSD
+с суперконденсаторами и для данных, и для журналов. Если вы используете
+такие SSD для всех журналов, но не для данных - можете установить параметр
+в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
+оставьте параметр пустым.
--- a/docs/config/layout-osd.en.md
+++ b/docs/config/layout-osd.en.md
@ -118,13 +118,12 @@ Physical block size of the journal device. Must be a multiple of
 - Type: boolean
 - Default: false

-Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
-Safe ONLY if your data device has write-through cache or if write-back
-cache is disabled. If you disable drive cache manually with `hdparm` or
-writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
-every time before starting Vitastor OSD (vitastor-disk does it automatically).
-See also [immediate_commit](layout-cluster.en.md#immediate_commit)
-for information about how to benefit from disabled cache.
+Do not issue fsyncs to the data device, i.e. do not flush its cache.
+Safe ONLY if your data device has write-through cache. If you disable
+the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
+that the cache disable command is run every time before starting Vitastor
+OSD, for example, in the systemd unit. See also `immediate_commit` option
+for the instructions to disable cache and how to benefit from it.

 ## disable_meta_fsync

@ -172,7 +171,8 @@ size, it actually has to write the whole 4 KB sector.

 Because of this it can actually be beneficial to use SSDs which work well
 with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-and meta_block_size. But at the moment, no such SSDs are known...
+and meta_block_size. But the only SSD that may fit into this category is
+Intel Optane (probably, not tested yet).

 Clients don't need to be aware of disk_alignment, so it's not required to
 put a modified value into etcd key /vitastor/config/global.
--- a/docs/config/layout-osd.ru.md
+++ b/docs/config/layout-osd.ru.md
@ -122,14 +122,13 @@ SSD-диске, иначе производительность пострада
 - Тип: булево (да/нет)
 - Значение по умолчанию: false

-Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
+Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
 Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-записью (write-through) или если кэш с отложенной записью (write-back) отключён.
-Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
-то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
-(vitastor-disk делает это автоматически). Смотрите также опцию
-[immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
-как извлечь выгоду из отключённого кэша.
+записью (write-through). Если вы отключаете кэш через `hdparm` или
+`scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
+выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
+Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
+и о том, как из этого извлечь выгоду.

 ## disable_meta_fsync

@ -180,8 +179,9 @@ SSD и HDD диски используют 4 КБ физические сект

 Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
 меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-journal_block_size и meta_block_size. Однако на данный момент такие SSD
-не известны...
+journal_block_size и meta_block_size. Однако единственные SSD, которые
+теоретически могут попасть в эту категорию - это Intel Optane (но и это
+пока не проверялось автором).

 Клиентам не обязательно знать про disk_alignment, так что помещать значение
 этого параметра в etcd в /vitastor/config/global не нужно.
--- a/docs/config/monitor.en.md
+++ b/docs/config/monitor.en.md
@ -8,14 +8,6 @@

 These parameters only apply to Monitors.

- [use_antietcd](#use_antietcd)
- [enable_prometheus](#enable_prometheus)
- [mon_http_port](#mon_http_port)
- [mon_http_ip](#mon_http_ip)
- [mon_https_cert](#mon_https_cert)
- [mon_https_key](#mon_https_key)
- [mon_https_client_auth](#mon_https_client_auth)
- [mon_https_ca](#mon_https_ca)
 - [etcd_mon_ttl](#etcd_mon_ttl)
 - [etcd_mon_timeout](#etcd_mon_timeout)
 - [etcd_mon_retries](#etcd_mon_retries)
@ -23,95 +15,12 @@ These parameters only apply to Monitors.
 - [mon_stats_timeout](#mon_stats_timeout)
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
- [use_old_pg_combinator](#use_old_pg_combinator)
- [osd_backfillfull_ratio](#osd_backfillfull_ratio)
-
-## use_antietcd
-
- Type: boolean
- Default: false
-
-Enable experimental built-in etcd replacement (clustered key-value database):
-[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
-
-When set to true, monitor runs internal antietcd automatically if it finds
-a network interface with an IP address matching one of addresses in the
-`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
-the monitor command line). If there are multiple matching addresses, it also
-checks `antietcd_port` and antietcd is started for address with matching port.
-By default, antietcd accepts connection on the selected IP address, but it
-can also be overridden manually in the `antietcd_ip` option.
-
-When antietcd is started, monitor stores cluster metadata itself and exposes
-a etcd-compatible REST API. On disk, these metadata are stored in
-`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
-or antietcd_data_dir options). All other antietcd parameters
-(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
-cluster, cluster_key, persist_filter, stale_read can also be set in
-Vitastor configuration with `antietcd_` prefix.
-
-You can dump/load data to or from antietcd using Antietcd `anticli` tool:
-
-```
-npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
-npm exec anticli -e http://antietcd:2379/v3 load < dump.json
-```
-
-## enable_prometheus
-
- Type: boolean
- Default: true
-
-Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
-
-Note that only the active (master) monitor exposes metrics, others return
-HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
-
-Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
-
-## mon_http_port
-
- Type: integer
- Default: 8060
-
-HTTP port for monitors to listen to (including metrics exporter)
-
-## mon_http_ip
-
- Type: string
-
-IP address for monitors to listen to (all addresses by default)
-
-## mon_https_cert
-
- Type: string
-
-Path to PEM SSL certificate file for monitor to listen using HTTPS
-
-## mon_https_key
-
- Type: string
-
-Path to PEM SSL private key file for monitor to listen using HTTPS
-
-## mon_https_client_auth
-
- Type: boolean
- Default: false
-
-Enable HTTPS client certificate-based authorization for monitor connections
-
-## mon_https_ca
-
- Type: string
-
-Path to CA certificate for client HTTPS authorization

 ## etcd_mon_ttl

 - Type: seconds
- Default: 1
- Minimum: 5
+- Default: 30
+- Minimum: 10

 Monitor etcd lease refresh interval in seconds

@ -168,26 +77,3 @@ values.  Smaller priority means higher level in tree. For example,
 levels are always predefined and can't be removed. If one of them is not
 present in the configuration, then it is defined with the default priority
 (100 for "host", 101 for "osd").
-
-## use_old_pg_combinator
-
- Type: boolean
- Default: false
-
-Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
-and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
-
-## osd_backfillfull_ratio
-
- Type: number
- Default: 0.99
-
-Monitors try to prevent OSDs becoming 100% full during rebalance or recovery by
-calculating how much space will be occupied on every OSD after all rebalance
-and recovery operations finish, and pausing rebalance and recovery if that
-amount of space exceeds OSD capacity multiplied by the value of this
-configuration parameter.
-
-Future used space is calculated by summing space used by all user data blocks
-(objects) in all PGs placed on a specific OSD, even if some of these objects
-currently reside on a different set of OSDs.
--- a/docs/config/monitor.ru.md
+++ b/docs/config/monitor.ru.md
@ -8,14 +8,6 @@

 Данные параметры используются только мониторами Vitastor.

- [use_antietcd](#use_antietcd)
- [enable_prometheus](#enable_prometheus)
- [mon_http_port](#mon_http_port)
- [mon_http_ip](#mon_http_ip)
- [mon_https_cert](#mon_https_cert)
- [mon_https_key](#mon_https_key)
- [mon_https_client_auth](#mon_https_client_auth)
- [mon_https_ca](#mon_https_ca)
 - [etcd_mon_ttl](#etcd_mon_ttl)
 - [etcd_mon_timeout](#etcd_mon_timeout)
 - [etcd_mon_retries](#etcd_mon_retries)
@ -23,97 +15,12 @@
 - [mon_stats_timeout](#mon_stats_timeout)
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
- [use_old_pg_combinator](#use_old_pg_combinator)
- [osd_backfillfull_ratio](#osd_backfillfull_ratio)
-
-## use_antietcd
-
- Тип: булево (да/нет)
- Значение по умолчанию: false
-
-Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
-[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
-
-Если параметр установлен в true, монитор запускает antietcd автоматически,
-если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
-конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
-командной строки монитора). Если таких адресов несколько, также проверяется
-опция `antietcd_port` и antietcd запускается для адреса с соответствующим
-портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
-IP, но его также можно определить вручную опцией `antietcd_ip`.
-
-При запуске antietcd монитор сам хранит центральные метаданные кластера и
-выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
-`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
-antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
-(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
-node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
-в конфигурации Vitastor с префиксом `antietcd_`.
-
-Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
-`anticli`:
-
-```
-npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
-npm exec anticli -e http://antietcd:2379/v3 load < dump.json
-```
-
-## enable_prometheus
-
- Тип: булево (да/нет)
- Значение по умолчанию: true
-
-Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
-
-Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
-возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
-в задание по сбору метрик Prometheus.
-
-Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
-
-## mon_http_port
-
- Тип: целое число
- Значение по умолчанию: 8060
-
-Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
-
-## mon_http_ip
-
- Тип: строка
-
-IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
-
-## mon_https_cert
-
- Тип: строка
-
-Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
-
-## mon_https_key
-
- Тип: строка
-
-Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
-
-## mon_https_client_auth
-
- Тип: булево (да/нет)
- Значение по умолчанию: false
-
-Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
-
-## mon_https_ca
-
- Тип: строка
-
-Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений

 ## etcd_mon_ttl

 - Тип: секунды
- Значение по умолчанию: 1
- Минимальное значение: 5
+- Значение по умолчанию: 30
+- Минимальное значение: 10

 Интервал обновления etcd резервации (lease) монитором

@ -171,27 +78,3 @@ OSD перед обновлением агрегированной статис
 "host" и "osd" являются предопределёнными и не могут быть удалены. Если
 один из них отсутствует в конфигурации, он доопределяется с приоритетом по
 умолчанию (100 для уровня "host", 101 для "osd").
-
-## use_old_pg_combinator
-
- Тип: булево (да/нет)
- Значение по умолчанию: false
-
-Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
-и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
-
-## osd_backfillfull_ratio
-
- Тип: число
- Значение по умолчанию: 0.99
-
-Мониторы стараются предотвратить 100% заполнение OSD в процессе ребаланса
-или восстановления, рассчитывая, сколько места будет занято на каждом OSD после
-завершения всех операций ребаланса и восстановления, и приостанавливая
-ребаланс и восстановление, если рассчитанный объём превышает ёмкость OSD,
-умноженную на значение данного параметра.
-
-Будущее занятое место рассчитывается сложением места, занятого всеми
-пользовательскими блоками данных (объектами) во всех PG, расположенных
-на конкретном OSD, даже если часть этих объектов в данный момент находится
-на другом наборе OSD.
--- a/docs/config/network.en.md
+++ b/docs/config/network.en.md
@ -9,11 +9,9 @@
 These parameters apply to clients and OSDs and affect network connection logic
 between clients, OSDs and etcd.

- [osd_network](#osd_network)
- [osd_cluster_network](#osd_cluster_network)
+- [tcp_header_buffer_size](#tcp_header_buffer_size)
+- [use_sync_send_recv](#use_sync_send_recv)
 - [use_rdma](#use_rdma)
- [use_rdmacm](#use_rdmacm)
- [disable_tcp](#disable_tcp)
 - [rdma_device](#rdma_device)
 - [rdma_port_num](#rdma_port_num)
 - [rdma_gid_index](#rdma_gid_index)
@ -27,84 +25,55 @@ between clients, OSDs and etcd.
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
+- [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
- [tcp_header_buffer_size](#tcp_header_buffer_size)
- [use_sync_send_recv](#use_sync_send_recv)
+- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)

-## osd_network
+## tcp_header_buffer_size

- Type: string or array of strings
+- Type: integer
+- Default: 65536

-Network mask of public OSD network(s) (IPv4 or IPv6). Each OSD listens to all
-addresses of UP + RUNNING interfaces matching one of these networks, on the
-same port. Port is auto-selected except if [bind_port](osd.en.md#bind_port) is
-explicitly specified. Bind address(es) may also be overridden manually by
-specifying [bind_address](osd.en.md#bind_address). If OSD networks are not specified
-at all, OSD just listens to a wildcard address (0.0.0.0).
+Size of the buffer used to read data using an additional copy. Vitastor
+packet headers are 128 bytes, payload is always at least 4 KB, so it is
+usually beneficial to try to read multiple packets at once even though
+it requires to copy the data an additional time. The rest of each packet
+is received without an additional copy. You can try to play with this
+parameter and see how it affects random iops and linear bandwidth if you
+want.

-## osd_cluster_network
+## use_sync_send_recv

- Type: string or array of strings
+- Type: boolean
+- Default: false

-Network mask of separate network(s) (IPv4 or IPv6) to use for OSD
-cluster connections. I.e. OSDs will always attempt to use these networks
-to connect to other OSDs, while clients will attempt to use networks from
-[osd_network](#osd_network).
+If true, synchronous send/recv syscalls are used instead of io_uring for
+socket communication. Useless for OSDs because they require io_uring anyway,
+but may be required for clients with old kernel versions.

 ## use_rdma

 - Type: boolean
 - Default: true

-Try to use RDMA through libibverbs for communication if it's available.
-Disable if you don't want Vitastor to use RDMA. TCP-only clients can also
-talk to an RDMA-enabled cluster, so disabling RDMA may be needed if clients
-have RDMA devices, but they are not connected to the cluster.
-
-`use_rdma` works with RoCEv1/RoCEv2 networks, but not with iWARP and,
-maybe, with some Infiniband configurations which require RDMA-CM.
-Consider `use_rdmacm` for such networks.
-
-## use_rdmacm
-
- Type: boolean
- Default: true
-
-Use an alternative implementation of RDMA through RDMA-CM (Connection
-Manager). Works with all RDMA networks: Infiniband, iWARP and
-RoCEv1/RoCEv2, and even allows to disable TCP and run only with RDMA.
-OSDs always use random port numbers for RDMA-CM listeners, different
-from their TCP ports. `use_rdma` is automatically disabled when
-`use_rdmacm` is enabled.
-
-## disable_tcp
-
- Type: boolean
- Default: true
-
-Fully disable TCP and only use RDMA-CM for OSD communication.
+Try to use RDMA for communication if it's available. Disable if you don't
+want Vitastor to use RDMA. TCP-only clients can also talk to an RDMA-enabled
+cluster, so disabling RDMA may be needed if clients have RDMA devices,
+but they are not connected to the cluster.

 ## rdma_device

 - Type: string

 RDMA device name to use for Vitastor OSD communications (for example,
-"rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
-device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
-or choose the first available RDMA device if no RoCE devices are
-found or if `osd_network` is not specified. Auto-selection is also
-unsupported with old libibverbs < v32, like in Debian 10 Buster or
-CentOS 7.
+"rocep5s0f0"). Now Vitastor supports all adapters, even ones without
+ODP support, like Mellanox ConnectX-3 and non-Mellanox cards.

-Vitastor supports all adapters, even ones without ODP support, like
-Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
-1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
-See also [rdma_odp](#rdma_odp).
+Versions up to Vitastor 1.2.0 required ODP which is only present in
+Mellanox ConnectX >= 4. See also [rdma_odp](#rdma_odp).

 Run `ibv_devinfo -v` as root to list available RDMA devices and their
 features.
@ -118,36 +87,32 @@ PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
 ## rdma_port_num

 - Type: integer
+- Default: 1

 RDMA device port number to use. Only for devices that have more than 1 port.
 See `phys_port_cnt` in `ibv_devinfo -v` output to determine how many ports
 your device has.

-Not relevant for RDMA-CM (use_rdmacm).
-
 ## rdma_gid_index

 - Type: integer
+- Default: 0

 Global address identifier index of the RDMA device to use. Different GID
 indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
 Search for "GID" in `ibv_devinfo -v` output to determine which GID index
 you need.

-If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
-RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
-GID auto-selection is unsupported with libibverbs < v32.
-
-A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
-
-Not relevant for RDMA-CM (use_rdmacm).
+**IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
+rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).

 ## rdma_mtu

 - Type: integer
+- Default: 4096

-RDMA Path MTU to use. Must be 1024, 2048 or 4096. Default is to use the
-RDMA device's MTU.
+RDMA Path MTU to use. Must be 1024, 2048 or 4096. There is usually no
+sense to change it from the default 4096.

 ## rdma_max_sge

@ -247,6 +212,17 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
 within this time, the connection to it is dropped and a reconnection attempt
 is scheduled.

+## up_wait_retry_interval
+
+- Type: milliseconds
+- Default: 500
+- Minimum: 50
+- Can be changed online: yes
+
+OSDs respond to clients with a special error code when they receive I/O
+requests for a PG that's not synchronized and started. This parameter sets
+the time for the clients to wait before re-attempting such I/O requests.
+
 ## max_etcd_attempts

 - Type: integer
@ -281,43 +257,11 @@ Timeout for etcd requests which are allowed to wait for some time.
 Timeout for etcd connection HTTP Keep-Alive. Should be higher than
 etcd_report_interval to guarantee that keepalive actually works.

-## etcd_ws_keepalive_interval
+## etcd_ws_keepalive_timeout

 - Type: seconds
- Default: 5
+- Default: 30
 - Can be changed online: yes

 etcd websocket ping interval required to keep the connection alive and
 detect disconnections quickly.
-
-## etcd_min_reload_interval
-
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
-
-Minimum interval for full etcd state reload. Introduced to prevent
-excessive load on etcd during outages when etcd can't keep up with event
-streams and cancels them.
-
-## tcp_header_buffer_size
-
- Type: integer
- Default: 65536
-
-Size of the buffer used to read data using an additional copy. Vitastor
-packet headers are 128 bytes, payload is always at least 4 KB, so it is
-usually beneficial to try to read multiple packets at once even though
-it requires to copy the data an additional time. The rest of each packet
-is received without an additional copy. You can try to play with this
-parameter and see how it affects random iops and linear bandwidth if you
-want.
-
-## use_sync_send_recv
-
- Type: boolean
- Default: false
-
-If true, synchronous send/recv syscalls are used instead of io_uring for
-socket communication. Useless for OSDs because they require io_uring anyway,
-but may be required for clients with old kernel versions.
--- a/docs/config/network.ru.md
+++ b/docs/config/network.ru.md
@ -9,11 +9,9 @@
 Данные параметры используются клиентами и OSD и влияют на логику сетевого
 взаимодействия между клиентами, OSD, а также etcd.

- [osd_network](#osd_network)
- [osd_cluster_network](#osd_cluster_network)
+- [tcp_header_buffer_size](#tcp_header_buffer_size)
+- [use_sync_send_recv](#use_sync_send_recv)
 - [use_rdma](#use_rdma)
- [use_rdmacm](#use_rdmacm)
- [disable_tcp](#disable_tcp)
 - [rdma_device](#rdma_device)
 - [rdma_port_num](#rdma_port_num)
 - [rdma_gid_index](#rdma_gid_index)
@ -27,84 +25,59 @@
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
+- [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
- [tcp_header_buffer_size](#tcp_header_buffer_size)
- [use_sync_send_recv](#use_sync_send_recv)
+- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)

-## osd_network
+## tcp_header_buffer_size

- Тип: строка или массив строк
+- Тип: целое число
+- Значение по умолчанию: 65536

-Маски подсетей (IPv4 или IPv6) публичной сети или сетей OSD. Каждый OSD слушает
-один и тот же порт на всех адресах поднятых (UP + RUNNING) сетевых интерфейсов,
-соответствующих одной из указанных сетей. Порт выбирается автоматически, если
-только [bind_port](osd.ru.md#bind_port) не задан явно. Адреса для подключений можно
-также переопределить явно, задав [bind_address](osd.ru.md#bind_address). Если сети OSD
-не заданы вообще, OSD слушает все адреса (0.0.0.0).
+Размер буфера для чтения данных с дополнительным копированием. Пакеты
+Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
+от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
+сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
+скопировать данные. Часть каждого пакета за пределами значения данного
+параметра читается без дополнительного копирования. Вы можете попробовать
+поменять этот параметр и посмотреть, как он влияет на производительность
+случайного и линейного доступа.

-## osd_cluster_network
+## use_sync_send_recv

- Тип: строка или массив строк
+- Тип: булево (да/нет)
+- Значение по умолчанию: false

-Маски подсетей (IPv4 или IPv6) отдельной кластерной сети или сетей OSD.
-То есть, OSD будут всегда стараться использовать эти сети для соединений
-с другими OSD, а клиенты будут стараться использовать сети из [osd_network](#osd_network).
+Если установлено в истину, то вместо io_uring для передачи данных по сети
+будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
+это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
+принципе, это может применяться для клиентов со старыми версиями ядра.

 ## use_rdma

 - Тип: булево (да/нет)
 - Значение по умолчанию: true

-Попробовать использовать RDMA через libibverbs для связи при наличии
-доступных устройств. Отключите, если вы не хотите, чтобы Vitastor
-использовал RDMA. TCP-клиенты также могут работать с RDMA-кластером,
-так что отключать RDMA может быть нужно, только если у клиентов есть
-RDMA-устройства, но они не имеют соединения с кластером Vitastor.
-
-`use_rdma` работает с RoCEv1/RoCEv2 сетями, но не работает с iWARP и
-может не работать с частью конфигураций Infiniband, требующих RDMA-CM.
-Рассмотрите включение `use_rdmacm` для таких сетей.
-
-## use_rdmacm
-
- Тип: булево (да/нет)
- Значение по умолчанию: true
-
-Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
-Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
-RoCEv1/RoCEv2, и даже позволяет полностью отключить TCP и работать
-только на RDMA. OSD используют случайные номера портов для ожидания
-соединений через RDMA-CM, отличающиеся от их TCP-портов. Также при
-включении `use_rdmacm` автоматически отключается опция `use_rdma`.
-
-## disable_tcp
-
- Тип: булево (да/нет)
- Значение по умолчанию: true
-
-Полностью отключить TCP и использовать только RDMA-CM для соединений с OSD.
+Пытаться использовать RDMA для связи при наличии доступных устройств.
+Отключите, если вы не хотите, чтобы Vitastor использовал RDMA.
+TCP-клиенты также могут работать с RDMA-кластером, так что отключать
+RDMA может быть нужно только если у клиентов есть RDMA-устройства,
+но они не имеют соединения с кластером Vitastor.

 ## rdma_device

 - Тип: строка

 Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
-Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
-[osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
-попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
-не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
-libibverbs < v32, например в Debian 10 Buster или CentOS 7.
-
-Vitastor поддерживает все модели адаптеров, включая те, у которых
+Сейчас Vitastor поддерживает все модели адаптеров, включая те, у которых
 нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
-картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
-требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
-См. также [rdma_odp](#rdma_odp).
+картами производства не Mellanox.
+
+Версии Vitastor до 1.2.0 включительно требовали ODP, который есть только
+на Mellanox ConnectX 4 и более новых. См. также [rdma_odp](#rdma_odp).

 Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
 список доступных RDMA-устройств, их параметры и возможности.
@ -119,38 +92,33 @@ Control) и ECN (Explicit Congestion Notification).
 ## rdma_port_num

 - Тип: целое число
+- Значение по умолчанию: 1

 Номер порта RDMA-устройства, который следует использовать. Имеет смысл
 только для устройств, у которых более 1 порта. Чтобы узнать, сколько портов
 у вашего адаптера, посмотрите `phys_port_cnt` в выводе команды
 `ibv_devinfo -v`.

-Опция неприменима к RDMA-CM (use_rdmacm).
-
 ## rdma_gid_index

 - Тип: целое число
+- Значение по умолчанию: 0

 Номер глобального идентификатора адреса RDMA-устройства, который следует
 использовать. Разным gid_index могут соответствовать разные протоколы связи:
 RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
 словом "GID" в выводе команды `ibv_devinfo -v`.

-Если не указан, Vitastor попробует автоматически выбрать сначала GID,
-соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
-RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
-libibverbs < v32.
-
-Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
-
-Опция неприменима к RDMA-CM (use_rdmacm).
+**ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
+правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).

 ## rdma_mtu

 - Тип: целое число
+- Значение по умолчанию: 4096

 Максимальная единица передачи (Path MTU) для RDMA. Должно быть равно 1024,
-2048 или 4096. По умолчанию используется значение MTU RDMA-устройства.
+2048 или 4096. Обычно нет смысла менять значение по умолчанию, равное 4096.

 ## rdma_max_sge

@ -253,6 +221,19 @@ OSD в любом случае согласовывают реальное зн
 Если OSD не отвечает за это время, соединение отключается и производится
 повторная попытка соединения.

+## up_wait_retry_interval
+
+- Тип: миллисекунды
+- Значение по умолчанию: 500
+- Минимальное значение: 50
+- Можно менять на лету: да
+
+Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
+поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
+они отвечают клиентам специальным кодом ошибки, означающим, что клиент
+должен некоторое время подождать перед повторением запроса. Именно это время
+ожидания задаёт данный параметр.
+
 ## max_etcd_attempts

 - Тип: целое число
@ -289,44 +270,10 @@ OSD в любом случае согласовывают реальное зн
 Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
 etcd_report_interval, чтобы keepalive гарантированно работал.

-## etcd_ws_keepalive_interval
+## etcd_ws_keepalive_timeout

 - Тип: секунды
- Значение по умолчанию: 5
+- Значение по умолчанию: 30
 - Можно менять на лету: да

 Интервал проверки живости вебсокет-подключений к etcd.
-
-## etcd_min_reload_interval
-
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
-
-Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
-предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
-успевает рассылать потоки событий и отменяет их.
-
-## tcp_header_buffer_size
-
- Тип: целое число
- Значение по умолчанию: 65536
-
-Размер буфера для чтения данных с дополнительным копированием. Пакеты
-Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
-от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
-сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
-скопировать данные. Часть каждого пакета за пределами значения данного
-параметра читается без дополнительного копирования. Вы можете попробовать
-поменять этот параметр и посмотреть, как он влияет на производительность
-случайного и линейного доступа.
-
-## use_sync_send_recv
-
- Тип: булево (да/нет)
- Значение по умолчанию: false
-
-Если установлено в истину, то вместо io_uring для передачи данных по сети
-будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
-это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
-принципе, это может применяться для клиентов со старыми версиями ядра.
--- a/docs/config/osd.en.md
+++ b/docs/config/osd.en.md
@ -7,19 +7,18 @@
 # Runtime OSD Parameters

 These parameters only apply to OSDs, are not fixed at the moment of OSD drive
-initialization and can be changed - in /etc/vitastor/vitastor.conf or [vitastor-disk update-sb](../usage/disk.en.md#update-sb)
-with an OSD restart or, for some of them, even without restarting by updating configuration in etcd.
+initialization and can be changed - either with an OSD restart or, for some of
+them, even without restarting by updating configuration in etcd.

- [bind_address](#bind_address)
- [bind_port](#bind_port)
- [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
+- [osd_network](#osd_network)
+- [bind_address](#bind_address)
+- [bind_port](#bind_port)
 - [autosync_interval](#autosync_interval)
 - [autosync_writes](#autosync_writes)
 - [recovery_queue_depth](#recovery_queue_depth)
- [recovery_sleep_us](#recovery_sleep_us)
 - [recovery_pg_switch](#recovery_pg_switch)
 - [recovery_sync_batch](#recovery_sync_batch)
 - [readonly](#readonly)
@ -52,46 +51,6 @@ with an OSD restart or, for some of them, even without restarting by updating co
 - [scrub_list_limit](#scrub_list_limit)
 - [scrub_find_best](#scrub_find_best)
 - [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
- [recovery_tune_interval](#recovery_tune_interval)
- [recovery_tune_util_low](#recovery_tune_util_low)
- [recovery_tune_util_high](#recovery_tune_util_high)
- [recovery_tune_client_util_low](#recovery_tune_client_util_low)
- [recovery_tune_client_util_high](#recovery_tune_client_util_high)
- [recovery_tune_agg_interval](#recovery_tune_agg_interval)
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
- [discard_on_start](#discard_on_start)
- [min_discard_size](#min_discard_size)
- [allow_net_split](#allow_net_split)
-
-## bind_address
-
- Type: string or array of strings
-
-Instead of the network masks ([osd_network](network.en.md#osd_network) and
-[osd_cluster_network](network.en.md#osd_cluster_network)), you can also set
-OSD listen addresses explicitly using this parameter. May be useful if you
-want to start OSDs on interfaces that are not UP + RUNNING.
-
-## bind_port
-
- Type: integer
-
-By default, OSDs pick random ports to use for incoming connections
-automatically. With this option you can set a specific port for a specific
-OSD by hand.
-
-## osd_iothread_count
-
- Type: integer
- Default: 0
-
-TCP network I/O thread count for OSD. When non-zero, a single OSD process
-may handle more TCP I/O, but at a cost of increased latency because thread
-switching overhead occurs. RDMA isn't affected by this option.
-
-Because of latency, instead of enabling OSD I/O threads it's recommended to
-just create multiple OSDs per disk, or use RDMA.

 ## etcd_report_interval

@ -123,6 +82,34 @@ debugging purposes. It's possible to implement additional feature for the
 monitor which may allow to separate primary and secondary OSDs, but it's
 unclear why anyone could need it, so it's not implemented.

+## osd_network
+
+- Type: string or array of strings
+
+Network mask of the network (IPv4 or IPv6) to use for OSDs. Note that
+although it's possible to specify multiple networks here, this does not
+mean that OSDs will create multiple listening sockets - they'll only
+pick the first matching address of an UP + RUNNING interface. Separate
+networks for cluster and client connections are also not implemented, but
+they are mostly useless anyway, so it's not a big deal.
+
+## bind_address
+
+- Type: string
+- Default: 0.0.0.0
+
+Instead of the network mask, you can also set OSD listen address explicitly
+using this parameter. May be useful if you want to start OSDs on interfaces
+that are not UP + RUNNING.
+
+## bind_port
+
+- Type: integer
+
+By default, OSDs pick random ports to use for incoming connections
+automatically. With this option you can set a specific port for a specific
+OSD by hand.
+
 ## autosync_interval

 - Type: seconds
@ -148,24 +135,12 @@ operations before issuing an fsync operation internally.
 ## recovery_queue_depth

 - Type: integer
- Default: 1
+- Default: 4
 - Can be changed online: yes

-Maximum recovery and rebalance operations initiated by each OSD in parallel.
-Note that each OSD talks to a lot of other OSDs so actual number of parallel
-recovery operations per each OSD is greater than just recovery_queue_depth.
-Increasing this parameter can speedup recovery if [auto-tuning](#recovery_tune_interval)
-allows it or if it is disabled.
-
-## recovery_sleep_us
-
- Type: microseconds
- Default: 0
- Can be changed online: yes
-
-Delay for all recovery- and rebalance- related operations. If non-zero,
-such operations are artificially slowed down to reduce the impact on
-client I/O.
+Maximum recovery operations per one primary OSD at any given moment of time.
+Currently it's the only parameter available to tune the speed or recovery
+and rebalancing, but it's planned to implement more.

 ## recovery_pg_switch

@ -307,7 +282,7 @@ for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
 decrease write performance for fast disks because page cache is an overhead
 itself.

-Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
+Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
 (which requires disable_data_fsync) with drives having write-back cache
 which can't be turned off, for example, Intel Optane. Also note that *some*
 desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
@ -533,117 +508,3 @@ the variant with most available equal copies is correct. For example, if
 you have 3 replicas and 1 of them differs, this one is considered to be
 corrupted. But if there is no "best" version with more copies than all
 others have then the object is also marked as inconsistent.
-
-## recovery_tune_interval
-
- Type: seconds
- Default: 1
- Can be changed online: yes
-
-Interval at which OSD re-considers client and recovery load and automatically
-adjusts [recovery_sleep_us](#recovery_sleep_us). Recovery auto-tuning is
-disabled if recovery_tune_interval is set to 0.
-
-Auto-tuning targets utilization. Utilization is a measure of load and is
-equal to the product of iops and average latency (so it may be greater
-than 1). You set "low" and "high" client utilization thresholds and two
-corresponding target recovery utilization levels. OSD calculates desired
-recovery utilization from client utilization using linear interpolation
-and auto-tunes recovery operation delay to make actual recovery utilization
-match desired.
-
-This allows to reduce recovery/rebalance impact on client operations. It is
-of course impossible to remove it completely, but it should become adequate.
-In some tests rebalance could earlier drop client write speed from 1.5 GB/s
-to 50-100 MB/s, with default auto-tuning settings it now only reduces
-to ~1 GB/s.
-
-## recovery_tune_util_low
-
- Type: number
- Default: 0.1
- Can be changed online: yes
-
-Desired recovery/rebalance utilization when client load is high, i.e. when
-it is at or above recovery_tune_client_util_high.
-
-## recovery_tune_util_high
-
- Type: number
- Default: 1
- Can be changed online: yes
-
-Desired recovery/rebalance utilization when client load is low, i.e. when
-it is at or below recovery_tune_client_util_low.
-
-## recovery_tune_client_util_low
-
- Type: number
- Default: 0
- Can be changed online: yes
-
-Client utilization considered "low".
-
-## recovery_tune_client_util_high
-
- Type: number
- Default: 0.5
- Can be changed online: yes
-
-Client utilization considered "high".
-
-## recovery_tune_agg_interval
-
- Type: integer
- Default: 10
- Can be changed online: yes
-
-The number of last auto-tuning iterations to use for calculating the
-delay as average. Lower values result in quicker response to client
-load change, higher values result in more stable delay. Default value of 10
-is usually fine.
-
-## recovery_tune_sleep_min_us
-
- Type: microseconds
- Default: 10
- Can be changed online: yes
-
-Minimum possible value for auto-tuned recovery_sleep_us. Lower values
-are changed to 0.
-
-## recovery_tune_sleep_cutoff_us
-
- Type: microseconds
- Default: 10000000
- Can be changed online: yes
-
-Maximum possible value for auto-tuned recovery_sleep_us. Higher values
-are treated as outliers and ignored in aggregation.
-
-## discard_on_start
-
- Type: boolean
-
-Discard (SSD TRIM) unused data device blocks on every OSD startup.
-
-## min_discard_size
-
- Type: integer
- Default: 1048576
-
-Minimum consecutive block size to TRIM it.
-
-## allow_net_split
-
- Type: boolean
- Default: false
-
-Allow "safe" cases of network splits/partitions - allow to start PGs without
-connections to some OSDs currently registered as alive in etcd, if the number
-of actually connected PG OSDs is at least pg_minsize. That is, allow some OSDs to lose
-connectivity with some other OSDs as long as it doesn't break pg_minsize guarantees.
-The downside is that it increases the probability of writing data into just pg_minsize
-OSDs during failover which can lead to PGs becoming incomplete after additional outages.
-
-The old behaviour in versions up to 2.0.0 was equal to enabled allow_net_split.
--- a/docs/config/osd.ru.md
+++ b/docs/config/osd.ru.md
@ -8,19 +8,18 @@

 Данные параметры используются только OSD, но, в отличие от дисковых параметров,
 не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
-момент с перезапуском OSD в /etc/vitastor/vitastor.conf или [vitastor-disk update-sb](../usage/disk.ru.md#update-sb),
-а некоторые и без перезапуска, с помощью изменения конфигурации в etcd.
+момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
+изменения конфигурации в etcd.

- [bind_address](#bind_address)
- [bind_port](#bind_port)
- [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
+- [osd_network](#osd_network)
+- [bind_address](#bind_address)
+- [bind_port](#bind_port)
 - [autosync_interval](#autosync_interval)
 - [autosync_writes](#autosync_writes)
 - [recovery_queue_depth](#recovery_queue_depth)
- [recovery_sleep_us](#recovery_sleep_us)
 - [recovery_pg_switch](#recovery_pg_switch)
 - [recovery_sync_batch](#recovery_sync_batch)
 - [readonly](#readonly)
@ -53,47 +52,6 @@
 - [scrub_list_limit](#scrub_list_limit)
 - [scrub_find_best](#scrub_find_best)
 - [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
- [recovery_tune_interval](#recovery_tune_interval)
- [recovery_tune_util_low](#recovery_tune_util_low)
- [recovery_tune_util_high](#recovery_tune_util_high)
- [recovery_tune_client_util_low](#recovery_tune_client_util_low)
- [recovery_tune_client_util_high](#recovery_tune_client_util_high)
- [recovery_tune_agg_interval](#recovery_tune_agg_interval)
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
- [discard_on_start](#discard_on_start)
- [min_discard_size](#min_discard_size)
- [allow_net_split](#allow_net_split)
-
-## bind_address
-
- Тип: строка или массив строк
-
-Вместо использования масок подсети ([osd_network](network.ru.md#osd_network) и
-[osd_cluster_network](network.ru.md#osd_cluster_network)), вы также можете явно
-задать адрес(а), на которых будут ожидать соединений OSD, с помощью данного
-параметра. Это может быть полезно, например, чтобы запускать OSD на неподнятых
-интерфейсах (не UP + RUNNING).
-
-## bind_port
-
- Тип: целое число
-
-По умолчанию OSD сами выбирают случайные порты для входящих подключений.
-С помощью данной опции вы можете задать порт для отдельного OSD вручную.
-
-## osd_iothread_count
-
- Тип: целое число
- Значение по умолчанию: 0
-
-Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
-стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
-по сети больше данных, но ухудшает задержку из-за накладных расходов
-переключения потоков. На работу RDMA опция не влияет.
-
-Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
-просто создавать по несколько OSD на каждом диске, или использовать RDMA.

 ## etcd_report_interval

@ -126,6 +84,34 @@ max_etcd_attempts * etcd_quick_timeout.
 первичные OSD от вторичных, но пока не понятно, зачем это может кому-то
 понадобиться, поэтому это не реализовано.

+## osd_network
+
+- Тип: строка или массив строк
+
+Маска подсети (IPv4 или IPv6) для использования для соединений с OSD.
+Имейте в виду, что хотя сейчас и можно передать в этот параметр несколько
+подсетей, это не означает, что OSD будут создавать несколько слушающих
+сокетов - они лишь будут выбирать адрес первого поднятого (состояние UP +
+RUNNING), подходящий под заданную маску. Также не реализовано разделение
+кластерной и публичной сетей OSD. Правда, от него обычно всё равно довольно
+мало толку, так что особенной проблемы в этом нет.
+
+## bind_address
+
+- Тип: строка
+- Значение по умолчанию: 0.0.0.0
+
+Этим параметром можно явным образом задать адрес, на котором будет ожидать
+соединений OSD (вместо использования маски подсети). Может быть полезно,
+например, чтобы запускать OSD на неподнятых интерфейсах (не UP + RUNNING).
+
+## bind_port
+
+- Тип: целое число
+
+По умолчанию OSD сами выбирают случайные порты для входящих подключений.
+С помощью данной опции вы можете задать порт для отдельного OSD вручную.
+
 ## autosync_interval

 - Тип: секунды
@ -152,25 +138,13 @@ OSD, чтобы успевать очищать журнал - без них OSD
 ## recovery_queue_depth

 - Тип: целое число
- Значение по умолчанию: 1
+- Значение по умолчанию: 4
 - Можно менять на лету: да

-Максимальное число параллельных операций восстановления, инициируемых одним
-OSD в любой момент времени. Имейте в виду, что каждый OSD обычно работает с
-многими другими OSD, так что на практике параллелизм восстановления больше,
-чем просто recovery_queue_depth. Увеличение значения этого параметра может
-ускорить восстановление если [автотюнинг скорости](#recovery_tune_interval)
-разрешает это или если он отключён.
-
-## recovery_sleep_us
-
- Тип: микросекунды
- Значение по умолчанию: 0
- Можно менять на лету: да
-
-Delay for all recovery- and rebalance- related operations. If non-zero,
-such operations are artificially slowed down to reduce the impact on
-client I/O.
+Максимальное число операций восстановления на одном первичном OSD в любой
+момент времени. На данный момент единственный параметр, который можно менять
+для ускорения или замедления восстановления и перебалансировки данных, но
+в планах реализация других параметров.

 ## recovery_pg_switch

@ -561,121 +535,3 @@ EC (кодов коррекции ошибок) с более, чем 1 диск
 считается некорректной. Однако, если "лучшую" версию с числом доступных
 копий большим, чем у всех других версий, найти невозможно, то объект тоже
 маркируется неконсистентным.
-
-## recovery_tune_interval
-
- Тип: секунды
- Значение по умолчанию: 1
- Можно менять на лету: да
-
-Интервал, с которым OSD пересматривает клиентскую нагрузку и нагрузку
-восстановления и автоматически подстраивает [recovery_sleep_us](#recovery_sleep_us).
-Автотюнинг (автоподстройка) отключается, если recovery_tune_interval
-устанавливается в значение 0.
-
-Автотюнинг регулирует утилизацию. Утилизация является мерой нагрузки
-и равна произведению числа операций в секунду и средней задержки
-(то есть, она может быть выше 1). Вы задаёте два уровня клиентской
-утилизации - "низкий" и "высокий" (low и high) и два соответствующих
-целевых уровня утилизации операциями восстановления. OSD рассчитывает
-желаемый уровень утилизации восстановления линейной интерполяцией от
-клиентской утилизации и подстраивает задержку операций восстановления
-так, чтобы фактическая утилизация восстановления совпадала с желаемой.
-
-Это позволяет снизить влияние восстановления и ребаланса на клиентские
-операции. Конечно, невозможно исключить такое влияние полностью, но оно
-должно становиться адекватнее. В некоторых тестах перебалансировка могла
-снижать клиентскую скорость записи с 1.5 ГБ/с до 50-100 МБ/с, а теперь, с
-настройками автотюнинга по умолчанию, она снижается только до ~1 ГБ/с.
-
-## recovery_tune_util_low
-
- Тип: число
- Значение по умолчанию: 0.1
- Можно менять на лету: да
-
-Желаемая утилизация восстановления в моменты, когда клиентская нагрузка
-высокая, то есть, находится на уровне или выше recovery_tune_client_util_high.
-
-## recovery_tune_util_high
-
- Тип: число
- Значение по умолчанию: 1
- Можно менять на лету: да
-
-Желаемая утилизация восстановления в моменты, когда клиентская нагрузка
-низкая, то есть, находится на уровне или ниже recovery_tune_client_util_low.
-
-## recovery_tune_client_util_low
-
- Тип: число
- Значение по умолчанию: 0
- Можно менять на лету: да
-
-Клиентская утилизация, которая считается "низкой".
-
-## recovery_tune_client_util_high
-
- Тип: число
- Значение по умолчанию: 0.5
- Можно менять на лету: да
-
-Клиентская утилизация, которая считается "высокой".
-
-## recovery_tune_agg_interval
-
- Тип: целое число
- Значение по умолчанию: 10
- Можно менять на лету: да
-
-Число последних итераций автоподстройки для расчёта задержки как среднего
-значения. Меньшие значения параметра ускоряют отклик на изменение нагрузки,
-большие значения делают задержку стабильнее. Значение по умолчанию 10
-обычно нормальное и не требует изменений.
-
-## recovery_tune_sleep_min_us
-
- Тип: микросекунды
- Значение по умолчанию: 10
- Можно менять на лету: да
-
-Минимальное возможное значение авто-подстроенного recovery_sleep_us.
-Меньшие значения заменяются на 0.
-
-## recovery_tune_sleep_cutoff_us
-
- Тип: микросекунды
- Значение по умолчанию: 10000000
- Можно менять на лету: да
-
-Максимальное возможное значение авто-подстроенного recovery_sleep_us.
-Большие значения считаются случайными выбросами и игнорируются в
-усреднении.
-
-## discard_on_start
-
- Тип: булево (да/нет)
-
-Освобождать (SSD TRIM) неиспользуемые блоки диска данных при каждом запуске OSD.
-
-## min_discard_size
-
- Тип: целое число
- Значение по умолчанию: 1048576
-
-Минимальный размер последовательного блока данных, чтобы освобождать его через TRIM.
-
-## allow_net_split
-
- Тип: булево (да/нет)
- Значение по умолчанию: false
-
-Разрешить "безопасные" случаи разделений сети - разрешить активировать PG без
-соединений к некоторым OSD, помеченным активными в etcd, если общее число активных
-OSD в PG составляет как минимум pg_minsize. То есть, разрешать некоторым OSD терять
-соединения с некоторыми другими OSD, если это не нарушает гарантий pg_minsize.
-Минус такого разрешения в том, что оно повышает вероятность записи данных ровно в
-pg_minsize OSD во время переключений, что может потом привести к тому, что PG станут
-неполными (incomplete), если упадут ещё какие-то OSD.
-
-Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
--- a/docs/config/pool.en.md
+++ b/docs/config/pool.en.md
@ -32,8 +32,6 @@ Parameters:
 - [pg_minsize](#pg_minsize)
 - [pg_count](#pg_count)
 - [failure_domain](#failure_domain)
- [level_placement](#level_placement)
- [raw_placement](#raw_placement)
 - [max_osd_combinations](#max_osd_combinations)
 - [block_size](#block_size)
 - [bitmap_granularity](#bitmap_granularity)
@ -43,7 +41,6 @@ Parameters:
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
- [used_for_app](#used_for_app)

 Examples:

@ -55,7 +52,7 @@ Examples:
 OSD placement tree is set in a separate etcd key `/vitastor/config/node_placement`
 in the following JSON format:

-```
+`
 {
  "<node name or OSD number>": {
    "level": "<level>",
@ -63,7 +60,7 @@ in the following JSON format:
  },
  ...
 }
-```
+`

 Here, if a node name is a number then it is assumed to refer to an OSD.
 Level of the OSD is always "osd" and cannot be overriden. You may only
@ -86,11 +83,7 @@ Parent node reference is required for intermediate tree nodes.
 Separate OSD settings are set in etc keys `/vitastor/config/osd/<number>`
 in JSON format `{"<key>":<value>}`.

-As of now, the following settings are supported:
-
- [reweight](#reweight)
- [tags](#tags)
- [noout](#noout)
+As of now, two settings are supported:

 ## reweight

@ -113,14 +106,6 @@ subsets and then use a specific subset for pool instead of all OSDs.
 For example you can mark SSD OSDs with tag "ssd" and HDD OSDs with "hdd" and
 such tags will work as device classes.

-## noout
-
- Type: boolean
- Default: false
-
-If set to true, [osd_out_time](monitor.en.md#osd_out_time) is ignored for this
-OSD and it's never removed from data distribution by the monitor.
-
 # Pool parameters

 ## name
@ -169,29 +154,6 @@ That is, if it becomes impossible to place PG data on at least (pg_minsize)
 OSDs, PG is deactivated for both read and write. So you know that a fresh
 write always goes to at least (pg_minsize) OSDs (disks).

-For example, the difference between pg_minsize 2 and 1 in a 3-way replicated
-pool (pg_size=3) is:
- If 2 hosts go down with pg_minsize=2, the pool becomes inactive and remains
-  inactive for [osd_out_time](monitor.en.md#osd_out_time) (10 minutes). After
-  this timeout, the monitor selects replacement hosts/OSDs and the pool comes
-  up and starts to heal. Therefore, if you don't have replacement OSDs, i.e.
-  if you only have 3 hosts with OSDs and 2 of them are down, the pool remains
-  inactive until you add or return at least 1 host (or change failure_domain
-  to "osd").
- If 2 hosts go down with pg_minsize=1, the pool only experiences a short
-  I/O pause until the monitor notices that OSDs are down (5-10 seconds with
-  the default [etcd_report_interval](osd.en.md#etcd_report_interval)). After
-  this pause, I/O resumes, but new data is temporarily written in only 1 copy.
-  Then, after osd_out_time, the monitor also selects replacement OSDs and the
-  pool starts to heal.
-
-So, pg_minsize regulates the number of failures that a pool can tolerate
-without temporary downtime for [osd_out_time](monitor.en.md#osd_out_time),
-but at a cost of slightly reduced storage reliability.
-
-See also [allow_net_split](osd.en.md#allow_net_split) and
-[PG state descriptions](../usage/admin.en.md#pg-states).
-
 FIXME: pg_minsize behaviour may be changed in the future to only make PGs
 read-only instead of deactivating them.

@ -203,8 +165,8 @@ read-only instead of deactivating them.
 Number of PGs for this pool. The value should be big enough for the monitor /
 LP solver to be able to optimize data placement.

-"Enough" is usually around 10-100 PGs per OSD, i.e. you set pg_count for pool
-to (total OSD count * 10 / pg_size). You can round it to the closest power of 2,
+"Enough" is usually around 64-128 PGs per OSD, i.e. you set pg_count for pool
+to (total OSD count * 100 / pg_size). You can round it to the closest power of 2,
 because it makes it easier to reduce or increase PG count later by dividing or
 multiplying it by 2.

@ -226,69 +188,6 @@ never put on OSDs in the same failure domain (for example, on the same host).
 So failure domain specifies the unit which failure you are protecting yourself
 from.

-## level_placement
-
- Type: string
-
-Additional failure domain rules, applied in conjuction with failure_domain.
-Must be specified in the following form:
-
-`<placement level>=<sequence of characters>, <level2>=<sequence2>, ...`
-
-Sequence should be exactly [pg_size](#pg_size) character long. Each character
-corresponds to an OSD in the PG of this pool. Equal characters mean that
-corresponding items of the PG should be placed into the same placement tree
-item at this level. Different characters mean that items should be placed into
-different items.
-
-For example, if you want a EC 4+2 pool and you want every 2 chunks to be stored
-in its own datacenter and you also want each chunk to be stored on a different
-host, you should set `level_placement` to `dc=112233 host=123456`.
-
-Or you can set `level_placement` to `dc=112233` and leave `failure_domain` empty,
-because `host` is the default `failure_domain` and it will be applied anyway.
-
-Without this rule, it may happen that 3 chunks will be stored on OSDs in the
-same datacenter, and the data will become inaccessibly if that datacenter goes
-down in this case.
-
-Of course, you should group your hosts into datacenters before applying the rule
-by setting [placement_levels](monitor.en.md#placement_levels) to something like
-`{"dc":90,"host":100,"osd":110}` and add DCs to [node_placement](#placement-tree),
-like `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
-
-## raw_placement
-
- Type: string
-
-Raw PG placement rules, specified in the form of a DSL (domain-specific language).
-Use only if you really know what you're doing :)
-
-DSL specification:
-
-```
-dsl := item | item ("\n" | ",") items
-item := "any" | rules
-rules := rule | rule rules
-rule := level operator arg
-level := /\w+/
-operator := "!=" | "=" | ">" | "?="
-arg := value | "(" values ")"
-values := value | value "," values
-value := item_ref | constant_id
-item_ref := /\d+/
-constant_id := /"([^"]+)"/
-```
-
-"?=" operator means "preferred". I.e. `dc ?= "meow"` means "prefer datacenter meow
-for this chunk, but put into another dc if it's unavailable".
-
-Examples:
-
- Simple 3 replicas with failure_domain=host: `any, host!=1, host!=(1,2)`
- EC 4+2 in 3 DC: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
- 1 replica in fixed DC + 2 in random DCs: `dc?=meow, dc!=1, dc!=(1,2)`
-
 ## max_osd_combinations

 - Type: integer
@ -380,38 +279,6 @@ of the OSDs containing a data chunk for a PG.
 Automatic scrubbing interval for this pool. Overrides
 [global scrub_interval setting](osd.en.md#scrub_interval).

-## used_for_app
-
- Type: string
-
-If non-empty, the pool is marked as used for a separate application, for example,
-VitastorFS or S3, which allocates Vitastor volume IDs by itself and does not use
-image/inode metadata in etcd.
-
-When a pool is marked as used for such app, regular block volume creation in it
-is disabled (vitastor-cli refuses to create images without --force) to protect
-the user from block volume and FS/S3 volume ID collisions and data loss.
-
-Also such pools do not calculate per-inode space usage statistics in etcd because
-using it for an external application implies that it may contain a very large
-number of volumes and their statistics may take too much space in etcd.
-
-Setting used_for_app to `fs:<name>` tells Vitastor that the pool is used for VitastorFS
-with VitastorKV metadata base stored in a block image (regular Vitastor volume) named
-`<name>`.
-
-[vitastor-nfs](../usage/nfs.en.md), in its turn, refuses to use pools not marked
-for the corresponding FS when starting. This also implies that you can use one
-pool only for one VitastorFS.
-
-If you plan to use the pool for S3, set its used_for_app to `s3:<name>`. `<name>` may
-be basically anything you want (for example, `s3:standard`) - it's not validated
-by Vitastor S3 components in any way.
-
-All other values except prefixed with `fs:` or `s3:` may be used freely and don't
-mean anything special for Vitastor core components. For now, you can use them as
-you wish.
-
 # Examples

 ## Replicated pool
--- a/docs/config/pool.ru.md
+++ b/docs/config/pool.ru.md
@ -31,8 +31,6 @@
 - [pg_minsize](#pg_minsize)
 - [pg_count](#pg_count)
 - [failure_domain](#failure_domain)
- [level_placement](#level_placement)
- [raw_placement](#raw_placement)
 - [max_osd_combinations](#max_osd_combinations)
 - [block_size](#block_size)
 - [bitmap_granularity](#bitmap_granularity)
@ -42,7 +40,6 @@
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
- [used_for_app](#used_for_app)

 Примеры:

@ -54,7 +51,7 @@
 Дерево размещения OSD задаётся в отдельном ключе etcd `/vitastor/config/node_placement`
 в следующем JSON-формате:

-```
+`
 {
  "<имя узла или номер OSD>": {
    "level": "<уровень>",
@ -62,7 +59,7 @@
  },
  ...
 }
-```
+`

 Здесь, если название узла - число, считается, что это OSD. Уровень OSD
 всегда равен "osd" и не может быть переопределён. Для OSD вы можете только
@ -85,11 +82,10 @@
 Настройки отдельных OSD задаются в ключах etcd `/vitastor/config/osd/<number>`
 в JSON-формате `{"<key>":<value>}`.

-На данный момент поддерживаются следующие настройки:
+На данный момент поддерживаются две настройки:

 - [reweight](#reweight)
 - [tags](#tags)
- [noout](#noout)

 ## reweight

@ -113,14 +109,6 @@
 всех. Можно, например, пометить SSD OSD тегом "ssd", а HDD тегом "hdd", в
 этом смысле теги работают аналогично классам устройств.

-## noout
-
- Тип: булево (да/нет)
- Значение по умолчанию: false
-
-Если установлено в true, то [osd_out_time](monitor.ru.md#osd_out_time) для этого
-OSD игнорируется и OSD не удаляется из распределения данных монитором.
-
 # Параметры

 ## name
@ -169,26 +157,6 @@ OSD игнорируется и OSD не удаляется из распред
 OSD, PG деактивируется на чтение и запись. Иными словами, всегда известно,
 что новые блоки данных всегда записываются как минимум на pg_minsize дисков.

-Для примера, разница между pg_minsize 2 и 1 в реплицированном пуле с 3 копиями
-данных (pg_size=3), проявляется следующим образом:
- Если 2 сервера отключаются при pg_minsize=2, пул становится неактивным и
-  остаётся неактивным в течение [osd_out_time](monitor.ru.md#osd_out_time)
-  (10 минут), после чего монитор назначает другие OSD/серверы на замену, пул
-  поднимается и начинает восстанавливать недостающие копии данных. Соответственно,
-  если OSD на замену нет - то есть, если у вас всего 3 сервера с OSD и 2 из них
-  недоступны - пул так и остаётся недоступным до тех пор, пока вы не вернёте
-  или не добавите хотя бы 1 сервер (или не переключите failure_domain на "osd").
- Если 2 сервера отключаются при pg_minsize=1, ввод-вывод лишь приостанавливается
-  на короткое время, до тех пор, пока монитор не поймёт, что OSD отключены
-  (что занимает 5-10 секунд при стандартном [etcd_report_interval](osd.ru.md#etcd_report_interval)).
-  После этого ввод-вывод восстанавливается, но новые данные временно пишутся
-  всего в 1 копии. Когда же проходит osd_out_time, монитор точно так же назначает
-  другие OSD на замену выбывшим и пул начинает восстанавливать копии данных.
-
-То есть, pg_minsize регулирует число отказов, которые пул может пережить без
-временной остановки обслуживания на [osd_out_time](monitor.ru.md#osd_out_time),
-но ценой немного пониженных гарантий надёжности.
-
 FIXME: Поведение pg_minsize может быть изменено в будущем с полной деактивации
 PG на перевод их в режим только для чтения.

@ -200,8 +168,8 @@ PG на перевод их в режим только для чтения.
 Число PG для данного пула. Число должно быть достаточно большим, чтобы монитор
 мог равномерно распределить по ним данные.

-Обычно это означает примерно 10-100 PG на 1 OSD, т.е. pg_count можно устанавливать
-равным (общему числу OSD * 10 / pg_size). Значение можно округлить до ближайшей
+Обычно это означает примерно 64-128 PG на 1 OSD, т.е. pg_count можно устанавливать
+равным (общему числу OSD * 100 / pg_size). Значение можно округлить до ближайшей
 степени 2, чтобы потом было легче уменьшать или увеличивать число PG, умножая
 или деля его на 2.

@ -222,71 +190,6 @@ PG в Vitastor эферемерны, то есть вы можете менят
 Иными словами, домен отказа - это то, от отказа чего вы защищаете себя избыточным
 хранением.

-## level_placement
-
- Тип: строка
-
-Правила дополнительных доменов отказа, применяемые вместе с failure_domain.
-Должны задаваться в следующем виде:
-
-`<уровень>=<последовательность символов>, <уровень2>=<последовательность2>, ...`
-
-Каждая `<последовательность>` должна состоять ровно из [pg_size](#pg_size) символов.
-Каждый символ соответствует одному OSD (размещению одной части PG) этого пула.
-Одинаковые символы означают, что соответствующие части размещаются в один и тот же
-узел дерева OSD на заданном `<уровне>`. Разные символы означают, что части
-размещаются в разные узлы.
-
-Например, если вы хотите сделать пул EC 4+2 и хотите поместить каждые 2 части
-данных в свой датацентр, и также вы хотите, чтобы каждая часть размещалась на
-другом хосте, то вы должны задать `level_placement` равным `dc=112233 host=123456`.
-
-Либо вы просто можете задать `level_placement` равным `dc=112233` и оставить
-`failure_domain` пустым, т.к. `host` это его значение по умолчанию и оно также
-применится автоматически.
-
-Без этого правила может получиться так, что в одном из датацентров окажется
-3 части данных одной PG и данные окажутся недоступными при временном отключении
-этого датацентра.
-
-Естественно, перед установкой правила вам нужно сгруппировать ваши хосты в
-датацентры, установив [placement_levels](monitor.ru.md#placement_levels) во что-то
-типа `{"dc":90,"host":100,"osd":110}` и добавив датацентры в [node_placement](#дерево-размещения),
-примерно так: `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
-
-## raw_placement
-
- Тип: строка
-
-Низкоуровневые правила генерации PG в форме DSL (доменно-специфичного языка).
-Используйте, только если действительно знаете, зачем вам это надо :)
-
-Спецификация DSL:
-
-```
-dsl := item | item ("\n" | ",") items
-item := "any" | rules
-rules := rule | rule rules
-rule := level operator arg
-level := /\w+/
-operator := "!=" | "=" | ">" | "?="
-arg := value | "(" values ")"
-values := value | value "," values
-value := item_ref | constant_id
-item_ref := /\d+/
-constant_id := /"([^"]+)"/
-```
-
-Оператор "?=" означает "предпочитаемый". Т.е. `dc ?= "meow"` означает "предпочитать
-датацентр meow для этой части данных, но разместить её в другом датацентре, если
-meow недоступен".
-
-Примеры:
-
- Простые 3 реплики с failure_domain=host: `any, host!=1, host!=(1,2)`
- EC 4+2 в 3 датацентрах: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
- 1 копия в фиксированном ДЦ + 2 в других ДЦ: `dc?=meow, dc!=1, dc!=(1,2)`
-
 ## max_osd_combinations

 - Тип: целое число
@ -383,43 +286,6 @@ OSD с "all".
 Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
 Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).

-## used_for_app
-
- Тип: строка
-
-Если непусто, пул помечается как используемый для отдельного приложения, например,
-для VitastorFS или S3, которое распределяет ID образов в пуле само и не использует
-метаданные образов/инодов в etcd.
-
-Когда пул помечается используемым для такого приложения, создание обычных блочных
-образов в нём запрещается (vitastor-cli отказывается создавать образы без --force),
-чтобы защитить пользователя от коллизий ID блочных образов и томов ФС/S3, и,
-таким образом, от потери данных.
-
-Также для таких пулов отключается передача статистики в etcd по отдельным инодам,
-так как использование для внешнего приложения подразумевает, что пул может содержать
-очень много томов и их статистика может занять слишком много места в etcd.
-
-Установка used_for_app в значение `fs:<name>` сообщает о том, что пул используется
-для VitastorFS с базой метаданных VitastorKV, хранимой в блочном образе с именем
-`<name>`.
-
-[vitastor-nfs](../usage/nfs.ru.md), в свою очередь, при запуске отказывается
-использовать для ФС пулы, не помеченные, как используемые для неё. Это также
-означает, что один пул может использоваться только для одной VitastorFS.
-
-Если же вы планируете использовать пул для данных S3, установите его used_for_app
-в значение `s3:<name>`, где `<name>` - любое название по вашему усмотрению
-(например, `s3:standard`) - конкретное содержимое `<name>` пока никак не проверяется
-компонентами Vitastor S3.
-
-Смотрите также [allow_net_split](osd.ru.md#allow_net_split) и
-[документацию по состояниям PG](../usage/admin.ru.md#состояния-pg).
-
-Все остальные значения used_for_app, кроме начинающихся на `fs:` или `s3:`, не
-означают ничего особенного для основных компонентов Vitastor. Поэтому сейчас вы
-можете использовать их свободно любым желаемым способом.
-
 # Примеры

 ## Реплицированный пул
--- a/docs/config/src/client.en.md
+++ b/docs/config/src/client.en.md
@ -1,4 +1,4 @@
 # Client Parameters

-These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
-affect their interaction with the cluster.
+These parameters apply only to clients and affect their interaction with
+the cluster.
--- a/docs/config/src/client.ru.md
+++ b/docs/config/src/client.ru.md
@ -1,4 +1,4 @@
 # Параметры клиентского кода

-Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
+Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD) и
 затрагивают логику их работы с кластером.
--- a/docs/config/src/client.yml
+++ b/docs/config/src/client.yml
@ -1,84 +1,3 @@
- name: client_iothread_count
-  type: int
-  default: 0
-  online: false
-  info: |
-    Number of separate threads for handling TCP network I/O at client library
-    side. Enabling 4 threads usually allows to increase peak performance of each
-    client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
-    100-150 to 400 thousand iops, but at the same time it increases latency.
-    Latency increase depends on CPU: with CPU power saving disabled latency
-    only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
-    with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
-    iops decrease from 2000 to 1000). RDMA isn't affected by this option.
-
-    It's recommended to enable client I/O threads if you don't use RDMA and want
-    to increase peak client performance.
-  info_ru: |
-    Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
-    клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
-    производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
-    чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
-    секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
-    при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
-    падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
-    и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
-    RDMA данная опция не влияет.
-
-    Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
-    RDMA и хотите повысить пиковую производительность клиентов.
- name: client_retry_interval
-  type: ms
-  min: 10
-  default: 50
-  online: true
-  info: |
-    Retry time for I/O requests failed due to inactive PGs or network
-    connectivity errors.
-  info_ru: |
-    Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
-    ошибок сети.
- name: client_eio_retry_interval
-  type: ms
-  default: 1000
-  online: true
-  info: |
-    Retry time for I/O requests failed due to data corruption or unfinished
-    EC object deletions (has_incomplete PG state). 0 disables such retries
-    and clients are not blocked and just get EIO error code instead.
-  info_ru: |
-    Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
-    или незавершённых удалений EC-объектов (состояния PG has_incomplete).
-    0 отключает повторы таких запросов и клиенты не блокируются, а вместо
-    этого просто получают код ошибки EIO.
- name: client_retry_enospc
-  type: bool
-  default: true
-  online: true
-  info: |
-    Retry writes on out of space errors to wait until some space is freed on
-    OSDs.
-  info_ru: |
-    Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
-    ожидать, пока на OSD не освободится место.
- name: client_wait_up_timeout
-  type: sec
-  default: 16
-  online: true
-  info: |
-    Wait for this number of seconds until PGs are up when doing operations
-    which require all PGs to be up. Currently only used by object listings
-    in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
-
-    The default value is calculated as `1 + OSD lease timeout`, which is
-    `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
-  info_ru: |
-    Время ожидания поднятия PG при операциях, требующих активности всех PG.
-    В данный момент используется листингами объектов в командах, использующих
-    удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
-
-    Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
-    `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 - name: client_max_dirty_bytes
  type: int
  default: 33554432
@ -203,71 +122,3 @@
    Maximum number of parallel writes when flushing buffered data to the server.
  info_ru: |
    Максимальное число параллельных операций записи при сбросе буферов на сервер.
- name: nbd_timeout
-  type: sec
-  default: 300
-  online: false
-  info: |
-    Timeout for I/O operations for [NBD](../usage/nbd.en.md). If an operation
-    executes for longer than this timeout, including when your cluster is just
-    temporarily down for more than timeout, the NBD device will detach by itself
-    (and possibly break the mounted file system).
-
-    You can set timeout to 0 to never detach, but in that case you won't be
-    able to remove the kernel device at all if the NBD process dies - you'll have
-    to reboot the host.
-  info_ru: |
-    Таймаут для операций чтения/записи через [NBD](../usage/nbd.ru.md). Если
-    операция выполняется дольше таймаута, включая временную недоступность
-    кластера на время, большее таймаута, NBD-устройство отключится само собой
-    (и, возможно, сломает примонтированную ФС).
-
-    Вы можете установить таймаут в 0, чтобы никогда не отключать устройство по
-    таймауту, но в этом случае вы вообще не сможете удалить устройство, если
-    процесс NBD умрёт - вам придётся перезагружать сервер.
- name: nbd_max_devices
-  type: int
-  default: 64
-  online: false
-  info: |
-    Maximum number of NBD devices in the system. This value is passed as
-    `nbds_max` parameter for the nbd kernel module when vitastor-nbd autoloads it.
-  info_ru: |
-    Максимальное число NBD-устройств в системе. Данное значение передаётся
-    модулю ядра nbd как параметр `nbds_max`, когда его загружает vitastor-nbd.
- name: nbd_max_part
-  type: int
-  default: 3
-  online: false
-  info: |
-    Maximum number of partitions per NBD device. This value is passed as
-    `max_part` parameter for the nbd kernel module when vitastor-nbd autoloads it.
-    Note that (nbds_max)*(1+max_part) usually can't exceed 256.
-  info_ru: |
-    Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
-    модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
-    Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
- name: osd_nearfull_ratio
-  type: float
-  default: 0.95
-  online: true
-  info: |
-    Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
-
-    Remember that some client writes may hang or complete with an error if even
-    just one OSD becomes 100 % full!
-
-    However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
-    unable to start at all), so you'll be able to recover from "out of space" errors
-    without destroying and recreating OSDs.
-  info_ru: |
-    Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
-    выводе vitastor-cli status.
-
-    Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
-    если на 100 % заполнится хотя бы 1 OSD!
-
-    Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
-    заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
-    восстановить работу кластера после ошибок отсутствия свободного места
-    без уничтожения и пересоздания OSD.
--- a/docs/config/src/included.en.md
+++ b/docs/config/src/included.en.md
@ -14,12 +14,8 @@

 {{../../installation/packages.en.md}}

-{{../../installation/docker.en.md}}
-
 {{../../installation/proxmox.en.md}}

-{{../../installation/opennebula.en.md}}
-
 {{../../installation/openstack.en.md}}

 {{../../installation/kubernetes.en.md}}
@ -60,8 +56,6 @@

 {{../../usage/nfs.en.md}}

-{{../../usage/admin.en.md}}
-
 ## Performance

 {{../../performance/understanding.en.md}}
@ -70,6 +64,4 @@

 {{../../performance/comparison1.en.md}}

-{{../../performance/bench2.en.md}}
-
 {{../../intro/author.en.md|indent=1}}
--- a/docs/config/src/included.ru.md
+++ b/docs/config/src/included.ru.md
@ -14,12 +14,8 @@

 {{../../installation/packages.ru.md}}

-{{../../installation/docker.ru.md}}
-
 {{../../installation/proxmox.ru.md}}

-{{../../installation/opennebula.ru.md}}
-
 {{../../installation/openstack.ru.md}}

 {{../../installation/kubernetes.ru.md}}
@ -60,8 +56,6 @@

 {{../../usage/nfs.ru.md}}

-{{../../usage/admin.ru.md}}
-
 ## Производительность

 {{../../performance/understanding.ru.md}}
@ -70,6 +64,4 @@

 {{../../performance/comparison1.ru.md}}

-{{../../performance/bench2.ru.md}}
-
 {{../../intro/author.ru.md|indent=1}}
--- a/docs/config/src/layout-cluster.yml
+++ b/docs/config/src/layout-cluster.yml
@ -47,24 +47,14 @@
    Не может быть меньше размера сектора дисков данных OSD.
 - name: immediate_commit
  type: string
-  default: all
+  default: false
  info: |
-    One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
-
-    This parameter is also really important for performance.
-
-    TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
-    power loss protection (nonvolatile write-through cache) and also for most HDDs.
-    "none" or "small" should be only selected if you use desktop SSDs without
-    capacitors or drives with slow write-back cache that can't be disabled. Check
-    immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
-
-    Detailed explanation:
+    Another parameter which is really important for performance.

    Desktop SSDs are very fast (100000+ iops) for simple random writes
    without cache flush. However, they are really slow (only around 1000 iops)
-    if you try to fsync() each write, that is, if you want to guarantee that
-    each change gets actually persisted to the physical media.
+    if you try to fsync() each write, that is, when you want to guarantee that
+    each change gets immediately persisted to the physical media.

    Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
    "Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -76,8 +66,8 @@
    efficiently utilize desktop SSDs by postponing fsync until the client calls
    it explicitly.

-    This is what this parameter regulates. When it's set to "all" Vitastor
-    cluster commits each change to disks immediately and clients just
+    This is what this parameter regulates. When it's set to "all" the whole
+    Vitastor cluster commits each change to disks immediately and clients just
    ignore fsyncs because they know for sure that they're unneeded. This reduces
    the amount of network roundtrips performed by clients and improves
    performance. So it's always better to use server grade SSDs with
@ -97,22 +87,17 @@
    it (they have internal SSD cache even though it's not stated in datasheets).

    Setting this parameter to "all" or "small" in OSD parameters requires enabling
-    [disable_journal_fsync](layout-osd.en.md#disable_journal_fsync) and
-    [disable_meta_fsync](layout-osd.en.md#disable_meta_fsync), setting it to
-    "all" also requires enabling [disable_data_fsync](layout-osd.en.md#disable_data_fsync).
-    vitastor-disk tried to do that by default, first checking/disabling drive cache.
-    If it can't disable drive cache, OSD get initialized with "none".
+    [disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
+    [disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
+    "all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
+
+    TLDR: For optimal performance, set immediate_commit to "all" if you only use
+    SSDs with supercapacitor-based power loss protection (nonvolatile
+    write-through cache) for both data and journals in the whole Vitastor
+    cluster. Set it to "small" if you only use such SSDs for journals. Leave
+    empty if your drives have write-back cache.
  info_ru: |
-    Одно из значений "none", "small" или "all". Глобальное значение, может быть
-    переопределено [на уровне пула](pool.ru.md#immediate_commit).
-
-    Данный параметр тоже важен для производительности.
-
-    Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
-    суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
-    устанавливать только при использовании SSD настольного класса без
-    суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
-    Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
+    Ещё один важный для производительности параметр.

    Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
    секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -133,7 +118,7 @@
    эффективно утилизировать настольные SSD.

    Данный параметр влияет как раз на это. Когда он установлен в значение "all",
-    кластер Vitastor мгновенно фиксирует каждое изменение на физические
+    весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
    носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
    знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
    по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -156,6 +141,13 @@
    указано в спецификациях).

    Указание "all" или "small" в настройках / командной строке OSD требует
-    включения [disable_journal_fsync](layout-osd.ru.md#disable_journal_fsync) и
-    [disable_meta_fsync](layout-osd.ru.md#disable_meta_fsync), значение "all"
-    также требует включения [disable_data_fsync](layout-osd.ru.md#disable_data_fsync).
+    включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
+    [disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
+    также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
+
+    Итого, вкратце: для оптимальной производительности установите
+    immediate_commit в значение "all", если вы используете в кластере только SSD
+    с суперконденсаторами и для данных, и для журналов. Если вы используете
+    такие SSD для всех журналов, но не для данных - можете установить параметр
+    в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
+    оставьте параметр пустым.
--- a/docs/config/src/layout-osd.yml
+++ b/docs/config/src/layout-osd.yml
@ -110,22 +110,20 @@
  type: bool
  default: false
  info: |
-    Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
-    Safe ONLY if your data device has write-through cache or if write-back
-    cache is disabled. If you disable drive cache manually with `hdparm` or
-    writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
-    every time before starting Vitastor OSD (vitastor-disk does it automatically).
-    See also [immediate_commit](layout-cluster.en.md#immediate_commit)
-    for information about how to benefit from disabled cache.
+    Do not issue fsyncs to the data device, i.e. do not flush its cache.
+    Safe ONLY if your data device has write-through cache. If you disable
+    the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
+    that the cache disable command is run every time before starting Vitastor
+    OSD, for example, in the systemd unit. See also `immediate_commit` option
+    for the instructions to disable cache and how to benefit from it.
  info_ru: |
-    Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
+    Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
    Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-    записью (write-through) или если кэш с отложенной записью (write-back) отключён.
-    Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
-    то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
-    (vitastor-disk делает это автоматически). Смотрите также опцию
-    [immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
-    как извлечь выгоду из отключённого кэша.
+    записью (write-through). Если вы отключаете кэш через `hdparm` или
+    `scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
+    выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
+    Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
+    и о том, как из этого извлечь выгоду.
 - name: disable_meta_fsync
  type: bool
  default: false
@ -181,7 +179,8 @@

    Because of this it can actually be beneficial to use SSDs which work well
    with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-    and meta_block_size. But at the moment, no such SSDs are known...
+    and meta_block_size. But the only SSD that may fit into this category is
+    Intel Optane (probably, not tested yet).

    Clients don't need to be aware of disk_alignment, so it's not required to
    put a modified value into etcd key /vitastor/config/global.
@ -199,8 +198,9 @@

    Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
    меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-    journal_block_size и meta_block_size. Однако на данный момент такие SSD
-    не известны...
+    journal_block_size и meta_block_size. Однако единственные SSD, которые
+    теоретически могут попасть в эту категорию - это Intel Optane (но и это
+    пока не проверялось автором).

    Клиентам не обязательно знать про disk_alignment, так что помещать значение
    этого параметра в etcd в /vitastor/config/global не нужно.
--- a/docs/config/src/make.js
+++ b/docs/config/src/make.js
@ -38,7 +38,6 @@ const types = {
        bool: 'boolean',
        int: 'integer',
        sec: 'seconds',
-        float: 'number',
        ms: 'milliseconds',
        us: 'microseconds',
    },
@ -47,7 +46,6 @@ const types = {
        bool: 'булево (да/нет)',
        int: 'целое число',
        sec: 'секунды',
-        float: 'число',
        ms: 'миллисекунды',
        us: 'микросекунды',
    },
--- a/docs/config/src/monitor.yml
+++ b/docs/config/src/monitor.yml
@ -1,107 +1,7 @@
- name: use_antietcd
-  type: bool
-  default: false
-  info: |
-    Enable experimental built-in etcd replacement (clustered key-value database):
-    [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
-
-    When set to true, monitor runs internal antietcd automatically if it finds
-    a network interface with an IP address matching one of addresses in the
-    `etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
-    the monitor command line). If there are multiple matching addresses, it also
-    checks `antietcd_port` and antietcd is started for address with matching port.
-    By default, antietcd accepts connection on the selected IP address, but it
-    can also be overridden manually in the `antietcd_ip` option.
-
-    When antietcd is started, monitor stores cluster metadata itself and exposes
-    a etcd-compatible REST API. On disk, these metadata are stored in
-    `/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
-    or antietcd_data_dir options). All other antietcd parameters
-    (see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
-    cluster, cluster_key, persist_filter, stale_read can also be set in
-    Vitastor configuration with `antietcd_` prefix.
-
-    You can dump/load data to or from antietcd using Antietcd `anticli` tool:
-
-    ```
-    npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
-    npm exec anticli -e http://antietcd:2379/v3 load < dump.json
-    ```
-  info_ru: |
-    Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
-    [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
-
-    Если параметр установлен в true, монитор запускает antietcd автоматически,
-    если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
-    конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
-    командной строки монитора). Если таких адресов несколько, также проверяется
-    опция `antietcd_port` и antietcd запускается для адреса с соответствующим
-    портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
-    IP, но его также можно определить вручную опцией `antietcd_ip`.
-
-    При запуске antietcd монитор сам хранит центральные метаданные кластера и
-    выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
-    `/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
-    antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
-    (смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
-    node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
-    в конфигурации Vitastor с префиксом `antietcd_`.
-
-    Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
-    `anticli`:
-
-    ```
-    npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
-    npm exec anticli -e http://antietcd:2379/v3 load < dump.json
-    ```
- name: enable_prometheus
-  type: bool
-  default: true
-  info: |
-    Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
-
-    Note that only the active (master) monitor exposes metrics, others return
-    HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
-
-    Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
-  info_ru: |
-    Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
-
-    Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
-    возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
-    в задание по сбору метрик Prometheus.
-
-    Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
- name: mon_http_port
-  type: int
-  default: 8060
-  info: HTTP port for monitors to listen to (including metrics exporter)
-  info_ru: Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
- name: mon_http_ip
-  type: string
-  info: IP address for monitors to listen to (all addresses by default)
-  info_ru: IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
- name: mon_https_cert
-  type: string
-  info: Path to PEM SSL certificate file for monitor to listen using HTTPS
-  info_ru: Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
- name: mon_https_key
-  type: string
-  info: Path to PEM SSL private key file for monitor to listen using HTTPS
-  info_ru: Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
- name: mon_https_client_auth
-  type: bool
-  default: false
-  info: Enable HTTPS client certificate-based authorization for monitor connections
-  info_ru: Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
- name: mon_https_ca
-  type: string
-  info: Path to CA certificate for client HTTPS authorization
-  info_ru: Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
 - name: etcd_mon_ttl
  type: sec
-  min: 5
-  default: 1
+  min: 10
+  default: 30
  info: Monitor etcd lease refresh interval in seconds
  info_ru: Интервал обновления etcd резервации (lease) монитором
 - name: etcd_mon_timeout
@ -163,36 +63,3 @@
    "host" и "osd" являются предопределёнными и не могут быть удалены. Если
    один из них отсутствует в конфигурации, он доопределяется с приоритетом по
    умолчанию (100 для уровня "host", 101 для "osd").
- name: use_old_pg_combinator
-  type: bool
-  default: false
-  info: |
-    Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
-    and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
-  info_ru: |
-    Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
-    и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
- name: osd_backfillfull_ratio
-  type: float
-  default: 0.99
-  info: |
-    Monitors try to prevent OSDs becoming 100% full during rebalance or recovery by
-    calculating how much space will be occupied on every OSD after all rebalance
-    and recovery operations finish, and pausing rebalance and recovery if that
-    amount of space exceeds OSD capacity multiplied by the value of this
-    configuration parameter.
-
-    Future used space is calculated by summing space used by all user data blocks
-    (objects) in all PGs placed on a specific OSD, even if some of these objects
-    currently reside on a different set of OSDs.
-  info_ru: |
-    Мониторы стараются предотвратить 100% заполнение OSD в процессе ребаланса
-    или восстановления, рассчитывая, сколько места будет занято на каждом OSD после
-    завершения всех операций ребаланса и восстановления, и приостанавливая
-    ребаланс и восстановление, если рассчитанный объём превышает ёмкость OSD,
-    умноженную на значение данного параметра.
-
-    Будущее занятое место рассчитывается сложением места, занятого всеми
-    пользовательскими блоками данных (объектами) во всех PG, расположенных
-    на конкретном OSD, даже если часть этих объектов в данный момент находится
-    на другом наборе OSD.
--- a/docs/config/src/network.yml
+++ b/docs/config/src/network.yml
@ -1,93 +1,58 @@
- name: osd_network
-  type: string or array of strings
-  type_ru: строка или массив строк
+- name: tcp_header_buffer_size
+  type: int
+  default: 65536
  info: |
-    Network mask of public OSD network(s) (IPv4 or IPv6). Each OSD listens to all
-    addresses of UP + RUNNING interfaces matching one of these networks, on the
-    same port. Port is auto-selected except if [bind_port](osd.en.md#bind_port) is
-    explicitly specified. Bind address(es) may also be overridden manually by
-    specifying [bind_address](osd.en.md#bind_address). If OSD networks are not specified
-    at all, OSD just listens to a wildcard address (0.0.0.0).
+    Size of the buffer used to read data using an additional copy. Vitastor
+    packet headers are 128 bytes, payload is always at least 4 KB, so it is
+    usually beneficial to try to read multiple packets at once even though
+    it requires to copy the data an additional time. The rest of each packet
+    is received without an additional copy. You can try to play with this
+    parameter and see how it affects random iops and linear bandwidth if you
+    want.
  info_ru: |
-    Маски подсетей (IPv4 или IPv6) публичной сети или сетей OSD. Каждый OSD слушает
-    один и тот же порт на всех адресах поднятых (UP + RUNNING) сетевых интерфейсов,
-    соответствующих одной из указанных сетей. Порт выбирается автоматически, если
-    только [bind_port](osd.ru.md#bind_port) не задан явно. Адреса для подключений можно
-    также переопределить явно, задав [bind_address](osd.ru.md#bind_address). Если сети OSD
-    не заданы вообще, OSD слушает все адреса (0.0.0.0).
- name: osd_cluster_network
-  type: string or array of strings
-  type_ru: строка или массив строк
+    Размер буфера для чтения данных с дополнительным копированием. Пакеты
+    Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
+    от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
+    сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
+    скопировать данные. Часть каждого пакета за пределами значения данного
+    параметра читается без дополнительного копирования. Вы можете попробовать
+    поменять этот параметр и посмотреть, как он влияет на производительность
+    случайного и линейного доступа.
+- name: use_sync_send_recv
+  type: bool
+  default: false
  info: |
-    Network mask of separate network(s) (IPv4 or IPv6) to use for OSD
-    cluster connections. I.e. OSDs will always attempt to use these networks
-    to connect to other OSDs, while clients will attempt to use networks from
-    [osd_network](#osd_network).
+    If true, synchronous send/recv syscalls are used instead of io_uring for
+    socket communication. Useless for OSDs because they require io_uring anyway,
+    but may be required for clients with old kernel versions.
  info_ru: |
-    Маски подсетей (IPv4 или IPv6) отдельной кластерной сети или сетей OSD.
-    То есть, OSD будут всегда стараться использовать эти сети для соединений
-    с другими OSD, а клиенты будут стараться использовать сети из [osd_network](#osd_network).
+    Если установлено в истину, то вместо io_uring для передачи данных по сети
+    будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
+    это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
+    принципе, это может применяться для клиентов со старыми версиями ядра.
 - name: use_rdma
  type: bool
  default: true
  info: |
-    Try to use RDMA through libibverbs for communication if it's available.
-    Disable if you don't want Vitastor to use RDMA. TCP-only clients can also
-    talk to an RDMA-enabled cluster, so disabling RDMA may be needed if clients
-    have RDMA devices, but they are not connected to the cluster.
-
-    `use_rdma` works with RoCEv1/RoCEv2 networks, but not with iWARP and,
-    maybe, with some Infiniband configurations which require RDMA-CM.
-    Consider `use_rdmacm` for such networks.
+    Try to use RDMA for communication if it's available. Disable if you don't
+    want Vitastor to use RDMA. TCP-only clients can also talk to an RDMA-enabled
+    cluster, so disabling RDMA may be needed if clients have RDMA devices,
+    but they are not connected to the cluster.
  info_ru: |
-    Попробовать использовать RDMA через libibverbs для связи при наличии
-    доступных устройств. Отключите, если вы не хотите, чтобы Vitastor
-    использовал RDMA. TCP-клиенты также могут работать с RDMA-кластером,
-    так что отключать RDMA может быть нужно, только если у клиентов есть
-    RDMA-устройства, но они не имеют соединения с кластером Vitastor.
-
-    `use_rdma` работает с RoCEv1/RoCEv2 сетями, но не работает с iWARP и
-    может не работать с частью конфигураций Infiniband, требующих RDMA-CM.
-    Рассмотрите включение `use_rdmacm` для таких сетей.
- name: use_rdmacm
-  type: bool
-  default: true
-  info: |
-    Use an alternative implementation of RDMA through RDMA-CM (Connection
-    Manager). Works with all RDMA networks: Infiniband, iWARP and
-    RoCEv1/RoCEv2, and even allows to disable TCP and run only with RDMA.
-    OSDs always use random port numbers for RDMA-CM listeners, different
-    from their TCP ports. `use_rdma` is automatically disabled when
-    `use_rdmacm` is enabled.
-  info_ru: |
-    Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
-    Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
-    RoCEv1/RoCEv2, и даже позволяет полностью отключить TCP и работать
-    только на RDMA. OSD используют случайные номера портов для ожидания
-    соединений через RDMA-CM, отличающиеся от их TCP-портов. Также при
-    включении `use_rdmacm` автоматически отключается опция `use_rdma`.
- name: disable_tcp
-  type: bool
-  default: true
-  info: |
-    Fully disable TCP and only use RDMA-CM for OSD communication.
-  info_ru: |
-    Полностью отключить TCP и использовать только RDMA-CM для соединений с OSD.
+    Пытаться использовать RDMA для связи при наличии доступных устройств.
+    Отключите, если вы не хотите, чтобы Vitastor использовал RDMA.
+    TCP-клиенты также могут работать с RDMA-кластером, так что отключать
+    RDMA может быть нужно только если у клиентов есть RDMA-устройства,
+    но они не имеют соединения с кластером Vitastor.
 - name: rdma_device
  type: string
  info: |
    RDMA device name to use for Vitastor OSD communications (for example,
-    "rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
-    device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
-    or choose the first available RDMA device if no RoCE devices are
-    found or if `osd_network` is not specified. Auto-selection is also
-    unsupported with old libibverbs < v32, like in Debian 10 Buster or
-    CentOS 7.
+    "rocep5s0f0"). Now Vitastor supports all adapters, even ones without
+    ODP support, like Mellanox ConnectX-3 and non-Mellanox cards.

-    Vitastor supports all adapters, even ones without ODP support, like
-    Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
-    1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
-    See also [rdma_odp](#rdma_odp).
+    Versions up to Vitastor 1.2.0 required ODP which is only present in
+    Mellanox ConnectX >= 4. See also [rdma_odp](#rdma_odp).

    Run `ibv_devinfo -v` as root to list available RDMA devices and their
    features.
@ -99,17 +64,12 @@
    PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
  info_ru: |
    Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
-    Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
-    [osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
-    попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
-    не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
-    libibverbs < v32, например в Debian 10 Buster или CentOS 7.
-
-    Vitastor поддерживает все модели адаптеров, включая те, у которых
+    Сейчас Vitastor поддерживает все модели адаптеров, включая те, у которых
    нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
-    картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
-    требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
-    См. также [rdma_odp](#rdma_odp).
+    картами производства не Mellanox.
+
+    Версии Vitastor до 1.2.0 включительно требовали ODP, который есть только
+    на Mellanox ConnectX 4 и более новых. См. также [rdma_odp](#rdma_odp).

    Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
    список доступных RDMA-устройств, их параметры и возможности.
@ -122,56 +82,44 @@
    Control) и ECN (Explicit Congestion Notification).
 - name: rdma_port_num
  type: int
+  default: 1
  info: |
    RDMA device port number to use. Only for devices that have more than 1 port.
    See `phys_port_cnt` in `ibv_devinfo -v` output to determine how many ports
    your device has.
-
-    Not relevant for RDMA-CM (use_rdmacm).
  info_ru: |
    Номер порта RDMA-устройства, который следует использовать. Имеет смысл
    только для устройств, у которых более 1 порта. Чтобы узнать, сколько портов
    у вашего адаптера, посмотрите `phys_port_cnt` в выводе команды
    `ibv_devinfo -v`.
-
-    Опция неприменима к RDMA-CM (use_rdmacm).
 - name: rdma_gid_index
  type: int
+  default: 0
  info: |
    Global address identifier index of the RDMA device to use. Different GID
    indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
    Search for "GID" in `ibv_devinfo -v` output to determine which GID index
    you need.

-    If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
-    RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
-    GID auto-selection is unsupported with libibverbs < v32.
-
-    A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
-
-    Not relevant for RDMA-CM (use_rdmacm).
+    **IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
+    rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).
  info_ru: |
    Номер глобального идентификатора адреса RDMA-устройства, который следует
    использовать. Разным gid_index могут соответствовать разные протоколы связи:
    RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
    словом "GID" в выводе команды `ibv_devinfo -v`.

-    Если не указан, Vitastor попробует автоматически выбрать сначала GID,
-    соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
-    RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
-    libibverbs < v32.
-
-    Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
-
-    Опция неприменима к RDMA-CM (use_rdmacm).
+    **ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
+    правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).
 - name: rdma_mtu
  type: int
+  default: 4096
  info: |
-    RDMA Path MTU to use. Must be 1024, 2048 or 4096. Default is to use the
-    RDMA device's MTU.
+    RDMA Path MTU to use. Must be 1024, 2048 or 4096. There is usually no
+    sense to change it from the default 4096.
  info_ru: |
    Максимальная единица передачи (Path MTU) для RDMA. Должно быть равно 1024,
-    2048 или 4096. По умолчанию используется значение MTU RDMA-устройства.
+    2048 или 4096. Обычно нет смысла менять значение по умолчанию, равное 4096.
 - name: rdma_max_sge
  type: int
  default: 128
@ -295,6 +243,21 @@
    Максимальное время ожидания ответа на запрос проверки состояния соединения.
    Если OSD не отвечает за это время, соединение отключается и производится
    повторная попытка соединения.
+- name: up_wait_retry_interval
+  type: ms
+  min: 50
+  default: 500
+  online: true
+  info: |
+    OSDs respond to clients with a special error code when they receive I/O
+    requests for a PG that's not synchronized and started. This parameter sets
+    the time for the clients to wait before re-attempting such I/O requests.
+  info_ru: |
+    Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
+    поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
+    они отвечают клиентам специальным кодом ошибки, означающим, что клиент
+    должен некоторое время подождать перед повторением запроса. Именно это время
+    ожидания задаёт данный параметр.
 - name: max_etcd_attempts
  type: int
  default: 5
@ -332,56 +295,12 @@
  info_ru: |
    Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
    etcd_report_interval, чтобы keepalive гарантированно работал.
- name: etcd_ws_keepalive_interval
+- name: etcd_ws_keepalive_timeout
  type: sec
-  default: 5
+  default: 30
  online: true
  info: |
    etcd websocket ping interval required to keep the connection alive and
    detect disconnections quickly.
  info_ru: |
    Интервал проверки живости вебсокет-подключений к etcd.
- name: etcd_min_reload_interval
-  type: ms
-  default: 1000
-  online: true
-  info: |
-    Minimum interval for full etcd state reload. Introduced to prevent
-    excessive load on etcd during outages when etcd can't keep up with event
-    streams and cancels them.
-  info_ru: |
-    Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
-    предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
-    успевает рассылать потоки событий и отменяет их.
- name: tcp_header_buffer_size
-  type: int
-  default: 65536
-  info: |
-    Size of the buffer used to read data using an additional copy. Vitastor
-    packet headers are 128 bytes, payload is always at least 4 KB, so it is
-    usually beneficial to try to read multiple packets at once even though
-    it requires to copy the data an additional time. The rest of each packet
-    is received without an additional copy. You can try to play with this
-    parameter and see how it affects random iops and linear bandwidth if you
-    want.
-  info_ru: |
-    Размер буфера для чтения данных с дополнительным копированием. Пакеты
-    Vitastor содержат 128-байтные заголовки, за которыми следуют данные размером
-    от 4 КБ и для мелких операций ввода-вывода обычно выгодно за 1 вызов читать
-    сразу несколько пакетов, даже не смотря на то, что это требует лишний раз
-    скопировать данные. Часть каждого пакета за пределами значения данного
-    параметра читается без дополнительного копирования. Вы можете попробовать
-    поменять этот параметр и посмотреть, как он влияет на производительность
-    случайного и линейного доступа.
- name: use_sync_send_recv
-  type: bool
-  default: false
-  info: |
-    If true, synchronous send/recv syscalls are used instead of io_uring for
-    socket communication. Useless for OSDs because they require io_uring anyway,
-    but may be required for clients with old kernel versions.
-  info_ru: |
-    Если установлено в истину, то вместо io_uring для передачи данных по сети
-    будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
-    это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
-    принципе, это может применяться для клиентов со старыми версиями ядра.
--- a/docs/config/src/osd.en.md
+++ b/docs/config/src/osd.en.md
@ -1,5 +1,5 @@
 # Runtime OSD Parameters

 These parameters only apply to OSDs, are not fixed at the moment of OSD drive
-initialization and can be changed - in /etc/vitastor/vitastor.conf or [vitastor-disk update-sb](../usage/disk.en.md#update-sb)
-with an OSD restart or, for some of them, even without restarting by updating configuration in etcd.
+initialization and can be changed - either with an OSD restart or, for some of
+them, even without restarting by updating configuration in etcd.
--- a/docs/config/src/osd.ru.md
+++ b/docs/config/src/osd.ru.md
@ -2,5 +2,5 @@

 Данные параметры используются только OSD, но, в отличие от дисковых параметров,
 не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
-момент с перезапуском OSD в /etc/vitastor/vitastor.conf или [vitastor-disk update-sb](../usage/disk.ru.md#update-sb),
-а некоторые и без перезапуска, с помощью изменения конфигурации в etcd.
+момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
+изменения конфигурации в etcd.
--- a/docs/config/src/osd.yml
+++ b/docs/config/src/osd.yml
@ -1,44 +1,3 @@
- name: bind_address
-  type: string or array of strings
-  type_ru: строка или массив строк
-  info: |
-    Instead of the network masks ([osd_network](network.en.md#osd_network) and
-    [osd_cluster_network](network.en.md#osd_cluster_network)), you can also set
-    OSD listen addresses explicitly using this parameter. May be useful if you
-    want to start OSDs on interfaces that are not UP + RUNNING.
-  info_ru: |
-    Вместо использования масок подсети ([osd_network](network.ru.md#osd_network) и
-    [osd_cluster_network](network.ru.md#osd_cluster_network)), вы также можете явно
-    задать адрес(а), на которых будут ожидать соединений OSD, с помощью данного
-    параметра. Это может быть полезно, например, чтобы запускать OSD на неподнятых
-    интерфейсах (не UP + RUNNING).
- name: bind_port
-  type: int
-  info: |
-    By default, OSDs pick random ports to use for incoming connections
-    automatically. With this option you can set a specific port for a specific
-    OSD by hand.
-  info_ru: |
-    По умолчанию OSD сами выбирают случайные порты для входящих подключений.
-    С помощью данной опции вы можете задать порт для отдельного OSD вручную.
- name: osd_iothread_count
-  type: int
-  default: 0
-  info: |
-    TCP network I/O thread count for OSD. When non-zero, a single OSD process
-    may handle more TCP I/O, but at a cost of increased latency because thread
-    switching overhead occurs. RDMA isn't affected by this option.
-
-    Because of latency, instead of enabling OSD I/O threads it's recommended to
-    just create multiple OSDs per disk, or use RDMA.
-  info_ru: |
-    Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
-    стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
-    по сети больше данных, но ухудшает задержку из-за накладных расходов
-    переключения потоков. На работу RDMA опция не влияет.
-
-    Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
-    просто создавать по несколько OSD на каждом диске, или использовать RDMA.
 - name: etcd_report_interval
  type: sec
  default: 5
@ -79,6 +38,44 @@
    реализовать дополнительный режим для монитора, который позволит отделять
    первичные OSD от вторичных, но пока не понятно, зачем это может кому-то
    понадобиться, поэтому это не реализовано.
+- name: osd_network
+  type: string or array of strings
+  type_ru: строка или массив строк
+  info: |
+    Network mask of the network (IPv4 or IPv6) to use for OSDs. Note that
+    although it's possible to specify multiple networks here, this does not
+    mean that OSDs will create multiple listening sockets - they'll only
+    pick the first matching address of an UP + RUNNING interface. Separate
+    networks for cluster and client connections are also not implemented, but
+    they are mostly useless anyway, so it's not a big deal.
+  info_ru: |
+    Маска подсети (IPv4 или IPv6) для использования для соединений с OSD.
+    Имейте в виду, что хотя сейчас и можно передать в этот параметр несколько
+    подсетей, это не означает, что OSD будут создавать несколько слушающих
+    сокетов - они лишь будут выбирать адрес первого поднятого (состояние UP +
+    RUNNING), подходящий под заданную маску. Также не реализовано разделение
+    кластерной и публичной сетей OSD. Правда, от него обычно всё равно довольно
+    мало толку, так что особенной проблемы в этом нет.
+- name: bind_address
+  type: string
+  default: "0.0.0.0"
+  info: |
+    Instead of the network mask, you can also set OSD listen address explicitly
+    using this parameter. May be useful if you want to start OSDs on interfaces
+    that are not UP + RUNNING.
+  info_ru: |
+    Этим параметром можно явным образом задать адрес, на котором будет ожидать
+    соединений OSD (вместо использования маски подсети). Может быть полезно,
+    например, чтобы запускать OSD на неподнятых интерфейсах (не UP + RUNNING).
+- name: bind_port
+  type: int
+  info: |
+    By default, OSDs pick random ports to use for incoming connections
+    automatically. With this option you can set a specific port for a specific
+    OSD by hand.
+  info_ru: |
+    По умолчанию OSD сами выбирают случайные порты для входящих подключений.
+    С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 - name: autosync_interval
  type: sec
  default: 5
@ -110,29 +107,17 @@
    принудительной отправкой fsync-а.
 - name: recovery_queue_depth
  type: int
-  default: 1
+  default: 4
  online: true
  info: |
-    Maximum recovery and rebalance operations initiated by each OSD in parallel.
-    Note that each OSD talks to a lot of other OSDs so actual number of parallel
-    recovery operations per each OSD is greater than just recovery_queue_depth.
-    Increasing this parameter can speedup recovery if [auto-tuning](#recovery_tune_interval)
-    allows it or if it is disabled.
+    Maximum recovery operations per one primary OSD at any given moment of time.
+    Currently it's the only parameter available to tune the speed or recovery
+    and rebalancing, but it's planned to implement more.
  info_ru: |
-    Максимальное число параллельных операций восстановления, инициируемых одним
-    OSD в любой момент времени. Имейте в виду, что каждый OSD обычно работает с
-    многими другими OSD, так что на практике параллелизм восстановления больше,
-    чем просто recovery_queue_depth. Увеличение значения этого параметра может
-    ускорить восстановление если [автотюнинг скорости](#recovery_tune_interval)
-    разрешает это или если он отключён.
- name: recovery_sleep_us
-  type: us
-  default: 0
-  online: true
-  info: |
-    Delay for all recovery- and rebalance- related operations. If non-zero,
-    such operations are artificially slowed down to reduce the impact on
-    client I/O.
+    Максимальное число операций восстановления на одном первичном OSD в любой
+    момент времени. На данный момент единственный параметр, который можно менять
+    для ускорения или замедления восстановления и перебалансировки данных, но
+    в планах реализация других параметров.
 - name: recovery_pg_switch
  type: int
  default: 128
@ -300,7 +285,7 @@
    decrease write performance for fast disks because page cache is an overhead
    itself.

-    Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
+    Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
    (which requires disable_data_fsync) with drives having write-back cache
    which can't be turned off, for example, Intel Optane. Also note that *some*
    desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
@ -641,143 +626,3 @@
    считается некорректной. Однако, если "лучшую" версию с числом доступных
    копий большим, чем у всех других версий, найти невозможно, то объект тоже
    маркируется неконсистентным.
- name: recovery_tune_interval
-  type: sec
-  default: 1
-  online: true
-  info: |
-    Interval at which OSD re-considers client and recovery load and automatically
-    adjusts [recovery_sleep_us](#recovery_sleep_us). Recovery auto-tuning is
-    disabled if recovery_tune_interval is set to 0.
-
-    Auto-tuning targets utilization. Utilization is a measure of load and is
-    equal to the product of iops and average latency (so it may be greater
-    than 1). You set "low" and "high" client utilization thresholds and two
-    corresponding target recovery utilization levels. OSD calculates desired
-    recovery utilization from client utilization using linear interpolation
-    and auto-tunes recovery operation delay to make actual recovery utilization
-    match desired.
-
-    This allows to reduce recovery/rebalance impact on client operations. It is
-    of course impossible to remove it completely, but it should become adequate.
-    In some tests rebalance could earlier drop client write speed from 1.5 GB/s
-    to 50-100 MB/s, with default auto-tuning settings it now only reduces
-    to ~1 GB/s.
-  info_ru: |
-    Интервал, с которым OSD пересматривает клиентскую нагрузку и нагрузку
-    восстановления и автоматически подстраивает [recovery_sleep_us](#recovery_sleep_us).
-    Автотюнинг (автоподстройка) отключается, если recovery_tune_interval
-    устанавливается в значение 0.
-
-    Автотюнинг регулирует утилизацию. Утилизация является мерой нагрузки
-    и равна произведению числа операций в секунду и средней задержки
-    (то есть, она может быть выше 1). Вы задаёте два уровня клиентской
-    утилизации - "низкий" и "высокий" (low и high) и два соответствующих
-    целевых уровня утилизации операциями восстановления. OSD рассчитывает
-    желаемый уровень утилизации восстановления линейной интерполяцией от
-    клиентской утилизации и подстраивает задержку операций восстановления
-    так, чтобы фактическая утилизация восстановления совпадала с желаемой.
-
-    Это позволяет снизить влияние восстановления и ребаланса на клиентские
-    операции. Конечно, невозможно исключить такое влияние полностью, но оно
-    должно становиться адекватнее. В некоторых тестах перебалансировка могла
-    снижать клиентскую скорость записи с 1.5 ГБ/с до 50-100 МБ/с, а теперь, с
-    настройками автотюнинга по умолчанию, она снижается только до ~1 ГБ/с.
- name: recovery_tune_util_low
-  type: float
-  default: 0.1
-  online: true
-  info: |
-    Desired recovery/rebalance utilization when client load is high, i.e. when
-    it is at or above recovery_tune_client_util_high.
-  info_ru: |
-    Желаемая утилизация восстановления в моменты, когда клиентская нагрузка
-    высокая, то есть, находится на уровне или выше recovery_tune_client_util_high.
- name: recovery_tune_util_high
-  type: float
-  default: 1
-  online: true
-  info: |
-    Desired recovery/rebalance utilization when client load is low, i.e. when
-    it is at or below recovery_tune_client_util_low.
-  info_ru: |
-    Желаемая утилизация восстановления в моменты, когда клиентская нагрузка
-    низкая, то есть, находится на уровне или ниже recovery_tune_client_util_low.
- name: recovery_tune_client_util_low
-  type: float
-  default: 0
-  online: true
-  info: Client utilization considered "low".
-  info_ru: Клиентская утилизация, которая считается "низкой".
- name: recovery_tune_client_util_high
-  type: float
-  default: 0.5
-  online: true
-  info: Client utilization considered "high".
-  info_ru: Клиентская утилизация, которая считается "высокой".
- name: recovery_tune_agg_interval
-  type: int
-  default: 10
-  online: true
-  info: |
-    The number of last auto-tuning iterations to use for calculating the
-    delay as average. Lower values result in quicker response to client
-    load change, higher values result in more stable delay. Default value of 10
-    is usually fine.
-  info_ru: |
-    Число последних итераций автоподстройки для расчёта задержки как среднего
-    значения. Меньшие значения параметра ускоряют отклик на изменение нагрузки,
-    большие значения делают задержку стабильнее. Значение по умолчанию 10
-    обычно нормальное и не требует изменений.
- name: recovery_tune_sleep_min_us
-  type: us
-  default: 10
-  online: true
-  info: |
-    Minimum possible value for auto-tuned recovery_sleep_us. Lower values
-    are changed to 0.
-  info_ru: |
-    Минимальное возможное значение авто-подстроенного recovery_sleep_us.
-    Меньшие значения заменяются на 0.
- name: recovery_tune_sleep_cutoff_us
-  type: us
-  default: 10000000
-  online: true
-  info: |
-    Maximum possible value for auto-tuned recovery_sleep_us. Higher values
-    are treated as outliers and ignored in aggregation.
-  info_ru: |
-    Максимальное возможное значение авто-подстроенного recovery_sleep_us.
-    Большие значения считаются случайными выбросами и игнорируются в
-    усреднении.
- name: discard_on_start
-  type: bool
-  info: Discard (SSD TRIM) unused data device blocks on every OSD startup.
-  info_ru: Освобождать (SSD TRIM) неиспользуемые блоки диска данных при каждом запуске OSD.
- name: min_discard_size
-  type: int
-  default: 1048576
-  info: Minimum consecutive block size to TRIM it.
-  info_ru: Минимальный размер последовательного блока данных, чтобы освобождать его через TRIM.
- name: allow_net_split
-  type: bool
-  default: false
-  info: |
-    Allow "safe" cases of network splits/partitions - allow to start PGs without
-    connections to some OSDs currently registered as alive in etcd, if the number
-    of actually connected PG OSDs is at least pg_minsize. That is, allow some OSDs to lose
-    connectivity with some other OSDs as long as it doesn't break pg_minsize guarantees.
-    The downside is that it increases the probability of writing data into just pg_minsize
-    OSDs during failover which can lead to PGs becoming incomplete after additional outages.
-
-    The old behaviour in versions up to 2.0.0 was equal to enabled allow_net_split.
-  info_ru: |
-    Разрешить "безопасные" случаи разделений сети - разрешить активировать PG без
-    соединений к некоторым OSD, помеченным активными в etcd, если общее число активных
-    OSD в PG составляет как минимум pg_minsize. То есть, разрешать некоторым OSD терять
-    соединения с некоторыми другими OSD, если это не нарушает гарантий pg_minsize.
-    Минус такого разрешения в том, что оно повышает вероятность записи данных ровно в
-    pg_minsize OSD во время переключений, что может потом привести к тому, что PG станут
-    неполными (incomplete), если упадут ещё какие-то OSD.
-
-    Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
--- a/docs/installation/docker.en.md
+++ b/docs/installation/docker.en.md
@ -1,60 +0,0 @@
-[Documentation](../../README.md#documentation) → Installation → Dockerized Installation
-
-----
-
-[Читать на русском](docker.ru.md)
-
-# Dockerized Installation
-
-Vitastor may be installed in Docker/Podman. In such setups etcd, monitors and OSD
-all run in containers, but everything else looks as close as possible to a usual
-setup with packages:
- host network is used
- auto-start is implemented through udev and systemd
- logs are written to journald (not docker json log files)
- command-line wrapper scripts are installed to the host system to call vitastor-disk,
-  vitastor-cli and others through the container
-
-Such installations may be useful when it's impossible or inconvenient to install
-Vitastor from packages, for example, in exotic Linux distributions.
-
-If you don't want just a simple containerized installation, you can also take a look
-at Vitastor Kubernetes operator: https://github.com/Antilles7227/vitastor-operator
-
-## Installing Containers
-
-The instruction is very simple.
-
-1. Download a Docker image of the desired version: \
-   `docker pull vitastor:2.1.0`
-2. Install scripts to the host system: \
-   `docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:2.1.0 install.sh`
-3. Reload udev rules: \
-   `udevadm control --reload-rules`
-
-And you can return to [Quick Start](../intro/quickstart.en.md).
-
-## Upgrading Containers
-
-First make sure to check the topic [Upgrading Vitastor](../usage/admin.en.md#upgrading-vitastor)
-to figure out if you need any additional steps.
-
-Then, to upgrade a containerized installation, you just need to change the `VITASTOR_VERSION`
-option in `/etc/vitastor/docker.conf` and restart all Vitastor services:
-
-`systemctl restart vitastor.target`
-
-## QEMU
-
-Vitastor Docker image also contains QEMU, qemu-img and qemu-storage-daemon built with Vitastor support.
-
-However, running QEMU in Docker is harder to setup and it depends on the used virtualization UI
-(OpenNebula, Proxmox and so on). Some of them also required patched Libvirt.
-
-That's why containerized installation of Vitastor doesn't contain a ready-made QEMU setup and it's
-recommended to install QEMU from packages or build it manually.
-
-## fio
-
-Vitastor Docker image also contains fio and installs a wrapper called `vitastor-fio` to use it from
-the host system.
--- a/docs/installation/docker.ru.md
+++ b/docs/installation/docker.ru.md
@ -1,60 +0,0 @@
-[Документация](../../README-ru.md#документация) → Установка → Установка в Docker
-
-----
-
-[Read in English](docker.en.md)
-
-# Установка в Docker
-
-Vitastor можно установить в Docker/Podman. При этом etcd, мониторы и OSD запускаются
-в контейнерах, но всё остальное выглядит максимально приближенно к установке из пакетов:
- используется сеть хост-системы
- для автозапуска используются udev и systemd
- журналы записываются в journald (не в json-файлы журналов docker)
- в хост-систему устанавливаются обёртки для вызова консольных инструментов vitastor-disk,
-  vitastor-cli и других через контейнер
-
-Такая установка полезна тогда, когда установка из пакетов невозможна или неудобна,
-например, в нестандартных Linux-дистрибутивах.
-
-Если вам нужна не просто контейнеризованная инсталляция, вы также можете обратить внимание
-на Vitastor Kubernetes-оператор: https://github.com/Antilles7227/vitastor-operator
-
-## Установка контейнеров
-
-Инструкция по установке максимально простая.
-
-1. Скачайте Docker-образ желаемой версии: \
-   `docker pull vitastor:2.1.0`
-2. Установите скрипты в хост-систему командой: \
-   `docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:2.1.0 install.sh`
-3. Перезагрузите правила udev: \
-   `udevadm control --reload-rules`
-
-После этого вы можете возвращаться к разделу [Быстрый старт](../intro/quickstart.ru.md).
-
-## Обновление контейнеров
-
-Сначала обязательно проверьте раздел [Обновление Vitastor](../usage/admin.ru.md#обновление-vitastor),
-чтобы понять, не требуются ли вам какие-то дополнительные действия.
-
-После этого для обновления Docker-инсталляции вам нужно просто поменять опцию `VITASTOR_VERSION`
-в файле `/etc/vitastor/docker.conf` и перезапустить все сервисы Vitastor командой:
-
-`systemctl restart vitastor.target`
-
-## QEMU
-
-В Docker-образ также входят QEMU, qemu-img и qemu-storage-daemon, собранные с поддержкой Vitastor.
-
-Однако настроить запуск QEMU в Docker сложнее и способ запуска зависит от используемого интерфейса
-виртуализации (OpenNebula, Proxmox и т.п.). Также для OpenNebula, например, требуется патченый
-Libvirt.
-
-Поэтому по умолчанию Docker-сборка пока что не включает в себя готового способа запуска QEMU
-и QEMU рекомендуется устанавливать из пакетов или собирать самостоятельно.
-
-## fio
-
-fio также входит в Docker-контейнер vitastor, и в хост-систему устанавливается обёртка `vitastor-fio`
-для запуска fio в контейнер.
--- a/docs/installation/kubernetes.en.md
+++ b/docs/installation/kubernetes.en.md
@ -6,18 +6,9 @@

 # Kubernetes CSI

-Vitastor has a CSI plugin for Kubernetes which supports block-based and VitastorFS-based volumes.
+Vitastor has a CSI plugin for Kubernetes which supports RWO (and block RWX) volumes.

-Block-based volumes may be formatted and mounted with a normal FS (ext4 or xfs). Such volumes
-only support RWO (ReadWriteOnce) mode.
-
-Block-based volumes may also be left without FS and attached into the container as a block
-device. Such volumes also support RWX (ReadWriteMany) mode.
-
-VitastorFS-based volumes use a clustered file system and support FS-based RWX (ReadWriteMany)
-mode. However, such volumes don't support quotas and snapshots.
-
-To deploy the CSI plugin, take manifests from [csi/deploy/](../../csi/deploy/) directory, put your
+To deploy it, take manifests from [csi/deploy/](../../csi/deploy/) directory, put your
 Vitastor configuration in [001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
 configure storage class in [009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
 and apply all `NNN-*.yaml` manifests to your Kubernetes installation:
@ -28,25 +19,13 @@ for i in ./???-*.yaml; do kubectl apply -f $i; done

 After that you'll be able to create PersistentVolumes.

-**Important:** For best experience, use Linux kernel at least 5.15 with [VDUSE](../usage/qemu.en.md#vduse)
-kernel modules enabled (vdpa, vduse, virtio-vdpa). If your distribution doesn't
-have them pre-built - build them yourself ([instructions](../usage/qemu.en.md#vduse)),
-I promise it's worth it :-). When VDUSE is unavailable, CSI driver uses [NBD](../usage/nbd.en.md)
-to map Vitastor devices. NBD is slower and, with kernels older than 5.19, unmountable
-if the cluster becomes unresponsible.
-
 ## Features

 Vitastor CSI supports:
 - Kubernetes starting with 1.20 (or 1.17 for older vitastor-csi <= 1.1.0)
- Block-based FS-formatted RWO (ReadWriteOnce) volumes. Example: [PVC](../../csi/deploy/example-pvc.yaml), [pod](../../csi/deploy/example-test-pod.yaml)
+- Filesystem RWO (ReadWriteOnce) volumes. Example: [PVC](../../csi/deploy/example-pvc.yaml), [pod](../../csi/deploy/example-test-pod.yaml)
 - Raw block RWX (ReadWriteMany) volumes. Example: [PVC](../../csi/deploy/example-pvc-block.yaml), [pod](../../csi/deploy/example-test-pod-block.yaml)
- VitastorFS-based volumes RWX (ReadWriteMany) volumes. Example: [storage class](../../csi/deploy/example-storage-class-fs.yaml)
 - Volume expansion
 - Volume snapshots. Example: [snapshot class](../../csi/deploy/example-snapshot-class.yaml), [snapshot](../../csi/deploy/example-snapshot.yaml), [clone](../../csi/deploy/example-snapshot-clone.yaml)
- [VDUSE](../usage/qemu.en.md#vduse) (preferred) and [NBD](../usage/nbd.en.md) device mapping methods
- Upgrades with VDUSE - new handler processes are restarted when CSI pods are restarted themselves
- VDUSE daemon auto-restart - handler processes are automatically restarted if they crash due to a bug in Vitastor client code
- Multiple clusters by using multiple configuration files in ConfigMap.

 Remember that to use snapshots with CSI you also have to install [Snapshot Controller and CRDs](https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment).
--- a/docs/installation/kubernetes.ru.md
+++ b/docs/installation/kubernetes.ru.md
@ -6,17 +6,7 @@

 # Kubernetes CSI

-У Vitastor есть CSI-плагин для Kubernetes, поддерживающий блочные тома и тома на основе
-кластерной ФС VitastorFS.
-
-Блочные тома могут быть отформатированы и примонтированы со стандартной ФС (ext4 или xfs).
-Такие тома поддерживают только режим RWO (ReadWriteOnce, одновременный доступ с одного узла).
-
-Блочные тома также могут не форматироваться и подключаться в контейнер в виде блочного устройства.
-В таком случае их можно подключать в режиме RWX (ReadWriteMany, одновременный доступ с многих узлов).
-
-Тома на основе VitastorFS используют кластерную ФС и поэтому также поддерживают режим RWX
-(ReadWriteMany). Однако, такие тома не поддерживают ограничение размера и снимки.
+У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.

 Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
 вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
@ -29,26 +19,13 @@ for i in ./???-*.yaml; do kubectl apply -f $i; done

 После этого вы сможете создавать PersistentVolume.

-**Важно:** Лучше всего использовать ядро Linux версии не менее 5.15 с включёнными модулями
-[VDUSE](../usage/qemu.ru.md#vduse) (vdpa, vduse, virtio-vdpa). Если в вашем дистрибутиве
-они не собраны из коробки - соберите их сами, обещаю, что это стоит того ([инструкция](../usage/qemu.ru.md#vduse)) :-).
-Когда VDUSE недоступно, CSI-плагин использует [NBD](../usage/nbd.ru.md) для подключения
-дисков, а NBD медленнее и имеет проблему таймаута - если кластер остаётся недоступным
-дольше, чем [nbd_timeout](../config/client.ru.md#nbd_timeout), NBD-устройство отключается
-и ломает поды, использующие его.
-
 ## Возможности

 CSI-плагин Vitastor поддерживает:
 - Версии Kubernetes, начиная с 1.20 (или с 1.17 для более старых vitastor-csi <= 1.1.0)
 - Файловые RWO (ReadWriteOnce) тома. Пример: [PVC](../../csi/deploy/example-pvc.yaml), [под](../../csi/deploy/example-test-pod.yaml)
 - Сырые блочные RWX (ReadWriteMany) тома. Пример: [PVC](../../csi/deploy/example-pvc-block.yaml), [под](../../csi/deploy/example-test-pod-block.yaml)
- Основанные на VitastorFS RWX (ReadWriteMany) тома. Пример: [класс хранения](../../csi/deploy/example-storage-class-fs.yaml)
 - Расширение размера томов
 - Снимки томов. Пример: [класс снимков](../../csi/deploy/example-snapshot-class.yaml), [снимок](../../csi/deploy/example-snapshot.yaml), [клон снимка](../../csi/deploy/example-snapshot-clone.yaml)
- Способы подключения устройств [VDUSE](../usage/qemu.ru.md#vduse) (предпочитаемый) и [NBD](../usage/nbd.ru.md)
- Обновление при использовании VDUSE - новые процессы-обработчики устройств успешно перезапускаются вместе с самими подами CSI
- Автоперезауск демонов VDUSE - процесс-обработчик автоматически перезапустится, если он внезапно упадёт из-за бага в коде клиента Vitastor
- Несколько кластеров через задание нескольких файлов конфигурации в ConfigMap.

 Не забывайте, что для использования снимков нужно сначала установить [контроллер снимков и CRD](https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment).
--- a/docs/installation/opennebula.en.md
+++ b/docs/installation/opennebula.en.md
@ -1,186 +0,0 @@
-[Documentation](../../README.md#documentation) → Installation → OpenNebula
-
-----
-
-[Читать на русском](opennebula.ru.md)
-
-# OpenNebula
-
-## Automatic Installation
-
-OpenNebula plugin is packaged as `vitastor-opennebula` Debian and RPM package since Vitastor 1.9.0. So:
-
- Run `apt-get install vitastor-opennebula` or `yum install vitastor-opennebula` after installing OpenNebula on all nodes
- Check that it prints "OK, Vitastor OpenNebula patches successfully applied" or "OK, Vitastor OpenNebula patches are already applied"
- If it does not, refer to [Manual Installation](#manual-installation) and apply configuration file changes manually
- Make sure that Vitastor patched versions of QEMU and libvirt are installed
-  (`dpkg -l qemu-system-x86`, `dpkg -l | grep libvirt`, `rpm -qa | grep qemu`, `rpm -qa | grep qemu`, `rpm -qa | grep libvirt-libs` should show "vitastor" in version names)
- [Block VM access to Vitastor cluster](#block-vm-access-to-vitastor-cluster)
-
-## Manual Installation
-
-Install OpenNebula. Then, on each node:
-
- Copy [opennebula/remotes](../../opennebula/remotes) into `/var/lib/one` recursively: `cp -r opennebula/remotes /var/lib/one/`
- Copy [opennebula/sudoers.d](../../opennebula/sudoers.d) to `/etc`: `cp -r opennebula/sudoers.d /etc/`
- Apply [downloader-vitastor.sh.diff](../../opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff) to `/var/lib/one/remotes/datastore/downloader.sh`:
-  `patch /var/lib/one/remotes/datastore/downloader.sh < opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff` - or read the patch and apply the same change manually
- Add `kvm-vitastor` to `LIVE_DISK_SNAPSHOTS` in `/etc/one/vmm_exec/vmm_execrc`
- If on Debian or Ubuntu (and AppArmor is used), add Vitastor config file path(s) to `/etc/apparmor.d/local/abstractions/libvirt-qemu`: for example,
-  `echo '  "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu`
- Apply changes to `/etc/one/oned.conf`
-
-### oned.conf changes
-
-1. Add deploy script override in kvm VM_MAD: add `-l deploy.vitastor` to ARGUMENTS.
-
-```diff
- VM_MAD = [
-     NAME           = "kvm",
-     SUNSTONE_NAME  = "KVM",
-     EXECUTABLE     = "one_vmm_exec",
-    ARGUMENTS      = "-t 15 -r 0 kvm -p",
-+    ARGUMENTS      = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
-     DEFAULT        = "vmm_exec/vmm_exec_kvm.conf",
-     TYPE           = "kvm",
-     KEEP_SNAPSHOTS = "yes",
-     LIVE_RESIZE    = "yes",
-     SUPPORT_SHAREABLE    = "yes",
-     IMPORTED_VMS_ACTIONS = "terminate, terminate-hard, hold, release, suspend,
-         resume, delete, reboot, reboot-hard, resched, unresched, disk-attach,
-         disk-detach, nic-attach, nic-detach, snapshot-create, snapshot-delete,
-         resize, updateconf, update"
- ]
-```
-
-Optional: if you also want to save VM RAM checkpoints to Vitastor, use
-`-l deploy=deploy.vitastor,save=save.vitastor,restore=restore.vitastor`
-instead of just `-l deploy=deploy.vitastor`.
-
-2. Add `vitastor` to TM_MAD.ARGUMENTS and DATASTORE_MAD.ARGUMENTS:
-
-```diff
- TM_MAD = [
-     EXECUTABLE = "one_tm",
-    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
-+    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
- ]
-
- DATASTORE_MAD = [
-     EXECUTABLE = "one_datastore",
-    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
-+    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
- ]
-```
-
-3. Add INHERIT_DATASTORE_ATTR for two Vitastor attributes:
-
-```
-INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
-INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
-```
-
-4. Add TM_MAD_CONF and DS_MAD_CONF for Vitastor:
-
-```
-TM_MAD_CONF = [
-    NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
-    DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
-    TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
-    DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
-    CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
-]
-
-DS_MAD_CONF = [
-    NAME = "vitastor",
-    REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
-    PERSISTENT_ONLY = "NO",
-    MARKETPLACE_ACTIONS = "export"
-]
-```
-
-## Create Datastores
-
-Example Image and System Datastore definitions:
-[opennebula/vitastor-imageds.conf](../../opennebula/vitastor-imageds.conf) and
-[opennebula/vitastor-systemds.conf](../../opennebula/vitastor-systemds.conf).
-
-Change parameters to your will:
-
- POOL_NAME is Vitastor pool name to store images.
- IMAGE_PREFIX is a string prepended to all Vitastor image names.
- BRIDGE_LIST is a list of hosts with access to Vitastor cluster, mostly used for image (not system) datastore operations.
- VITASTOR_CONF is the path to cluster configuration. Note that it should be also added to `/etc/apparmor.d/local/abstractions/libvirt-qemu` if you use AppArmor.
- STAGING_DIR is a temporary directory used when importing external images. Should have free space sufficient for downloading external images.
-
-Then create datastores using `onedatastore create vitastor-imageds.conf` and `onedatastore create vitastor-systemds.conf` (or use UI).
-
-## Block VM access to Vitastor cluster
-
-Vitastor doesn't support any authentication yet, so you MUST block VM guest access to the Vitastor cluster at the network level.
-
-If you use VLAN networking for VMs - make sure you use different VLANs for VMs and hypervisor/storage network and
-block access between them using your firewall/switch configuration.
-
-If you use something more stupid like bridged networking, you probably have to use manual firewall/iptables setup
-to only allow access to Vitastor from hypervisor IPs.
-
-Also you need to switch network to "Bridged & Security Groups" and enable IP spoofing filters in OpenNebula.
-Problem is that OpenNebula's IP spoofing filter doesn't affect local interfaces of the hypervisor i.e. when
-it's enabled a VM can't talk to other VMs or to the outer world using a spoofed IP, but it CAN talk to the
-hypervisor if it takes an IP from its subnet. To fix that you also need some more iptables.
-
-So the complete "stupid" bridged network filter setup could look like the following
-(here `10.0.3.0/24` is the VM subnet and `10.0.2.0/24` is the hypervisor subnet):
-
-```
-# Allow incoming traffic from physical device
-iptables -A INPUT -m physdev --physdev-in eth0 -j ACCEPT
-# Do not allow incoming traffic from VMs, but not from VM subnet
-iptables -A INPUT ! -s 10.0.3.0/24 -i onebr0 -j DROP
-# Drop traffic from VMs to hypervisor/storage subnet
-iptables -I FORWARD 1 -s 10.0.3.0/24 -d 10.0.2.0/24 -j DROP
-```
-
-## Testing
-
-The OpenNebula plugin includes quite a bit of bash scripts, so here's their description to get an idea about what they actually do.
-
-| Script                  | Action                                    | How to Test                                                                          |
-| ----------------------- | ----------------------------------------- | ------------------------------------------------------------------------------------ |
-| vmm/kvm/deploy.vitastor | Start a VM                                | Create and start a VM with Vitastor disk(s): persistent / non-persistent / volatile. |
-| vmm/kvm/save.vitastor   | Save VM memory checkpoint                 | Stop a VM using "Stop" command.                                                      |
-| vmm/kvm/restore.vitastor| Restore VM memory checkpoint              | Start a VM back after stopping it.                                                   |
-| datastore/clone         | Copy an image as persistent               | Create a VM template and instantiate it as persistent.                               |
-| datastore/cp            | Import an external image                  | Import a VM template with images from Marketplace.                                   |
-| datastore/export        | Export an image as URL                    | Probably: export a VM template with images to Marketplace.                           |
-| datastore/mkfs          | Create an image with FS                   | Storage → Images → Create → Type: Datablock, Location: Empty disk image, Filesystem: Not empty. |
-| datastore/monitor       | Monitor used space in image datastore     | Check reported used/free space in image datastore list.                              |
-| datastore/rm            | Remove a persistent image                 | Storage → Images → Select an image → Delete.                                         |
-| datastore/snap_delete   | Delete a snapshot of a persistent image   | Storage → Images → Select an image → Select a snapshot → Delete; <br> To create an image with snapshot: attach a persistent image to a VM; create a snapshot; detach the image. |
-| datastore/snap_flatten  | Revert an image to snapshot and delete other snapshots | Storage → Images → Select an image → Select a snapshot → Flatten.       |
-| datastore/snap_revert   | Revert an image to snapshot               | Storage → Images → Select an image → Select a snapshot → Revert.                     |
-| datastore/stat          | Get virtual size of an image in MB        | No idea. Seems to be unused both in Vitastor and Ceph datastores.                    |
-| tm/clone                | Clone a non-persistent image to a VM disk | Attach a non-persistent image to a VM.                                               |
-| tm/context              | Generate a contextualisation VM disk      | Create a VM with enabled contextualisation (default). Common host FS-based version is used in Vitastor and Ceph datastores. |
-| tm/cpds                 | Copy a VM disk / its snapshot to an image | Select a VM → Select a disk → Optionally select a snapshot → Save as.                |
-| tm/delete               | Delete a cloned or volatile VM disk       | Detach a volatile disk or a non-persistent image from a VM.                          |
-| tm/failmigrate          | Handle live migration failure             | No action. Script is empty in Vitastor and Ceph. In other datastores, should roll back actions done by tm/premigrate. |
-| tm/ln                   | Attach a persistent image to a VM         | No action. Script is empty in Vitastor and Ceph.                                     |
-| tm/mkimage              | Create a volatile disk, maybe with FS     | Attach a volatile disk to a VM, with or without file system.                         |
-| tm/mkswap               | Create a volatile swap disk               | Attach a volatile disk to a VM, formatted as swap.                                   |
-| tm/monitor              | Monitor used space in system datastore    | Check reported used/free space in system datastore list.                             |
-| tm/mv                   | Move a migrated VM disk between hosts     | Migrate a VM between hosts. In Vitastor and Ceph datastores, doesn't do any storage action. |
-| tm/mvds                 | Detach a persistent image from a VM       | No action. The opposite of tm/ln. Script is empty in Vitastor and Ceph. In other datastores, script may copy the image from VM host back to the datastore. |
-| tm/postbackup           | Executed after backup                     | Seems that the script just removes temporary files after backup. Perform a VM backup and check that temporary files are cleaned up. |
-| tm/postbackup_live      | Executed after backup of a running VM     | Same as tm/postbackup, but for a running VM.                                         |
-| tm/postmigrate          | Executed after VM live migration          | No action. Only executed for system datastore, so the script tries to call other TMs for other disks. Except that, the script does nothing in Vitastor and Ceph datastores. |
-| tm/prebackup            | Actual backup script: backup VM disks     | Set up "rsync" backup datastore → Backup a VM to it.                                 |
-| tm/prebackup_live       | Backup VM disks of a running VM           | Same as tm/prebackup, but also does fsfreeze/thaw. So perform a live backup, restore it and check that disks are consistent. |
-| tm/premigrate           | Executed before live migration            | No action. Only executed for system datastore, so the script tries to call other TMs for other disks. Except that, the script does nothing in Vitastor and Ceph datastores. |
-| tm/resize               | Resize a VM disk                          | Select a VM → Select a non-persistent disk → Resize.                                 |
-| tm/restore              | Restore VM disks from backup              | Set up "rsync" backup datastore → Backup a VM to it → Restore it back.               |
-| tm/snap_create          | Create a VM disk snapshot                 | Select a VM → Select a disk → Create snapshot.                                       |
-| tm/snap_create_live     | Create a VM disk snapshot for a live VM   | Select a running VM → Select a disk → Create snapshot.                               |
-| tm/snap_delete          | Delete a VM disk snapshot                 | Select a VM → Select a disk → Select a snapshot → Delete.                            |
-| tm/snap_revert          | Revert a VM disk to a snapshot            | Select a VM → Select a disk → Select a snapshot → Revert.                            |
--- a/docs/installation/opennebula.ru.md
+++ b/docs/installation/opennebula.ru.md
@ -1,189 +0,0 @@
-[Документация](../../README-ru.md#документация) → Установка → OpenNebula
-
-----
-
-[Read in English](opennebula.en.md)
-
-# OpenNebula
-
-## Автоматическая установка
-
-Плагин OpenNebula Vitastor распространяется как Debian и RPM пакет `vitastor-opennebula`, начиная с версии Vitastor 1.9.0. Так что:
-
- Запустите `apt-get install vitastor-opennebula` или `yum install vitastor-opennebula` после установки OpenNebula на всех серверах
- Проверьте, что он выводит "OK, Vitastor OpenNebula patches successfully applied" или "OK, Vitastor OpenNebula patches are already applied" в процессе установки
- Если сообщение не выведено, пройдите по шагам инструкцию [Ручная установка](#ручная-установка) и примените правки файлов конфигурации вручную
- Удостоверьтесь, что установлены версии QEMU и libvirt с изменениями Vitastor
-  (`dpkg -l qemu-system-x86`, `dpkg -l | grep libvirt`, `rpm -qa | grep qemu`, `rpm -qa | grep qemu`, `rpm -qa | grep libvirt-libs` должны показывать "vitastor" в номере версии)
- [Заблокируйте доступ виртуальных машин в Vitastor](#блокировка-доступа-вм-в-vitastor)
-
-## Ручная установка
-
-Сначала установите саму OpenNebula. После этого, на каждом сервере:
-
- Скопируйте директорию [opennebula/remotes](../../opennebula/remotes) в `/var/lib/one`: `cp -r opennebula/remotes /var/lib/one/`
- Скопируйте директорию [opennebula/sudoers.d](../../opennebula/sudoers.d) в `/etc`: `cp -r opennebula/sudoers.d /etc/`
- Примените патч [downloader-vitastor.sh.diff](../../opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff) к `/var/lib/one/remotes/datastore/downloader.sh`:
-  `patch /var/lib/one/remotes/datastore/downloader.sh < opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff` - либо прочитайте патч и примените изменение вручную
- Добавьте `kvm-vitastor` в список `LIVE_DISK_SNAPSHOTS` в файле `/etc/one/vmm_exec/vmm_execrc`
- Если вы используете Debian или Ubuntu (и AppArmor), добавьте пути к файлу(ам) конфигурации Vitastor в файл `/etc/apparmor.d/local/abstractions/libvirt-qemu`: например,
-  `echo '  "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu`
- Примените изменения `/etc/one/oned.conf`
-
-### Изменения oned.conf
-
-1. Добавьте переопределение скрипта deploy в VM_MAD kvm, добавив `-l deploy.vitastor` в `ARGUMENTS`:
-
-```diff
- VM_MAD = [
-     NAME           = "kvm",
-     SUNSTONE_NAME  = "KVM",
-     EXECUTABLE     = "one_vmm_exec",
-    ARGUMENTS      = "-t 15 -r 0 kvm -p",
-+    ARGUMENTS      = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
-     DEFAULT        = "vmm_exec/vmm_exec_kvm.conf",
-     TYPE           = "kvm",
-     KEEP_SNAPSHOTS = "yes",
-     LIVE_RESIZE    = "yes",
-     SUPPORT_SHAREABLE    = "yes",
-     IMPORTED_VMS_ACTIONS = "terminate, terminate-hard, hold, release, suspend,
-         resume, delete, reboot, reboot-hard, resched, unresched, disk-attach,
-         disk-detach, nic-attach, nic-detach, snapshot-create, snapshot-delete,
-         resize, updateconf, update"
- ]
-```
-
-Опционально: если вы хотите также сохранять снимки памяти ВМ в Vitastor, добавьте
-`-l deploy=deploy.vitastor,save=save.vitastor,restore=restore.vitastor`
-вместо просто `-l deploy=deploy.vitastor`.
-
-2. Добавьте `vitastor` в значения TM_MAD.ARGUMENTS и DATASTORE_MAD.ARGUMENTS:
-
-```diff
- TM_MAD = [
-     EXECUTABLE = "one_tm",
-    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
-+    ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
- ]
-
- DATASTORE_MAD = [
-     EXECUTABLE = "one_datastore",
-    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
-+    ARGUMENTS  = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
- ]
-```
-
-3. Добавьте строчки с INHERIT_DATASTORE_ATTR для двух атрибутов Vitastor-хранилищ:
-
-```
-INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
-INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
-```
-
-4. Добавьте TM_MAD_CONF и DS_MAD_CONF для Vitastor:
-
-```
-TM_MAD_CONF = [
-    NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
-    DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
-    TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
-    DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
-    CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
-]
-
-DS_MAD_CONF = [
-    NAME = "vitastor",
-    REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
-    PERSISTENT_ONLY = "NO",
-    MARKETPLACE_ACTIONS = "export"
-]
-```
-
-## Создайте хранилища
-
-Примеры настроек хранилищ образов (image) и дисков ВМ (system):
-[opennebula/vitastor-imageds.conf](../../opennebula/vitastor-imageds.conf) и
-[opennebula/vitastor-systemds.conf](../../opennebula/vitastor-systemds.conf).
-
-Скопируйте настройки и поменяйте следующие параметры так, как вам необходимо:
-
- POOL_NAME - имя пула Vitastor для сохранения образов дисков.
- IMAGE_PREFIX - строка, добавляемая в начало имён образов дисков.
- BRIDGE_LIST - список серверов с доступом к кластеру Vitastor, используемых для операций с хранилищем образов (image, не system).
- VITASTOR_CONF - путь к конфигурации Vitastor. Имейте в виду, что этот путь также надо добавить в `/etc/apparmor.d/local/abstractions/libvirt-qemu`, если вы используете AppArmor.
- STAGING_DIR - путь к временному каталогу, используемому при импорте внешних образов. Должен иметь достаточно свободного места, чтобы вмещать скачанные образы.
-
-После этого создайте хранилища с помощью команд `onedatastore create vitastor-imageds.conf` и `onedatastore create vitastor-systemds.conf` (либо через UI).
-
-## Блокировка доступа ВМ в Vitastor
-
-Vitastor пока не поддерживает никакую аутентификацию, так что вы ДОЛЖНЫ заблокировать доступ гостевых ВМ
-в кластер Vitastor на сетевом уровне.
-
-Если вы используете VLAN-сети для ВМ - удостоверьтесь, что ВМ и гипервизор/сеть хранения помещены в разные
-изолированные друг от друга VLAN-ы.
-
-Если вы используете что-то более примитивное, например, мосты (bridge), вам, скорее всего, придётся вручную
-настроить iptables / межсетевой экран, чтобы разрешить доступ к Vitastor только с IP гипервизоров.
-
-Также в этом случае нужно будет переключить обычные мосты на "Bridged & Security Groups" и включить фильтр
-спуфинга IP в OpenNebula. Правда, реализация этого фильтра пока не полная, и она не блокирует доступ к
-локальным интерфейсам гипервизора. То есть, включённый фильтр спуфинга IP запрещает ВМ отправлять трафик
-с чужими IP к другим ВМ или во внешний мир, но не запрещает отправлять его напрямую гипервизору. Чтобы
-исправить это, тоже нужны дополнительные правила iptables.
-
-Таким образом, более-менее полная блокировка при использовании простой сети на сетевых мостах может
-выглядеть так (здесь `10.0.3.0/24` - подсеть ВМ, `10.0.2.0/24` - подсеть гипервизора):
-
-```
-# Разрешаем входящий трафик с физического устройства
-iptables -A INPUT -m physdev --physdev-in eth0 -j ACCEPT
-# Запрещаем трафик со всех ВМ, но с IP не из подсети ВМ
-iptables -A INPUT ! -s 10.0.3.0/24 -i onebr0 -j DROP
-# Запрещаем трафик от ВМ к сети гипервизора
-iptables -I FORWARD 1 -s 10.0.3.0/24 -d 10.0.2.0/24 -j DROP
-```
-
-## Тестирование
-
-Плагин OpenNebula по большей части состоит из bash-скриптов, и чтобы было понятнее, что они
-вообще делают - ниже приведены описания процедур, которыми можно протестировать каждый из них.
-
-| Скрипт                  | Описание                                      | Как протестировать                                                                   |
-| ----------------------- | --------------------------------------------- | ------------------------------------------------------------------------------------ |
-| vmm/kvm/deploy.vitastor | Запустить виртуальную машину                  | Создайте и запустите виртуальную машину с дисками Vitastor: постоянным / непостоянным / волатильным (временным). |
-| vmm/kvm/save.vitastor   | Сохранить снимок памяти ВМ                    | Остановите виртуальную машину командой "Остановить".                                 |
-| vmm/kvm/restore.vitastor| Восстановить снимок памяти ВМ                 | Запустите ВМ после остановки обратно.                                                |
-| datastore/clone         | Скопировать образ как "постоянный"            | Создайте шаблон ВМ и создайте из него постоянную ВМ.                                 |
-| datastore/cp            | Импортировать внешний образ                   | Импортируйте шаблон ВМ с образами дисков из Магазина OpenNebula.                     |
-| datastore/export        | Экспортировать образ как URL                  | Вероятно: экспортируйте шаблон ВМ с образами в Магазин.                              |
-| datastore/mkfs          | Создать образ с файловой системой             | Хранилище → Образы → Создать → Тип: базовый блок данных, Расположение: пустой образ диска, Файловая система: любая непустая. |
-| datastore/monitor       | Вывод статистики места в хранилище образов    | Проверьте статистику свободного/занятого места в списке хранилищ образов.            |
-| datastore/rm            | Удалить "постоянный" образ                    | Хранилище → Образы → Выберите образ → Удалить.                                       |
-| datastore/snap_delete   | Удалить снимок "постоянного" образа           | Хранилище → Образы → Выберите образ → Выберите снимок → Удалить; <br> Чтобы создать образ со снимком: подключите постоянный образ к ВМ, создайте снимок, отключите образ. |
-| datastore/snap_flatten  | Откатить образ к снимку, удалив другие снимки | Хранилище → Образы → Выберите образ → Выберите снимок → "Выровнять" (flatten).       |
-| datastore/snap_revert   | Откатить образ к снимку                       | Хранилище → Образы → Выберите образ → Выберите снимок → Откатить.                    |
-| datastore/stat          | Показать виртуальный размер образа в МБ       | Неизвестно. По-видимому, в плагинах Vitastor и Ceph не используется.                 |
-| tm/clone                | Клонировать "непостоянный" образ в диск ВМ    | Подключите "непостоянный" образ к ВМ.                                                |
-| tm/context              | Создать диск контекстуализации ВМ             | Создайте ВМ с контекстуализацией, как обычно. Но тестировать особенно нечего: в плагинах Vitastor и Ceph образ контекста хранится в локальной ФС гипервизора. |
-| tm/cpds                 | Копировать диск ВМ/его снимок в новый образ   | Выберите ВМ → Выберите диск → Опционально выберите снимок → "Сохранить как".         |
-| tm/delete               | Удалить диск-клон или волатильный диск ВМ     | Отключите волатильный или не-постоянный диск от ВМ.                                  |
-| tm/failmigrate          | Обработать неудачную миграцию                 | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. В других плагинах скрипт должен откатывать действия tm/premigrate. |
-| tm/ln                   | Подключить "постоянный" образ к ВМ            | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph.                          |
-| tm/mkimage              | Создать волатильный диск, без или с ФС        | Подключите волатильный диск к ВМ, с или без файловой системы.                        |
-| tm/mkswap               | Создать волатильный диск подкачки             | Подключите волатильный диск к ВМ, форматированный как диск подкачки (swap).          |
-| tm/monitor              | Вывод статистики места в хранилище дисков ВМ  | Проверьте статистику свободного/занятого места в списке хранилищ дисков ВМ.          |
-| tm/mv                   | Мигрировать диск ВМ между хостами             | Мигрируйте ВМ между серверами. Правда, с точки зрения хранилища в плагинах Vitastor и Ceph этот скрипт ничего не делает. |
-| tm/mvds                 | Отключить "постоянный" образ от ВМ            | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. В целом же скрипт обратный к tm/ln и в других хранилищах он может, например, копировать образ ВМ с диска гипервизора обратно в хранилище. |
-| tm/postbackup           | Выполняется после бэкапа                      | По-видимому, скрипт просто удаляет временные файлы после резервного копирования. Так что можно провести его и проверить, что на серверах не осталось временных файлов. |
-| tm/postbackup_live      | Выполняется после бэкапа запущенной ВМ        | То же, что tm/postbackup, но для запущенной ВМ.                                      |
-| tm/postmigrate          | Выполняется после миграции ВМ                 | Тестировать нечего. Однако, OpenNebula запускает скрипт только для системного хранилища, поэтому он вызывает аналогичные скрипты для хранилищ других дисков той же ВМ. Помимо этого в плагинах Vitastor и Ceph скрипт ничего не делает. |
-| tm/prebackup            | Выполнить резервное копирование дисков ВМ     | Создайте хранилище резервных копий типа "rsync" → Забэкапьте в него ВМ.              |
-| tm/prebackup_live       | То же самое для запущенной ВМ                 | То же, что tm/prebackup, но запускает fsfreeze/thaw (остановку доступа к дискам). Так что смысл теста - проведите резервное копирование и проверьте, что данные скопировались консистентно. |
-| tm/premigrate           | Выполняется перед миграцией ВМ                | Тестировать нечего. Аналогично tm/postmigrate запускается только для системного хранилища. |
-| tm/resize               | Изменить размер диска ВМ                      | Выберите ВМ → Выберите непостоянный диск → Измените его размер.                      |
-| tm/restore              | Восстановить диски ВМ из бэкапа               | Создайте хранилище резервных копий → Забэкапьте в него ВМ → Восстановите её обратно. |
-| tm/snap_create          | Создать снимок диска ВМ                       | Выберите ВМ → Выберите диск → Создайте снимок.                                       |
-| tm/snap_create_live     | Создать снимок диска запущенной ВМ            | Выберите запущенную ВМ → Выберите диск → Создайте снимок.                            |
-| tm/snap_delete          | Удалить снимок диска ВМ                       | Выберите ВМ → Выберите диск → Выберите снимок → Удалить.                             |
-| tm/snap_revert          | Откатить диск ВМ к снимку                     | Выберите ВМ → Выберите диск → Выберите снимок → Откатить.                            |
--- a/docs/installation/packages.en.md
+++ b/docs/installation/packages.en.md
@ -14,10 +14,11 @@
  - Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
  - Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
-  - Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
  - Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
    stable version from 0.9.x branch instead of 1.x
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
+- For Debian 10 (Buster) also enable backports repository:
+  `deb http://deb.debian.org/debian buster-backports main`
+- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu`

 ## CentOS

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Vitaliy Filippov	06f4e0fcce	K/V control prints (for debug only) O:-)	2023-12-01 02:33:04 +03:00
Vitaliy Filippov	f285cfc483	Fix eviction when random_pos selects the end	2023-12-01 01:43:03 +03:00
Vitaliy Filippov	12b50b421d	Implement min/max list_count to make listings during performance test reasonable	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	9f6d09428d	Fix and improve parallel allocation - Do not try to allocate more DB blocks in an inode block until it's "confirmed" and "locked" by the first write - Do not recheck for new zero DB blocks on first write into an inode block - a CAS failure means someone else is already writing into it - Throw new allocation blocks away regardless of whether the known_version is 0 on a CAS failure	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	580025cfc9	Implement key_prefix for K/V stress test	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	13e2d3ce7c	More fixes - do not overwrite a block with older version if known version is newer (read may start before update and end after update) - invalidated block versions can't be remembered and trusted - right boundary for split blocks is right_half when diving down, not key_lt - restart update also when block is "invalidated", not just on version mismatch - copy callback in listings to avoid closure destruction bugs too	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	c5b00f897a	Add logging and one more assert	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	e847e26912	Make get_block() wait for updating when unrelated block is found along the path	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	3393463466	Fix a race condition where changed blocks were parsed over existing cached blocks and getting a mix of data	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	bd96a6194a	Simplify code by removing an unneeded "optimisation"	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	601fe10c28	Add kv_log_level, print warnings on level 1, trace ops on level 10	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	63dbc9ca85	Fix duplicate keys in listings on parallel updates -- do not rewind key "iterator position"	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	aa0c363c39	Implement key suffix to avoid collisions of multiple test workers	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	ce52c5589e	Do not complain on empty first block	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	aee20ab1ee	Add JSON output for stress-tester	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	bb81992fac	Print total stats	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	a28f401aff	Do not send more than op_count operations (fix segfault on finish)	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	4ac7e096fd	Add some more resiliency to serialize()	2023-12-01 01:17:04 +03:00
Vitaliy Filippov	b6171a4599	Invalidate blocks being updated too	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	28045f230c	Change new block allocation method: make each writer choose multiple empty PG blocks and place blocks in them	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	10e867880f	Remove blocks from cache on unsuccessful updates	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	012462171a	Allow to track multiple updates per block (it should never happen though)	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	904793cdab	Do not call stop_updating after failed write_new_block and after clear_block (both delete the item)	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	45c01db2de	Track versions of parent blocks and recheck if changed during update	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	8c9206cecd	Fix resume_split condition (key_lt can also be "")	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	e8c46ededa	Experiment: transform offsets for better sharding	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	e9b321a0e0	More post-stress-test fixes - Prevent _split types of new blocks - Stop updating new blocks only after the whole update, otherwise pointers may become invalid - Use recheck_none for updates initially - Use UINT64_MAX as initial block version when postponing ops, otherwise the check fails when the block is initially empty. This for example leads to writing both leaf items & block pointers (which is incorrect) into the root block when starting stress-test with --parallelism 32 - Fix -EINTR comparison	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	09a77991ae	Print operation statistics	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	29d8c9b6f3	K/V fixes after stress-test :-) - track block versions correctly - per inode block (128kb) instead of tree block (4kb) - prevent multiple parallel CAS writes of the same inode block - add logging for EILSEQ which means invalid data in the tree - fix get_block updated flag which was true for blocks already in cache and was leading to infinite loops on "unrelated block" errors - apply changes to blocks in cache only after successful writes (using "virtual changes") - do not replace cached block with an older version from disk - recheck "unrelated blocks" (read/update collisions) until data stops changing - track tree path correctly - do not treat split block as parent of its right half - correctly move blocks when finding new empty place on disk - restart updates from the beginning when one of blocks is changed by a parallel update - fix delete using SET opcode and setting key to the empty value instead - prevent changing the same key more than 1 time in parallel - fix listing verification - resume continue_updates in update_find (required because it uses continue_update itself) - add allow_old_cached parameter to get()	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	20321aaaef	Implement K/V DB stress tester	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	987b005356	Evict blocks based on memory limit & block usage	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	41754b748b	Track blocks per level	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	31913256f3	Track block level	2023-12-01 01:17:03 +03:00
Vitaliy Filippov	0ee36baed7	Experimental B-Tree Vitastor embedded K/V database implementation!	2023-12-01 01:17:03 +03:00