WIP Implement RDMA v2 based on IBV_WR_RDMA_WRITE with remote buffer management

One BIG FIXME remaining - handling large operations :))
2023-02-26 00:26:39 +03:00
538 changed files with 12472 additions and 67636 deletions
--- a/.gitea/workflows/buildenv.Dockerfile
+++ b/.gitea/workflows/buildenv.Dockerfile
@ -1,36 +0,0 @@
 FROM node:16-bullseye
 WORKDIR /root
 ADD ./docker/vitastor.gpg /etc/apt/trusted.gpg.d
 RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' >> /etc/apt/sources.list; \
    echo 'deb http://vitastor.io/debian bullseye main' >> /etc/apt/sources.list; \
    echo >> /etc/apt/preferences; \
    echo 'Package: *' >> /etc/apt/preferences; \
    echo 'Pin: release a=bullseye-backports' >> /etc/apt/preferences; \
    echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    echo >> /etc/apt/preferences; \
    echo 'Package: *' >> /etc/apt/preferences; \
    echo 'Pin: origin "vitastor.io"' >> /etc/apt/preferences; \
    echo 'Pin-Priority: 1000' >> /etc/apt/preferences; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
    echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
 RUN apt-get update
 RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
    liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
 RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
 RUN apt-get update && apt-get -y install jq lp-solve sudo nfs-common fdisk parted
 RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
 RUN set -ex; \
    mkdir qemu-build; \
    cd qemu-build; \
    dpkg-source -x /root/qemu*.dsc; \
    cd qemu*/; \
    debian/rules configure-qemu || debian/rules b/configure-stamp; \
    cd b/qemu; \
    make -j8 config-poison.h || true; \
    make -j8 qapi/qapi-builtin-types.h
--- a/.gitea/workflows/test.Dockerfile
+++ b/.gitea/workflows/test.Dockerfile
@ -1,19 +0,0 @@
 FROM git.yourcmc.ru/vitalif/vitastor/buildenv
 ADD . /root/vitastor
 RUN set -e -x; \
    mkdir -p /root/fio-build/; \
    cd /root/fio-build/; \
    dpkg-source -x /root/fio*.dsc; \
    cd /root/vitastor; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    ln -s /root/qemu-build/qemu-*/ ./qemu; \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
    cd mon; \
    npm install; \
    cd ..; \
    mkdir build; \
    cd build; \
    cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
    make -j16
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
--- a/.gitea/workflows/tests-to-yaml.pl
+++ b/.gitea/workflows/tests-to-yaml.pl
@ -1,83 +0,0 @@
 #!/usr/bin/perl
 use strict;
 for my $line (<>)
 {
    if ($line =~ /\.\/(test_[^\.]+)/s)
    {
        chomp $line;
        my $base_name = $1;
        my $test_name = $base_name;
        my $timeout = 3;
        if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_add_osd' ||
            $test_name eq 'test_interrupted_rebalance' || $test_name eq 'test_rebalance_verify')
        {
            $timeout = 10;
        }
        while ($line =~ /([^\s=]+)=(\S+)/gs)
        {
            if ($1 eq 'TEST_NAME')
            {
                $test_name = $base_name.'_'.$2;
                last;
            }
            elsif ($1 eq 'SCHEME' && $2 eq 'ec')
            {
                $test_name .= '_ec';
            }
            elsif ($1 eq 'SCHEME' && $2 eq 'xor')
            {
                $test_name .= '_xor';
            }
            elsif ($1 eq 'IMMEDIATE_COMMIT')
            {
                $test_name .= '_imm';
            }
            elsif ($1 eq 'ANTIETCD')
            {
                $test_name .= '_antietcd';
            }
            else
            {
                $test_name .= '_'.lc($1).'_'.$2;
            }
        }
        if ($test_name eq 'test_snapshot_chain_ec')
        {
            $timeout = 6;
        }
        $line =~ s!\./test_!/root/vitastor/tests/test_!;
        # Gitea CI doesn't support artifacts yet, lol
        #- name: Upload results
        #  uses: actions/upload-artifact\@v3
        #  if: always()
        #  with:
        #    name: ${test_name}_result
        #    path: |
        #      /root/vitastor/testdata
        #      !/root/vitastor/testdata/*.bin
        #    retention-days: 5
        print <<"EOF"
  $test_name:
    runs-on: ubuntu-latest
    needs: build
    container: \${{env.TEST_IMAGE}}:\${{github.sha}}
    steps:
    - name: Run test
      id: test
      timeout-minutes: $timeout
      run: $line
    - name: Print logs
      if: always() && steps.test.outcome == 'failure'
      run: |
        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
          echo "-------- \$i --------"
          cat \$i
          echo ""
        done
 EOF
 ;
    }
 }
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,16 @@
 package-lock.json
 fio
 qemu
 osd
 stub_osd
 stub_uring_osd
 stub_bench
 osd_test
 osd_peering_pg_test
 dump_journal
 nbd_proxy
 rm_inode
 test_allocator
 test_blockstore
 test_shit
 osd_rmw_test
--- a/CLA-en.md
+++ b/CLA-en.md
@ -1,115 +0,0 @@
 ## Contributor License Agreement
 > This Agreement is made in the Russian and English languages. **The English
 text of Agreement is for informational purposes only** and is not binding
 for the Parties.
 >
 > In the event of a conflict between the provisions of the Russian and
 English versions of this Agreement, the **Russian version shall prevail**.
 >
 > Russian version is published at https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md
 This document represents the offer of Filippov Vitaliy Vladimirovich
 ("Author"), author and copyright holder of Vitastor software ("Program"),
 acknowledged by a certificate of Federal Service for Intellectual
 Property of Russian Federation (Rospatent) # 2021617829 dated 20 May 2021,
 to "Contributors" to conclude this license agreement as follows
 ("Agreement" or "Offer").
 In accordance with Art. 435, Art. 438 of the Civil Code of the Russian
 Federation, this Agreement is an offer and in case of acceptance of the
 offer, an agreement is considered concluded on the conditions specified
 in the offer.
 1. Applicable Terms. \
   1.1. "Official Repository" shall mean the computer storage, operated by
        the Author, containing all prior and future versions of the Source
        Code of the Program, at Internet addresses https://git.yourcmc.ru/vitalif/vitastor/
        or https://github.com/vitalif/vitastor/. \
   1.2. "Contributions" shall mean results of intellectual activity
        (including, but not limited to, source code, libraries, components,
        texts, documentation) which can be software or elements of the software
        and which are provided by Contributors to the Author for inclusion
        in the Program. \
   1.3. "Contributor" shall mean a person who provides Contributions to
        the Author and agrees with all provisions of this Agreement.
        A Сontributor can be: 1) an individual; or 2) a legal entity or an
        individual entrepreneur in case when an individual provides Contributions
        on behalf of third parties, including on behalf of his employer.
 2. Subject of the Agreement. \
   2.1. Subject of the Agreement shall be the Contributions sent to the Author by Contributors. \
   2.2. The Contributor grants to the Author the right to use Contributions at his own
        discretion and without any necessity to get a prior approval from Contributor or
        any other third party in any way, under a simple (non-exclusive), royalty-free,
        irrevocable license throughout the world by all means not contrary to law, in whole
        or as a part of the Program, or other open-source or closed-source computer programs,
        products or services (hereinafter -- the "License"), including, but not limited to: \
        2.2.1. to execute Contributions and use them for any tasks; \
        2.2.2. to publish and distribute Contributions in modified or unmodified form and/or to rent them; \
        2.2.3. to modify Contributions, add comments, illustrations or any explanations to Contributions while using them; \
        2.2.4. to create other results of intellectual activity based on Contributions, including derivative works and composite works; \
        2.2.5. to translate Contributions into other languages, including other programming languages; \
        2.2.6. to carry out rental and public display of Contributions; \
        2.2.7. to use Contributions under the trade name and/or any trademark or any other label, or without it, as the Author thinks fit; \
   2.3. The Contributor grants to the Author the right to sublicense any of the aforementioned
        rights to third parties on any terms at the Author's discretion. \
   2.4. The License is provided for the entire duration of Contributor's
        exclusive intellectual property rights to the Contributions. \
   2.5. The Contributor grants to the Author the right to decide how and where to mention,
        or to not mention at all, the fact of his authorship, name, nickname and/or company
        details when including Contributions into the Program or in any other computer
        programs, products or services.
 3. Acceptance of the Offer \
   3.1. The Contributor may provide Contributions to the Author in the form of
        a "Pull Request" in an Official Repository of the Program or by any
        other electronic means of communication, including, but not limited to,
        E-mail or messenger applications. \
   3.2. The acceptance of the Offer shall be the fact of provision of Contributions
        to the Author by the Contributor by any means with the following remark:
        “I accept Vitastor CLA agreement: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md”
        or “Я принимаю соглашение Vitastor CLA: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md”. \
   3.3. Date of acceptance of the Offer shall be the date of such provision.
 4. Rights and obligations of the parties. \
   4.1. The Contributor reserves the right to use Contributions by any lawful means
        not contrary to this Agreement. \
   4.2. The Author has the right to refuse to include Contributions into the Program
        at any moment with no explanation to the Contributor.
 5. Representations and Warranties. \
   5.1. The person providing Contributions for the purpose of their inclusion
        in the Program represents and warrants that he is the Contributor
        or legally acts on the Contributor's behalf. Name or company details
        of the Contributor shall be provided with the Contribution at the moment
        of their provision to the Author. \
   5.2. The Contributor represents and warrants that he legally owns exclusive
        intellectual property rights to the Contributions. \
   5.3. The Contributor represents and warrants that any further use of
        Contributions by the Author as provided by Contributor under the terms
        of the Agreement does not infringe on intellectual and other rights and
        legitimate interests of third parties. \
   5.4. The Contributor represents and warrants that he has all rights and legal
        capacity needed to accept this Offer; \
   5.5. The Contributor represents and warrants that Contributions don't
        contain malware or any information considered illegal under the law
        of Russian Federation.
 6. Termination of the Agreement \
   6.1. The Agreement may be terminated at will of both Author and Contributor,
        formalised in the written form or if the Agreement is terminated on
        reasons prescribed by the law of Russian Federation.
 7. Final Clauses \
   7.1. The Contributor may optionally sign the Agreement in the written form. \
   7.2. The Agreement is deemed to become effective from the Date of signing of
        the Agreement and until the expiration of Contributor's exclusive
        intellectual property rights to the Contributions. \
   7.3. The Author may unilaterally alter the Agreement without informing Contributors.
        The new version of the document shall come into effect 3 (three) days after
        being published in the Official Repository of the Program at Internet address
        [https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md).
        Contributors should keep informed about the actual version of the Agreement themselves. \
   7.4. If the Author and the Contributor fail to agree on disputable issues,
        disputes shall be referred to the Moscow Arbitration court.
--- a/CLA-ru.md
+++ b/CLA-ru.md
@ -1,108 +0,0 @@
 ## Лицензионное соглашение с участником
 > Данная Оферта написана в Русской и Английской версиях. **Версия на английском
 языке предоставляется в информационных целях** и не связывает стороны договора.
 >
 > В случае несоответствий между положениями Русской и Английской версий Договора,
 **Русская версия имеет приоритет**.
 >
 > Английская версия опубликована по адресу https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md
 Настоящий договор-оферта (далее по тексту – Оферта, Договор) адресована физическим
 и юридическим лицам (далее – Участникам) и является официальным публичным предложением
 Филиппова Виталия Владимировича (далее – Автора) программного обеспечения Vitastor,
 свидетельство Федеральной службы по интеллектуальной собственности (Роспатент) № 2021617829
 от 20 мая 2021 г. (далее – Программа) о нижеследующем:
 1. Термины и определения \
   1.1. Репозиторий – электронное хранилище, содержащее исходный код Программы. \
   1.2. Доработка – результат интеллектуальной деятельности Участника, включающий
        в себя изменения или дополнения к исходному коду Программы, которые Участник
        желает включить в состав Программы для дальнейшего использования и распространения
        Автором и для этого направляет их Автору. \
   1.3. Участник – физическое или юридическое лицо, вносящее Доработки в код Программы. \
   1.4. ГК РФ – Гражданский кодекс Российской Федерации.
 2. Предмет оферты \
   2.1. Предметом настоящей оферты являются Доработки, отправляемые Участником Автору. \
   2.2. Участник предоставляет Автору право использовать Доработки по собственному усмотрению
        и без необходимости предварительного согласования с Участником или иным третьим лицом
        на условиях простой (неисключительной) безвозмездной безотзывной лицензии, полностью
        или фрагментарно, в составе Программы или других программ, продуктов или сервисов
        как с открытым, так и с закрытым исходным кодом, любыми способами, не противоречащими
        закону, включая, но не ограничиваясь следующими: \
        2.2.1. Запускать и использовать Доработки для выполнения любых задач; \
        2.2.2. Распространять, импортировать и доводить Доработки до всеобщего сведения; \
        2.2.3. Вносить в Доработки изменения, сокращения и дополнения, снабжать Доработки
               при их использовании комментариями, иллюстрациями или пояснениями; \
        2.2.4. Создавать на основе Доработок иные результаты интеллектуальной деятельности,
               в том числе производные и составные произведения; \
        2.2.5. Переводить Доработки на другие языки, в том числе на другие языки программирования; \
        2.2.6. Осуществлять прокат и публичный показ Доработок; \
        2.2.7. Использовать Доработки под любым фирменным наименованием, товарным знаком
               (знаком обслуживания) или иным обозначением, или без такового. \
   2.3. Участник предоставляет Автору право сублицензировать полученные права на Доработки
        третьим лицам на любых условиях на усмотрение Автора. \
   2.4. Участник предоставляет Автору права на Доработки на территории всего мира. \
   2.5. Участник предоставляет Автору права на весь срок действия исключительного права
        Участника на Доработки. \
   2.6. Участник предоставляет Автору права на Доработки на безвозмездной основе. \
   2.7. Участник разрешает Автору самостоятельно определять порядок, способ и
        место указания его имени, реквизитов и/или псевдонима при включении
        Доработок в состав Программы или других программ, продуктов или сервисов.
 3. Акцепт Оферты \
   3.1. Участник может передавать Доработки в адрес Автора через зеркала официального
        Репозитория Программы по адресам https://git.yourcmc.ru/vitalif/vitastor/ или
        https://github.com/vitalif/vitastor/ в виде “запроса на слияние” (pull request),
        либо в письменном виде или с помощью любых других электронных средств коммуникации,
        например, электронной почты или мессенджеров. \
   3.2. Факт передачи Участником Доработок в адрес Автора любым способом с одной из пометок
        “I accept Vitastor CLA agreement: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-en.md”
        или “Я принимаю соглашение Vitastor CLA: https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md”
        является полным и безоговорочным акцептом (принятием) Участником условий настоящей
        Оферты, т.е. Участник считается ознакомившимся с настоящим публичным договором и
        в соответствии с ГК РФ признается лицом, вступившим с Автором в договорные отношения
        на основании настоящей Оферты. \
   3.3. Датой акцептирования настоящей Оферты считается дата такой передачи.
 4. Права и обязанности Сторон \
   4.1. Участник сохраняет за собой право использовать Доработки любым законным
        способом, не противоречащим настоящему Договору. \
   4.2. Автор вправе отказать Участнику во включении Доработок в состав
        Программы без объяснения причин в любой момент по своему усмотрению.
 5. Гарантии и заверения \
   5.1. Лицо, направляющее Доработки для целей их включения в состав Программы,
        гарантирует, что является Участником или представителем Участника. Имя или реквизиты
        Участника должны быть указаны при их передаче в адрес Автора Программы. \
   5.2. Участник гарантирует, что является законным обладателем исключительных прав
        на Доработки. \
   5.3. Участник гарантирует, что на момент акцептирования настоящей Оферты ему
        ничего не известно (и не могло быть известно) о правах третьих лиц на
        передаваемые Автору Доработки или их часть, которые могут быть нарушены
        в связи с передачей Доработок по настоящему Договору. \
   5.4. Участник гарантирует, что является дееспособным лицом и обладает всеми
        необходимыми правами для заключения Договора. \
   5.5. Участник гарантирует, что Доработки не содержат вредоносного ПО, а также
        любой другой информации, запрещённой к распространению по законам Российской
        Федерации.
 6. Прекращение действия оферты \
   6.1. Действие настоящего договора может быть прекращено по соглашению сторон,
        оформленному в письменном виде, а также вследствие его расторжения по основаниям,
        предусмотренным законом.
 7. Заключительные положения \
   7.1. Участник вправе по желанию подписать настоящий Договор в письменном виде. \
   7.2. Настоящий договор действует с момента его заключения и до истечения срока
        действия исключительных прав Участника на Доработки. \
   7.3. Автор имеет право в одностороннем порядке вносить изменения и дополнения в договор
        без специального уведомления об этом Участников. Новая редакция документа вступает
        в силу через 3 (Три) календарных дня со дня опубликования в официальном Репозитории
        Программы по адресу в сети Интернет
        [https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/CLA-ru.md).
        Участники самостоятельно отслеживают действующие условия Оферты. \
   7.4. Все споры, возникающие между сторонами в процессе их взаимодействия по настоящему
        договору, решаются путём переговоров. В случае невозможности урегулирования споров
        переговорным порядком стороны разрешают их в Арбитражном суде г.Москвы.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
-cmake_minimum_required(VERSION 2.8.12)
+cmake_minimum_required(VERSION 2.8)
 project(vitastor)
-set(VITASTOR_VERSION "2.1.0")
+set(VERSION "0.8.5")
 add_subdirectory(src)
--- a/README-ru.md
+++ b/README-ru.md
@ -1,4 +1,4 @@
-# Vitastor
+## Vitastor
 [Read English version](README.md)
@ -6,8 +6,8 @@
 Вернём былую скорость кластерному блочному хранилищу!
-Vitastor - распределённая блочная, файловая и объектная SDS (программная СХД), прямой аналог Ceph RBD, CephFS и RGW,
+Vitastor - распределённая блочная SDS (программная СХД), прямой аналог Ceph RBD и
-а также внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
+внутренних СХД популярных облачных провайдеров. Однако, в отличие от них, Vitastor
 быстрый и при этом простой. Только пока маленький :-).
 Vitastor архитектурно похож на Ceph, что означает атомарность и строгую консистентность,
@ -15,14 +15,14 @@ Vitastor архитектурно похож на Ceph, что означает
 и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
 избыточности - репликацией или с произвольными кодами коррекции ошибок.
-Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
+Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
 TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
 что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
-Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
+Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, Proxmox, Kubernetes.
 Другие драйверы могут также быть легко реализованы.
-Подробности смотрите в документации по ссылкам. Можете начать отсюда: [Быстрый старт](docs/intro/quickstart.ru.md).
+Подробности смотрите в документации по ссылкам ниже.
 ## Презентации и записи докладов
@ -41,19 +41,15 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - [Автор и лицензия](docs/intro/author.ru.md)
 - Установка
  - [Пакеты](docs/installation/packages.ru.md)
  - [Docker](docs/installation/docker.ru.md)
  - [Proxmox](docs/installation/proxmox.ru.md)
  - [OpenNebula](docs/installation/opennebula.ru.md)
  - [OpenStack](docs/installation/openstack.ru.md)
  - [Kubernetes CSI](docs/installation/kubernetes.ru.md)
  - [S3](docs/installation/s3.ru.md)
  - [Сборка из исходных кодов](docs/installation/source.ru.md)
 - Конфигурация
  - [Обзор](docs/config.ru.md)
  - Параметры
    - [Общие](docs/config/common.ru.md)
    - [Сетевые](docs/config/network.ru.md)
    - [Клиентский код](docs/config/client.ru.md)
    - [Глобальные дисковые параметры](docs/config/layout-cluster.ru.md)
    - [Дисковые параметры OSD](docs/config/layout-osd.ru.md)
    - [Прочие параметры OSD](docs/config/osd.ru.md)
@ -66,13 +62,11 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
  - [fio](docs/usage/fio.ru.md) для тестов производительности
  - [NBD](docs/usage/nbd.ru.md) для монтирования ядром
  - [QEMU и qemu-img](docs/usage/qemu.ru.md)
-  - [NFS](docs/usage/nfs.ru.md) кластерная файловая система и псевдо-ФС прокси
+  - [NFS](docs/usage/nfs.ru.md)-прокси для VMWare и подобных
  - [Администрирование](docs/usage/admin.ru.md)
 - Производительность
  - [Понимание сути производительности](docs/performance/understanding.ru.md)
  - [Теоретический максимум](docs/performance/theoretical.ru.md)
  - [Пример сравнения с Ceph](docs/performance/comparison1.ru.md)
  - [Более новый тест Vitastor 1.3.1](docs/performance/bench2.ru.md)
 ## Автор и лицензия
--- a/README.md
+++ b/README.md
@ -6,23 +6,23 @@
 Make Clustered Block Storage Fast Again.
-Vitastor is a distributed block, file and object SDS, direct replacement of Ceph RBD, CephFS and RGW,
+Vitastor is a distributed block SDS, direct replacement of Ceph RBD and internal SDS's
-and also internal SDS's of public clouds. However, in contrast to them, Vitastor is fast
+of public clouds. However, in contrast to them, Vitastor is fast and simple at the same time.
-and simple at the same time. The only thing is it's slightly young :-).
+The only thing is it's slightly young :-).
 Vitastor is architecturally similar to Ceph which means strong consistency,
 primary-replication, symmetric clustering and automatic data distribution over any
 number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
-Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network,
+Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
-supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
+TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
 with proper hardware which is ~10 times faster than other popular SDS's like Ceph
 or internal systems of public clouds.
-Vitastor supports QEMU, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
+Vitastor supports QEMU, NBD, NFS protocols, OpenStack, Proxmox, Kubernetes drivers.
 More drivers may be created easily.
-Read more details in the documentation. You can start from here: [Quick Start](docs/intro/quickstart.en.md).
+Read more details below in the documentation.
 ## Talks and presentations
@ -41,19 +41,15 @@ Read more details in the documentation. You can start from here: [Quick Start](d
  - [Author and license](docs/intro/author.en.md)
 - Installation
  - [Packages](docs/installation/packages.en.md)
  - [Docker](docs/installation/docker.en.md)
  - [Proxmox](docs/installation/proxmox.en.md)
  - [OpenNebula](docs/installation/opennebula.en.md)
  - [OpenStack](docs/installation/openstack.en.md)
  - [Kubernetes CSI](docs/installation/kubernetes.en.md)
  - [S3](docs/installation/s3.en.md)
  - [Building from Source](docs/installation/source.en.md)
 - Configuration
  - [Overview](docs/config.en.md)
  - Parameter Reference
    - [Common](docs/config/common.en.md)
    - [Network](docs/config/network.en.md)
    - [Client](docs/config/client.en.md)
    - [Global Disk Layout](docs/config/layout-cluster.en.md)
    - [OSD Disk Layout](docs/config/layout-osd.en.md)
    - [OSD Runtime Parameters](docs/config/osd.en.md)
@ -66,13 +62,11 @@ Read more details in the documentation. You can start from here: [Quick Start](d
  - [fio](docs/usage/fio.en.md) for benchmarks
  - [NBD](docs/usage/nbd.en.md) for kernel mounts
  - [QEMU and qemu-img](docs/usage/qemu.en.md)
-  - [NFS](docs/usage/nfs.en.md) clustered file system and pseudo-FS proxy
+  - [NFS](docs/usage/nfs.en.md) emulator for VMWare and similar
  - [Administration](docs/usage/admin.en.md)
 - Performance
  - [Understanding storage performance](docs/performance/understanding.en.md)
  - [Theoretical performance](docs/performance/theoretical.en.md)
  - [Example comparison with Ceph](docs/performance/comparison1.en.md)
  - [Newer benchmark of Vitastor 1.3.1](docs/performance/bench2.en.md)
 ## Author and License
--- a/copy-fio-includes.sh
+++ b/copy-fio-includes.sh
@ -1,6 +1,6 @@
 #!/bin/bash
-gcc -I. -E -o fio_headers.i src/util/fio_headers.h
+gcc -I. -E -o fio_headers.i src/fio_headers.h
 rm -rf fio-copy
 for i in `grep -Po 'fio/[^"]+' fio_headers.i | sort | uniq`; do
--- a/copy-qemu-includes.sh
+++ b/copy-qemu-includes.sh
@ -5,7 +5,7 @@
 #cd b/qemu; make qapi
 gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
-    -I qemu/include -E -o qemu_driver.i src/client/qemu_driver.c
+    -I qemu/include -E -o qemu_driver.i src/qemu_driver.c
 rm -rf qemu-copy
 for i in `grep -Po 'qemu/[^"]+' qemu_driver.i | sort | uniq`; do
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 8de8b467acbca50cfd8835c20e0e379110f3b32b
+Subproject commit 45e6d1f13196a0824e2089a586c53b9de0283f17
--- a/csi/Dockerfile
+++ b/csi/Dockerfile
@ -1,15 +1,14 @@
 # Compile stage
-FROM golang:bookworm AS build
+FROM golang:buster AS build
 ADD go.sum go.mod /app/
 RUN cd /app; CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go mod download -x
 ADD . /app
-RUN perl -i -e '$/ = undef; while(<>) { s/\n\s*(\{\s*\n)/$1\n/g; s/\}(\s*\n\s*)else\b/$1} else/g; print; }' `find /app -name '*.go'` && \
+RUN perl -i -e '$/ = undef; while(<>) { s/\n\s*(\{\s*\n)/$1\n/g; s/\}(\s*\n\s*)else\b/$1} else/g; print; }' `find /app -name '*.go'`
-    cd /app && \
+RUN cd /app; CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o vitastor-csi
    CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o vitastor-csi
 # Final stage
-FROM debian:bookworm
+FROM debian:buster
 LABEL maintainers="Vitaliy Filippov <vitalif@yourcmc.ru>"
 LABEL description="Vitastor CSI Driver"
@ -19,32 +18,19 @@ ENV CSI_ENDPOINT=""
 RUN apt-get update && \
    apt-get install -y wget && \
    (echo deb http://deb.debian.org/debian buster-backports main > /etc/apt/sources.list.d/backports.list) && \
    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
    apt-get update && \
-    apt-get install -y e2fsprogs xfsprogs kmod iproute2 \
+    apt-get install -y e2fsprogs xfsprogs kmod && \
        # NFS mount dependencies
        nfs-common netbase \
        # dependencies of qemu-storage-daemon
        libnuma1 liburing2 libglib2.0-0 libfuse3-3 libaio1 libzstd1 libnettle8 \
        libgmp10 libhogweed6 libp11-kit0 libidn2-0 libunistring2 libtasn1-6 libpcre2-8-0 libffi8 && \
    apt-get clean && \
    (echo options nbd nbds_max=128 > /etc/modprobe.d/nbd.conf)
 COPY --from=build /app/vitastor-csi /bin/
-RUN (echo deb http://vitastor.io/debian bookworm main > /etc/apt/sources.list.d/vitastor.list) && \
+RUN (echo deb http://vitastor.io/debian buster main > /etc/apt/sources.list.d/vitastor.list) && \
    ((echo 'Package: *'; echo 'Pin: origin "vitastor.io"'; echo 'Pin-Priority: 1000') > /etc/apt/preferences.d/vitastor.pref) && \
    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg && \
    apt-get update && \
    apt-get install -y vitastor-client && \
    wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-utils_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
    wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-block-extra_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
    dpkg -x qemu-utils*.deb tmp1 && \
    dpkg -x qemu-block-extra*.deb tmp1 && \
    cp -a tmp1/usr/bin/qemu-storage-daemon /usr/bin/ && \
    mkdir -p /usr/lib/x86_64-linux-gnu/qemu && \
    cp -a tmp1/usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/ && \
    rm -rf tmp1 *.deb && \
    apt-get clean
 ENTRYPOINT ["/bin/vitastor-csi"]
--- a/csi/Makefile
+++ b/csi/Makefile
@ -1,9 +1,9 @@
-VITASTOR_VERSION ?= v2.1.0
+VERSION ?= v0.8.5
 all: build push
 build:
-	@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
+	@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
 push:
-	@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
+	@docker push vitalif/vitastor-csi:$(VERSION)
--- a/csi/deploy/001-csi-config-map.yaml
+++ b/csi/deploy/001-csi-config-map.yaml
@ -2,7 +2,6 @@
 apiVersion: v1
 kind: ConfigMap
 data:
  # You can add multiple configuration files here to use a multi-cluster setup
  vitastor.conf: |-
    {"etcd_address":"http://192.168.7.2:2379","etcd_prefix":"/vitastor"}
 metadata:
--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v2.1.0
+          image: vitalif/vitastor-csi:v0.8.5
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
@ -82,8 +82,6 @@ spec:
              name: host-sys
            - mountPath: /run/mount
              name: host-mount
            - mountPath: /run/vitastor-csi
              name: run-vitastor-csi
            - mountPath: /lib/modules
              name: lib-modules
              readOnly: true
@ -134,9 +132,6 @@ spec:
        - name: host-mount
          hostPath:
            path: /run/mount
        - name: run-vitastor-csi
          hostPath:
            path: /run/vitastor-csi
        - name: lib-modules
          hostPath:
            path: /lib/modules
--- a/csi/deploy/005-csi-provisioner-rbac.yaml
+++ b/csi/deploy/005-csi-provisioner-rbac.yaml
@ -35,13 +35,10 @@ rules:
    verbs: ["get", "list", "watch"]
  - apiGroups: ["snapshot.storage.k8s.io"]
    resources: ["volumesnapshots"]
-    verbs: ["get", "list", "patch"]
+    verbs: ["get", "list"]
  - apiGroups: ["snapshot.storage.k8s.io"]
    resources: ["volumesnapshots/status"]
    verbs: ["get", "list", "patch"]
  - apiGroups: ["snapshot.storage.k8s.io"]
    resources: ["volumesnapshotcontents"]
-    verbs: ["create", "get", "list", "watch", "update", "delete", "patch"]
+    verbs: ["create", "get", "list", "watch", "update", "delete"]
  - apiGroups: ["snapshot.storage.k8s.io"]
    resources: ["volumesnapshotclasses"]
    verbs: ["get", "list", "watch"]
@ -56,7 +53,7 @@ rules:
    verbs: ["get", "list", "watch"]
  - apiGroups: ["snapshot.storage.k8s.io"]
    resources: ["volumesnapshotcontents/status"]
-    verbs: ["update", "patch"]
+    verbs: ["update"]
  - apiGroups: [""]
    resources: ["configmaps"]
    verbs: ["get"]
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@ -23,11 +23,6 @@ metadata:
  name: csi-vitastor-provisioner
 spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
      maxSurge: 0
  selector:
    matchLabels:
      app: csi-vitastor-provisioner
@ -51,7 +46,7 @@ spec:
      priorityClassName: system-cluster-critical
      containers:
        - name: csi-provisioner
-          image: k8s.gcr.io/sig-storage/csi-provisioner:v3.0.0
+          image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.0
          args:
            - "--csi-address=$(ADDRESS)"
            - "--v=5"
@ -121,7 +116,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v2.1.0
+          image: vitalif/vitastor-csi:v0.8.5
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/009-storage-class.yaml
+++ b/csi/deploy/009-storage-class.yaml
@ -9,17 +9,11 @@ metadata:
 provisioner: csi.vitastor.io
 volumeBindingMode: Immediate
 parameters:
-  # CSI driver can create block-based volumes and VitastorFS-based volumes
+  etcdVolumePrefix: ""
-  # only VitastorFS-based volumes and raw block volumes (without FS) support ReadWriteMany mode
+  poolId: "1"
  # set this parameter to VitastorFS metadata volume name to use VitastorFS
  # if unset, block-based volumes will be created
  vitastorfs: ""
  # for block-based storage classes, pool ID may be either a string (name) or a number (ID)
  # for vitastorFS-based storage classes it must be a string - name of the default pool for FS data
  poolId: "testpool"
  # volume name prefix for block-based storage classes or NFS subdirectory (including /) for FS-based volumes
  volumePrefix: ""
  # you can choose other configuration file if you have it in the config map
  # different etcd URLs and prefixes should also be put in the config
  #configPath: "/etc/vitastor/vitastor.conf"
-allowVolumeExpansion: true
+  # you can also specify etcdUrl here, maybe to connect to another Vitastor cluster
  # multiple etcdUrls may be specified, delimited by comma
  #etcdUrl: "http://192.168.7.2:2379"
  #etcdPrefix: "/vitastor"
--- a/csi/deploy/example-snapshot-class.yaml
+++ b/csi/deploy/example-snapshot-class.yaml
@ -1,7 +0,0 @@
 apiVersion: snapshot.storage.k8s.io/v1
 kind: VolumeSnapshotClass
 metadata:
  name: vitastor-snapclass
 driver: csi.vitastor.io
 deletionPolicy: Delete
 parameters:
--- a/csi/deploy/example-snapshot-clone.yaml
+++ b/csi/deploy/example-snapshot-clone.yaml
@ -1,16 +0,0 @@
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: test-vitastor-clone
 spec:
  storageClassName: vitastor
  dataSource:
    name: snap1
    kind: VolumeSnapshot
    apiGroup: snapshot.storage.k8s.io
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
--- a/csi/deploy/example-snapshot.yaml
+++ b/csi/deploy/example-snapshot.yaml
@ -1,8 +0,0 @@
 apiVersion: snapshot.storage.k8s.io/v1
 kind: VolumeSnapshot
 metadata:
  name: snap1
 spec:
  volumeSnapshotClassName: vitastor-snapclass
  source:
    persistentVolumeClaimName: test-vitastor-pvc
--- a/csi/deploy/example-storage-class-fs.yaml
+++ b/csi/deploy/example-storage-class-fs.yaml
@ -1,25 +0,0 @@
 ---
 apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
  namespace: vitastor-system
  name: vitastor
  annotations:
    storageclass.kubernetes.io/is-default-class: "true"
 provisioner: csi.vitastor.io
 volumeBindingMode: Immediate
 parameters:
  # CSI driver can create block-based volumes and VitastorFS-based volumes
  # only VitastorFS-based volumes and raw block volumes (without FS) support ReadWriteMany mode
  # set this parameter to VitastorFS metadata volume name to use VitastorFS
  # if unset, block-based volumes will be created
  vitastorfs: "testfs"
  # for block-based storage classes, pool ID may be either a string (name) or a number (ID)
  # for vitastorFS-based storage classes it must be a string - name of the default pool for FS data
  poolId: "testpool"
  # volume name prefix for block-based storage classes or NFS subdirectory (including /) for FS-based volumes
  volumePrefix: "k8s/"
  # you can choose other configuration file if you have it in the config map
  # different etcd URLs and prefixes should also be put in the config
  #configPath: "/etc/vitastor/vitastor.conf"
 allowVolumeExpansion: true
--- a/csi/go.mod
+++ b/csi/go.mod
@ -3,13 +3,27 @@ module vitastor.io/csi
 go 1.15
 require (
-	github.com/container-storage-interface/spec v1.8.0
+	github.com/container-storage-interface/spec v1.4.0
 	github.com/coreos/bbolt v0.0.0-00010101000000-000000000000 // indirect
 	github.com/coreos/etcd v3.3.25+incompatible // indirect
 	github.com/coreos/go-semver v0.3.0 // indirect
 	github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
 	github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
 	github.com/dustin/go-humanize v1.0.0 // indirect
 	github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
 	github.com/gorilla/websocket v1.4.2 // indirect
 	github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
 	github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
 	github.com/jonboulle/clockwork v0.2.2 // indirect
 	github.com/kubernetes-csi/csi-lib-utils v0.9.1
-	golang.org/x/net v0.7.0
+	github.com/soheilhy/cmux v0.1.5 // indirect
-	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
+	github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
 	github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
 	go.etcd.io/bbolt v0.0.0-00010101000000-000000000000 // indirect
 	go.etcd.io/etcd v3.3.25+incompatible
 	golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
 	google.golang.org/grpc v1.33.1
 	google.golang.org/protobuf v1.24.0
 	k8s.io/klog v1.0.0
 	k8s.io/utils v0.0.0-20210305010621-2afb4311ab10
 )
--- a/csi/go.sum
+++ b/csi/go.sum
@ -31,24 +31,40 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
 github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
 github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
 github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
-github.com/container-storage-interface/spec v1.8.0 h1:D0vhF3PLIZwlwZEf2eNbpujGCNwspwTYf2idJRJx4xI=
+github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
-github.com/container-storage-interface/spec v1.8.0/go.mod h1:ROLik+GhPslwwWRNFF1KasPzroNARibH2rfz1rkg4H0=
+github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
 github.com/coreos/bbolt v1.3.5 h1:XFv7xaq7701j8ZSEzR28VohFYSlyakMyqNMU5FQH6Ac=
 github.com/coreos/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
 github.com/coreos/etcd v3.3.25+incompatible h1:0GQEw6h3YnuOVdtwygkIfJ+Omx0tZ8/QkVyXI4LkbeY=
 github.com/coreos/etcd v3.3.25+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
 github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
 github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
 github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
 github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
 github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
 github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
 github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
 github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@ -57,6 +73,7 @@ github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@ -71,10 +88,14 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA
 github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
 github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
 github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
@ -92,6 +113,7 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD
 github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@ -105,24 +127,38 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
 github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
 github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
 github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg=
 github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
 github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
 github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
 github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ=
 github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
 github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
 github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@ -135,11 +171,14 @@ github.com/kubernetes-csi/csi-lib-utils v0.9.1 h1:sGq6ifVujfMSkfTsMZip44Ttv8SDXv
 github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M=
 github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
 github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
 github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
@ -149,28 +188,38 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
 github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
 github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
 github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
 github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
 github.com/prometheus/client_golang v1.7.1 h1:NTGy1Ja9pByO+xAeH/qiWnLrKtr3hJPNjaVUwnjpdpA=
 github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
 github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
 github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
 github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
 github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
 github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
 github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
 github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
 github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
 github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
 github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
 github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
 github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
@ -182,12 +231,24 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
 go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
 go.etcd.io/etcd v3.3.25+incompatible h1:V1RzkZJj9LqsJRy+TUBgpWSbZXITLB819lstuTFoZOY=
 go.etcd.io/etcd v3.3.25+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU=
 go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
 go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
 go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
 go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
 go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@ -195,8 +256,8 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -215,7 +276,8 @@ golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCc
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
 golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -229,23 +291,26 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
 golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
 golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
-golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -261,28 +326,22 @@ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 h1:5/PjkGUjvEU5Gl6BxmvKRPpqo2uNMv4rcHBMwzk/st8=
 golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@ -304,9 +363,11 @@ golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@ -327,6 +388,8 @@ google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
 google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
 google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
 google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
@ -352,6 +415,7 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
@ -380,4 +444,5 @@ k8s.io/utils v0.0.0-20210305010621-2afb4311ab10/go.mod h1:jPW/WVKK9YHAvNhRxK0md/
 rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
 sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw=
 sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
 sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q=
 sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=
--- a/csi/src/config.go
+++ b/csi/src/config.go
@ -5,7 +5,7 @@ package vitastor
 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "2.1.0"
+    vitastorCSIDriverVersion = "0.8.5"
 )
 // Config struct fills the parameters of request or user input
--- a/csi/src/controllerserver.go
+++ b/csi/src/controllerserver.go
@ -6,10 +6,13 @@ package vitastor
 import (
    "context"
    "encoding/json"
    "fmt"
    "strings"
    "bytes"
    "strconv"
    "time"
    "fmt"
    "os"
    "os/exec"
    "io/ioutil"
    "github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
@ -17,7 +20,8 @@ import (
    "google.golang.org/grpc/codes"
    "google.golang.org/grpc/status"
-    "google.golang.org/protobuf/types/known/timestamppb"
+
    "go.etcd.io/etcd/clientv3"
    "github.com/container-storage-interface/spec/lib/go/csi"
 )
@ -43,7 +47,6 @@ type InodeConfig struct
    ParentPool uint64 `json:"parent_pool,omitempty"`
    ParentId uint64 `json:"parent_id,omitempty"`
    Readonly bool `json:"readonly,omitempty"`
    CreateTs uint64 `json:"create_ts,omitempty"`
 }
 type ControllerServer struct
@ -59,7 +62,7 @@ func NewControllerServer(driver *Driver) *ControllerServer
    }
 }
-func GetConnectionParams(params map[string]string) (map[string]string, error)
+func GetConnectionParams(params map[string]string) (map[string]string, []string, string)
 {
    ctxVars := make(map[string]string)
    configPath := params["configPath"]
@ -67,59 +70,48 @@ func GetConnectionParams(params map[string]string) (map[string]string, error)
    {
        configPath = "/etc/vitastor/vitastor.conf"
    }
-    ctxVars["configPath"] = configPath
+    else
    if (params["vitastorfs"] != "")
    {
-        ctxVars["vitastorfs"] = params["vitastorfs"]
+        ctxVars["configPath"] = configPath
    }
    config := make(map[string]interface{})
-    configFD, err := os.Open(configPath)
+    if configFD, err := os.Open(configPath); err == nil
    if (err != nil)
    {
-        return nil, err
+        defer configFD.Close()
        data, _ := ioutil.ReadAll(configFD)
        json.Unmarshal(data, &config)
    }
-    defer configFD.Close()
+    // Try to load prefix & etcd URL from the config
    data, _ := ioutil.ReadAll(configFD)
    json.Unmarshal(data, &config)
    // Check etcd URL in the config, but do not use the explicit etcdUrl
    // parameter for CLI calls, otherwise users won't be able to later
    // change them - storage class parameters are saved in volume IDs
    var etcdUrl []string
-    switch config["etcd_address"].(type)
+    if (params["etcdUrl"] != "")
    {
-    case string:
+        ctxVars["etcdUrl"] = params["etcdUrl"]
-        url := strings.TrimSpace(config["etcd_address"].(string))
+        etcdUrl = strings.Split(params["etcdUrl"], ",")
        if (url != "")
        {
            etcdUrl = strings.Split(url, ",")
        }
    case []string:
        etcdUrl = config["etcd_address"].([]string)
    case []interface{}:
        for _, url := range config["etcd_address"].([]interface{})
        {
            s, ok := url.(string)
            if (ok)
            {
                etcdUrl = append(etcdUrl, s)
            }
        }
    }
    if (len(etcdUrl) == 0)
    {
-        return nil, status.Error(codes.InvalidArgument, "etcd_address is missing in "+configPath)
+        switch config["etcd_address"].(type)
        {
        case string:
            etcdUrl = strings.Split(config["etcd_address"].(string), ",")
        case []string:
            etcdUrl = config["etcd_address"].([]string)
        }
    }
-    return ctxVars, nil
+    etcdPrefix := params["etcdPrefix"]
-}
+    if (etcdPrefix == "")
 func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
 {
    if (ctxVars["configPath"] != "")
    {
-        args = append(args, "--config_path", ctxVars["configPath"])
+        etcdPrefix, _ = config["etcd_prefix"].(string)
        if (etcdPrefix == "")
        {
            etcdPrefix = "/vitastor"
        }
    }
-    stdout, _, err := system("/usr/bin/vitastor-cli", args...)
+    else
-    return stdout, err
+    {
        ctxVars["etcdPrefix"] = etcdPrefix
    }
    return ctxVars, etcdUrl, etcdPrefix
 }
 // Create the volume
@ -140,101 +132,142 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
        return nil, status.Error(codes.InvalidArgument, "volume capabilities is a required field")
    }
-    ctxVars, err := GetConnectionParams(req.Parameters)
+    etcdVolumePrefix := req.Parameters["etcdVolumePrefix"]
-    if (err != nil)
+    poolId, _ := strconv.ParseUint(req.Parameters["poolId"], 10, 64)
-    {
+    if (poolId == 0)
        return nil, err
    }
    err = cs.checkCaps(volumeCapabilities, ctxVars["vitastorfs"] != "")
    if (err != nil)
    {
        return nil, err
    }
    pool := req.Parameters["poolId"]
    if (pool == "")
    {
        return nil, status.Error(codes.InvalidArgument, "poolId is missing in storage class configuration")
    }
-    volumePrefix := req.Parameters["volumePrefix"]
+
-    if (volumePrefix == "")
+    volName := etcdVolumePrefix + req.GetName()
    {
        // Old name
        volumePrefix = req.Parameters["etcdVolumePrefix"]
    }
    volName := volumePrefix + req.GetName()
    volSize := 1 * GB
    if capRange := req.GetCapacityRange(); capRange != nil
    {
        volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
    }
-    if (ctxVars["vitastorfs"] != "")
+    // FIXME: The following should PROBABLY be implemented externally in a management tool
    ctxVars, etcdUrl, etcdPrefix := GetConnectionParams(req.Parameters)
    if (len(etcdUrl) == 0)
    {
-        // Nothing to create, subdirectories are created during mounting
+        return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
        // FIXME: It would be cool to support quotas some day and set it here
        if (req.VolumeContentSource.GetSnapshot() != nil)
        {
            return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
        }
        ctxVars["name"] = volName
        ctxVars["pool"] = pool
        volumeIdJson, _ := json.Marshal(ctxVars)
        return &csi.CreateVolumeResponse{
            Volume: &csi.Volume{
                // Ugly, but VolumeContext isn't passed to DeleteVolume :-(
                VolumeId: string(volumeIdJson),
                CapacityBytes: volSize,
            },
        }, nil
    }
-    args := []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", pool }
+    // Connect to etcd
-
+    cli, err := clientv3.New(clientv3.Config{
-    // Support creation from snapshot
+        DialTimeout: ETCD_TIMEOUT,
-    var src *csi.VolumeContentSource
+        Endpoints: etcdUrl,
-    if (req.VolumeContentSource.GetSnapshot() != nil)
+    })
    {
        snapId := req.VolumeContentSource.GetSnapshot().GetSnapshotId()
        if (snapId != "")
        {
            snapVars := make(map[string]string)
            err := json.Unmarshal([]byte(snapId), &snapVars)
            if (err != nil)
            {
                return nil, status.Error(codes.Internal, "volume ID not in JSON format")
            }
            args = append(args, "--parent", snapVars["name"]+"@"+snapVars["snapshot"])
            src = &csi.VolumeContentSource{
                Type: &csi.VolumeContentSource_Snapshot{
                    Snapshot: &csi.VolumeContentSource_SnapshotSource{
                        SnapshotId: snapId,
                    },
                },
            }
        }
    }
    // Create image using vitastor-cli
    _, err = invokeCLI(ctxVars, args)
    if (err != nil)
    {
-        if (strings.Index(err.Error(), "already exists") > 0)
+        return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
    }
    defer cli.Close()
    var imageId uint64 = 0
    for
    {
        // Check if the image exists
        ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
        resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
        cancel()
        if (err != nil)
        {
-            inodeCfg, err := invokeList(ctxVars, volName, true)
+            return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
        }
        if (len(resp.Kvs) > 0)
        {
            kv := resp.Kvs[0]
            var v InodeIndex
            err := json.Unmarshal(kv.Value, &v)
            if (err != nil)
            {
-                return nil, err
+                return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
            }
-            if (inodeCfg[0].Size < uint64(volSize))
+            poolId = v.PoolId
            imageId = v.Id
            inodeCfgKey := fmt.Sprintf("/config/inode/%d/%d", poolId, imageId)
            ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
            resp, err := cli.Get(ctx, etcdPrefix+inodeCfgKey)
            cancel()
            if (err != nil)
            {
                return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
            }
            if (len(resp.Kvs) == 0)
            {
                return nil, status.Error(codes.Internal, "missing "+inodeCfgKey+" key in etcd")
            }
            var inodeCfg InodeConfig
            err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
            if (err != nil)
            {
                return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
            }
            if (inodeCfg.Size < uint64(volSize))
            {
                return nil, status.Error(codes.Internal, "image "+volName+" is already created, but size is less than expected")
            }
        }
        else
        {
-            return nil, err
+            // Find a free ID
            // Create image metadata in a transaction verifying that the image doesn't exist yet AND ID is still free
            maxIdKey := fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)
            ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
            resp, err := cli.Get(ctx, maxIdKey)
            cancel()
            if (err != nil)
            {
                return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
            }
            var modRev int64
            var nextId uint64
            if (len(resp.Kvs) > 0)
            {
                var err error
                nextId, err = strconv.ParseUint(string(resp.Kvs[0].Value), 10, 64)
                if (err != nil)
                {
                    return nil, status.Error(codes.Internal, maxIdKey+" contains invalid ID")
                }
                modRev = resp.Kvs[0].ModRevision
                nextId++
            }
            else
            {
                nextId = 1
            }
            inodeIdxJson, _ := json.Marshal(InodeIndex{
                Id: nextId,
                PoolId: poolId,
            })
            inodeCfgJson, _ := json.Marshal(InodeConfig{
                Name: volName,
                Size: uint64(volSize),
            })
            ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
            txnResp, err := cli.Txn(ctx).If(
                clientv3.Compare(clientv3.ModRevision(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)), "=", modRev),
                clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)), "=", 0),
                clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId)), "=", 0),
            ).Then(
                clientv3.OpPut(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId), fmt.Sprintf("%d", nextId)),
                clientv3.OpPut(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName), string(inodeIdxJson)),
                clientv3.OpPut(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId), string(inodeCfgJson)),
            ).Commit()
            cancel()
            if (err != nil)
            {
                return nil, status.Error(codes.Internal, "failed to commit transaction in etcd: "+err.Error())
            }
            if (txnResp.Succeeded)
            {
                imageId = nextId
                break
            }
            // Start over if the transaction fails
        }
    }
@ -245,7 +278,6 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
            // Ugly, but VolumeContext isn't passed to DeleteVolume :-(
            VolumeId: string(volumeIdJson),
            CapacityBytes: volSize,
            ContentSource: src,
        },
    }, nil
 }
@ -259,30 +291,105 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
        return nil, status.Error(codes.InvalidArgument, "request cannot be empty")
    }
-    volVars := make(map[string]string)
+    ctxVars := make(map[string]string)
-    err := json.Unmarshal([]byte(req.VolumeId), &volVars)
+    err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
-    volName := volVars["name"]
+    volName := ctxVars["name"]
-    ctxVars, err := GetConnectionParams(volVars)
+    _, etcdUrl, etcdPrefix := GetConnectionParams(ctxVars)
-    if (err != nil)
+    if (len(etcdUrl) == 0)
    {
-        return nil, err
+        return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
    }
-    if (ctxVars["vitastorfs"] != "")
+    cli, err := clientv3.New(clientv3.Config{
-    {
+        DialTimeout: ETCD_TIMEOUT,
-        // FIXME: Delete FS subdirectory
+        Endpoints: etcdUrl,
-        return &csi.DeleteVolumeResponse{}, nil
+    })
    }
    _, err = invokeCLI(ctxVars, []string{ "rm", volName })
    if (err != nil)
    {
-        return nil, err
+        return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
    }
    defer cli.Close()
    // Find inode by name
    ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
    resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
    cancel()
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
    }
    if (len(resp.Kvs) == 0)
    {
        return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
    }
    var idx InodeIndex
    err = json.Unmarshal(resp.Kvs[0].Value, &idx)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
    }
    // Get inode config
    inodeCfgKey := fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)
    ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
    resp, err = cli.Get(ctx, inodeCfgKey)
    cancel()
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
    }
    if (len(resp.Kvs) == 0)
    {
        return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
    }
    var inodeCfg InodeConfig
    err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
    }
    // Delete inode data by invoking vitastor-cli
    args := []string{
        "rm-data", "--etcd_address", strings.Join(etcdUrl, ","),
        "--pool", fmt.Sprintf("%d", idx.PoolId),
        "--inode", fmt.Sprintf("%d", idx.Id),
    }
    if (ctxVars["configPath"] != "")
    {
        args = append(args, "--config_path", ctxVars["configPath"])
    }
    c := exec.Command("/usr/bin/vitastor-cli", args...)
    var stderr bytes.Buffer
    c.Stdout = nil
    c.Stderr = &stderr
    err = c.Run()
    stderrStr := string(stderr.Bytes())
    if (err != nil)
    {
        klog.Errorf("vitastor-cli rm-data failed: %s, status %s\n", stderrStr, err)
        return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
    }
    // Delete inode config in etcd
    ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
    txnResp, err := cli.Txn(ctx).Then(
        clientv3.OpDelete(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)),
        clientv3.OpDelete(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)),
    ).Commit()
    cancel()
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "failed to delete keys in etcd: "+err.Error())
    }
    if (!txnResp.Succeeded)
    {
        return nil, status.Error(codes.Internal, "failed to delete keys in etcd: transaction failed")
    }
    return &csi.DeleteVolumeResponse{}, nil
@ -313,72 +420,19 @@ func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req
    {
        return nil, status.Error(codes.InvalidArgument, "volumeId is nil")
    }
    volVars := make(map[string]string)
    err := json.Unmarshal([]byte(volumeID), &volVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    ctxVars, err := GetConnectionParams(volVars)
    if (err != nil)
    {
        return nil, err
    }
    volumeCapabilities := req.GetVolumeCapabilities()
    if (volumeCapabilities == nil)
    {
        return nil, status.Error(codes.InvalidArgument, "volumeCapabilities is nil")
    }
    err = cs.checkCaps(volumeCapabilities, ctxVars["vitastorfs"] != "")
    if (err != nil)
    {
        return nil, err
    }
    return &csi.ValidateVolumeCapabilitiesResponse{
        Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
            VolumeCapabilities: req.VolumeCapabilities,
        },
    }, nil
 }
 func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability, fs bool) error
 {
    var volumeCapabilityAccessModes []*csi.VolumeCapability_AccessMode
    for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
        csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
-        csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY,
+        csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
        csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY,
        csi.VolumeCapability_AccessMode_SINGLE_NODE_SINGLE_WRITER,
        csi.VolumeCapability_AccessMode_SINGLE_NODE_MULTI_WRITER,
    } {
        volumeCapabilityAccessModes = append(volumeCapabilityAccessModes, &csi.VolumeCapability_AccessMode{Mode: mode})
    }
    for _, capability := range volumeCapabilities
    {
        if (capability.GetBlock() != nil)
        {
            if (fs)
            {
                return status.Errorf(codes.InvalidArgument, "%v not supported with FS-based volumes", capability)
            }
            for _, mode := range []csi.VolumeCapability_AccessMode_Mode{
                csi.VolumeCapability_AccessMode_MULTI_NODE_SINGLE_WRITER,
                csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
            } {
                volumeCapabilityAccessModes = append(volumeCapabilityAccessModes, &csi.VolumeCapability_AccessMode{Mode: mode})
            }
            break
        }
    }
    if (fs)
    {
        // All access modes including RWX are supported with FS-based volumes
        return nil
    }
    capabilitySupport := false
    for _, capability := range volumeCapabilities
@ -394,10 +448,14 @@ func (cs *ControllerServer) checkCaps(volumeCapabilities []*csi.VolumeCapability
    if (!capabilitySupport)
    {
-        return status.Errorf(codes.InvalidArgument, "%v not supported", volumeCapabilities)
+        return nil, status.Errorf(codes.NotFound, "%v not supported", req.GetVolumeCapabilities())
    }
-    return nil
+    return &csi.ValidateVolumeCapabilitiesResponse{
        Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
            VolumeCapabilities: req.VolumeCapabilities,
        },
    }, nil
 }
 // ListVolumes returns a list of volumes
@ -432,8 +490,6 @@ func (cs *ControllerServer) ControllerGetCapabilities(ctx context.Context, req *
        csi.ControllerServiceCapability_RPC_LIST_VOLUMES,
        csi.ControllerServiceCapability_RPC_EXPAND_VOLUME,
        csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT,
        csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
        // TODO: csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
    } {
        controllerServerCapabilities = append(controllerServerCapabilities, functionControllerServerCapabilities(capability))
    }
@ -443,252 +499,28 @@ func (cs *ControllerServer) ControllerGetCapabilities(ctx context.Context, req *
    }, nil
 }
 func invokeList(ctxVars map[string]string, pattern string, expectExist bool) ([]InodeConfig, error)
 {
    stat, err := invokeCLI(ctxVars, []string{ "ls", "--json", pattern })
    if (err != nil)
    {
        return nil, err
    }
    var inodeCfg []InodeConfig
    err = json.Unmarshal(stat, &inodeCfg)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "Invalid JSON in vitastor-cli ls: "+err.Error())
    }
    if (expectExist && len(inodeCfg) == 0)
    {
        return nil, status.Error(codes.Internal, "Can't find expected image "+pattern+" via vitastor-cli ls")
    }
    return inodeCfg, nil
 }
 // CreateSnapshot create snapshot of an existing PV
 func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error)
 {
-    klog.Infof("received controller create snapshot request %+v", protosanitizer.StripSecrets(req))
+    return nil, status.Error(codes.Unimplemented, "")
    if (req == nil)
    {
        return nil, status.Errorf(codes.InvalidArgument, "request cannot be empty")
    }
    if (req.SourceVolumeId == "" || req.Name == "")
    {
        return nil, status.Error(codes.InvalidArgument, "source volume ID and snapshot name are required fields")
    }
    // snapshot name
    snapName := req.Name
    // req.VolumeId is an ugly json string in our case :)
    ctxVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.SourceVolumeId), &ctxVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    if (ctxVars["vitastorfs"] != "")
    {
        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
    }
    volName := ctxVars["name"]
    // Create image using vitastor-cli
    _, err = invokeCLI(ctxVars, []string{ "create", "--snapshot", snapName, volName })
    if (err != nil && strings.Index(err.Error(), "already exists") <= 0)
    {
        return nil, err
    }
    // Check created snapshot
    inodeCfg, err := invokeList(ctxVars, volName+"@"+snapName, true)
    if (err != nil)
    {
        return nil, err
    }
    // Use ugly JSON snapshot ID again, DeleteSnapshot doesn't have context :-(
    ctxVars["snapshot"] = snapName
    snapIdJson, _ := json.Marshal(ctxVars)
    return &csi.CreateSnapshotResponse{
        Snapshot: &csi.Snapshot{
            SizeBytes: int64(inodeCfg[0].Size),
            SnapshotId: string(snapIdJson),
            SourceVolumeId: req.SourceVolumeId,
            CreationTime: &timestamppb.Timestamp{ Seconds: int64(inodeCfg[0].CreateTs) },
            ReadyToUse: true,
        },
    }, nil
 }
 // DeleteSnapshot delete provided snapshot of a PV
 func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error)
 {
-    klog.Infof("received controller delete snapshot request %+v", protosanitizer.StripSecrets(req))
+    return nil, status.Error(codes.Unimplemented, "")
    if (req == nil)
    {
        return nil, status.Errorf(codes.InvalidArgument, "request cannot be empty")
    }
    if (req.SnapshotId == "")
    {
        return nil, status.Error(codes.InvalidArgument, "snapshot ID is a required field")
    }
    volVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.SnapshotId), &volVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "snapshot ID not in JSON format")
    }
    volName := volVars["name"]
    snapName := volVars["snapshot"]
    ctxVars, err := GetConnectionParams(volVars)
    if (err != nil)
    {
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
    }
    _, err = invokeCLI(ctxVars, []string{ "rm", volName+"@"+snapName })
    if (err != nil)
    {
        return nil, err
    }
    return &csi.DeleteSnapshotResponse{}, nil
 }
 // ListSnapshots list the snapshots of a PV
 func (cs *ControllerServer) ListSnapshots(ctx context.Context, req *csi.ListSnapshotsRequest) (*csi.ListSnapshotsResponse, error)
 {
-    klog.Infof("received controller list snapshots request %+v", protosanitizer.StripSecrets(req))
+    return nil, status.Error(codes.Unimplemented, "")
    if (req == nil)
    {
        return nil, status.Error(codes.InvalidArgument, "request cannot be empty")
    }
    volVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.SourceVolumeId), &volVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := volVars["name"]
    ctxVars, err := GetConnectionParams(volVars)
    if (err != nil)
    {
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        return nil, status.Error(codes.InvalidArgument, "VitastorFS doesn't support snapshots")
    }
    inodeCfg, err := invokeList(ctxVars, volName+"@*", false)
    if (err != nil)
    {
        return nil, err
    }
    resp := &csi.ListSnapshotsResponse{}
    for _, ino := range inodeCfg
    {
        snapName := ino.Name[len(volName)+1:]
        if (len(req.StartingToken) > 0 && snapName < req.StartingToken)
        {
        }
        else if (req.MaxEntries == 0 || len(resp.Entries) < int(req.MaxEntries))
        {
            volVars["snapshot"] = snapName
            snapIdJson, _ := json.Marshal(volVars)
            resp.Entries = append(resp.Entries, &csi.ListSnapshotsResponse_Entry{
                Snapshot: &csi.Snapshot{
                    SizeBytes: int64(ino.Size),
                    SnapshotId: string(snapIdJson),
                    SourceVolumeId: req.SourceVolumeId,
                    CreationTime: &timestamppb.Timestamp{ Seconds: int64(ino.CreateTs) },
                    ReadyToUse: true,
                },
            })
        }
        else
        {
            resp.NextToken = snapName
            break
        }
    }
    return resp, nil
 }
-// ControllerExpandVolume increases the size of a volume
+// ControllerExpandVolume resizes a volume
 func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi.ControllerExpandVolumeRequest) (*csi.ControllerExpandVolumeResponse, error)
 {
-    klog.Infof("received controller expand volume request %+v", protosanitizer.StripSecrets(req))
+    return nil, status.Error(codes.Unimplemented, "")
    if (req == nil)
    {
        return nil, status.Error(codes.InvalidArgument, "request cannot be empty")
    }
    if (req.VolumeId == "" || req.CapacityRange == nil || req.CapacityRange.RequiredBytes == 0)
    {
        return nil, status.Error(codes.InvalidArgument, "VolumeId, CapacityRange and RequiredBytes are required fields")
    }
    volVars := make(map[string]string)
    err := json.Unmarshal([]byte(req.VolumeId), &volVars)
    if (err != nil)
    {
        return nil, status.Error(codes.Internal, "volume ID not in JSON format")
    }
    volName := volVars["name"]
    ctxVars, err := GetConnectionParams(volVars)
    if (err != nil)
    {
        return nil, err
    }
    if (ctxVars["vitastorfs"] != "")
    {
        // Nothing to change
        // FIXME: Support quotas and change quota here
        return &csi.ControllerExpandVolumeResponse{
            CapacityBytes: req.CapacityRange.RequiredBytes,
            NodeExpansionRequired: false,
        }, nil
    }
    inodeCfg, err := invokeList(ctxVars, volName, true)
    if (err != nil)
    {
        return nil, err
    }
    if (req.CapacityRange.RequiredBytes > 0 && inodeCfg[0].Size < uint64(req.CapacityRange.RequiredBytes))
    {
        sz := ((req.CapacityRange.RequiredBytes+4095)/4096)*4096
        _, err := invokeCLI(ctxVars, []string{ "modify", "--inc_size", "1", "--resize", fmt.Sprintf("%d", sz), volName })
        if (err != nil)
        {
            return nil, err
        }
        inodeCfg, err = invokeList(ctxVars, volName, true)
        if (err != nil)
        {
            return nil, err
        }
    }
    return &csi.ControllerExpandVolumeResponse{
        CapacityBytes: int64(inodeCfg[0].Size),
        NodeExpansionRequired: false,
    }, nil
 }
 // ControllerGetVolume get volume info
--- a/csi/src/identityserver.go
+++ b/csi/src/identityserver.go
@ -49,13 +49,6 @@ func (is *IdentityServer) GetPluginCapabilities(ctx context.Context, req *csi.Ge
                    },
                },
            },
            {
                Type: &csi.PluginCapability_VolumeExpansion_{
                    VolumeExpansion: &csi.PluginCapability_VolumeExpansion{
                        Type: csi.PluginCapability_VolumeExpansion_OFFLINE,
                    },
                },
            },
        },
    }, nil
 }
--- a/csi/src/nodeserver.go
+++ b/csi/src/nodeserver.go
--- a/csi/src/utils.go
+++ b/csi/src/utils.go
@ -1,342 +0,0 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
 package vitastor
 import (
    "bytes"
    "errors"
    "encoding/json"
    "fmt"
    "os"
    "os/exec"
    "path/filepath"
    "strconv"
    "strings"
    "syscall"
    "k8s.io/klog"
    "google.golang.org/grpc/codes"
    "google.golang.org/grpc/status"
 )
 func Contains(list []string, s string) bool
 {
    for i := 0; i < len(list); i++
    {
        if (list[i] == s)
        {
            return true
        }
    }
    return false
 }
 func checkVduseSupport() bool
 {
    // Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
    vduse := true
    for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
    {
        _, err := os.Stat("/sys/module/"+mod)
        if (err != nil)
        {
            if (!errors.Is(err, os.ErrNotExist))
            {
                klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
            }
            c := exec.Command("/sbin/modprobe", mod)
            c.Stdout = os.Stderr
            c.Stderr = os.Stderr
            err := c.Run()
            if (err != nil)
            {
                klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
                vduse = false
                break
            }
        }
    }
    // Check that vdpa tool functions
    if (vduse)
    {
        c := exec.Command("/sbin/vdpa", "-j", "dev")
        c.Stderr = os.Stderr
        err := c.Run()
        if (err != nil)
        {
            klog.Errorf("/sbin/vdpa -j dev failed: %v", err)
            vduse = false
        }
    }
    if (!vduse)
    {
        klog.Errorf(
            "Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
            " For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
        )
    }
    else
    {
        klog.Infof("VDUSE support enabled successfully")
    }
    return vduse
 }
 func mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
 {
    // Map NBD device
    // FIXME: Check if already mapped
    args := []string{
        "map", "--image", volName,
    }
    if (ctxVars["configPath"] != "")
    {
        args = append(args, "--config_path", ctxVars["configPath"])
    }
    if (readonly)
    {
        args = append(args, "--readonly", "1")
    }
    stdout, stderr, err := system("/usr/bin/vitastor-nbd", args...)
    dev := strings.TrimSpace(string(stdout))
    if (dev == "")
    {
        return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
    }
    klog.Infof("Attached volume %s via NBD as %s", volName, dev)
    return dev, err
 }
 func unmapNbd(devicePath string)
 {
    // unmap NBD device
    unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
    if (unmapErr != nil)
    {
        klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
    }
 }
 func findByPidFile(pidFile string) (*os.Process, error)
 {
    pidBuf, err := os.ReadFile(pidFile)
    if (err != nil)
    {
        return nil, err
    }
    pid, err := strconv.ParseInt(strings.TrimSpace(string(pidBuf)), 0, 64)
    if (err != nil)
    {
        return nil, err
    }
    proc, err := os.FindProcess(int(pid))
    if (err != nil)
    {
        return nil, err
    }
    return proc, nil
 }
 func killByPidFile(pidFile string) error
 {
    klog.Infof("killing process with PID from file %s", pidFile)
    proc, err := findByPidFile(pidFile)
    if (err != nil)
    {
        return err
    }
    return proc.Signal(syscall.SIGTERM)
 }
 func startStorageDaemon(vdpaId, volName, pidFile, configPath string, readonly bool) error
 {
    // Start qemu-storage-daemon
    blockSpec := map[string]interface{}{
        "node-name": "disk1",
        "driver": "vitastor",
        "image": volName,
        "cache": map[string]bool{
            "direct": true,
            "no-flush": false,
        },
        "discard": "unmap",
    }
    if (configPath != "")
    {
        blockSpec["config-path"] = configPath
    }
    blockSpecJson, _ := json.Marshal(blockSpec)
    writable := "true"
    if (readonly)
    {
        writable = "false"
    }
    _, _, err := system(
        "/usr/bin/qemu-storage-daemon", "--daemonize", "--pidfile", pidFile, "--blockdev", string(blockSpecJson),
        "--export", "vduse-blk,id="+vdpaId+",node-name=disk1,name="+vdpaId+",num-queues=16,queue-size=128,writable="+writable,
    )
    return err
 }
 func mapVduse(stateDir string, volName string, ctxVars map[string]string, readonly bool) (string, string, error)
 {
    // Generate state file
    stateFd, err := os.CreateTemp(stateDir, "vitastor-vduse-*.json")
    if (err != nil)
    {
        return "", "", err
    }
    stateFile := stateFd.Name()
    stateFd.Close()
    vdpaId := filepath.Base(stateFile)
    vdpaId = vdpaId[0:len(vdpaId)-5] // remove ".json"
    pidFile := stateDir + vdpaId + ".pid"
    // Map VDUSE device via qemu-storage-daemon
    err = startStorageDaemon(vdpaId, volName, pidFile, ctxVars["configPath"], readonly)
    if (err == nil)
    {
        // Add device to VDPA bus
        _, _, err = system("/sbin/vdpa", "-j", "dev", "add", "name", vdpaId, "mgmtdev", "vduse")
        if (err == nil)
        {
            // Find block device name
            var matches []string
            matches, err = filepath.Glob("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/*")
            if (err == nil && len(matches) == 0)
            {
                err = errors.New("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/* is not found")
            }
            if (err == nil)
            {
                blockdev := "/dev/"+filepath.Base(matches[0])
                _, err = os.Stat(blockdev)
                if (err == nil)
                {
                    // Generate state file
                    stateJSON, _ := json.Marshal(&DeviceState{
                        ConfigPath: ctxVars["configPath"],
                        VdpaId:     vdpaId,
                        Image:      volName,
                        Blockdev:   blockdev,
                        Readonly:   readonly,
                        PidFile:    pidFile,
                    })
                    err = os.WriteFile(stateFile, stateJSON, 0600)
                    if (err == nil)
                    {
                        klog.Infof("Attached volume %s via VDUSE as %s (VDPA ID %s)", volName, blockdev, vdpaId)
                        return blockdev, vdpaId, nil
                    }
                }
            }
        }
        killErr := killByPidFile(pidFile)
        if (killErr != nil)
        {
            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", killErr)
        }
        os.Remove(stateFile)
        os.Remove(pidFile)
    }
    return "", "", err
 }
 func unmapVduse(stateDir, devicePath string)
 {
    if (len(devicePath) < 6 || devicePath[0:6] != "/dev/v")
    {
        klog.Errorf("%s does not start with /dev/v", devicePath)
        return
    }
    vduseDev, err := os.Readlink("/sys/block/"+devicePath[5:])
    if (err != nil)
    {
        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx): %v", devicePath, err)
        return
    }
    vdpaId := ""
    p := strings.Index(vduseDev, "/vduse/")
    if (p >= 0)
    {
        vduseDev = vduseDev[p+7:]
        p = strings.Index(vduseDev, "/")
        if (p >= 0)
        {
            vdpaId = vduseDev[0:p]
        }
    }
    if (vdpaId == "")
    {
        klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx), but is %v", devicePath, vduseDev)
        return
    }
    unmapVduseById(stateDir, vdpaId)
 }
 func unmapVduseById(stateDir, vdpaId string)
 {
    _, err := os.Stat("/sys/bus/vdpa/devices/"+vdpaId)
    if (err != nil)
    {
        klog.Errorf("failed to stat /sys/bus/vdpa/devices/"+vdpaId+": %v", err)
    }
    else
    {
        _, _, _ = system("/sbin/vdpa", "-j", "dev", "del", vdpaId)
    }
    stateFile := stateDir + vdpaId + ".json"
    os.Remove(stateFile)
    pidFile := stateDir + vdpaId + ".pid"
    _, err = os.Stat(pidFile)
    if (os.IsNotExist(err))
    {
        // ok, already killed
    }
    else if (err != nil)
    {
        klog.Errorf("Failed to stat %v: %v", pidFile, err)
        return
    }
    else
    {
        err = killByPidFile(pidFile)
        if (err != nil)
        {
            klog.Errorf("Failed to kill started qemu-storage-daemon: %v", err)
        }
        os.Remove(pidFile)
    }
 }
 func system(program string, args ...string) ([]byte, []byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var stdout, stderr bytes.Buffer
    c.Stdout, c.Stderr = &stdout, &stderr
    err := c.Run()
    if (err != nil)
    {
        stdoutStr, stderrStr := string(stdout.Bytes()), string(stderr.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s\nOutput:\n%s", err, stdoutStr+stderrStr)
        return nil, nil, status.Error(codes.Internal, stdoutStr+stderrStr+" (status "+err.Error()+")")
    }
    return stdout.Bytes(), stderr.Bytes(), nil
 }
 func systemCombined(program string, args ...string) ([]byte, error)
 {
    klog.Infof("Running "+program+" "+strings.Join(args, " "))
    c := exec.Command(program, args...)
    var out bytes.Buffer
    c.Stdout, c.Stderr = &out, &out
    err := c.Run()
    if (err != nil)
    {
        outStr := string(out.Bytes())
        klog.Errorf(program+" "+strings.Join(args, " ")+" failed: %s, status %s\n", outStr, err)
        return nil, status.Error(codes.Internal, outStr+" (status "+err.Error()+")")
    }
    return out.Bytes(), nil
 }
--- a/debian/build-pve-qemu.sh
+++ b/debian/build-pve-qemu.sh
@ -1,58 +0,0 @@
 exit
 git clone https://git.yourcmc.ru/vitalif/pve-qemu .
 # bookworm
 docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
 perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
 echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
 echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
 echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
 echo 'ru_RU UTF-8' >> /etc/locale.gen
 echo 'en_US UTF-8' >> /etc/locale.gen
 apt-get update
 apt-get install wget ca-certificates
 wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
 wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
 apt-get update
 apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
 mk-build-deps --install ./control
 # bullseye
 docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
 grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
 echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
 echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
 echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
 echo 'ru_RU UTF-8' >> /etc/locale.gen
 echo 'en_US UTF-8' >> /etc/locale.gen
 apt-get update
 apt-get install wget
 wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
 wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
 apt-get update
 apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
 mk-build-deps --install ./control
 # buster
 docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
 grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
 echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
 echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
 echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
 echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
 echo 'ru_RU UTF-8' >> /etc/locale.gen
 echo 'en_US UTF-8' >> /etc/locale.gen
 apt-get update
 apt-get install wget ca-certificates
 wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
 wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
 apt-get update
 apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
 mk-build-deps --install ./control
--- a/debian/build-vitastor-bookworm.sh
+++ b/debian/build-vitastor-bookworm.sh
@ -1,7 +0,0 @@
 #!/bin/bash
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
 sudo podman build --build-arg DISTRO=debian --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-bullseye.sh
+++ b/debian/build-vitastor-bullseye.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-buster.sh
+++ b/debian/build-vitastor-buster.sh
@ -3,5 +3,5 @@
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
-sudo podman build --build-arg DISTRO=debian --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
+sudo podman build --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/build-vitastor-ubuntu-jammy.sh
+++ b/debian/build-vitastor-ubuntu-jammy.sh
@ -1,7 +0,0 @@
 #!/bin/bash
 cat < vitastor.Dockerfile > ../Dockerfile
 cd ..
 mkdir -p packages
 sudo podman build --build-arg DISTRO=ubuntu --build-arg REL=jammy -v `pwd`/packages:/root/packages -f Dockerfile .
 rm Dockerfile
--- a/debian/changelog
+++ b/debian/changelog
@ -1,10 +1,10 @@
-vitastor (2.1.0-1) unstable; urgency=medium
+vitastor (0.8.5-1) unstable; urgency=medium
  * Bugfixes
 -- Vitaliy Filippov <vitalif@yourcmc.ru>  Fri, 03 Jun 2022 02:09:44 +0300
-vitastor (0.7.0-1) unstable; urgency=medium
+vitastor (0.8.5-1) unstable; urgency=medium
  * Implement NFS proxy
  * Add documentation
--- a/debian/control
+++ b/debian/control
@ -2,10 +2,7 @@ Source: vitastor
 Section: admin
 Priority: optional
 Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
-Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8),
+Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev
  linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev,
  libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
  node-bindings <!nocheck>, node-gyp, node-nan
 Standards-Version: 4.5.0
 Homepage: https://vitastor.io/
 Rules-Requires-Root: no
@ -56,15 +53,3 @@ Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version})
 Description: Vitastor Proxmox Virtual Environment storage plugin
 Vitastor storage plugin for Proxmox Virtual Environment.
 Package: vitastor-opennebula
 Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client, patch, python3, jq
 Description: Vitastor OpenNebula storage plugin
 Vitastor storage plugin for OpenNebula.
 Package: node-vitastor
 Architecture: amd64
 Depends: ${shlibs:Depends}, ${misc:Depends}, node-bindings
 Description: Node.js bindings for Vitastor client
 Node.js native bindings for the Vitastor client library (vitastor-client).
--- a/debian/libvirt.Dockerfile
+++ b/debian/libvirt.Dockerfile
@ -1,14 +1,13 @@
 # Build patched libvirt for Debian Buster or Bullseye/Sid inside a container
-# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/libvirt.Dockerfile .
+# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/libvirt.Dockerfile .
 ARG DISTRO=
 ARG REL=
-FROM $DISTRO:$REL
+FROM debian:$REL
 ARG REL=
 WORKDIR /root
-RUN if ([ "${DISTRO}" = "debian" ]) && ( [ "${REL}" = "buster" -o "${REL}" = "bullseye" ] ); then \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        echo >> /etc/apt/preferences; \
        echo 'Package: *' >> /etc/apt/preferences; \
@ -24,7 +23,7 @@ RUN apt-get -y build-dep libvirt0
 RUN apt-get -y install libglusterfs-dev
 RUN apt-get --download-only source libvirt
-ADD patches/libvirt-5.0-vitastor.diff patches/libvirt-7.0-vitastor.diff patches/libvirt-7.5-vitastor.diff patches/libvirt-7.6-vitastor.diff patches/libvirt-8.0-vitastor.diff /root
+ADD patches/libvirt-5.0-vitastor.diff patches/libvirt-7.0-vitastor.diff patches/libvirt-7.5-vitastor.diff patches/libvirt-7.6-vitastor.diff /root
 RUN set -e; \
    mkdir -p /root/packages/libvirt-$REL; \
    rm -rf /root/packages/libvirt-$REL/*; \
--- a/debian/node-vitastor.install
+++ b/debian/node-vitastor.install
@ -1 +0,0 @@
 usr/lib/x86_64-linux-gnu/nodejs/vitastor
--- a/debian/patched-qemu.Dockerfile
+++ b/debian/patched-qemu.Dockerfile
@ -1,67 +1,61 @@
-# Build patched QEMU for Debian inside a container
+# Build patched QEMU for Debian Buster or Bullseye/Sid inside a container
 # cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
 ARG DISTRO=debian
 ARG REL=
-FROM $DISTRO:$REL
+FROM debian:$REL
 ARG DISTRO=debian
 ARG REL=
 WORKDIR /root
-RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
-        if [ "$REL" = "buster" ]; then \
+        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
            echo "deb http://archive.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        else \
            echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
        fi; \
        echo >> /etc/apt/preferences; \
        echo 'Package: *' >> /etc/apt/preferences; \
-        echo "Pin: release n=$REL-backports" >> /etc/apt/preferences; \
+        echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
    perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
    echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
 RUN apt-get update
-RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
+RUN apt-get -y install qemu fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
-RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y build-dep qemu
+RUN apt-get -y build-dep qemu
 # To build a custom version
 #RUN cp /root/packages/qemu-orig/* /root
 RUN apt-get --download-only source qemu
-ADD patches /root/vitastor/patches
+ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch patches/qemu-6.1-vitastor.patch src/qemu_driver.c /root/vitastor/patches/
 ADD src/client/qemu_driver.c /root/qemu_driver.c
 #RUN set -e; \
 #    apt-get install -y wget; \
 #    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
 #    (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
 #    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
 #    apt-get update; \
 #    apt-get install -y vitastor-client vitastor-client-dev quilt
 RUN set -e; \
-    DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
+    apt-get install -y wget; \
    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
    (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
    apt-get update; \
-    DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install quilt; \
+    apt-get install -y vitastor-client vitastor-client-dev quilt; \
    mkdir -p /root/packages/qemu-$REL; \
    rm -rf /root/packages/qemu-$REL/*; \
    cd /root/packages/qemu-$REL; \
    dpkg-source -x /root/qemu*.dsc; \
-    QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*?(\d+\.\d+).*!$1!'); \
+    if ls -d /root/packages/qemu-$REL/qemu-5.0*; then \
-    D=$(ls -d qemu*/); \
+        D=$(ls -d /root/packages/qemu-$REL/qemu-5.0*); \
-    cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch ./qemu-*/debian/patches; \
+        cp /root/vitastor/patches/qemu-5.0-vitastor.patch $D/debian/patches; \
-    echo qemu-$QEMU_VER-vitastor.patch >> $D/debian/patches/series; \
+        echo qemu-5.0-vitastor.patch >> $D/debian/patches/series; \
    elif ls /root/packages/qemu-$REL/qemu-6.1*; then \
        D=$(ls -d /root/packages/qemu-$REL/qemu-6.1*); \
        cp /root/vitastor/patches/qemu-6.1-vitastor.patch $D/debian/patches; \
        echo qemu-6.1-vitastor.patch >> $D/debian/patches/series; \
    else \
        cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
        P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \
        echo qemu-5.1-vitastor.patch >> $P/series; \
    fi; \
    cd /root/packages/qemu-$REL/qemu-*/; \
    quilt push -a; \
    quilt add block/vitastor.c; \
-    cp /root/qemu_driver.c block/vitastor.c; \
+    cp /root/vitastor/patches/qemu_driver.c block/vitastor.c; \
    quilt refresh; \
-    V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(\+deb\d+u\d+)?(~bpo[\d\+]*)?\).*$/$1/')+vitastor5; \
+    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
    if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \
    DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
    rm -rf /root/packages/qemu-$REL/qemu-*/
--- a/debian/rules
+++ b/debian/rules
@ -4,14 +4,6 @@ export DH_VERBOSE = 1
 %:
 	dh $@
 override_dh_install:
 	perl -pe 's!prefix=/usr!prefix='`pwd`'/debian/tmp/usr!' < obj-x86_64-linux-gnu/src/client/vitastor.pc > node-binding/vitastor.pc
 	cd node-binding && PKG_CONFIG_PATH=./ PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 npm install --unsafe-perm || exit 1
 	mkdir -p debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
 	cp -v node-binding/package.json node-binding/index.js node-binding/addon.cc node-binding/addon.h node-binding/client.cc node-binding/client.h debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor
 	cp -v node-binding/build/Release/addon.node debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
 	dh_install
 override_dh_installdeb:
 	cat debian/fio_version >> debian/vitastor-fio.substvars
 	[ -f debian/qemu_version ] && (cat debian/qemu_version >> debian/vitastor-qemu.substvars) || true
--- a/debian/vitastor-client.install
+++ b/debian/vitastor-client.install
@ -3,6 +3,4 @@ usr/bin/vitastor-cli
 usr/bin/vitastor-rm
 usr/bin/vitastor-nbd
 usr/bin/vitastor-nfs
 usr/bin/vitastor-kv
 usr/bin/vitastor-kv-stress
 usr/lib/*/libvitastor*.so*
--- a/debian/vitastor-mon.install
+++ b/debian/vitastor-mon.install
@ -1,3 +1,2 @@
-mon usr/lib/vitastor/
+mon usr/lib/vitastor
-mon/scripts/make-etcd usr/lib/vitastor/mon
+mon/vitastor-mon.service /lib/systemd/system
 mon/scripts/vitastor-mon.service /lib/systemd/system
--- a/debian/vitastor-mon.postinst
+++ b/debian/vitastor-mon.postinst
@ -6,6 +6,4 @@ if [ "$1" = "configure" ]; then
 	addgroup --system --quiet vitastor
 	adduser --system --quiet --ingroup vitastor --no-create-home --home /nonexistent vitastor
 	mkdir -p /etc/vitastor
 	mkdir -p /var/lib/vitastor
 	chown vitastor:vitastor /var/lib/vitastor
 fi
--- a/debian/vitastor-opennebula.install
+++ b/debian/vitastor-opennebula.install
@ -1,3 +0,0 @@
 opennebula/remotes var/lib/one/
 opennebula/sudoers.d etc/
 opennebula/install.sh var/lib/one/remotes/datastore/vitastor/
--- a/debian/vitastor-opennebula.postinst
+++ b/debian/vitastor-opennebula.postinst
@ -1,7 +0,0 @@
 #!/bin/sh
 set -e
 if [ "$1" = "configure" ]; then
 	/var/lib/one/remotes/datastore/vitastor/install.sh
 fi
--- a/debian/vitastor-opennebula.triggers
+++ b/debian/vitastor-opennebula.triggers
@ -1,4 +0,0 @@
 interest /var/lib/one/remotes/datastore/downloader.sh
 interest /etc/one/oned.conf
 interest /etc/one/vmm_exec/vmm_execrc
 interest /etc/apparmor.d/local/abstractions/libvirt-qemu
--- a/debian/vitastor-osd.install
+++ b/debian/vitastor-osd.install
@ -1,6 +1,6 @@
 usr/bin/vitastor-osd
 usr/bin/vitastor-disk
 usr/bin/vitastor-dump-journal
-mon/scripts/vitastor-osd@.service /lib/systemd/system
+mon/vitastor-osd@.service /lib/systemd/system
-mon/scripts/vitastor.target /lib/systemd/system
+mon/vitastor.target /lib/systemd/system
-mon/scripts/90-vitastor.rules /lib/udev/rules.d
+mon/90-vitastor.rules /lib/udev/rules.d
--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@ -1,31 +1,28 @@
-# Build Vitastor packages for Debian inside a container
+# Build Vitastor packages for Debian Buster or Bullseye/Sid inside a container
-# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
+# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
 ARG DISTRO=debian
 ARG REL=
-FROM $DISTRO:$REL
+FROM debian:$REL
 ARG DISTRO=debian
 ARG REL=
 WORKDIR /root
-RUN set -e -x; \
+RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
-    if [ "$REL" = "buster" ]; then \
+        echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
-        apt-get update; \
+        echo >> /etc/apt/preferences; \
-        apt-get -y install wget; \
+        echo 'Package: *' >> /etc/apt/preferences; \
-        wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
+        echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
-        echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
+        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
    perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
    echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
    echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
-RUN apt-get update && \
+RUN apt-get update
-    apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake \
+RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
-        libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
+RUN apt-get -y build-dep fio
-    apt-get -y build-dep fio && \
+RUN apt-get --download-only source fio
-    apt-get --download-only source fio
+RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
 ADD . /root/vitastor
 RUN set -e -x; \
@ -37,10 +34,8 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
+    cp -r /root/vitastor vitastor-0.8.5; \
-    VER=${FULLVER%%-*}; \
+    cd vitastor-0.8.5; \
    cp -r /root/vitastor vitastor-$VER; \
    cd vitastor-$VER; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -52,14 +47,10 @@ RUN set -e -x; \
    echo fio-headers.patch >> debian/patches/series; \
    rm -rf a b; \
    echo "dep:fio=$FIO" > debian/fio_version; \
    cd /root/packages/vitastor-$REL/vitastor-$VER; \
    mkdir mon/node_modules; \
    cd mon/node_modules; \
    curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
    curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.5.orig.tar.xz vitastor-0.8.5; \
-    cd vitastor-$VER; \
+    cd vitastor-0.8.5; \
-    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
+    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
    rm -rf /root/packages/vitastor-$REL/vitastor-*/
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -1,11 +1,9 @@
 # Build Docker image with Vitastor packages
-FROM debian:bookworm
+FROM debian:bullseye
-ADD etc/apt /etc/apt/
+ADD vitastor.list /etc/apt/sources.list.d
-RUN apt-get update && apt-get -y install vitastor udev systemd qemu-system-x86 qemu-system-common qemu-block-extra qemu-utils jq nfs-common && apt-get clean
+ADD vitastor.gpg /etc/apt/trusted.gpg.d
-ADD sleep.sh /usr/bin/
+ADD vitastor.pref /etc/apt/preferences.d
-ADD install.sh /usr/bin/
+ADD apt.conf /etc/apt/
-ADD scripts /opt/scripts/
+RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common && apt-get clean
 ADD etc /etc/
 RUN ln -s /usr/lib/vitastor/mon/make-etcd /usr/bin/make-etcd
--- a/docker/Makefile
+++ b/docker/Makefile
@ -1,9 +0,0 @@
 VITASTOR_VERSION ?= v2.1.0
 all: build push
 build:
 	@docker build --no-cache --rm -t vitalif/vitastor:$(VITASTOR_VERSION) .
 push:
 	@docker push vitalif/vitastor:$(VITASTOR_VERSION)
--- a/docker/etc/apt/apt.conf
+++ b/docker/etc/apt/apt.conf
--- a/docker/etc/apt/sources.list.d/vitastor.list
+++ b/docker/etc/apt/sources.list.d/vitastor.list
@ -1,2 +0,0 @@
 deb http://vitastor.io/debian bookworm main
 deb http://http.debian.net/debian/ bookworm-backports main
--- a/docker/etc/apt/trusted.gpg.d/vitastor.gpg
+++ b/docker/etc/apt/trusted.gpg.d/vitastor.gpg
--- a/docker/etc/systemd/system/vitastor-etcd.service
+++ b/docker/etc/systemd/system/vitastor-etcd.service
@ -1,27 +0,0 @@
 [Unit]
 Description=Containerized etcd for Vitastor
 After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 PartOf=vitastor.target
 [Service]
 Restart=always
 Environment=GOGC=50
 EnvironmentFile=/etc/vitastor/docker.conf
 EnvironmentFile=/etc/vitastor/etcd.conf
 SyslogIdentifier=etcd
 ExecStart=bash -c 'docker run --rm -i -v /var/lib/vitastor/etcd:/data \
    --log-driver none --network host $CONTAINER_OPTIONS --name vitastor-etcd \
    $ETCD_IMAGE /usr/local/bin/etcd --name "$ETCD_NAME" --data-dir /data \
    --snapshot-count 10000 --advertise-client-urls http://$ETCD_IP:2379 --listen-client-urls http://$ETCD_IP:2379 \
    --initial-advertise-peer-urls http://$ETCD_IP:2380 --listen-peer-urls http://$ETCD_IP:2380 \
    --initial-cluster-token vitastor-etcd-1 --initial-cluster "$ETCD_INITIAL_CLUSTER" \
    --initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \
    --auto-compaction-retention=10 --auto-compaction-mode=revision'
 ExecStop=docker stop vitastor-etcd
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-host.service
+++ b/docker/etc/systemd/system/vitastor-host.service
@ -1,23 +0,0 @@
 [Unit]
 Description=Empty container for running Vitastor commands
 After=network-online.target local-fs.target time-sync.target docker.service
 Wants=network-online.target local-fs.target time-sync.target docker.service
 PartOf=vitastor.target
 [Service]
 Restart=always
 EnvironmentFile=/etc/vitastor/docker.conf
 ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev -v /run:/run \
    --security-opt seccomp=unconfined --privileged --pid=host --log-driver none --network host --name vitastor vitastor:$VITASTOR_VERSION \
    sleep.sh'
 ExecStartPost=udevadm trigger
 ExecStop=docker stop vitastor
 WorkingDirectory=/
 PrivateTmp=false
 TasksMax=infinity
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-mon.service
+++ b/docker/etc/systemd/system/vitastor-mon.service
@ -1,23 +0,0 @@
 [Unit]
 Description=Containerized Vitastor monitor
 After=network-online.target local-fs.target time-sync.target docker.service
 Wants=network-online.target local-fs.target time-sync.target docker.service
 PartOf=vitastor.target
 [Service]
 Restart=always
 EnvironmentFile=/etc/vitastor/docker.conf
 SyslogIdentifier=vitastor-mon
 ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /var/lib/vitastor:/var/lib/vitastor -v /dev:/dev \
    --log-driver none --network host $CONTAINER_OPTIONS --name vitastor-mon vitastor:$VITASTOR_VERSION \
    node /usr/lib/vitastor/mon/mon-main.js'
 ExecStop=docker stop vitastor-mon
 WorkingDirectory=/
 PrivateTmp=false
 TasksMax=infinity
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/docker/etc/systemd/system/vitastor-osd@.service
+++ b/docker/etc/systemd/system/vitastor-osd@.service
@ -1,28 +0,0 @@
 [Unit]
 Description=Containerized Vitastor object storage daemon osd.%i
 After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
 PartOf=vitastor.target
 [Service]
 LimitNOFILE=1048576
 LimitNPROC=1048576
 LimitMEMLOCK=infinity
 EnvironmentFile=/etc/vitastor/docker.conf
 SyslogIdentifier=vitastor-osd%i
 ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev \
    $(for i in $(ls /dev/vitastor/osd%i-*); do echo --device $i:$i; done) \
    --log-driver none --network host --ulimit nofile=1048576 --ulimit memlock=-1 \
    --security-opt seccomp=unconfined $CONTAINER_OPTIONS --name vitastor-osd%i \
    vitastor:$VITASTOR_VERSION vitastor-disk exec-osd /dev/vitastor/osd%i-data'
 ExecStartPre=+docker exec vitastor vitastor-disk pre-exec /dev/vitastor/osd%i-data
 ExecStop=docker stop vitastor-etcd%i
 WorkingDirectory=/
 PrivateTmp=false
 TasksMax=infinity
 Restart=always
 StartLimitInterval=0
 RestartSec=10
 [Install]
 WantedBy=vitastor.target
--- a/docker/etc/udev/rules.d/90-vitastor.rules
+++ b/docker/etc/udev/rules.d/90-vitastor.rules
@ -1,7 +0,0 @@
 SUBSYSTEM=="block", ENV{ID_PART_ENTRY_TYPE}=="e7009fac-a5a1-4d72-af72-53de13059903", \
    OWNER="vitastor", GROUP="vitastor", \
    IMPORT{program}="/usr/bin/docker exec vitastor vitastor-disk udev $devnode", \
    SYMLINK+="vitastor/$env{VITASTOR_ALIAS}"
 ENV{VITASTOR_OSD_NUM}!="", ACTION=="add", RUN{program}+="/usr/bin/systemctl enable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
 ENV{VITASTOR_OSD_NUM}!="", ACTION=="remove", RUN{program}+="/usr/bin/systemctl disable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
--- a/docker/etc/vitastor/docker.conf
+++ b/docker/etc/vitastor/docker.conf
@ -1,11 +0,0 @@
 #
 # Configuration file for containerized Vitastor installation
 # (non-Kubernetes, with systemd and udev-based orchestration)
 #
 # Desired Vitastor version
 VITASTOR_VERSION=v2.1.0
 # Additional arguments for all containers
 # For example, you may want to specify a custom logging driver here
 CONTAINER_OPTIONS=""
--- a/docker/etc/vitastor/etcd.conf
+++ b/docker/etc/vitastor/etcd.conf
@ -1,4 +0,0 @@
 ETCD_IMAGE=quay.io/coreos/etcd:v3.5.18
 ETCD_NAME=""
 ETCD_IP=""
 ETCD_INITIAL_CLUSTER=""
--- a/docker/etc/vitastor/vitastor.conf
+++ b/docker/etc/vitastor/vitastor.conf
@ -1,2 +0,0 @@
 {
 }
--- a/docker/install.sh
+++ b/docker/install.sh
@ -1,9 +0,0 @@
 #!/bin/bash
 set -e
 cp -urv /etc/default /host-etc/
 cp -urv /etc/systemd /host-etc/
 cp -urv /etc/udev /host-etc/
 cp -urnv /etc/vitastor /host-etc/
 cp -urnv /opt/scripts/* /host-bin/
--- a/docker/scripts/vitastor-cli
+++ b/docker/scripts/vitastor-cli
@ -1,3 +0,0 @@
 #!/bin/bash
 docker exec -it vitastor vitastor-cli "$@"
--- a/docker/scripts/vitastor-disk
+++ b/docker/scripts/vitastor-disk
@ -1,3 +0,0 @@
 #!/bin/bash
 docker exec -it vitastor vitastor-disk "$@"
--- a/docker/scripts/vitastor-fio
+++ b/docker/scripts/vitastor-fio
@ -1,3 +0,0 @@
 #!/bin/bash
 docker exec -it vitastor fio "$@"
--- a/docker/scripts/vitastor-nbd
+++ b/docker/scripts/vitastor-nbd
@ -1,3 +0,0 @@
 #!/bin/bash
 docker exec -it vitastor vitastor-nbd "$@"
--- a/docker/sleep.sh
+++ b/docker/sleep.sh
@ -1,3 +0,0 @@
 #!/bin/bash
 while :; do sleep infinity; done
--- a/docker/vitastor.gpg
+++ b/docker/vitastor.gpg
--- a/docker/vitastor.list
+++ b/docker/vitastor.list
@ -0,0 +1 @@
 deb http://vitastor.io/debian bullseye main
--- a/docker/etc/apt/preferences.d/vitastor.pref
+++ b/docker/etc/apt/preferences.d/vitastor.pref
--- a/docs/config.en.md
+++ b/docs/config.en.md
@ -13,27 +13,24 @@ Vitastor configuration consists of:
 - [Separate OSD settings](config/pool.en.md#osd-settings)
 - [Inode configuration](config/inode.en.md) i.e. image metadata like name, size and parent reference
-Configuration parameters can be set in 4 places:
+Configuration parameters can be set in 3 places:
 - Configuration file (`/etc/vitastor/vitastor.conf` or other path)
 - etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
  connection parameters should obviously be set in the configuration file.
- Command line of Vitastor components: OSD (when you run it without vitastor-disk),
+- Command line of Vitastor components: OSD, mon, fio and QEMU options,
-  mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
+  OpenStack/Proxmox/etc configuration. The latter doesn't allow to set all
-  doesn't allow to set all variables directly, but it allows to override the
+  variables directly, but it allows to override the configuration file and
-  configuration file and set everything you need inside it.
+  set everything you need inside it.
 - OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain
  primarily disk layout parameters of specific OSDs. In fact, these parameters
  are automatically passed into the command line of vitastor-osd process, so
  they have the same "status" as command-line parameters.
 In the future, additional configuration methods may be added:
 - OSD superblock which will, by design, contain parameters related to the disk
  layout and to one specific OSD.
 - OSD-specific keys in etcd like `/vitastor/config/osd/<number>`.
 ## Parameter Reference
 - [Common](config/common.en.md)
 - [Network](config/network.en.md)
 - [Client](config/client.en.md)
 - [Global Disk Layout](config/layout-cluster.en.md)
 - [OSD Disk Layout](config/layout-osd.en.md)
 - [OSD Runtime Parameters](config/osd.en.md)
--- a/docs/config.ru.md
+++ b/docs/config.ru.md
@ -14,29 +14,25 @@
 - [Настроек инодов](config/inode.ru.md), т.е. метаданных образов, таких, как имя, размер и ссылки на
  родительский образ
-Параметры конфигурации могут задаваться в 4 местах:
+Параметры конфигурации могут задаваться в 3 местах:
 - Файле конфигурации (`/etc/vitastor/vitastor.conf` или по другому пути)
 - Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
  задаваться там, кроме, естественно, самих параметров соединения с etcd,
  которые должны задаваться в файле конфигурации
- В командной строке компонентов Vitastor: OSD (при ручном запуске без vitastor-disk),
+- В командной строке компонентов Vitastor: OSD, монитора, опциях fio и QEMU,
-  монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
+  настроек OpenStack, Proxmox и т.п. Последние, как правило, не включают полный
-  как правило, не включают полный набор параметров напрямую, но позволяют
+  набор параметров напрямую, но разрешают определить путь к файлу конфигурации
-  определить путь к файлу конфигурации и задать любые параметры в нём.
+  и задать любые параметры в нём.
 - В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры,
  связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
  при запуске OSD эти параметры автоматически передаются в командную строку
  процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
  командной строки OSD.
 В будущем также могут быть добавлены другие способы конфигурации:
 - Суперблок OSD, в котором будут храниться параметры OSD, связанные с дисковым
  форматом и с этим конкретным OSD.
 - OSD-специфичные ключи в etcd типа `/vitastor/config/osd/<номер>`.
 ## Список параметров
 - [Общие](config/common.ru.md)
 - [Сеть](config/network.ru.md)
 - [Клиентский код](config/client.ru.md)
 - [Глобальные дисковые параметры](config/layout-cluster.ru.md)
 - [Дисковые параметры OSD](config/layout-osd.ru.md)
 - [Прочие параметры OSD](config/osd.ru.md)
--- a/docs/config/client.en.md
+++ b/docs/config/client.en.md
@ -1,217 +0,0 @@
 [Documentation](../../README.md#documentation) → [Configuration](../config.en.md) → Client Parameters
 -----
 [Читать на русском](client.ru.md)
 # Client Parameters
 These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
 affect their interaction with the cluster.
 - [client_iothread_count](#client_iothread_count)
 - [client_retry_interval](#client_retry_interval)
 - [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_retry_enospc](#client_retry_enospc)
 - [client_wait_up_timeout](#client_wait_up_timeout)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
 - [client_max_buffered_bytes](#client_max_buffered_bytes)
 - [client_max_buffered_ops](#client_max_buffered_ops)
 - [client_max_writeback_iodepth](#client_max_writeback_iodepth)
 - [nbd_timeout](#nbd_timeout)
 - [nbd_max_devices](#nbd_max_devices)
 - [nbd_max_part](#nbd_max_part)
 - [osd_nearfull_ratio](#osd_nearfull_ratio)
 ## client_iothread_count
 - Type: integer
 - Default: 0
 Number of separate threads for handling TCP network I/O at client library
 side. Enabling 4 threads usually allows to increase peak performance of each
 client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
 100-150 to 400 thousand iops, but at the same time it increases latency.
 Latency increase depends on CPU: with CPU power saving disabled latency
 only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
 with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
 iops decrease from 2000 to 1000). RDMA isn't affected by this option.
 It's recommended to enable client I/O threads if you don't use RDMA and want
 to increase peak client performance.
 ## client_retry_interval
 - Type: milliseconds
 - Default: 50
 - Minimum: 10
 - Can be changed online: yes
 Retry time for I/O requests failed due to inactive PGs or network
 connectivity errors.
 ## client_eio_retry_interval
 - Type: milliseconds
 - Default: 1000
 - Can be changed online: yes
 Retry time for I/O requests failed due to data corruption or unfinished
 EC object deletions (has_incomplete PG state). 0 disables such retries
 and clients are not blocked and just get EIO error code instead.
 ## client_retry_enospc
 - Type: boolean
 - Default: true
 - Can be changed online: yes
 Retry writes on out of space errors to wait until some space is freed on
 OSDs.
 ## client_wait_up_timeout
 - Type: seconds
 - Default: 16
 - Can be changed online: yes
 Wait for this number of seconds until PGs are up when doing operations
 which require all PGs to be up. Currently only used by object listings
 in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
 The default value is calculated as `1 + OSD lease timeout`, which is
 `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 ## client_max_dirty_bytes
 - Type: integer
 - Default: 33554432
 - Can be changed online: yes
 Without [immediate_commit](layout-cluster.en.md#immediate_commit)=all this parameter sets the limit of "dirty"
 (not committed by fsync) data allowed by the client before forcing an
 additional fsync and committing the data. Also note that the client always
 holds a copy of uncommitted data in memory so this setting also affects
 RAM usage of clients.
 ## client_max_dirty_ops
 - Type: integer
 - Default: 1024
 - Can be changed online: yes
 Same as client_max_dirty_bytes, but instead of total size, limits the number
 of uncommitted write operations.
 ## client_enable_writeback
 - Type: boolean
 - Default: false
 - Can be changed online: yes
 This parameter enables client-side write buffering. This means that write
 requests are accumulated in memory for a short time before being sent to
 a Vitastor cluster which allows to send them in parallel and increase
 performance of some applications. Writes are buffered until client forces
 a flush with fsync() or until the amount of buffered writes exceeds the
 limit.
 Write buffering significantly increases performance of some applications,
 for example, CrystalDiskMark under Windows (LOL :-D), but also any other
 applications if they do writes in one of two non-optimal ways: either if
 they do a lot of small (4 kb or so) sequential writes, or if they do a lot
 of small random writes, but without any parallelism or asynchrony, and also
 without calling fsync().
 With write buffering enabled, you can expect around 22000 T1Q1 random write
 iops in QEMU more or less regardless of the quality of your SSDs, and this
 number is in fact bound by QEMU itself rather than Vitastor (check it
 yourself by adding a "driver=null-co" disk in QEMU). Without write
 buffering, the current record is 9900 iops, but the number is usually
 even lower with non-ideal hardware, for example, it may be 5000 iops.
 Even when this parameter is enabled, write buffering isn't enabled until
 the client explicitly allows it, because enabling it without the client
 being aware of the fact that his writes may be buffered may lead to data
 loss. Because of this, older versions of clients don't support write
 buffering at all, newer versions of the QEMU driver allow write buffering
 only if it's enabled in disk settings with `-blockdev cache.direct=false`,
 and newer versions of FIO only allow write buffering if you don't specify
 `-direct=1`. NBD and NFS drivers allow write buffering by default.
 You can overcome this restriction too with the `client_writeback_allowed`
 parameter, but you shouldn't do that unless you **really** know what you
 are doing.
 ## client_max_buffered_bytes
 - Type: integer
 - Default: 33554432
 - Can be changed online: yes
 Maximum total size of buffered writes which triggers write-back when reached.
 ## client_max_buffered_ops
 - Type: integer
 - Default: 1024
 - Can be changed online: yes
 Maximum number of buffered writes which triggers write-back when reached.
 Multiple consecutive modified data regions are counted as 1 write here.
 ## client_max_writeback_iodepth
 - Type: integer
 - Default: 256
 - Can be changed online: yes
 Maximum number of parallel writes when flushing buffered data to the server.
 ## nbd_timeout
 - Type: seconds
 - Default: 300
 Timeout for I/O operations for [NBD](../usage/nbd.en.md). If an operation
 executes for longer than this timeout, including when your cluster is just
 temporarily down for more than timeout, the NBD device will detach by itself
 (and possibly break the mounted file system).
 You can set timeout to 0 to never detach, but in that case you won't be
 able to remove the kernel device at all if the NBD process dies - you'll have
 to reboot the host.
 ## nbd_max_devices
 - Type: integer
 - Default: 64
 Maximum number of NBD devices in the system. This value is passed as
 `nbds_max` parameter for the nbd kernel module when vitastor-nbd autoloads it.
 ## nbd_max_part
 - Type: integer
 - Default: 3
 Maximum number of partitions per NBD device. This value is passed as
 `max_part` parameter for the nbd kernel module when vitastor-nbd autoloads it.
 Note that (nbds_max)*(1+max_part) usually can't exceed 256.
 ## osd_nearfull_ratio
 - Type: number
 - Default: 0.95
 - Can be changed online: yes
 Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
 Remember that some client writes may hang or complete with an error if even
 just one OSD becomes 100 % full!
 However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
 unable to start at all), so you'll be able to recover from "out of space" errors
 without destroying and recreating OSDs.
--- a/docs/config/client.ru.md
+++ b/docs/config/client.ru.md
@ -1,221 +0,0 @@
 [Документация](../../README-ru.md#документация) → [Конфигурация](../config.ru.md) → Параметры клиентского кода
 -----
 [Read in English](client.en.md)
 # Параметры клиентского кода
 Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
 затрагивают логику их работы с кластером.
 - [client_iothread_count](#client_iothread_count)
 - [client_retry_interval](#client_retry_interval)
 - [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_retry_enospc](#client_retry_enospc)
 - [client_wait_up_timeout](#client_wait_up_timeout)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
 - [client_max_buffered_bytes](#client_max_buffered_bytes)
 - [client_max_buffered_ops](#client_max_buffered_ops)
 - [client_max_writeback_iodepth](#client_max_writeback_iodepth)
 - [nbd_timeout](#nbd_timeout)
 - [nbd_max_devices](#nbd_max_devices)
 - [nbd_max_part](#nbd_max_part)
 - [osd_nearfull_ratio](#osd_nearfull_ratio)
 ## client_iothread_count
 - Тип: целое число
 - Значение по умолчанию: 0
 Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
 клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
 производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
 чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
 секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
 при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
 падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
 и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
 RDMA данная опция не влияет.
 Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
 RDMA и хотите повысить пиковую производительность клиентов.
 ## client_retry_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 50
 - Минимальное значение: 10
 - Можно менять на лету: да
 Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
 ошибок сети.
 ## client_eio_retry_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 1000
 - Можно менять на лету: да
 Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
 или незавершённых удалений EC-объектов (состояния PG has_incomplete).
 0 отключает повторы таких запросов и клиенты не блокируются, а вместо
 этого просто получают код ошибки EIO.
 ## client_retry_enospc
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 - Можно менять на лету: да
 Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
 ожидать, пока на OSD не освободится место.
 ## client_wait_up_timeout
 - Тип: секунды
 - Значение по умолчанию: 16
 - Можно менять на лету: да
 Время ожидания поднятия PG при операциях, требующих активности всех PG.
 В данный момент используется листингами объектов в командах, использующих
 удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
 Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
 `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 ## client_max_dirty_bytes
 - Тип: целое число
 - Значение по умолчанию: 33554432
 - Можно менять на лету: да
 При работе без [immediate_commit](layout-cluster.ru.md#immediate_commit)=all - это лимит объёма "грязных" (не
 зафиксированных fsync-ом) данных, при достижении которого клиент будет
 принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
 что в этом случае до момента fsync клиент хранит копию незафиксированных
 данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
 ## client_max_dirty_ops
 - Тип: целое число
 - Значение по умолчанию: 1024
 - Можно менять на лету: да
 Аналогично client_max_dirty_bytes, но ограничивает количество
 незафиксированных операций записи вместо их общего объёма.
 ## client_enable_writeback
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 - Можно менять на лету: да
 Данный параметр разрешает включать буферизацию записи в памяти. Буферизация
 означает, что операции записи отправляются на кластер Vitastor не сразу, а
 могут небольшое время накапливаться в памяти и сбрасываться сразу пакетами,
 до тех пор, пока либо не будет превышен лимит неотправленных записей, либо
 пока клиент не вызовет fsync.
 Буферизация значительно повышает производительность некоторых приложений,
 например, CrystalDiskMark в Windows (ха-ха :-D), но также и любых других,
 которые пишут на диск неоптимально: либо последовательно, но мелкими блоками
 (например, по 4 кб), либо случайно, но без параллелизма и без fsync - то
 есть, например, отправляя 128 операций записи в разные места диска, но не
 все сразу с помощью асинхронного I/O, а по одной.
 В QEMU с буферизацией записи можно ожидать показателя примерно 22000
 операций случайной записи в секунду в 1 поток и с глубиной очереди 1 (T1Q1)
 без fsync, почти вне зависимости от того, насколько хороши ваши диски - эта
 цифра упирается в сам QEMU. Без буферизации рекорд пока что - 9900 операций
 в секунду, но на железе похуже может быть и поменьше, например, 5000 операций
 в секунду.
 При этом, даже если данный параметр включён, буферизация не включается, если
 явно не разрешена клиентом, т.к. если клиент не знает, что запросы записи
 буферизуются, это может приводить к потере данных. Поэтому в старых версиях
 клиентских драйверов буферизация записи не включается вообще, в новых
 версиях QEMU-драйвера включается, только если разрешена опцией диска
 `-blockdev cache.direct=false`, а в fio - только если нет опции `-direct=1`.
 В NBD и NFS драйверах буферизация записи разрешена по умолчанию.
 Можно обойти и это ограничение с помощью параметра `client_writeback_allowed`,
 но делать так не надо, если только вы не уверены в том, что делаете, на все
 100%. :-)
 ## client_max_buffered_bytes
 - Тип: целое число
 - Значение по умолчанию: 33554432
 - Можно менять на лету: да
 Максимальный общий размер буферизованных записей, при достижении которого
 начинается процесс сброса данных на сервер.
 ## client_max_buffered_ops
 - Тип: целое число
 - Значение по умолчанию: 1024
 - Можно менять на лету: да
 Максимальное количество буферизованных записей, при достижении которого
 начинается процесс сброса данных на сервер. При этом несколько
 последовательных изменённых областей здесь считаются 1 записью.
 ## client_max_writeback_iodepth
 - Тип: целое число
 - Значение по умолчанию: 256
 - Можно менять на лету: да
 Максимальное число параллельных операций записи при сбросе буферов на сервер.
 ## nbd_timeout
 - Тип: секунды
 - Значение по умолчанию: 300
 Таймаут для операций чтения/записи через [NBD](../usage/nbd.ru.md). Если
 операция выполняется дольше таймаута, включая временную недоступность
 кластера на время, большее таймаута, NBD-устройство отключится само собой
 (и, возможно, сломает примонтированную ФС).
 Вы можете установить таймаут в 0, чтобы никогда не отключать устройство по
 таймауту, но в этом случае вы вообще не сможете удалить устройство, если
 процесс NBD умрёт - вам придётся перезагружать сервер.
 ## nbd_max_devices
 - Тип: целое число
 - Значение по умолчанию: 64
 Максимальное число NBD-устройств в системе. Данное значение передаётся
 модулю ядра nbd как параметр `nbds_max`, когда его загружает vitastor-nbd.
 ## nbd_max_part
 - Тип: целое число
 - Значение по умолчанию: 3
 Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
 модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
 Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
 ## osd_nearfull_ratio
 - Тип: число
 - Значение по умолчанию: 0.95
 - Можно менять на лету: да
 Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
 выводе vitastor-cli status.
 Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
 если на 100 % заполнится хотя бы 1 OSD!
 Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
 заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
 восстановить работу кластера после ошибок отсутствия свободного места
 без уничтожения и пересоздания OSD.
--- a/docs/config/common.en.md
+++ b/docs/config/common.en.md
@ -25,16 +25,11 @@ running if required parameters are specified.
 ## etcd_address
 - Type: string or array of strings
 - Can be changed online: yes
 etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
 specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
 Note that https is not supported for etcd connections yet.
 etcd connection endpoints can be changed online by updating global
 configuration in etcd itself - this allows to switch the cluster to new
 etcd addresses without downtime.
 ## etcd_prefix
 - Type: string
@ -47,6 +42,5 @@ example, use a single etcd cluster for multiple Vitastor clusters.
 - Type: integer
 - Default: 0
 - Can be changed online: yes
 Log level. Raise if you want more verbose output.
--- a/docs/config/common.ru.md
+++ b/docs/config/common.ru.md
@ -24,14 +24,10 @@
 ## etcd_address
 - Тип: строка или массив строк
 - Можно менять на лету: да
 Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
 или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
 Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
 самом etcd - это позволяет переключить кластер на новые etcd без остановки.
 ## etcd_prefix
 - Тип: строка
@ -45,6 +41,5 @@
 - Тип: целое число
 - Значение по умолчанию: 0
 - Можно менять на лету: да
 Уровень логгирования. Повысьте, если хотите более подробный вывод.
--- a/docs/config/layout-cluster.en.md
+++ b/docs/config/layout-cluster.en.md
@ -33,13 +33,12 @@ Size of objects (data blocks) into which all physical and virtual drives
 in Vitastor, affects memory usage, write amplification and I/O load
 distribution effectiveness.
-Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
+Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
-it's possible to use 1 MB for SSD too - it will lower memory usage, but
+it's possible to use 4 MB for SSD too - it will lower memory usage, but
 may increase average WA and reduce linear performance.
 OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
 544 MB per 1 TB of used disk space with the default 128 KB block size.
 With 1 MB it's 8 times lower.
 ## bitmap_granularity
@ -56,24 +55,14 @@ Can't be smaller than the OSD data device sector.
 ## immediate_commit
 - Type: string
- Default: all
+- Default: false
-One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
+Another parameter which is really important for performance.
 This parameter is also really important for performance.
 TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
 power loss protection (nonvolatile write-through cache) and also for most HDDs.
 "none" or "small" should be only selected if you use desktop SSDs without
 capacitors or drives with slow write-back cache that can't be disabled. Check
 immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
 Detailed explanation:
 Desktop SSDs are very fast (100000+ iops) for simple random writes
 without cache flush. However, they are really slow (only around 1000 iops)
-if you try to fsync() each write, that is, if you want to guarantee that
+if you try to fsync() each write, that is, when you want to guarantee that
-each change gets actually persisted to the physical media.
+each change gets immediately persisted to the physical media.
 Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
 "Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -85,8 +74,8 @@ really slow when used with desktop SSDs. Vitastor, however, can also
 efficiently utilize desktop SSDs by postponing fsync until the client calls
 it explicitly.
-This is what this parameter regulates. When it's set to "all" Vitastor
+This is what this parameter regulates. When it's set to "all" the whole
-cluster commits each change to disks immediately and clients just
+Vitastor cluster commits each change to disks immediately and clients just
 ignore fsyncs because they know for sure that they're unneeded. This reduces
 the amount of network roundtrips performed by clients and improves
 performance. So it's always better to use server grade SSDs with
@ -106,8 +95,11 @@ SSD cache or "media-cache" - for example, a lot of Seagate EXOS drives have
 it (they have internal SSD cache even though it's not stated in datasheets).
 Setting this parameter to "all" or "small" in OSD parameters requires enabling
-[disable_journal_fsync](layout-osd.en.md#disable_journal_fsync) and
+disable_journal_fsync and disable_meta_fsync, setting it to "all" also requires
-[disable_meta_fsync](layout-osd.en.md#disable_meta_fsync), setting it to
+enabling disable_data_fsync.
-"all" also requires enabling [disable_data_fsync](layout-osd.en.md#disable_data_fsync).
+
-vitastor-disk tried to do that by default, first checking/disabling drive cache.
+TLDR: For optimal performance, set immediate_commit to "all" if you only use
-If it can't disable drive cache, OSD get initialized with "none".
+SSDs with supercapacitor-based power loss protection (nonvolatile
 write-through cache) for both data and journals in the whole Vitastor
 cluster. Set it to "small" if you only use such SSDs for journals. Leave
 empty if your drives have write-back cache.
--- a/docs/config/layout-cluster.ru.md
+++ b/docs/config/layout-cluster.ru.md
@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
 настроек, влияет на потребление памяти, объём избыточной записи (write
 amplification) и эффективность распределения нагрузки по OSD.
-Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
+Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
-для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
+мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
 это понизит использование памяти, но ухудшит распределение нагрузки и в
 среднем увеличит WA.
 Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
 т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
-стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
+стандартном 128 КБ блоке.
 ## bitmap_granularity
@ -57,18 +57,9 @@ amplification) и эффективность распределения нагр
 ## immediate_commit
 - Тип: строка
- Значение по умолчанию: all
+- Значение по умолчанию: false
-Одно из значений "none", "small" или "all". Глобальное значение, может быть
+Ещё один важный для производительности параметр.
 переопределено [на уровне пула](pool.ru.md#immediate_commit).
 Данный параметр тоже важен для производительности.
 Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
 суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
 устанавливать только при использовании SSD настольного класса без
 суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
 Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
 Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
 секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -89,7 +80,7 @@ Power Loss Protection" - одинаково быстрые и со сбросо
 эффективно утилизировать настольные SSD.
 Данный параметр влияет как раз на это. Когда он установлен в значение "all",
-кластер Vitastor мгновенно фиксирует каждое изменение на физические
+весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
 носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
 знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
 по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -112,6 +103,12 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
 указано в спецификациях).
 Указание "all" или "small" в настройках / командной строке OSD требует
-включения [disable_journal_fsync](layout-osd.ru.md#disable_journal_fsync) и
+включения disable_journal_fsync и disable_meta_fsync, значение "all" также
-[disable_meta_fsync](layout-osd.ru.md#disable_meta_fsync), значение "all"
+требует включения disable_data_fsync.
-также требует включения [disable_data_fsync](layout-osd.ru.md#disable_data_fsync).
+
 Итого, вкратце: для оптимальной производительности установите
 immediate_commit в значение "all", если вы используете в кластере только SSD
 с суперконденсаторами и для данных, и для журналов. Если вы используете
 такие SSD для всех журналов, но не для данных - можете установить параметр
 в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
 оставьте параметр пустым.
--- a/docs/config/layout-osd.en.md
+++ b/docs/config/layout-osd.en.md
@ -24,8 +24,6 @@ initialization and can't be changed after it without losing data.
 - [disable_journal_fsync](#disable_journal_fsync)
 - [disable_device_lock](#disable_device_lock)
 - [disk_alignment](#disk_alignment)
 - [data_csum_type](#data_csum_type)
 - [csum_block_size](#csum_block_size)
 ## data_device
@ -118,13 +116,12 @@ Physical block size of the journal device. Must be a multiple of
 - Type: boolean
 - Default: false
-Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
+Do not issue fsyncs to the data device, i.e. do not flush its cache.
-Safe ONLY if your data device has write-through cache or if write-back
+Safe ONLY if your data device has write-through cache. If you disable
-cache is disabled. If you disable drive cache manually with `hdparm` or
+the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
-writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
+that the cache disable command is run every time before starting Vitastor
-every time before starting Vitastor OSD (vitastor-disk does it automatically).
+OSD, for example, in the systemd unit. See also `immediate_commit` option
-See also [immediate_commit](layout-cluster.en.md#immediate_commit)
+for the instructions to disable cache and how to benefit from it.
 for information about how to benefit from disabled cache.
 ## disable_meta_fsync
@ -172,47 +169,8 @@ size, it actually has to write the whole 4 KB sector.
 Because of this it can actually be beneficial to use SSDs which work well
 with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-and meta_block_size. But at the moment, no such SSDs are known...
+and meta_block_size. But the only SSD that may fit into this category is
 Intel Optane (probably, not tested yet).
 Clients don't need to be aware of disk_alignment, so it's not required to
 put a modified value into etcd key /vitastor/config/global.
 ## data_csum_type
 - Type: string
 - Default: none
 Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
 enable data checksums.
 ## csum_block_size
 - Type: integer
 - Default: 4096
 Checksum calculation block size.
 Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
 (which is usually 4 KB).
 Checksums increase metadata size by 4 bytes per each csum_block_size of data.
 Checksums are always a tradeoff:
 1. You either sacrifice +1 GB RAM per 1 TB of data
 2. Or you raise csum_block_size, for example, to 32k and sacrifice
   50% random write iops due to checksum read-modify-write
 3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
   sacrifice 50% random read iops due to checksum reads
 All-flash clusters usually have enough RAM to use default csum_block_size,
 which uses 1 GB RAM per 1 TB of data. HDD clusters usually don't.
 Thus, recommended setups are:
 1. All-flash, 1 GB RAM per 1 TB data: default (csum_block_size=4k)
 2. All-flash, less RAM: csum_block_size=4k + inmemory_metadata=false
 3. Hybrid HDD+SSD: csum_block_size=4k + inmemory_metadata=false
 4. HDD-only, faster random read: csum_block_size=32k
 5. HDD-only, faster random write: csum_block_size=4k +
   inmemory_metadata=false + meta_io=cached
 See also [meta_io](osd.en.md#meta_io).
--- a/docs/config/layout-osd.ru.md
+++ b/docs/config/layout-osd.ru.md
@ -25,8 +25,6 @@
 - [disable_journal_fsync](#disable_journal_fsync)
 - [disable_device_lock](#disable_device_lock)
 - [disk_alignment](#disk_alignment)
 - [data_csum_type](#data_csum_type)
 - [csum_block_size](#csum_block_size)
 ## data_device
@ -122,14 +120,13 @@ SSD-диске, иначе производительность пострада
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
-Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
+Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
 Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-записью (write-through) или если кэш с отложенной записью (write-back) отключён.
+записью (write-through). Если вы отключаете кэш через `hdparm` или
-Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
+`scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
-то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
+выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
-(vitastor-disk делает это автоматически). Смотрите также опцию
+Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
-[immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
+и о том, как из этого извлечь выгоду.
 как извлечь выгоду из отключённого кэша.
 ## disable_meta_fsync
@ -180,52 +177,9 @@ SSD и HDD диски используют 4 КБ физические сект
 Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
 меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-journal_block_size и meta_block_size. Однако на данный момент такие SSD
+journal_block_size и meta_block_size. Однако единственные SSD, которые
-не известны...
+теоретически могут попасть в эту категорию - это Intel Optane (но и это
 пока не проверялось автором).
 Клиентам не обязательно знать про disk_alignment, так что помещать значение
 этого параметра в etcd в /vitastor/config/global не нужно.
 ## data_csum_type
 - Тип: строка
 - Значение по умолчанию: none
 Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
 Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
 Следует понимать, что контрольные суммы в зависимости от размера блока их
 расчёта либо увеличивают потребление памяти, либо снижают производительность.
 Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
 ## csum_block_size
 - Тип: целое число
 - Значение по умолчанию: 4096
 Размер блока расчёта контрольных сумм.
 Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
 (который обычно равен 4 КБ).
 Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
 csum_block_size данных.
 Контрольные суммы - это всегда компромисс:
 1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
 2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
   скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
   новых контрольных сумм
 3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
   жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
   с диска
 Таким образом, рекомендуются следующие варианты настроек:
 1. All-flash, 1 ГБ памяти на 1 ТБ данных: по умолчанию (csum_block_size=4k)
 2. All-flash, меньше памяти: csum_block_size=4k + inmemory_metadata=false
 3. Гибридные HDD+SSD: csum_block_size=4k + inmemory_metadata=false
 4. Только HDD, быстрее случайное чтение: csum_block_size=32k
 5. Только HDD, быстрее случайная запись: csum_block_size=4k +
   inmemory_metadata=false + meta_io=cached
 Смотрите также [meta_io](osd.ru.md#meta_io).
--- a/docs/config/monitor.en.md
+++ b/docs/config/monitor.en.md
@ -8,14 +8,6 @@
 These parameters only apply to Monitors.
 - [use_antietcd](#use_antietcd)
 - [enable_prometheus](#enable_prometheus)
 - [mon_http_port](#mon_http_port)
 - [mon_http_ip](#mon_http_ip)
 - [mon_https_cert](#mon_https_cert)
 - [mon_https_key](#mon_https_key)
 - [mon_https_client_auth](#mon_https_client_auth)
 - [mon_https_ca](#mon_https_ca)
 - [etcd_mon_ttl](#etcd_mon_ttl)
 - [etcd_mon_timeout](#etcd_mon_timeout)
 - [etcd_mon_retries](#etcd_mon_retries)
@ -23,95 +15,12 @@ These parameters only apply to Monitors.
 - [mon_stats_timeout](#mon_stats_timeout)
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
 - [use_old_pg_combinator](#use_old_pg_combinator)
 - [osd_backfillfull_ratio](#osd_backfillfull_ratio)
 ## use_antietcd
 - Type: boolean
 - Default: false
 Enable experimental built-in etcd replacement (clustered key-value database):
 [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
 When set to true, monitor runs internal antietcd automatically if it finds
 a network interface with an IP address matching one of addresses in the
 `etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
 the monitor command line). If there are multiple matching addresses, it also
 checks `antietcd_port` and antietcd is started for address with matching port.
 By default, antietcd accepts connection on the selected IP address, but it
 can also be overridden manually in the `antietcd_ip` option.
 When antietcd is started, monitor stores cluster metadata itself and exposes
 a etcd-compatible REST API. On disk, these metadata are stored in
 `/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
 or antietcd_data_dir options). All other antietcd parameters
 (see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
 cluster, cluster_key, persist_filter, stale_read can also be set in
 Vitastor configuration with `antietcd_` prefix.
 You can dump/load data to or from antietcd using Antietcd `anticli` tool:
 ```
 npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
 npm exec anticli -e http://antietcd:2379/v3 load < dump.json
 ```
 ## enable_prometheus
 - Type: boolean
 - Default: true
 Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
 Note that only the active (master) monitor exposes metrics, others return
 HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
 Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
 ## mon_http_port
 - Type: integer
 - Default: 8060
 HTTP port for monitors to listen to (including metrics exporter)
 ## mon_http_ip
 - Type: string
 IP address for monitors to listen to (all addresses by default)
 ## mon_https_cert
 - Type: string
 Path to PEM SSL certificate file for monitor to listen using HTTPS
 ## mon_https_key
 - Type: string
 Path to PEM SSL private key file for monitor to listen using HTTPS
 ## mon_https_client_auth
 - Type: boolean
 - Default: false
 Enable HTTPS client certificate-based authorization for monitor connections
 ## mon_https_ca
 - Type: string
 Path to CA certificate for client HTTPS authorization
 ## etcd_mon_ttl
 - Type: seconds
- Default: 1
+- Default: 30
- Minimum: 5
+- Minimum: 10
 Monitor etcd lease refresh interval in seconds
@ -168,26 +77,3 @@ values.  Smaller priority means higher level in tree. For example,
 levels are always predefined and can't be removed. If one of them is not
 present in the configuration, then it is defined with the default priority
 (100 for "host", 101 for "osd").
 ## use_old_pg_combinator
 - Type: boolean
 - Default: false
 Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
 and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
 ## osd_backfillfull_ratio
 - Type: number
 - Default: 0.99
 Monitors try to prevent OSDs becoming 100% full during rebalance or recovery by
 calculating how much space will be occupied on every OSD after all rebalance
 and recovery operations finish, and pausing rebalance and recovery if that
 amount of space exceeds OSD capacity multiplied by the value of this
 configuration parameter.
 Future used space is calculated by summing space used by all user data blocks
 (objects) in all PGs placed on a specific OSD, even if some of these objects
 currently reside on a different set of OSDs.
--- a/docs/config/monitor.ru.md
+++ b/docs/config/monitor.ru.md
@ -8,14 +8,6 @@
 Данные параметры используются только мониторами Vitastor.
 - [use_antietcd](#use_antietcd)
 - [enable_prometheus](#enable_prometheus)
 - [mon_http_port](#mon_http_port)
 - [mon_http_ip](#mon_http_ip)
 - [mon_https_cert](#mon_https_cert)
 - [mon_https_key](#mon_https_key)
 - [mon_https_client_auth](#mon_https_client_auth)
 - [mon_https_ca](#mon_https_ca)
 - [etcd_mon_ttl](#etcd_mon_ttl)
 - [etcd_mon_timeout](#etcd_mon_timeout)
 - [etcd_mon_retries](#etcd_mon_retries)
@ -23,97 +15,12 @@
 - [mon_stats_timeout](#mon_stats_timeout)
 - [osd_out_time](#osd_out_time)
 - [placement_levels](#placement_levels)
 - [use_old_pg_combinator](#use_old_pg_combinator)
 - [osd_backfillfull_ratio](#osd_backfillfull_ratio)
 ## use_antietcd
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
 [antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
 Если параметр установлен в true, монитор запускает antietcd автоматически,
 если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
 конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
 командной строки монитора). Если таких адресов несколько, также проверяется
 опция `antietcd_port` и antietcd запускается для адреса с соответствующим
 портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
 IP, но его также можно определить вручную опцией `antietcd_ip`.
 При запуске antietcd монитор сам хранит центральные метаданные кластера и
 выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
 `/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
 antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
 (смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
 node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
 в конфигурации Vitastor с префиксом `antietcd_`.
 Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
 `anticli`:
 ```
 npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
 npm exec anticli -e http://antietcd:2379/v3 load < dump.json
 ```
 ## enable_prometheus
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
 Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
 возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
 в задание по сбору метрик Prometheus.
 Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
 ## mon_http_port
 - Тип: целое число
 - Значение по умолчанию: 8060
 Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
 ## mon_http_ip
 - Тип: строка
 IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
 ## mon_https_cert
 - Тип: строка
 Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
 ## mon_https_key
 - Тип: строка
 Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
 ## mon_https_client_auth
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
 ## mon_https_ca
 - Тип: строка
 Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
 ## etcd_mon_ttl
 - Тип: секунды
- Значение по умолчанию: 1
+- Значение по умолчанию: 30
- Минимальное значение: 5
+- Минимальное значение: 10
 Интервал обновления etcd резервации (lease) монитором
@ -171,27 +78,3 @@ OSD перед обновлением агрегированной статис
 "host" и "osd" являются предопределёнными и не могут быть удалены. Если
 один из них отсутствует в конфигурации, он доопределяется с приоритетом по
 умолчанию (100 для уровня "host", 101 для "osd").
 ## use_old_pg_combinator
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
 и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
 ## osd_backfillfull_ratio
 - Тип: число
 - Значение по умолчанию: 0.99
 Мониторы стараются предотвратить 100% заполнение OSD в процессе ребаланса
 или восстановления, рассчитывая, сколько места будет занято на каждом OSD после
 завершения всех операций ребаланса и восстановления, и приостанавливая
 ребаланс и восстановление, если рассчитанный объём превышает ёмкость OSD,
 умноженную на значение данного параметра.
 Будущее занятое место рассчитывается сложением места, занятого всеми
 пользовательскими блоками данных (объектами) во всех PG, расположенных
 на конкретном OSD, даже если часть этих объектов в данный момент находится
 на другом наборе OSD.
--- a/docs/config/network.en.md
+++ b/docs/config/network.en.md
@ -9,11 +9,9 @@
 These parameters apply to clients and OSDs and affect network connection logic
 between clients, OSDs and etcd.
- [osd_network](#osd_network)
+- [tcp_header_buffer_size](#tcp_header_buffer_size)
- [osd_cluster_network](#osd_cluster_network)
+- [use_sync_send_recv](#use_sync_send_recv)
 - [use_rdma](#use_rdma)
 - [use_rdmacm](#use_rdmacm)
 - [disable_tcp](#disable_tcp)
 - [rdma_device](#rdma_device)
 - [rdma_port_num](#rdma_port_num)
 - [rdma_gid_index](#rdma_gid_index)
@ -21,284 +19,17 @@ between clients, OSDs and etcd.
 - [rdma_max_sge](#rdma_max_sge)
 - [rdma_max_msg](#rdma_max_msg)
 - [rdma_max_recv](#rdma_max_recv)
 - [rdma_max_send](#rdma_max_send)
 - [rdma_odp](#rdma_odp)
 - [peer_connect_interval](#peer_connect_interval)
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
 - [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
+- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
+- [client_dirty_limit](#client_dirty_limit)
 - [tcp_header_buffer_size](#tcp_header_buffer_size)
 - [use_sync_send_recv](#use_sync_send_recv)
 ## osd_network
 - Type: string or array of strings
 Network mask of public OSD network(s) (IPv4 or IPv6). Each OSD listens to all
 addresses of UP + RUNNING interfaces matching one of these networks, on the
 same port. Port is auto-selected except if [bind_port](osd.en.md#bind_port) is
 explicitly specified. Bind address(es) may also be overridden manually by
 specifying [bind_address](osd.en.md#bind_address). If OSD networks are not specified
 at all, OSD just listens to a wildcard address (0.0.0.0).
 ## osd_cluster_network
 - Type: string or array of strings
 Network mask of separate network(s) (IPv4 or IPv6) to use for OSD
 cluster connections. I.e. OSDs will always attempt to use these networks
 to connect to other OSDs, while clients will attempt to use networks from
 [osd_network](#osd_network).
 ## use_rdma
 - Type: boolean
 - Default: true
 Try to use RDMA through libibverbs for communication if it's available.
 Disable if you don't want Vitastor to use RDMA. TCP-only clients can also
 talk to an RDMA-enabled cluster, so disabling RDMA may be needed if clients
 have RDMA devices, but they are not connected to the cluster.
 `use_rdma` works with RoCEv1/RoCEv2 networks, but not with iWARP and,
 maybe, with some Infiniband configurations which require RDMA-CM.
 Consider `use_rdmacm` for such networks.
 ## use_rdmacm
 - Type: boolean
 - Default: true
 Use an alternative implementation of RDMA through RDMA-CM (Connection
 Manager). Works with all RDMA networks: Infiniband, iWARP and
 RoCEv1/RoCEv2, and even allows to disable TCP and run only with RDMA.
 OSDs always use random port numbers for RDMA-CM listeners, different
 from their TCP ports. `use_rdma` is automatically disabled when
 `use_rdmacm` is enabled.
 ## disable_tcp
 - Type: boolean
 - Default: true
 Fully disable TCP and only use RDMA-CM for OSD communication.
 ## rdma_device
 - Type: string
 RDMA device name to use for Vitastor OSD communications (for example,
 "rocep5s0f0"). If not specified, Vitastor will try to find an RoCE
 device matching [osd_network](osd.en.md#osd_network), preferring RoCEv2,
 or choose the first available RDMA device if no RoCE devices are
 found or if `osd_network` is not specified. Auto-selection is also
 unsupported with old libibverbs < v32, like in Debian 10 Buster or
 CentOS 7.
 Vitastor supports all adapters, even ones without ODP support, like
 Mellanox ConnectX-3 and non-Mellanox cards. Versions up to Vitastor
 1.2.0 required ODP which is only present in Mellanox ConnectX >= 4.
 See also [rdma_odp](#rdma_odp).
 Run `ibv_devinfo -v` as root to list available RDMA devices and their
 features.
 Remember that you also have to configure your network switches if you use
 RoCE/RoCEv2, otherwise you may experience unstable performance. Refer to
 the manual of your network vendor for details about setting up the switch
 for RoCEv2 correctly. Usually it means setting up Lossless Ethernet with
 PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
 ## rdma_port_num
 - Type: integer
 RDMA device port number to use. Only for devices that have more than 1 port.
 See `phys_port_cnt` in `ibv_devinfo -v` output to determine how many ports
 your device has.
 Not relevant for RDMA-CM (use_rdmacm).
 ## rdma_gid_index
 - Type: integer
 Global address identifier index of the RDMA device to use. Different GID
 indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
 Search for "GID" in `ibv_devinfo -v` output to determine which GID index
 you need.
 If not specified, Vitastor will try to auto-select a RoCEv2 IPv4 GID, then
 RoCEv2 IPv6 GID, then RoCEv1 IPv4 GID, then RoCEv1 IPv6 GID, then IB GID.
 GID auto-selection is unsupported with libibverbs < v32.
 A correct rdma_gid_index for RoCEv2 is usually 1 (IPv6) or 3 (IPv4).
 Not relevant for RDMA-CM (use_rdmacm).
 ## rdma_mtu
 - Type: integer
 RDMA Path MTU to use. Must be 1024, 2048 or 4096. Default is to use the
 RDMA device's MTU.
 ## rdma_max_sge
 - Type: integer
 - Default: 128
 Maximum number of scatter/gather entries to use for RDMA. OSDs negotiate
 the actual value when establishing connection anyway, so it's usually not
 required to change this parameter.
 ## rdma_max_msg
 - Type: integer
 - Default: 132096
 Maximum size of a single RDMA send or receive operation in bytes.
 ## rdma_max_recv
 - Type: integer
 - Default: 16
 Maximum number of RDMA receive buffers per connection (RDMA requires
 preallocated buffers to receive data). Each buffer is `rdma_max_msg` bytes
 in size. So this setting directly affects memory usage: a single Vitastor
 RDMA client uses `rdma_max_recv * rdma_max_msg * OSD_COUNT` bytes of memory.
 Default is roughly 2 MB * number of OSDs.
 ## rdma_max_send
 - Type: integer
 - Default: 8
 Maximum number of outstanding RDMA send operations per connection. Should be
 less than `rdma_max_recv` so the receiving side doesn't run out of buffers.
 Doesn't affect memory usage - additional memory isn't allocated for send
 operations.
 ## rdma_odp
 - Type: boolean
 - Default: false
 Use RDMA with On-Demand Paging. ODP is currently only available on Mellanox
 ConnectX-4 and newer adapters. ODP allows to not register memory explicitly
 for RDMA adapter to be able to use it. This, in turn, allows to skip memory
 copying during sending. One would think this should improve performance, but
 **in reality** RDMA performance with ODP is **drastically** worse. Example
 3-node cluster with 8 NVMe in each node and 2*25 GBit/s ConnectX-6 RDMA network
 without ODP pushes 3950000 read iops, but only 239000 iops with ODP...
 This happens because Mellanox ODP implementation seems to be based on
 message retransmissions when the adapter doesn't know about the buffer yet -
 it likely uses standard "RNR retransmissions" (RNR = receiver not ready)
 which is generally slow in RDMA/RoCE networks. Here's a presentation about
 it from ISPASS-2021 conference: https://tkygtr6.github.io/pub/ISPASS21_slides.pdf
 ODP support is retained in the code just in case a good ODP implementation
 appears one day.
 ## peer_connect_interval
 - Type: seconds
 - Default: 5
 - Minimum: 1
 - Can be changed online: yes
 Interval before attempting to reconnect to an unavailable OSD.
 ## peer_connect_timeout
 - Type: seconds
 - Default: 5
 - Minimum: 1
 - Can be changed online: yes
 Timeout for OSD connection attempts.
 ## osd_idle_timeout
 - Type: seconds
 - Default: 5
 - Minimum: 1
 - Can be changed online: yes
 OSD connection inactivity time after which clients and other OSDs send
 keepalive requests to check state of the connection.
 ## osd_ping_timeout
 - Type: seconds
 - Default: 5
 - Minimum: 1
 - Can be changed online: yes
 Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
 within this time, the connection to it is dropped and a reconnection attempt
 is scheduled.
 ## max_etcd_attempts
 - Type: integer
 - Default: 5
 - Can be changed online: yes
 Maximum number of attempts for etcd requests which can't be retried
 indefinitely.
 ## etcd_quick_timeout
 - Type: milliseconds
 - Default: 1000
 - Can be changed online: yes
 Timeout for etcd requests which should complete quickly, like lease refresh.
 ## etcd_slow_timeout
 - Type: milliseconds
 - Default: 5000
 - Can be changed online: yes
 Timeout for etcd requests which are allowed to wait for some time.
 ## etcd_keepalive_timeout
 - Type: seconds
 - Default: max(30, etcd_report_interval*2)
 - Can be changed online: yes
 Timeout for etcd connection HTTP Keep-Alive. Should be higher than
 etcd_report_interval to guarantee that keepalive actually works.
 ## etcd_ws_keepalive_interval
 - Type: seconds
 - Default: 5
 - Can be changed online: yes
 etcd websocket ping interval required to keep the connection alive and
 detect disconnections quickly.
 ## etcd_min_reload_interval
 - Type: milliseconds
 - Default: 1000
 - Can be changed online: yes
 Minimum interval for full etcd state reload. Introduced to prevent
 excessive load on etcd during outages when etcd can't keep up with event
 streams and cancels them.
 ## tcp_header_buffer_size
@ -321,3 +52,177 @@ want.
 If true, synchronous send/recv syscalls are used instead of io_uring for
 socket communication. Useless for OSDs because they require io_uring anyway,
 but may be required for clients with old kernel versions.
 ## use_rdma
 - Type: boolean
 - Default: true
 Try to use RDMA for communication if it's available. Disable if you don't
 want Vitastor to use RDMA. TCP-only clients can also talk to an RDMA-enabled
 cluster, so disabling RDMA may be needed if clients have RDMA devices,
 but they are not connected to the cluster.
 ## rdma_device
 - Type: string
 RDMA device name to use for Vitastor OSD communications (for example,
 "rocep5s0f0"). Please note that Vitastor RDMA requires Implicit On-Demand
 Paging (Implicit ODP) and Scatter/Gather (SG) support from the RDMA device
 to work. For example, Mellanox ConnectX-3 and older adapters don't have
 Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
 root to list available RDMA devices and their features.
 ## rdma_port_num
 - Type: integer
 - Default: 1
 RDMA device port number to use. Only for devices that have more than 1 port.
 See `phys_port_cnt` in `ibv_devinfo -v` output to determine how many ports
 your device has.
 ## rdma_gid_index
 - Type: integer
 - Default: 0
 Global address identifier index of the RDMA device to use. Different GID
 indexes may correspond to different protocols like RoCEv1, RoCEv2 and iWARP.
 Search for "GID" in `ibv_devinfo -v` output to determine which GID index
 you need.
 **IMPORTANT:** If you want to use RoCEv2 (as recommended) then the correct
 rdma_gid_index is usually 1 (IPv6) or 3 (IPv4).
 ## rdma_mtu
 - Type: integer
 - Default: 4096
 RDMA Path MTU to use. Must be 1024, 2048 or 4096. There is usually no
 sense to change it from the default 4096.
 ## rdma_max_sge
 - Type: integer
 - Default: 128
 Maximum number of scatter/gather entries to use for RDMA. OSDs negotiate
 the actual value when establishing connection anyway, so it's usually not
 required to change this parameter.
 ## rdma_max_msg
 - Type: integer
 - Default: 1048576
 Maximum size of a single RDMA send or receive operation in bytes.
 ## rdma_max_recv
 - Type: integer
 - Default: 8
 Maximum number of parallel RDMA receive operations. Note that this number
 of receive buffers `rdma_max_msg` in size are allocated for each client,
 so this setting actually affects memory usage. This is because RDMA receive
 operations are (sadly) still not zero-copy in Vitastor. It may be fixed in
 later versions.
 ## peer_connect_interval
 - Type: seconds
 - Default: 5
 - Minimum: 1
 Interval before attempting to reconnect to an unavailable OSD.
 ## peer_connect_timeout
 - Type: seconds
 - Default: 5
 - Minimum: 1
 Timeout for OSD connection attempts.
 ## osd_idle_timeout
 - Type: seconds
 - Default: 5
 - Minimum: 1
 OSD connection inactivity time after which clients and other OSDs send
 keepalive requests to check state of the connection.
 ## osd_ping_timeout
 - Type: seconds
 - Default: 5
 - Minimum: 1
 Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
 within this time, the connection to it is dropped and a reconnection attempt
 is scheduled.
 ## up_wait_retry_interval
 - Type: milliseconds
 - Default: 500
 - Minimum: 50
 OSDs respond to clients with a special error code when they receive I/O
 requests for a PG that's not synchronized and started. This parameter sets
 the time for the clients to wait before re-attempting such I/O requests.
 ## max_etcd_attempts
 - Type: integer
 - Default: 5
 Maximum number of attempts for etcd requests which can't be retried
 indefinitely.
 ## etcd_quick_timeout
 - Type: milliseconds
 - Default: 1000
 Timeout for etcd requests which should complete quickly, like lease refresh.
 ## etcd_slow_timeout
 - Type: milliseconds
 - Default: 5000
 Timeout for etcd requests which are allowed to wait for some time.
 ## etcd_keepalive_timeout
 - Type: seconds
 - Default: max(30, etcd_report_interval*2)
 Timeout for etcd connection HTTP Keep-Alive. Should be higher than
 etcd_report_interval to guarantee that keepalive actually works.
 ## etcd_ws_keepalive_timeout
 - Type: seconds
 - Default: 30
 etcd websocket ping interval required to keep the connection alive and
 detect disconnections quickly.
 ## client_dirty_limit
 - Type: integer
 - Default: 33554432
 Without immediate_commit=all this parameter sets the limit of "dirty"
 (not committed by fsync) data allowed by the client before forcing an
 additional fsync and committing the data. Also note that the client always
 holds a copy of uncommitted data in memory so this setting also affects
 RAM usage of clients.
 This parameter doesn't affect OSDs themselves.
--- a/docs/config/network.ru.md
+++ b/docs/config/network.ru.md
@ -9,11 +9,9 @@
 Данные параметры используются клиентами и OSD и влияют на логику сетевого
 взаимодействия между клиентами, OSD, а также etcd.
- [osd_network](#osd_network)
+- [tcp_header_buffer_size](#tcp_header_buffer_size)
- [osd_cluster_network](#osd_cluster_network)
+- [use_sync_send_recv](#use_sync_send_recv)
 - [use_rdma](#use_rdma)
 - [use_rdmacm](#use_rdmacm)
 - [disable_tcp](#disable_tcp)
 - [rdma_device](#rdma_device)
 - [rdma_port_num](#rdma_port_num)
 - [rdma_gid_index](#rdma_gid_index)
@ -21,291 +19,17 @@
 - [rdma_max_sge](#rdma_max_sge)
 - [rdma_max_msg](#rdma_max_msg)
 - [rdma_max_recv](#rdma_max_recv)
 - [rdma_max_send](#rdma_max_send)
 - [rdma_odp](#rdma_odp)
 - [peer_connect_interval](#peer_connect_interval)
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
 - [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
 - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
+- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
+- [client_dirty_limit](#client_dirty_limit)
 - [tcp_header_buffer_size](#tcp_header_buffer_size)
 - [use_sync_send_recv](#use_sync_send_recv)
 ## osd_network
 - Тип: строка или массив строк
 Маски подсетей (IPv4 или IPv6) публичной сети или сетей OSD. Каждый OSD слушает
 один и тот же порт на всех адресах поднятых (UP + RUNNING) сетевых интерфейсов,
 соответствующих одной из указанных сетей. Порт выбирается автоматически, если
 только [bind_port](osd.ru.md#bind_port) не задан явно. Адреса для подключений можно
 также переопределить явно, задав [bind_address](osd.ru.md#bind_address). Если сети OSD
 не заданы вообще, OSD слушает все адреса (0.0.0.0).
 ## osd_cluster_network
 - Тип: строка или массив строк
 Маски подсетей (IPv4 или IPv6) отдельной кластерной сети или сетей OSD.
 То есть, OSD будут всегда стараться использовать эти сети для соединений
 с другими OSD, а клиенты будут стараться использовать сети из [osd_network](#osd_network).
 ## use_rdma
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Попробовать использовать RDMA через libibverbs для связи при наличии
 доступных устройств. Отключите, если вы не хотите, чтобы Vitastor
 использовал RDMA. TCP-клиенты также могут работать с RDMA-кластером,
 так что отключать RDMA может быть нужно, только если у клиентов есть
 RDMA-устройства, но они не имеют соединения с кластером Vitastor.
 `use_rdma` работает с RoCEv1/RoCEv2 сетями, но не работает с iWARP и
 может не работать с частью конфигураций Infiniband, требующих RDMA-CM.
 Рассмотрите включение `use_rdmacm` для таких сетей.
 ## use_rdmacm
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
 Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
 RoCEv1/RoCEv2, и даже позволяет полностью отключить TCP и работать
 только на RDMA. OSD используют случайные номера портов для ожидания
 соединений через RDMA-CM, отличающиеся от их TCP-портов. Также при
 включении `use_rdmacm` автоматически отключается опция `use_rdma`.
 ## disable_tcp
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Полностью отключить TCP и использовать только RDMA-CM для соединений с OSD.
 ## rdma_device
 - Тип: строка
 Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
 Если не указано, Vitastor попробует найти RoCE-устройство, соответствующее
 [osd_network](osd.en.md#osd_network), предпочитая RoCEv2, или выбрать первое
 попавшееся RDMA-устройство, если RoCE-устройств нет или если сеть `osd_network`
 не задана. Также автовыбор не поддерживается со старыми версиями библиотеки
 libibverbs < v32, например в Debian 10 Buster или CentOS 7.
 Vitastor поддерживает все модели адаптеров, включая те, у которых
 нет поддержки ODP, то есть вы можете использовать RDMA с ConnectX-3 и
 картами производства не Mellanox. Версии Vitastor до 1.2.0 включительно
 требовали ODP, который есть только на Mellanox ConnectX 4 и более новых.
 См. также [rdma_odp](#rdma_odp).
 Запустите `ibv_devinfo -v` от имени суперпользователя, чтобы посмотреть
 список доступных RDMA-устройств, их параметры и возможности.
 Обратите внимание, что если вы используете RoCE/RoCEv2, вам также необходимо
 правильно настроить для него коммутаторы, иначе вы можете столкнуться с
 нестабильной производительностью. Подробную информацию о настройке
 коммутатора для RoCEv2 ищите в документации производителя. Обычно это
 подразумевает настройку сети без потерь на основе PFC (Priority Flow
 Control) и ECN (Explicit Congestion Notification).
 ## rdma_port_num
 - Тип: целое число
 Номер порта RDMA-устройства, который следует использовать. Имеет смысл
 только для устройств, у которых более 1 порта. Чтобы узнать, сколько портов
 у вашего адаптера, посмотрите `phys_port_cnt` в выводе команды
 `ibv_devinfo -v`.
 Опция неприменима к RDMA-CM (use_rdmacm).
 ## rdma_gid_index
 - Тип: целое число
 Номер глобального идентификатора адреса RDMA-устройства, который следует
 использовать. Разным gid_index могут соответствовать разные протоколы связи:
 RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
 словом "GID" в выводе команды `ibv_devinfo -v`.
 Если не указан, Vitastor попробует автоматически выбрать сначала GID,
 соответствующий RoCEv2 IPv4, потом RoCEv2 IPv6, потом RoCEv1 IPv4, потом
 RoCEv1 IPv6, потом IB. Авто-выбор GID не поддерживается со старыми версиями
 libibverbs < v32.
 Правильный rdma_gid_index для RoCEv2, как правило, 1 (IPv6) или 3 (IPv4).
 Опция неприменима к RDMA-CM (use_rdmacm).
 ## rdma_mtu
 - Тип: целое число
 Максимальная единица передачи (Path MTU) для RDMA. Должно быть равно 1024,
 2048 или 4096. По умолчанию используется значение MTU RDMA-устройства.
 ## rdma_max_sge
 - Тип: целое число
 - Значение по умолчанию: 128
 Максимальное число записей разделения/сборки (scatter/gather) для RDMA.
 OSD в любом случае согласовывают реальное значение при установке соединения,
 так что менять этот параметр обычно не нужно.
 ## rdma_max_msg
 - Тип: целое число
 - Значение по умолчанию: 132096
 Максимальный размер одной RDMA-операции отправки или приёма.
 ## rdma_max_recv
 - Тип: целое число
 - Значение по умолчанию: 16
 Максимальное число буферов для RDMA-приёма данных на одно соединение
 (RDMA требует заранее выделенных буферов для приёма данных). Каждый буфер
 имеет размер `rdma_max_msg` байт. Таким образом, настройка прямо влияет на
 потребление памяти - один Vitastor-клиент с RDMA использует
 `rdma_max_recv * rdma_max_msg * ЧИСЛО_OSD` байт памяти, по умолчанию -
 примерно 2 МБ * число OSD.
 ## rdma_max_send
 - Тип: целое число
 - Значение по умолчанию: 8
 Максимальное число RDMA-операций отправки, отправляемых в очередь одного
 соединения. Желательно, чтобы оно было меньше `rdma_max_recv`, чтобы
 у принимающей стороны в процессе работы не заканчивались буферы на приём.
 Не влияет на потребление памяти - дополнительная память на операции отправки
 не выделяется.
 ## rdma_odp
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Использовать RDMA с On-Demand Paging. ODP - функция, доступная пока что
 исключительно на адаптерах Mellanox ConnectX-4 и более новых. ODP позволяет
 не регистрировать память для её использования RDMA-картой. Благодаря этому
 можно не копировать данные при отправке их в сеть и, казалось бы, это должно
 улучшать производительность - но **по факту** получается так, что
 производительность только ухудшается, причём сильно. Пример - на 3-узловом
 кластере с 8 NVMe в каждом узле и сетью 2*25 Гбит/с на чтение с RDMA без ODP
 удаётся снять 3950000 iops, а с ODP - всего 239000 iops...
 Это происходит из-за того, что реализация ODP у Mellanox неоптимальная и
 основана на повторной передаче сообщений, когда карте не известен буфер -
 вероятно, на стандартных "RNR retransmission" (RNR = receiver not ready).
 А данные повторные передачи в RDMA/RoCE - всегда очень медленная штука.
 Презентация на эту тему с конференции ISPASS-2021: https://tkygtr6.github.io/pub/ISPASS21_slides.pdf
 Возможность использования ODP сохранена в коде на случай, если вдруг в один
 прекрасный день появится хорошая реализация ODP.
 ## peer_connect_interval
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 - Можно менять на лету: да
 Время ожидания перед повторной попыткой соединиться с недоступным OSD.
 ## peer_connect_timeout
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 - Можно менять на лету: да
 Максимальное время ожидания попытки соединения с OSD.
 ## osd_idle_timeout
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 - Можно менять на лету: да
 Время неактивности соединения с OSD, после которого клиенты или другие OSD
 посылают запрос проверки состояния соединения.
 ## osd_ping_timeout
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 - Можно менять на лету: да
 Максимальное время ожидания ответа на запрос проверки состояния соединения.
 Если OSD не отвечает за это время, соединение отключается и производится
 повторная попытка соединения.
 ## max_etcd_attempts
 - Тип: целое число
 - Значение по умолчанию: 5
 - Можно менять на лету: да
 Максимальное число попыток выполнения запросов к etcd для тех запросов,
 которые нельзя повторять бесконечно.
 ## etcd_quick_timeout
 - Тип: миллисекунды
 - Значение по умолчанию: 1000
 - Можно менять на лету: да
 Максимальное время выполнения запросов к etcd, которые должны завершаться
 быстро, таких, как обновление резервации (lease).
 ## etcd_slow_timeout
 - Тип: миллисекунды
 - Значение по умолчанию: 5000
 - Можно менять на лету: да
 Максимальное время выполнения запросов к etcd, для которых не обязательно
 гарантировать быстрое выполнение.
 ## etcd_keepalive_timeout
 - Тип: секунды
 - Значение по умолчанию: max(30, etcd_report_interval*2)
 - Можно менять на лету: да
 Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
 etcd_report_interval, чтобы keepalive гарантированно работал.
 ## etcd_ws_keepalive_interval
 - Тип: секунды
 - Значение по умолчанию: 5
 - Можно менять на лету: да
 Интервал проверки живости вебсокет-подключений к etcd.
 ## etcd_min_reload_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 1000
 - Можно менять на лету: да
 Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
 предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
 успевает рассылать потоки событий и отменяет их.
 ## tcp_header_buffer_size
@ -330,3 +54,185 @@ Vitastor содержат 128-байтные заголовки, за котор
 будут использоваться обычные синхронные системные вызовы send/recv. Для OSD
 это бессмысленно, так как OSD в любом случае нуждается в io_uring, но, в
 принципе, это может применяться для клиентов со старыми версиями ядра.
 ## use_rdma
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 Пытаться использовать RDMA для связи при наличии доступных устройств.
 Отключите, если вы не хотите, чтобы Vitastor использовал RDMA.
 TCP-клиенты также могут работать с RDMA-кластером, так что отключать
 RDMA может быть нужно только если у клиентов есть RDMA-устройства,
 но они не имеют соединения с кластером Vitastor.
 ## rdma_device
 - Тип: строка
 Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
 Имейте в виду, что поддержка RDMA в Vitastor требует функций устройства
 Implicit On-Demand Paging (Implicit ODP) и Scatter/Gather (SG). Например,
 адаптеры Mellanox ConnectX-3 и более старые не поддерживают Implicit ODP и
 потому не поддерживаются в Vitastor. Запустите `ibv_devinfo -v` от имени
 суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
 параметры и возможности.
 ## rdma_port_num
 - Тип: целое число
 - Значение по умолчанию: 1
 Номер порта RDMA-устройства, который следует использовать. Имеет смысл
 только для устройств, у которых более 1 порта. Чтобы узнать, сколько портов
 у вашего адаптера, посмотрите `phys_port_cnt` в выводе команды
 `ibv_devinfo -v`.
 ## rdma_gid_index
 - Тип: целое число
 - Значение по умолчанию: 0
 Номер глобального идентификатора адреса RDMA-устройства, который следует
 использовать. Разным gid_index могут соответствовать разные протоколы связи:
 RoCEv1, RoCEv2, iWARP. Чтобы понять, какой нужен вам - смотрите строчки со
 словом "GID" в выводе команды `ibv_devinfo -v`.
 **ВАЖНО:** Если вы хотите использовать RoCEv2 (как мы и рекомендуем), то
 правильный rdma_gid_index, как правило, 1 (IPv6) или 3 (IPv4).
 ## rdma_mtu
 - Тип: целое число
 - Значение по умолчанию: 4096
 Максимальная единица передачи (Path MTU) для RDMA. Должно быть равно 1024,
 2048 или 4096. Обычно нет смысла менять значение по умолчанию, равное 4096.
 ## rdma_max_sge
 - Тип: целое число
 - Значение по умолчанию: 128
 Максимальное число записей разделения/сборки (scatter/gather) для RDMA.
 OSD в любом случае согласовывают реальное значение при установке соединения,
 так что менять этот параметр обычно не нужно.
 ## rdma_max_msg
 - Тип: целое число
 - Значение по умолчанию: 1048576
 Максимальный размер одной RDMA-операции отправки или приёма.
 ## rdma_max_recv
 - Тип: целое число
 - Значение по умолчанию: 8
 Максимальное число параллельных RDMA-операций получения данных. Следует
 иметь в виду, что данное число буферов размером `rdma_max_msg` выделяется
 для каждого подключённого клиентского соединения, так что данная настройка
 влияет на потребление памяти. Это так потому, что RDMA-приём данных в
 Vitastor, увы, всё равно не является zero-copy, т.е. всё равно 1 раз
 копирует данные в памяти. Данная особенность, возможно, будет исправлена в
 более новых версиях Vitastor.
 ## peer_connect_interval
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 Время ожидания перед повторной попыткой соединиться с недоступным OSD.
 ## peer_connect_timeout
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 Максимальное время ожидания попытки соединения с OSD.
 ## osd_idle_timeout
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 Время неактивности соединения с OSD, после которого клиенты или другие OSD
 посылают запрос проверки состояния соединения.
 ## osd_ping_timeout
 - Тип: секунды
 - Значение по умолчанию: 5
 - Минимальное значение: 1
 Максимальное время ожидания ответа на запрос проверки состояния соединения.
 Если OSD не отвечает за это время, соединение отключается и производится
 повторная попытка соединения.
 ## up_wait_retry_interval
 - Тип: миллисекунды
 - Значение по умолчанию: 500
 - Минимальное значение: 50
 Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
 поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
 они отвечают клиентам специальным кодом ошибки, означающим, что клиент
 должен некоторое время подождать перед повторением запроса. Именно это время
 ожидания задаёт данный параметр.
 ## max_etcd_attempts
 - Тип: целое число
 - Значение по умолчанию: 5
 Максимальное число попыток выполнения запросов к etcd для тех запросов,
 которые нельзя повторять бесконечно.
 ## etcd_quick_timeout
 - Тип: миллисекунды
 - Значение по умолчанию: 1000
 Максимальное время выполнения запросов к etcd, которые должны завершаться
 быстро, таких, как обновление резервации (lease).
 ## etcd_slow_timeout
 - Тип: миллисекунды
 - Значение по умолчанию: 5000
 Максимальное время выполнения запросов к etcd, для которых не обязательно
 гарантировать быстрое выполнение.
 ## etcd_keepalive_timeout
 - Тип: секунды
 - Значение по умолчанию: max(30, etcd_report_interval*2)
 Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
 etcd_report_interval, чтобы keepalive гарантированно работал.
 ## etcd_ws_keepalive_timeout
 - Тип: секунды
 - Значение по умолчанию: 30
 Интервал проверки живости вебсокет-подключений к etcd.
 ## client_dirty_limit
 - Тип: целое число
 - Значение по умолчанию: 33554432
 При работе без immediate_commit=all - это лимит объёма "грязных" (не
 зафиксированных fsync-ом) данных, при достижении которого клиент будет
 принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
 что в этом случае до момента fsync клиент хранит копию незафиксированных
 данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
 Параметр не влияет на сами OSD.
--- a/docs/config/osd.en.md
+++ b/docs/config/osd.en.md
@ -7,19 +7,16 @@
 # Runtime OSD Parameters
 These parameters only apply to OSDs, are not fixed at the moment of OSD drive
-initialization and can be changed - in /etc/vitastor/vitastor.conf or [vitastor-disk update-sb](../usage/disk.en.md#update-sb)
+initialization and can be changed with an OSD restart.
 with an OSD restart or, for some of them, even without restarting by updating configuration in etcd.
 - [etcd_report_interval](#etcd_report_interval)
 - [run_primary](#run_primary)
 - [osd_network](#osd_network)
 - [bind_address](#bind_address)
 - [bind_port](#bind_port)
 - [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
 - [autosync_interval](#autosync_interval)
 - [autosync_writes](#autosync_writes)
 - [recovery_queue_depth](#recovery_queue_depth)
 - [recovery_sleep_us](#recovery_sleep_us)
 - [recovery_pg_switch](#recovery_pg_switch)
 - [recovery_sync_batch](#recovery_sync_batch)
 - [readonly](#readonly)
@ -33,9 +30,6 @@ with an OSD restart or, for some of them, even without restarting by updating co
 - [max_flusher_count](#max_flusher_count)
 - [inmemory_metadata](#inmemory_metadata)
 - [inmemory_journal](#inmemory_journal)
 - [data_io](#data_io)
 - [meta_io](#meta_io)
 - [journal_io](#journal_io)
 - [journal_sector_buffer_count](#journal_sector_buffer_count)
 - [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
 - [throttle_small_writes](#throttle_small_writes)
@ -44,75 +38,17 @@ with an OSD restart or, for some of them, even without restarting by updating co
 - [throttle_target_parallelism](#throttle_target_parallelism)
 - [throttle_threshold_us](#throttle_threshold_us)
 - [osd_memlock](#osd_memlock)
 - [auto_scrub](#auto_scrub)
 - [no_scrub](#no_scrub)
 - [scrub_interval](#scrub_interval)
 - [scrub_queue_depth](#scrub_queue_depth)
 - [scrub_sleep](#scrub_sleep)
 - [scrub_list_limit](#scrub_list_limit)
 - [scrub_find_best](#scrub_find_best)
 - [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
 - [recovery_tune_interval](#recovery_tune_interval)
 - [recovery_tune_util_low](#recovery_tune_util_low)
 - [recovery_tune_util_high](#recovery_tune_util_high)
 - [recovery_tune_client_util_low](#recovery_tune_client_util_low)
 - [recovery_tune_client_util_high](#recovery_tune_client_util_high)
 - [recovery_tune_agg_interval](#recovery_tune_agg_interval)
 - [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
 - [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
 - [discard_on_start](#discard_on_start)
 - [min_discard_size](#min_discard_size)
 - [allow_net_split](#allow_net_split)
 ## bind_address
 - Type: string or array of strings
 Instead of the network masks ([osd_network](network.en.md#osd_network) and
 [osd_cluster_network](network.en.md#osd_cluster_network)), you can also set
 OSD listen addresses explicitly using this parameter. May be useful if you
 want to start OSDs on interfaces that are not UP + RUNNING.
 ## bind_port
 - Type: integer
 By default, OSDs pick random ports to use for incoming connections
 automatically. With this option you can set a specific port for a specific
 OSD by hand.
 ## osd_iothread_count
 - Type: integer
 - Default: 0
 TCP network I/O thread count for OSD. When non-zero, a single OSD process
 may handle more TCP I/O, but at a cost of increased latency because thread
 switching overhead occurs. RDMA isn't affected by this option.
 Because of latency, instead of enabling OSD I/O threads it's recommended to
 just create multiple OSDs per disk, or use RDMA.
 ## etcd_report_interval
 - Type: seconds
 - Default: 5
-Interval at which OSDs report their liveness to etcd. Affects OSD lease time
+Interval at which OSDs report their state to etcd. Affects OSD lease time
 and thus the failover speed. Lease time is equal to this parameter value
 plus max_etcd_attempts * etcd_quick_timeout because it should be guaranteed
 that every OSD always refreshes its lease in time.
 ## etcd_stats_interval
 - Type: seconds
 - Default: 30
 Interval at which OSDs report their statistics to etcd. Highly affects the
 imposed load on etcd, because statistics include a key for every OSD and
 for every PG. At the same time, low statistic intervals make `vitastor-cli`
 statistics more responsive.
 ## run_primary
 - Type: boolean
@ -123,11 +59,38 @@ debugging purposes. It's possible to implement additional feature for the
 monitor which may allow to separate primary and secondary OSDs, but it's
 unclear why anyone could need it, so it's not implemented.
 ## osd_network
 - Type: string or array of strings
 Network mask of the network (IPv4 or IPv6) to use for OSDs. Note that
 although it's possible to specify multiple networks here, this does not
 mean that OSDs will create multiple listening sockets - they'll only
 pick the first matching address of an UP + RUNNING interface. Separate
 networks for cluster and client connections are also not implemented, but
 they are mostly useless anyway, so it's not a big deal.
 ## bind_address
 - Type: string
 - Default: 0.0.0.0
 Instead of the network mask, you can also set OSD listen address explicitly
 using this parameter. May be useful if you want to start OSDs on interfaces
 that are not UP + RUNNING.
 ## bind_port
 - Type: integer
 By default, OSDs pick random ports to use for incoming connections
 automatically. With this option you can set a specific port for a specific
 OSD by hand.
 ## autosync_interval
 - Type: seconds
 - Default: 5
 - Can be changed online: yes
 Time interval at which automatic fsyncs/flushes are issued by each OSD when
 the immediate_commit mode if disabled. fsyncs are required because without
@ -140,7 +103,6 @@ issue fsyncs at all.
 - Type: integer
 - Default: 128
 - Can be changed online: yes
 Same as autosync_interval, but sets the maximum number of uncommitted write
 operations before issuing an fsync operation internally.
@ -148,30 +110,16 @@ operations before issuing an fsync operation internally.
 ## recovery_queue_depth
 - Type: integer
- Default: 1
+- Default: 4
 - Can be changed online: yes
-Maximum recovery and rebalance operations initiated by each OSD in parallel.
+Maximum recovery operations per one primary OSD at any given moment of time.
-Note that each OSD talks to a lot of other OSDs so actual number of parallel
+Currently it's the only parameter available to tune the speed or recovery
-recovery operations per each OSD is greater than just recovery_queue_depth.
+and rebalancing, but it's planned to implement more.
 Increasing this parameter can speedup recovery if [auto-tuning](#recovery_tune_interval)
 allows it or if it is disabled.
 ## recovery_sleep_us
 - Type: microseconds
 - Default: 0
 - Can be changed online: yes
 Delay for all recovery- and rebalance- related operations. If non-zero,
 such operations are artificially slowed down to reduce the impact on
 client I/O.
 ## recovery_pg_switch
 - Type: integer
 - Default: 128
 - Can be changed online: yes
 Number of recovery operations before switching to recovery of the next PG.
 The idea is to mix all PGs during recovery for more even space and load
@ -182,7 +130,6 @@ Degraded PGs are anyway scanned first.
 - Type: integer
 - Default: 16
 - Can be changed online: yes
 Maximum number of recovery operations before issuing an additional fsync.
@ -198,7 +145,6 @@ the underlying device. This may be useful for recovery purposes.
 - Type: boolean
 - Default: false
 - Can be changed online: yes
 Disable automatic background recovery of objects. Note that it doesn't
 affect implicit recovery of objects happening during writes - a write is
@ -208,7 +154,6 @@ always made to a full set of at least pg_minsize OSDs.
 - Type: boolean
 - Default: false
 - Can be changed online: yes
 Disable background movement of data between different OSDs. Disabling it
 means that PGs in the `has_misplaced` state will be left in it indefinitely.
@ -217,7 +162,6 @@ means that PGs in the `has_misplaced` state will be left in it indefinitely.
 - Type: seconds
 - Default: 3
 - Can be changed online: yes
 Time interval at which OSDs print simple human-readable operation
 statistics on stdout.
@ -226,7 +170,6 @@ statistics on stdout.
 - Type: seconds
 - Default: 10
 - Can be changed online: yes
 Time interval at which OSDs dump slow or stuck operations on stdout, if
 they're any. Also it's the time after which an operation is considered
@ -236,7 +179,6 @@ they're any. Also it's the time after which an operation is considered
 - Type: seconds
 - Default: 60
 - Can be changed online: yes
 Number of seconds after which a deleted inode is removed from OSD statistics.
@ -244,7 +186,6 @@ Number of seconds after which a deleted inode is removed from OSD statistics.
 - Type: integer
 - Default: 128
 - Can be changed online: yes
 Parallel client write operation limit per one OSD. Operations that exceed
 this limit are pushed to a temporary queue instead of being executed
@ -254,7 +195,6 @@ immediately.
 - Type: integer
 - Default: 1
 - Can be changed online: yes
 Flusher is a micro-thread that moves data from the journal to the data
 area of the device. Their number is auto-tuned between minimum and maximum.
@ -264,7 +204,6 @@ Minimum number is set by this parameter.
 - Type: integer
 - Default: 256
 - Can be changed online: yes
 Maximum number of journal flushers (see above min_flusher_count).
@ -294,60 +233,6 @@ is typically very small because it's sufficient to have 16-32 MB journal
 for SSD OSDs. However, in theory it's possible that you'll want to turn it
 off for hybrid (HDD+SSD) OSDs with large journals on quick devices.
 ## data_io
 - Type: string
 - Default: direct
 I/O mode for *data*. One of "direct", "cached" or "directsync". Corresponds
 to O_DIRECT, O_SYNC and O_DIRECT|O_SYNC, respectively.
 Choose "cached" to use Linux page cache. This may improve read performance
 for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
 decrease write performance for fast disks because page cache is an overhead
 itself.
 Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
 (which requires disable_data_fsync) with drives having write-back cache
 which can't be turned off, for example, Intel Optane. Also note that *some*
 desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
 disable_data_fsync unsafe even with "directsync".
 ## meta_io
 - Type: string
 - Default: direct
 I/O mode for *metadata*. One of "direct", "cached" or "directsync".
 "cached" may improve read performance, but only under the following conditions:
 1. your drives are relatively slow (HDD, SATA SSD), and
 2. checksums are enabled, and
 3. [inmemory_metadata](#inmemory_metadata) is disabled.
 Under all these conditions, metadata blocks are read from disk on every
 read request to verify checksums and caching them may reduce this extra
 read load. Without (3) metadata is never read from the disk after starting,
 and without (2) metadata blocks are read from disk only during journal
 flushing.
 "directsync" is the same as above.
 If the same device is used for data and metadata, meta_io by default is set
 to the same value as [data_io](#data_io).
 ## journal_io
 - Type: string
 - Default: direct
 I/O mode for *journal*. One of "direct", "cached" or "directsync".
 Here, "cached" may only improve read performance for recent writes and
 only if [inmemory_journal](#inmemory_journal) is turned off.
 If the same device is used for metadata and journal, journal_io by default
 is set to the same value as [meta_io](#meta_io).
 ## journal_sector_buffer_count
 - Type: integer
@ -375,7 +260,6 @@ Most (99%) other SSDs don't need this option.
 - Type: boolean
 - Default: false
 - Can be changed online: yes
 Enable soft throttling of small journaled writes. Useful for hybrid OSDs
 with fast journal/metadata devices and slow data devices. The idea is that
@ -393,7 +277,6 @@ fills up.
 - Type: integer
 - Default: 100
 - Can be changed online: yes
 Target maximum number of throttled operations per second under the condition
 of full journal. Set it to approximate random write iops of your data devices
@ -403,7 +286,6 @@ of full journal. Set it to approximate random write iops of your data devices
 - Type: integer
 - Default: 100
 - Can be changed online: yes
 Target maximum bandwidth in MB/s of throttled operations per second under
 the condition of full journal. Set it to approximate linear write
@ -413,7 +295,6 @@ performance of your data devices (HDDs).
 - Type: integer
 - Default: 1
 - Can be changed online: yes
 Target maximum parallelism of throttled operations under the condition of
 full journal. Set it to approximate internal parallelism of your data
@ -423,7 +304,6 @@ devices (1 for HDDs, 4-8 for SSDs).
 - Type: microseconds
 - Default: 50
 - Can be changed online: yes
 Minimal computed delay to be applied to throttled operations. Usually
 doesn't need to be changed.
@ -433,217 +313,4 @@ doesn't need to be changed.
 - Type: boolean
 - Default: false
-Lock all OSD memory to prevent it from being unloaded into swap with
+Lock all OSD memory to prevent it from being unloaded into swap with mlockall(). Requires sufficient ulimit -l (max locked memory).
 mlockall(). Requires sufficient ulimit -l (max locked memory).
 ## auto_scrub
 - Type: boolean
 - Default: false
 - Can be changed online: yes
 Data scrubbing is the process of background verification of copies to find
 and repair corrupted blocks. It's not run automatically by default since
 it's a new feature. Set this parameter to true to enable automatic scrubs.
 This parameter makes OSDs automatically schedule data scrubbing of clean PGs
 every `scrub_interval` (see below). You can also start/schedule scrubbing
 manually by setting `next_scrub` JSON key to the desired UNIX time of the
 next scrub in `/pg/history/...` values in etcd.
 ## no_scrub
 - Type: boolean
 - Default: false
 - Can be changed online: yes
 Temporarily disable scrubbing and stop running scrubs.
 ## scrub_interval
 - Type: string
 - Default: 30d
 - Can be changed online: yes
 Default automatic scrubbing interval for all pools. Numbers without suffix
 are treated as seconds, possible unit suffixes include 's' (seconds),
 'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
 ## scrub_queue_depth
 - Type: integer
 - Default: 1
 - Can be changed online: yes
 Number of parallel scrubbing operations per one OSD.
 ## scrub_sleep
 - Type: milliseconds
 - Default: 0
 - Can be changed online: yes
 Additional interval between two consecutive scrubbing operations on one OSD.
 Can be used to slow down scrubbing if it affects user load too much.
 ## scrub_list_limit
 - Type: integer
 - Default: 1000
 - Can be changed online: yes
 Number of objects to list in one listing operation during scrub.
 ## scrub_find_best
 - Type: boolean
 - Default: true
 - Can be changed online: yes
 Find and automatically restore best versions of objects with unmatched
 copies. In replicated setups, the best version is the version with most
 matching replicas. In EC setups, the best version is the subset of data
 and parity chunks without mismatches.
 The hypothetical situation where you might want to disable it is when
 you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
 corrupt an object in the same way (for example, zero it out) and only
 1 HDD will remain good. In this case disabling scrub_find_best may help
 you to recover the data! See also scrub_ec_max_bruteforce below.
 ## scrub_ec_max_bruteforce
 - Type: integer
 - Default: 100
 - Can be changed online: yes
 Vitastor can locate corrupted chunks in EC setups with more than 1 parity
 chunk by brute-forcing all possible error locations. This configuration
 value limits the maximum number of checked combinations. You can try to
 increase it if you have EC N+K setup with N and K large enough for
 combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
 than the default 100.
 If there are too many possible combinations or if multiple combinations give
 correct results then objects are marked inconsistent and aren't recovered
 automatically.
 In replicated setups bruteforcing isn't needed, Vitastor just assumes that
 the variant with most available equal copies is correct. For example, if
 you have 3 replicas and 1 of them differs, this one is considered to be
 corrupted. But if there is no "best" version with more copies than all
 others have then the object is also marked as inconsistent.
 ## recovery_tune_interval
 - Type: seconds
 - Default: 1
 - Can be changed online: yes
 Interval at which OSD re-considers client and recovery load and automatically
 adjusts [recovery_sleep_us](#recovery_sleep_us). Recovery auto-tuning is
 disabled if recovery_tune_interval is set to 0.
 Auto-tuning targets utilization. Utilization is a measure of load and is
 equal to the product of iops and average latency (so it may be greater
 than 1). You set "low" and "high" client utilization thresholds and two
 corresponding target recovery utilization levels. OSD calculates desired
 recovery utilization from client utilization using linear interpolation
 and auto-tunes recovery operation delay to make actual recovery utilization
 match desired.
 This allows to reduce recovery/rebalance impact on client operations. It is
 of course impossible to remove it completely, but it should become adequate.
 In some tests rebalance could earlier drop client write speed from 1.5 GB/s
 to 50-100 MB/s, with default auto-tuning settings it now only reduces
 to ~1 GB/s.
 ## recovery_tune_util_low
 - Type: number
 - Default: 0.1
 - Can be changed online: yes
 Desired recovery/rebalance utilization when client load is high, i.e. when
 it is at or above recovery_tune_client_util_high.
 ## recovery_tune_util_high
 - Type: number
 - Default: 1
 - Can be changed online: yes
 Desired recovery/rebalance utilization when client load is low, i.e. when
 it is at or below recovery_tune_client_util_low.
 ## recovery_tune_client_util_low
 - Type: number
 - Default: 0
 - Can be changed online: yes
 Client utilization considered "low".
 ## recovery_tune_client_util_high
 - Type: number
 - Default: 0.5
 - Can be changed online: yes
 Client utilization considered "high".
 ## recovery_tune_agg_interval
 - Type: integer
 - Default: 10
 - Can be changed online: yes
 The number of last auto-tuning iterations to use for calculating the
 delay as average. Lower values result in quicker response to client
 load change, higher values result in more stable delay. Default value of 10
 is usually fine.
 ## recovery_tune_sleep_min_us
 - Type: microseconds
 - Default: 10
 - Can be changed online: yes
 Minimum possible value for auto-tuned recovery_sleep_us. Lower values
 are changed to 0.
 ## recovery_tune_sleep_cutoff_us
 - Type: microseconds
 - Default: 10000000
 - Can be changed online: yes
 Maximum possible value for auto-tuned recovery_sleep_us. Higher values
 are treated as outliers and ignored in aggregation.
 ## discard_on_start
 - Type: boolean
 Discard (SSD TRIM) unused data device blocks on every OSD startup.
 ## min_discard_size
 - Type: integer
 - Default: 1048576
 Minimum consecutive block size to TRIM it.
 ## allow_net_split
 - Type: boolean
 - Default: false
 Allow "safe" cases of network splits/partitions - allow to start PGs without
 connections to some OSDs currently registered as alive in etcd, if the number
 of actually connected PG OSDs is at least pg_minsize. That is, allow some OSDs to lose
 connectivity with some other OSDs as long as it doesn't break pg_minsize guarantees.
 The downside is that it increases the probability of writing data into just pg_minsize
 OSDs during failover which can lead to PGs becoming incomplete after additional outages.
 The old behaviour in versions up to 2.0.0 was equal to enabled allow_net_split.
--- a/docs/config/osd.ru.md
+++ b/docs/config/osd.ru.md
@ -8,19 +8,16 @@
 Данные параметры используются только OSD, но, в отличие от дисковых параметров,
 не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
-момент с перезапуском OSD в /etc/vitastor/vitastor.conf или [vitastor-disk update-sb](../usage/disk.ru.md#update-sb),
+момент с перезапуском OSD.
 а некоторые и без перезапуска, с помощью изменения конфигурации в etcd.
 - [etcd_report_interval](#etcd_report_interval)
 - [run_primary](#run_primary)
 - [osd_network](#osd_network)
 - [bind_address](#bind_address)
 - [bind_port](#bind_port)
 - [osd_iothread_count](#osd_iothread_count)
 - [etcd_report_interval](#etcd_report_interval)
 - [etcd_stats_interval](#etcd_stats_interval)
 - [run_primary](#run_primary)
 - [autosync_interval](#autosync_interval)
 - [autosync_writes](#autosync_writes)
 - [recovery_queue_depth](#recovery_queue_depth)
 - [recovery_sleep_us](#recovery_sleep_us)
 - [recovery_pg_switch](#recovery_pg_switch)
 - [recovery_sync_batch](#recovery_sync_batch)
 - [readonly](#readonly)
@ -34,9 +31,6 @@
 - [max_flusher_count](#max_flusher_count)
 - [inmemory_metadata](#inmemory_metadata)
 - [inmemory_journal](#inmemory_journal)
 - [data_io](#data_io)
 - [meta_io](#meta_io)
 - [journal_io](#journal_io)
 - [journal_sector_buffer_count](#journal_sector_buffer_count)
 - [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
 - [throttle_small_writes](#throttle_small_writes)
@ -45,76 +39,17 @@
 - [throttle_target_parallelism](#throttle_target_parallelism)
 - [throttle_threshold_us](#throttle_threshold_us)
 - [osd_memlock](#osd_memlock)
 - [auto_scrub](#auto_scrub)
 - [no_scrub](#no_scrub)
 - [scrub_interval](#scrub_interval)
 - [scrub_queue_depth](#scrub_queue_depth)
 - [scrub_sleep](#scrub_sleep)
 - [scrub_list_limit](#scrub_list_limit)
 - [scrub_find_best](#scrub_find_best)
 - [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
 - [recovery_tune_interval](#recovery_tune_interval)
 - [recovery_tune_util_low](#recovery_tune_util_low)
 - [recovery_tune_util_high](#recovery_tune_util_high)
 - [recovery_tune_client_util_low](#recovery_tune_client_util_low)
 - [recovery_tune_client_util_high](#recovery_tune_client_util_high)
 - [recovery_tune_agg_interval](#recovery_tune_agg_interval)
 - [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
 - [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
 - [discard_on_start](#discard_on_start)
 - [min_discard_size](#min_discard_size)
 - [allow_net_split](#allow_net_split)
 ## bind_address
 - Тип: строка или массив строк
 Вместо использования масок подсети ([osd_network](network.ru.md#osd_network) и
 [osd_cluster_network](network.ru.md#osd_cluster_network)), вы также можете явно
 задать адрес(а), на которых будут ожидать соединений OSD, с помощью данного
 параметра. Это может быть полезно, например, чтобы запускать OSD на неподнятых
 интерфейсах (не UP + RUNNING).
 ## bind_port
 - Тип: целое число
 По умолчанию OSD сами выбирают случайные порты для входящих подключений.
 С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 ## osd_iothread_count
 - Тип: целое число
 - Значение по умолчанию: 0
 Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
 стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
 по сети больше данных, но ухудшает задержку из-за накладных расходов
 переключения потоков. На работу RDMA опция не влияет.
 Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
 просто создавать по несколько OSD на каждом диске, или использовать RDMA.
 ## etcd_report_interval
 - Тип: секунды
 - Значение по умолчанию: 5
-Интервал, с которым OSD сообщает о том, что жив, в etcd. Значение параметра
+Интервал, с которым OSD обновляет своё состояние в etcd. Значение параметра
-влияет на время резервации (lease) OSD и поэтому - на скорость переключения
+влияет на время резервации (lease) OSD и поэтому на скорость переключения
 при падении OSD. Время lease равняется значению этого параметра плюс
 max_etcd_attempts * etcd_quick_timeout.
 ## etcd_stats_interval
 - Тип: секунды
 - Значение по умолчанию: 30
 Интервал, с которым OSD обновляет свою статистику в etcd. Сильно влияет на
 создаваемую нагрузку на etcd, потому что статистика содержит по ключу на
 каждый OSD и на каждую PG. В то же время низкий интервал делает
 статистику, печатаемую `vitastor-cli`, отзывчивей.
 ## run_primary
 - Тип: булево (да/нет)
@ -126,11 +61,38 @@ max_etcd_attempts * etcd_quick_timeout.
 первичные OSD от вторичных, но пока не понятно, зачем это может кому-то
 понадобиться, поэтому это не реализовано.
 ## osd_network
 - Тип: строка или массив строк
 Маска подсети (IPv4 или IPv6) для использования для соединений с OSD.
 Имейте в виду, что хотя сейчас и можно передать в этот параметр несколько
 подсетей, это не означает, что OSD будут создавать несколько слушающих
 сокетов - они лишь будут выбирать адрес первого поднятого (состояние UP +
 RUNNING), подходящий под заданную маску. Также не реализовано разделение
 кластерной и публичной сетей OSD. Правда, от него обычно всё равно довольно
 мало толку, так что особенной проблемы в этом нет.
 ## bind_address
 - Тип: строка
 - Значение по умолчанию: 0.0.0.0
 Этим параметром можно явным образом задать адрес, на котором будет ожидать
 соединений OSD (вместо использования маски подсети). Может быть полезно,
 например, чтобы запускать OSD на неподнятых интерфейсах (не UP + RUNNING).
 ## bind_port
 - Тип: целое число
 По умолчанию OSD сами выбирают случайные порты для входящих подключений.
 С помощью данной опции вы можете задать порт для отдельного OSD вручную.
 ## autosync_interval
 - Тип: секунды
 - Значение по умолчанию: 5
 - Можно менять на лету: да
 Временной интервал отправки автоматических fsync-ов (операций очистки кэша)
 каждым OSD для случая, когда режим immediate_commit отключён. fsync-и нужны
@ -143,7 +105,6 @@ OSD, чтобы успевать очищать журнал - без них OSD
 - Тип: целое число
 - Значение по умолчанию: 128
 - Можно менять на лету: да
 Аналогично autosync_interval, но задаёт не временной интервал, а
 максимальное количество незафиксированных операций записи перед
@ -152,31 +113,17 @@ OSD, чтобы успевать очищать журнал - без них OSD
 ## recovery_queue_depth
 - Тип: целое число
- Значение по умолчанию: 1
+- Значение по умолчанию: 4
 - Можно менять на лету: да
-Максимальное число параллельных операций восстановления, инициируемых одним
+Максимальное число операций восстановления на одном первичном OSD в любой
-OSD в любой момент времени. Имейте в виду, что каждый OSD обычно работает с
+момент времени. На данный момент единственный параметр, который можно менять
-многими другими OSD, так что на практике параллелизм восстановления больше,
+для ускорения или замедления восстановления и перебалансировки данных, но
-чем просто recovery_queue_depth. Увеличение значения этого параметра может
+в планах реализация других параметров.
 ускорить восстановление если [автотюнинг скорости](#recovery_tune_interval)
 разрешает это или если он отключён.
 ## recovery_sleep_us
 - Тип: микросекунды
 - Значение по умолчанию: 0
 - Можно менять на лету: да
 Delay for all recovery- and rebalance- related operations. If non-zero,
 such operations are artificially slowed down to reduce the impact on
 client I/O.
 ## recovery_pg_switch
 - Тип: целое число
 - Значение по умолчанию: 128
 - Можно менять на лету: да
 Число операций восстановления перед переключением на восстановление другой PG.
 Идея заключается в том, чтобы восстанавливать все PG одновременно для более
@ -188,7 +135,6 @@ client I/O.
 - Тип: целое число
 - Значение по умолчанию: 16
 - Можно менять на лету: да
 Максимальное число операций восстановления перед дополнительным fsync.
@ -204,7 +150,6 @@ client I/O.
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 - Можно менять на лету: да
 Отключить автоматическое фоновое восстановление объектов. Обратите внимание,
 что эта опция не отключает восстановление объектов, происходящее при
@ -215,7 +160,6 @@ OSD.
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 - Можно менять на лету: да
 Отключить фоновое перемещение объектов между разными OSD. Отключение
 означает, что PG, находящиеся в состоянии `has_misplaced`, будут оставлены
@ -225,7 +169,6 @@ OSD.
 - Тип: секунды
 - Значение по умолчанию: 3
 - Можно менять на лету: да
 Временной интервал, с которым OSD печатают простую человекочитаемую
 статистику выполнения операций в стандартный вывод.
@ -234,7 +177,6 @@ OSD.
 - Тип: секунды
 - Значение по умолчанию: 10
 - Можно менять на лету: да
 Временной интервал, с которым OSD выводят в стандартный вывод список
 медленных или зависших операций, если таковые имеются. Также время, при
@ -244,7 +186,6 @@ OSD.
 - Тип: секунды
 - Значение по умолчанию: 60
 - Можно менять на лету: да
 Число секунд, через которое удалённые инод удаляется и из статистики OSD.
@ -252,7 +193,6 @@ OSD.
 - Тип: целое число
 - Значение по умолчанию: 128
 - Можно менять на лету: да
 Максимальное число одновременных клиентских операций записи на один OSD.
 Операции, превышающие этот лимит, не исполняются сразу, а сохраняются во
@ -262,7 +202,6 @@ OSD.
 - Тип: целое число
 - Значение по умолчанию: 1
 - Можно менять на лету: да
 Flusher - это микро-поток (корутина), которая копирует данные из журнала в
 основную область устройства данных. Их число настраивается динамически между
@ -272,7 +211,6 @@ Flusher - это микро-поток (корутина), которая коп
 - Тип: целое число
 - Значение по умолчанию: 256
 - Можно менять на лету: да
 Максимальное число микро-потоков очистки журнала (см. выше min_flusher_count).
@ -303,63 +241,6 @@ Flusher - это микро-поток (корутина), которая коп
 параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
 журналами, расположенными на быстром по сравнению с HDD устройстве.
 ## data_io
 - Тип: строка
 - Значение по умолчанию: direct
 Режим ввода-вывода для *данных*. Одно из значений "direct", "cached" или
 "directsync", означающих O_DIRECT, O_SYNC и O_DIRECT|O_SYNC, соответственно.
 Выберите "cached", чтобы использовать системный кэш Linux (page cache) при
 чтении и записи. Это может улучшить скорость чтения горячих данных с
 относительно медленных дисков - HDD и, возможно, SATA SSD - но немного
 снижает производительность записи для быстрых дисков, так как кэш сам по
 себе тоже добавляет накладные расходы.
 Выберите "directsync", если хотите задействовать
 [immediate_commit](layout-cluster.ru.md#immediate_commit) (требующий
 включенияd disable_data_fsync) на дисках с неотключаемым кэшем. Пример таких
 дисков - Intel Optane. При этом также стоит иметь в виду, что *некоторые*
 настольные SSD (например, HP EX950) игнорируют флаг O_SYNC, делая отключение
 fsync небезопасным даже с режимом "directsync".
 ## meta_io
 - Тип: строка
 - Значение по умолчанию: direct
 Режим ввода-вывода для *метаданных*. Одно из значений "direct", "cached" или
 "directsync".
 "cached" может улучшить скорость чтения, если:
 1. у вас медленные диски (HDD, SATA SSD)
 2. контрольные суммы включены
 3. параметр [inmemory_metadata](#inmemory_metadata) отключён.
 При этих условиях блоки метаданных читаются с диска при каждом запросе чтения
 для проверки контрольных сумм и их кэширование может снизить дополнительную
 нагрузку на диск. Без (3) метаданные никогда не читаются с диска после
 запуска OSD, а без (2) блоки метаданных читаются только при сбросе журнала.
 Если одно и то же устройство используется для данных и метаданных, режим
 ввода-вывода метаданных по умолчанию устанавливается равным [data_io](#data_io).
 ## journal_io
 - Тип: строка
 - Значение по умолчанию: direct
 Режим ввода-вывода для *журнала*. Одно из значений "direct", "cached" или
 "directsync".
 Здесь "cached" может улучшить скорость чтения только недавно записанных
 данных и только если параметр [inmemory_journal](#inmemory_journal)
 отключён.
 Если одно и то же устройство используется для метаданных и журнала,
 режим ввода-вывода журнала по умолчанию устанавливается равным
 [meta_io](#meta_io).
 ## journal_sector_buffer_count
 - Тип: целое число
@ -389,7 +270,6 @@ fsync небезопасным даже с режимом "directsync".
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 - Можно менять на лету: да
 Разрешить мягкое ограничение скорости журналируемой записи. Полезно для
 гибридных OSD с быстрыми устройствами метаданных и медленными устройствами
@ -408,7 +288,6 @@ fsync небезопасным даже с режимом "directsync".
 - Тип: целое число
 - Значение по умолчанию: 100
 - Можно менять на лету: да
 Расчётное максимальное число ограничиваемых операций в секунду при условии
 отсутствия свободного места в журнале. Устанавливайте приблизительно равным
@ -419,7 +298,6 @@ fsync небезопасным даже с режимом "directsync".
 - Тип: целое число
 - Значение по умолчанию: 100
 - Можно менять на лету: да
 Расчётный максимальный размер в МБ/с ограничиваемых операций в секунду при
 условии отсутствия свободного места в журнале. Устанавливайте приблизительно
@ -430,7 +308,6 @@ fsync небезопасным даже с режимом "directsync".
 - Тип: целое число
 - Значение по умолчанию: 1
 - Можно менять на лету: да
 Расчётный максимальный параллелизм ограничиваемых операций в секунду при
 условии отсутствия свободного места в журнале. Устанавливайте приблизительно
@ -441,7 +318,6 @@ fsync небезопасным даже с режимом "directsync".
 - Тип: микросекунды
 - Значение по умолчанию: 50
 - Можно менять на лету: да
 Минимальная применимая к ограничиваемым операциям задержка. Обычно не
 требует изменений.
@ -451,231 +327,4 @@ fsync небезопасным даже с режимом "directsync".
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
-Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
+Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку в пространство подкачки. Требует достаточного значения ulimit -l (лимита заблокированной памяти).
 в пространство подкачки. Требует достаточного значения ulimit -l (лимита
 заблокированной памяти).
 ## auto_scrub
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 - Можно менять на лету: да
 Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
 находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
 запускаются автоматически, так как являются новой функцией. Чтобы включить
 автоматическое планирование скрабов, установите данный параметр в true.
 Включённый параметр заставляет OSD автоматически планировать фоновую
 проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
 запустить или запланировать проверку вручную, установив значение ключа JSON
 `next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
 желаемой проверки.
 ## no_scrub
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 - Можно менять на лету: да
 Временно отключить и остановить запущенные скрабы.
 ## scrub_interval
 - Тип: строка
 - Значение по умолчанию: 30d
 - Можно менять на лету: да
 Интервал автоматической фоновой проверки по умолчанию для всех пулов.
 Значения без указанной единицы измерения считаются в секундах, допустимые
 символы единиц измерения в конце: 's' (секунды),
 'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
 ## scrub_queue_depth
 - Тип: целое число
 - Значение по умолчанию: 1
 - Можно менять на лету: да
 Число параллельных операций фоновой проверки на один OSD.
 ## scrub_sleep
 - Тип: миллисекунды
 - Значение по умолчанию: 0
 - Можно менять на лету: да
 Дополнительный интервал ожидания после фоновой проверки каждого объекта на
 одном OSD. Может использоваться для замедления скраба, если он слишком
 сильно влияет на пользовательскую нагрузку.
 ## scrub_list_limit
 - Тип: целое число
 - Значение по умолчанию: 1000
 - Можно менять на лету: да
 Размер загружаемых за одну операцию списков объектов в процессе фоновой
 проверки.
 ## scrub_find_best
 - Тип: булево (да/нет)
 - Значение по умолчанию: true
 - Можно менять на лету: да
 Находить и автоматически восстанавливать "лучшие версии" объектов с
 несовпадающими копиями/частями. При использовании репликации "лучшая"
 версия - версия, доступная в большем числе экземпляров, чем другие. При
 использовании кодов коррекции ошибок "лучшая" версия - это подмножество
 частей данных и чётности, полностью соответствующих друг другу.
 Гипотетическая ситуация, в которой вы можете захотеть отключить этот
 поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
 незаметно и одинаково повредить данные одного и того же объекта, например,
 занулив его, и только 1 диск останется неповреждённым. В этой ситуации
 отключение этого параметра поможет вам восстановить данные! Смотрите также
 описание следующего параметра - scrub_ec_max_bruteforce.
 ## scrub_ec_max_bruteforce
 - Тип: целое число
 - Значение по умолчанию: 100
 - Можно менять на лету: да
 Vitastor старается определить повреждённые части объектов при использовании
 EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
 всех возможных комбинаций ошибочных частей. Данное значение конфигурации
 ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
 его, если используете схему кодирования EC N+K с N и K, достаточно большими
 для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
 было больше, чем стандартное значение 100.
 Если возможных комбинаций слишком много или если корректная комбинаций не
 определяется однозначно, объекты помечаются неконсистентными (inconsistent)
 и не восстанавливаются автоматически.
 При использовании репликации перебор не нужен, Vitastor просто предполагает,
 что вариант объекта с наибольшим количеством одинаковых копий корректен.
 Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
 считается некорректной. Однако, если "лучшую" версию с числом доступных
 копий большим, чем у всех других версий, найти невозможно, то объект тоже
 маркируется неконсистентным.
 ## recovery_tune_interval
 - Тип: секунды
 - Значение по умолчанию: 1
 - Можно менять на лету: да
 Интервал, с которым OSD пересматривает клиентскую нагрузку и нагрузку
 восстановления и автоматически подстраивает [recovery_sleep_us](#recovery_sleep_us).
 Автотюнинг (автоподстройка) отключается, если recovery_tune_interval
 устанавливается в значение 0.
 Автотюнинг регулирует утилизацию. Утилизация является мерой нагрузки
 и равна произведению числа операций в секунду и средней задержки
 (то есть, она может быть выше 1). Вы задаёте два уровня клиентской
 утилизации - "низкий" и "высокий" (low и high) и два соответствующих
 целевых уровня утилизации операциями восстановления. OSD рассчитывает
 желаемый уровень утилизации восстановления линейной интерполяцией от
 клиентской утилизации и подстраивает задержку операций восстановления
 так, чтобы фактическая утилизация восстановления совпадала с желаемой.
 Это позволяет снизить влияние восстановления и ребаланса на клиентские
 операции. Конечно, невозможно исключить такое влияние полностью, но оно
 должно становиться адекватнее. В некоторых тестах перебалансировка могла
 снижать клиентскую скорость записи с 1.5 ГБ/с до 50-100 МБ/с, а теперь, с
 настройками автотюнинга по умолчанию, она снижается только до ~1 ГБ/с.
 ## recovery_tune_util_low
 - Тип: число
 - Значение по умолчанию: 0.1
 - Можно менять на лету: да
 Желаемая утилизация восстановления в моменты, когда клиентская нагрузка
 высокая, то есть, находится на уровне или выше recovery_tune_client_util_high.
 ## recovery_tune_util_high
 - Тип: число
 - Значение по умолчанию: 1
 - Можно менять на лету: да
 Желаемая утилизация восстановления в моменты, когда клиентская нагрузка
 низкая, то есть, находится на уровне или ниже recovery_tune_client_util_low.
 ## recovery_tune_client_util_low
 - Тип: число
 - Значение по умолчанию: 0
 - Можно менять на лету: да
 Клиентская утилизация, которая считается "низкой".
 ## recovery_tune_client_util_high
 - Тип: число
 - Значение по умолчанию: 0.5
 - Можно менять на лету: да
 Клиентская утилизация, которая считается "высокой".
 ## recovery_tune_agg_interval
 - Тип: целое число
 - Значение по умолчанию: 10
 - Можно менять на лету: да
 Число последних итераций автоподстройки для расчёта задержки как среднего
 значения. Меньшие значения параметра ускоряют отклик на изменение нагрузки,
 большие значения делают задержку стабильнее. Значение по умолчанию 10
 обычно нормальное и не требует изменений.
 ## recovery_tune_sleep_min_us
 - Тип: микросекунды
 - Значение по умолчанию: 10
 - Можно менять на лету: да
 Минимальное возможное значение авто-подстроенного recovery_sleep_us.
 Меньшие значения заменяются на 0.
 ## recovery_tune_sleep_cutoff_us
 - Тип: микросекунды
 - Значение по умолчанию: 10000000
 - Можно менять на лету: да
 Максимальное возможное значение авто-подстроенного recovery_sleep_us.
 Большие значения считаются случайными выбросами и игнорируются в
 усреднении.
 ## discard_on_start
 - Тип: булево (да/нет)
 Освобождать (SSD TRIM) неиспользуемые блоки диска данных при каждом запуске OSD.
 ## min_discard_size
 - Тип: целое число
 - Значение по умолчанию: 1048576
 Минимальный размер последовательного блока данных, чтобы освобождать его через TRIM.
 ## allow_net_split
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Разрешить "безопасные" случаи разделений сети - разрешить активировать PG без
 соединений к некоторым OSD, помеченным активными в etcd, если общее число активных
 OSD в PG составляет как минимум pg_minsize. То есть, разрешать некоторым OSD терять
 соединения с некоторыми другими OSD, если это не нарушает гарантий pg_minsize.
 Минус такого разрешения в том, что оно повышает вероятность записи данных ровно в
 pg_minsize OSD во время переключений, что может потом привести к тому, что PG станут
 неполными (incomplete), если упадут ещё какие-то OSD.
 Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
--- a/docs/config/pool.en.md
+++ b/docs/config/pool.en.md
@ -32,8 +32,6 @@ Parameters:
 - [pg_minsize](#pg_minsize)
 - [pg_count](#pg_count)
 - [failure_domain](#failure_domain)
 - [level_placement](#level_placement)
 - [raw_placement](#raw_placement)
 - [max_osd_combinations](#max_osd_combinations)
 - [block_size](#block_size)
 - [bitmap_granularity](#bitmap_granularity)
@ -42,8 +40,6 @@ Parameters:
 - [root_node](#root_node)
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
 - [used_for_app](#used_for_app)
 Examples:
@ -55,7 +51,7 @@ Examples:
 OSD placement tree is set in a separate etcd key `/vitastor/config/node_placement`
 in the following JSON format:
-```
+`
 {
  "<node name or OSD number>": {
    "level": "<level>",
@ -63,7 +59,7 @@ in the following JSON format:
  },
  ...
 }
-```
+`
 Here, if a node name is a number then it is assumed to refer to an OSD.
 Level of the OSD is always "osd" and cannot be overriden. You may only
@ -86,11 +82,7 @@ Parent node reference is required for intermediate tree nodes.
 Separate OSD settings are set in etc keys `/vitastor/config/osd/<number>`
 in JSON format `{"<key>":<value>}`.
-As of now, the following settings are supported:
+As of now, two settings are supported:
 - [reweight](#reweight)
 - [tags](#tags)
 - [noout](#noout)
 ## reweight
@ -113,14 +105,6 @@ subsets and then use a specific subset for pool instead of all OSDs.
 For example you can mark SSD OSDs with tag "ssd" and HDD OSDs with "hdd" and
 such tags will work as device classes.
 ## noout
 - Type: boolean
 - Default: false
 If set to true, [osd_out_time](monitor.en.md#osd_out_time) is ignored for this
 OSD and it's never removed from data distribution by the monitor.
 # Pool parameters
 ## name
@ -169,29 +153,6 @@ That is, if it becomes impossible to place PG data on at least (pg_minsize)
 OSDs, PG is deactivated for both read and write. So you know that a fresh
 write always goes to at least (pg_minsize) OSDs (disks).
 For example, the difference between pg_minsize 2 and 1 in a 3-way replicated
 pool (pg_size=3) is:
 - If 2 hosts go down with pg_minsize=2, the pool becomes inactive and remains
  inactive for [osd_out_time](monitor.en.md#osd_out_time) (10 minutes). After
  this timeout, the monitor selects replacement hosts/OSDs and the pool comes
  up and starts to heal. Therefore, if you don't have replacement OSDs, i.e.
  if you only have 3 hosts with OSDs and 2 of them are down, the pool remains
  inactive until you add or return at least 1 host (or change failure_domain
  to "osd").
 - If 2 hosts go down with pg_minsize=1, the pool only experiences a short
  I/O pause until the monitor notices that OSDs are down (5-10 seconds with
  the default [etcd_report_interval](osd.en.md#etcd_report_interval)). After
  this pause, I/O resumes, but new data is temporarily written in only 1 copy.
  Then, after osd_out_time, the monitor also selects replacement OSDs and the
  pool starts to heal.
 So, pg_minsize regulates the number of failures that a pool can tolerate
 without temporary downtime for [osd_out_time](monitor.en.md#osd_out_time),
 but at a cost of slightly reduced storage reliability.
 See also [allow_net_split](osd.en.md#allow_net_split) and
 [PG state descriptions](../usage/admin.en.md#pg-states).
 FIXME: pg_minsize behaviour may be changed in the future to only make PGs
 read-only instead of deactivating them.
@ -203,8 +164,8 @@ read-only instead of deactivating them.
 Number of PGs for this pool. The value should be big enough for the monitor /
 LP solver to be able to optimize data placement.
-"Enough" is usually around 10-100 PGs per OSD, i.e. you set pg_count for pool
+"Enough" is usually around 64-128 PGs per OSD, i.e. you set pg_count for pool
-to (total OSD count * 10 / pg_size). You can round it to the closest power of 2,
+to (total OSD count * 100 / pg_size). You can round it to the closest power of 2,
 because it makes it easier to reduce or increase PG count later by dividing or
 multiplying it by 2.
@ -226,69 +187,6 @@ never put on OSDs in the same failure domain (for example, on the same host).
 So failure domain specifies the unit which failure you are protecting yourself
 from.
 ## level_placement
 - Type: string
 Additional failure domain rules, applied in conjuction with failure_domain.
 Must be specified in the following form:
 `<placement level>=<sequence of characters>, <level2>=<sequence2>, ...`
 Sequence should be exactly [pg_size](#pg_size) character long. Each character
 corresponds to an OSD in the PG of this pool. Equal characters mean that
 corresponding items of the PG should be placed into the same placement tree
 item at this level. Different characters mean that items should be placed into
 different items.
 For example, if you want a EC 4+2 pool and you want every 2 chunks to be stored
 in its own datacenter and you also want each chunk to be stored on a different
 host, you should set `level_placement` to `dc=112233 host=123456`.
 Or you can set `level_placement` to `dc=112233` and leave `failure_domain` empty,
 because `host` is the default `failure_domain` and it will be applied anyway.
 Without this rule, it may happen that 3 chunks will be stored on OSDs in the
 same datacenter, and the data will become inaccessibly if that datacenter goes
 down in this case.
 Of course, you should group your hosts into datacenters before applying the rule
 by setting [placement_levels](monitor.en.md#placement_levels) to something like
 `{"dc":90,"host":100,"osd":110}` and add DCs to [node_placement](#placement-tree),
 like `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
 ## raw_placement
 - Type: string
 Raw PG placement rules, specified in the form of a DSL (domain-specific language).
 Use only if you really know what you're doing :)
 DSL specification:
 ```
 dsl := item | item ("\n" | ",") items
 item := "any" | rules
 rules := rule | rule rules
 rule := level operator arg
 level := /\w+/
 operator := "!=" | "=" | ">" | "?="
 arg := value | "(" values ")"
 values := value | value "," values
 value := item_ref | constant_id
 item_ref := /\d+/
 constant_id := /"([^"]+)"/
 ```
 "?=" operator means "preferred". I.e. `dc ?= "meow"` means "prefer datacenter meow
 for this chunk, but put into another dc if it's unavailable".
 Examples:
 - Simple 3 replicas with failure_domain=host: `any, host!=1, host!=(1,2)`
 - EC 4+2 in 3 DC: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
 - 1 replica in fixed DC + 2 in random DCs: `dc?=meow, dc!=1, dc!=(1,2)`
 ## max_osd_combinations
 - Type: integer
@ -306,8 +204,9 @@ This parameter usually doesn't require to be changed.
 - Default: 131072
 Block size for this pool. The value from /vitastor/config/global is used when
-unspecified. Only OSDs with matching block_size are used for each pool. If you
+unspecified. If your cluster has OSDs with different block sizes then pool must
-want to further restrict OSDs for the pool, use [osd_tags](#osd_tags).
+be restricted by [osd_tags](#osd_tags) to only include OSDs with matching block
 size.
 Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#block_size).
@ -316,9 +215,10 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
 - Type: integer
 - Default: 4096
-"Sector" size of virtual disks in this pool. The value from /vitastor/config/global
+"Sector" size of virtual disks in this pool. The value from
-is used when unspecified. Similarly to block_size, only OSDs with matching
+/vitastor/config/global is used when unspecified. Similar to block_size, the
-bitmap_granularity are used for each pool.
+pool must be restricted by [osd_tags](#osd_tags) to only include OSDs with
 matching bitmap_granularity.
 Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#bitmap_granularity).
@ -328,11 +228,10 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
 - Default: none
 Immediate commit setting for this pool. The value from /vitastor/config/global
-is used when unspecified. Similarly to block_size, only OSDs with compatible
+is used when unspecified. Similar to block_size, the pool must be restricted by
-bitmap_granularity are used for each pool. "Compatible" means that a pool with
+[osd_tags](#osd_tags) to only include OSDs with compatible immediate_commit.
-non-immediate commit will use OSDs with immediate commit enabled, but not vice
+Compatible means that a pool with non-immediate commit will work with OSDs with
-versa. I.e., pools with "none" use all OSDs, pools with "small" only use OSDs
+immediate commit enabled, but not vice versa.
 with "all" or "small", and pools with "all" only use OSDs with "all".
 Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#immediate_commit).
@ -373,45 +272,6 @@ Specifies OSD tags to prefer putting primary OSDs in this pool to.
 Note that for EC/XOR pools Vitastor always prefers to put primary OSD on one
 of the OSDs containing a data chunk for a PG.
 ## scrub_interval
 - Type: time interval (number + unit s/m/h/d/M/y)
 Automatic scrubbing interval for this pool. Overrides
 [global scrub_interval setting](osd.en.md#scrub_interval).
 ## used_for_app
 - Type: string
 If non-empty, the pool is marked as used for a separate application, for example,
 VitastorFS or S3, which allocates Vitastor volume IDs by itself and does not use
 image/inode metadata in etcd.
 When a pool is marked as used for such app, regular block volume creation in it
 is disabled (vitastor-cli refuses to create images without --force) to protect
 the user from block volume and FS/S3 volume ID collisions and data loss.
 Also such pools do not calculate per-inode space usage statistics in etcd because
 using it for an external application implies that it may contain a very large
 number of volumes and their statistics may take too much space in etcd.
 Setting used_for_app to `fs:<name>` tells Vitastor that the pool is used for VitastorFS
 with VitastorKV metadata base stored in a block image (regular Vitastor volume) named
 `<name>`.
 [vitastor-nfs](../usage/nfs.en.md), in its turn, refuses to use pools not marked
 for the corresponding FS when starting. This also implies that you can use one
 pool only for one VitastorFS.
 If you plan to use the pool for S3, set its used_for_app to `s3:<name>`. `<name>` may
 be basically anything you want (for example, `s3:standard`) - it's not validated
 by Vitastor S3 components in any way.
 All other values except prefixed with `fs:` or `s3:` may be used freely and don't
 mean anything special for Vitastor core components. For now, you can use them as
 you wish.
 # Examples
 ## Replicated pool
--- a/docs/config/pool.ru.md
+++ b/docs/config/pool.ru.md
@ -31,8 +31,6 @@
 - [pg_minsize](#pg_minsize)
 - [pg_count](#pg_count)
 - [failure_domain](#failure_domain)
 - [level_placement](#level_placement)
 - [raw_placement](#raw_placement)
 - [max_osd_combinations](#max_osd_combinations)
 - [block_size](#block_size)
 - [bitmap_granularity](#bitmap_granularity)
@ -41,8 +39,6 @@
 - [root_node](#root_node)
 - [osd_tags](#osd_tags)
 - [primary_affinity_tags](#primary_affinity_tags)
 - [scrub_interval](#scrub_interval)
 - [used_for_app](#used_for_app)
 Примеры:
@ -54,7 +50,7 @@
 Дерево размещения OSD задаётся в отдельном ключе etcd `/vitastor/config/node_placement`
 в следующем JSON-формате:
-```
+`
 {
  "<имя узла или номер OSD>": {
    "level": "<уровень>",
@ -62,7 +58,7 @@
  },
  ...
 }
-```
+`
 Здесь, если название узла - число, считается, что это OSD. Уровень OSD
 всегда равен "osd" и не может быть переопределён. Для OSD вы можете только
@ -85,11 +81,10 @@
 Настройки отдельных OSD задаются в ключах etcd `/vitastor/config/osd/<number>`
 в JSON-формате `{"<key>":<value>}`.
-На данный момент поддерживаются следующие настройки:
+На данный момент поддерживаются две настройки:
 - [reweight](#reweight)
 - [tags](#tags)
 - [noout](#noout)
 ## reweight
@ -113,14 +108,6 @@
 всех. Можно, например, пометить SSD OSD тегом "ssd", а HDD тегом "hdd", в
 этом смысле теги работают аналогично классам устройств.
 ## noout
 - Тип: булево (да/нет)
 - Значение по умолчанию: false
 Если установлено в true, то [osd_out_time](monitor.ru.md#osd_out_time) для этого
 OSD игнорируется и OSD не удаляется из распределения данных монитором.
 # Параметры
 ## name
@ -169,26 +156,6 @@ OSD игнорируется и OSD не удаляется из распред
 OSD, PG деактивируется на чтение и запись. Иными словами, всегда известно,
 что новые блоки данных всегда записываются как минимум на pg_minsize дисков.
 Для примера, разница между pg_minsize 2 и 1 в реплицированном пуле с 3 копиями
 данных (pg_size=3), проявляется следующим образом:
 - Если 2 сервера отключаются при pg_minsize=2, пул становится неактивным и
  остаётся неактивным в течение [osd_out_time](monitor.ru.md#osd_out_time)
  (10 минут), после чего монитор назначает другие OSD/серверы на замену, пул
  поднимается и начинает восстанавливать недостающие копии данных. Соответственно,
  если OSD на замену нет - то есть, если у вас всего 3 сервера с OSD и 2 из них
  недоступны - пул так и остаётся недоступным до тех пор, пока вы не вернёте
  или не добавите хотя бы 1 сервер (или не переключите failure_domain на "osd").
 - Если 2 сервера отключаются при pg_minsize=1, ввод-вывод лишь приостанавливается
  на короткое время, до тех пор, пока монитор не поймёт, что OSD отключены
  (что занимает 5-10 секунд при стандартном [etcd_report_interval](osd.ru.md#etcd_report_interval)).
  После этого ввод-вывод восстанавливается, но новые данные временно пишутся
  всего в 1 копии. Когда же проходит osd_out_time, монитор точно так же назначает
  другие OSD на замену выбывшим и пул начинает восстанавливать копии данных.
 То есть, pg_minsize регулирует число отказов, которые пул может пережить без
 временной остановки обслуживания на [osd_out_time](monitor.ru.md#osd_out_time),
 но ценой немного пониженных гарантий надёжности.
 FIXME: Поведение pg_minsize может быть изменено в будущем с полной деактивации
 PG на перевод их в режим только для чтения.
@ -200,8 +167,8 @@ PG на перевод их в режим только для чтения.
 Число PG для данного пула. Число должно быть достаточно большим, чтобы монитор
 мог равномерно распределить по ним данные.
-Обычно это означает примерно 10-100 PG на 1 OSD, т.е. pg_count можно устанавливать
+Обычно это означает примерно 64-128 PG на 1 OSD, т.е. pg_count можно устанавливать
-равным (общему числу OSD * 10 / pg_size). Значение можно округлить до ближайшей
+равным (общему числу OSD * 100 / pg_size). Значение можно округлить до ближайшей
 степени 2, чтобы потом было легче уменьшать или увеличивать число PG, умножая
 или деля его на 2.
@ -222,71 +189,6 @@ PG в Vitastor эферемерны, то есть вы можете менят
 Иными словами, домен отказа - это то, от отказа чего вы защищаете себя избыточным
 хранением.
 ## level_placement
 - Тип: строка
 Правила дополнительных доменов отказа, применяемые вместе с failure_domain.
 Должны задаваться в следующем виде:
 `<уровень>=<последовательность символов>, <уровень2>=<последовательность2>, ...`
 Каждая `<последовательность>` должна состоять ровно из [pg_size](#pg_size) символов.
 Каждый символ соответствует одному OSD (размещению одной части PG) этого пула.
 Одинаковые символы означают, что соответствующие части размещаются в один и тот же
 узел дерева OSD на заданном `<уровне>`. Разные символы означают, что части
 размещаются в разные узлы.
 Например, если вы хотите сделать пул EC 4+2 и хотите поместить каждые 2 части
 данных в свой датацентр, и также вы хотите, чтобы каждая часть размещалась на
 другом хосте, то вы должны задать `level_placement` равным `dc=112233 host=123456`.
 Либо вы просто можете задать `level_placement` равным `dc=112233` и оставить
 `failure_domain` пустым, т.к. `host` это его значение по умолчанию и оно также
 применится автоматически.
 Без этого правила может получиться так, что в одном из датацентров окажется
 3 части данных одной PG и данные окажутся недоступными при временном отключении
 этого датацентра.
 Естественно, перед установкой правила вам нужно сгруппировать ваши хосты в
 датацентры, установив [placement_levels](monitor.ru.md#placement_levels) во что-то
 типа `{"dc":90,"host":100,"osd":110}` и добавив датацентры в [node_placement](#дерево-размещения),
 примерно так: `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
 ## raw_placement
 - Тип: строка
 Низкоуровневые правила генерации PG в форме DSL (доменно-специфичного языка).
 Используйте, только если действительно знаете, зачем вам это надо :)
 Спецификация DSL:
 ```
 dsl := item | item ("\n" | ",") items
 item := "any" | rules
 rules := rule | rule rules
 rule := level operator arg
 level := /\w+/
 operator := "!=" | "=" | ">" | "?="
 arg := value | "(" values ")"
 values := value | value "," values
 value := item_ref | constant_id
 item_ref := /\d+/
 constant_id := /"([^"]+)"/
 ```
 Оператор "?=" означает "предпочитаемый". Т.е. `dc ?= "meow"` означает "предпочитать
 датацентр meow для этой части данных, но разместить её в другом датацентре, если
 meow недоступен".
 Примеры:
 - Простые 3 реплики с failure_domain=host: `any, host!=1, host!=(1,2)`
 - EC 4+2 в 3 датацентрах: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
 - 1 копия в фиксированном ДЦ + 2 в других ДЦ: `dc?=meow, dc!=1, dc!=(1,2)`
 ## max_osd_combinations
 - Тип: целое число
@ -305,9 +207,8 @@ meow недоступен".
 Размер блока для данного пула. Если не задан, используется значение из
 /vitastor/config/global. Если в вашем кластере есть OSD с разными размерами
-блока, пул будет использовать только OSD с размером блока, равным размеру блока
+блока, пул должен быть ограничен только OSD, блок которых равен блоку пула,
-пула. Если вы хотите сильнее ограничить набор используемых для пула OSD -
+с помощью [osd_tags](#osd_tags).
 используйте [osd_tags](#osd_tags).
 О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#block_size).
@ -317,8 +218,9 @@ meow недоступен".
 - По умолчанию: 4096
 Размер "сектора" виртуальных дисков в данном пуле. Если не задан, используется
-значение из /vitastor/config/global. Аналогично block_size, каждый пул будет
+значение из /vitastor/config/global. Аналогично block_size, пул должен быть
-использовать только OSD с совпадающей с пулом настройкой bitmap_granularity.
+ограничен OSD со значением bitmap_granularity, равным значению пула, с помощью
 [osd_tags](#osd_tags).
 О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#bitmap_granularity).
@ -328,13 +230,11 @@ meow недоступен".
 - По умолчанию: none
 Настройка мгновенного коммита для данного пула. Если не задана, используется
-значение из /vitastor/config/global. Аналогично block_size, каждый пул будет
+значение из /vitastor/config/global. Аналогично block_size, пул должен быть
-использовать только OSD с *совместимыми* настройками immediate_commit.
+ограничен OSD со значением bitmap_granularity, совместимым со значением пула, с
-"Совместимыми" означает, что пул с отключенным мгновенным коммитом будет
+помощью [osd_tags](#osd_tags). Совместимость означает, что пул с отключенным
-использовать OSD с включённым мгновенным коммитом, но не наоборот. То есть,
+мгновенным коммитом может работать на OSD с включённым мгновенным коммитом, но
-пул со значением "none" будет использовать все OSD, пул со "small" будет
+не наоборот.
 использовать OSD с "all" или "small", а пул с "all" будет использовать только
 OSD с "all".
 О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#immediate_commit).
@ -376,50 +276,6 @@ OSD с "all".
 для PG этого пула. Имейте в виду, что для EC-пулов Vitastor также всегда
 предпочитает помещать первичный OSD на один из OSD с данными, а не с чётностью.
 ## scrub_interval
 - Тип: временной интервал (число + единица измерения s/m/h/d/M/y)
 Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
 Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
 ## used_for_app
 - Тип: строка
 Если непусто, пул помечается как используемый для отдельного приложения, например,
 для VitastorFS или S3, которое распределяет ID образов в пуле само и не использует
 метаданные образов/инодов в etcd.
 Когда пул помечается используемым для такого приложения, создание обычных блочных
 образов в нём запрещается (vitastor-cli отказывается создавать образы без --force),
 чтобы защитить пользователя от коллизий ID блочных образов и томов ФС/S3, и,
 таким образом, от потери данных.
 Также для таких пулов отключается передача статистики в etcd по отдельным инодам,
 так как использование для внешнего приложения подразумевает, что пул может содержать
 очень много томов и их статистика может занять слишком много места в etcd.
 Установка used_for_app в значение `fs:<name>` сообщает о том, что пул используется
 для VitastorFS с базой метаданных VitastorKV, хранимой в блочном образе с именем
 `<name>`.
 [vitastor-nfs](../usage/nfs.ru.md), в свою очередь, при запуске отказывается
 использовать для ФС пулы, не помеченные, как используемые для неё. Это также
 означает, что один пул может использоваться только для одной VitastorFS.
 Если же вы планируете использовать пул для данных S3, установите его used_for_app
 в значение `s3:<name>`, где `<name>` - любое название по вашему усмотрению
 (например, `s3:standard`) - конкретное содержимое `<name>` пока никак не проверяется
 компонентами Vitastor S3.
 Смотрите также [allow_net_split](osd.ru.md#allow_net_split) и
 [документацию по состояниям PG](../usage/admin.ru.md#состояния-pg).
 Все остальные значения used_for_app, кроме начинающихся на `fs:` или `s3:`, не
 означают ничего особенного для основных компонентов Vitastor. Поэтому сейчас вы
 можете использовать их свободно любым желаемым способом.
 # Примеры
 ## Реплицированный пул
--- a/docs/config/src/client.en.md
+++ b/docs/config/src/client.en.md
@ -1,4 +0,0 @@
 # Client Parameters
 These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
 affect their interaction with the cluster.
--- a/docs/config/src/client.ru.md
+++ b/docs/config/src/client.ru.md
@ -1,4 +0,0 @@
 # Параметры клиентского кода
 Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
 затрагивают логику их работы с кластером.
--- a/docs/config/src/client.yml
+++ b/docs/config/src/client.yml
@ -1,273 +0,0 @@
 - name: client_iothread_count
  type: int
  default: 0
  online: false
  info: |
    Number of separate threads for handling TCP network I/O at client library
    side. Enabling 4 threads usually allows to increase peak performance of each
    client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
    100-150 to 400 thousand iops, but at the same time it increases latency.
    Latency increase depends on CPU: with CPU power saving disabled latency
    only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
    with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
    iops decrease from 2000 to 1000). RDMA isn't affected by this option.
    It's recommended to enable client I/O threads if you don't use RDMA and want
    to increase peak client performance.
  info_ru: |
    Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
    клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
    производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
    чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
    секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
    при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
    падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
    и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
    RDMA данная опция не влияет.
    Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
    RDMA и хотите повысить пиковую производительность клиентов.
 - name: client_retry_interval
  type: ms
  min: 10
  default: 50
  online: true
  info: |
    Retry time for I/O requests failed due to inactive PGs or network
    connectivity errors.
  info_ru: |
    Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
    ошибок сети.
 - name: client_eio_retry_interval
  type: ms
  default: 1000
  online: true
  info: |
    Retry time for I/O requests failed due to data corruption or unfinished
    EC object deletions (has_incomplete PG state). 0 disables such retries
    and clients are not blocked and just get EIO error code instead.
  info_ru: |
    Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
    или незавершённых удалений EC-объектов (состояния PG has_incomplete).
    0 отключает повторы таких запросов и клиенты не блокируются, а вместо
    этого просто получают код ошибки EIO.
 - name: client_retry_enospc
  type: bool
  default: true
  online: true
  info: |
    Retry writes on out of space errors to wait until some space is freed on
    OSDs.
  info_ru: |
    Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
    ожидать, пока на OSD не освободится место.
 - name: client_wait_up_timeout
  type: sec
  default: 16
  online: true
  info: |
    Wait for this number of seconds until PGs are up when doing operations
    which require all PGs to be up. Currently only used by object listings
    in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
    The default value is calculated as `1 + OSD lease timeout`, which is
    `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
  info_ru: |
    Время ожидания поднятия PG при операциях, требующих активности всех PG.
    В данный момент используется листингами объектов в командах, использующих
    удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
    Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
    `1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
 - name: client_max_dirty_bytes
  type: int
  default: 33554432
  online: true
  info: |
    Without [immediate_commit](layout-cluster.en.md#immediate_commit)=all this parameter sets the limit of "dirty"
    (not committed by fsync) data allowed by the client before forcing an
    additional fsync and committing the data. Also note that the client always
    holds a copy of uncommitted data in memory so this setting also affects
    RAM usage of clients.
  info_ru: |
    При работе без [immediate_commit](layout-cluster.ru.md#immediate_commit)=all - это лимит объёма "грязных" (не
    зафиксированных fsync-ом) данных, при достижении которого клиент будет
    принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
    что в этом случае до момента fsync клиент хранит копию незафиксированных
    данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
 - name: client_max_dirty_ops
  type: int
  default: 1024
  online: true
  info: |
    Same as client_max_dirty_bytes, but instead of total size, limits the number
    of uncommitted write operations.
  info_ru: |
    Аналогично client_max_dirty_bytes, но ограничивает количество
    незафиксированных операций записи вместо их общего объёма.
 - name: client_enable_writeback
  type: bool
  default: false
  online: true
  info: |
    This parameter enables client-side write buffering. This means that write
    requests are accumulated in memory for a short time before being sent to
    a Vitastor cluster which allows to send them in parallel and increase
    performance of some applications. Writes are buffered until client forces
    a flush with fsync() or until the amount of buffered writes exceeds the
    limit.
    Write buffering significantly increases performance of some applications,
    for example, CrystalDiskMark under Windows (LOL :-D), but also any other
    applications if they do writes in one of two non-optimal ways: either if
    they do a lot of small (4 kb or so) sequential writes, or if they do a lot
    of small random writes, but without any parallelism or asynchrony, and also
    without calling fsync().
    With write buffering enabled, you can expect around 22000 T1Q1 random write
    iops in QEMU more or less regardless of the quality of your SSDs, and this
    number is in fact bound by QEMU itself rather than Vitastor (check it
    yourself by adding a "driver=null-co" disk in QEMU). Without write
    buffering, the current record is 9900 iops, but the number is usually
    even lower with non-ideal hardware, for example, it may be 5000 iops.
    Even when this parameter is enabled, write buffering isn't enabled until
    the client explicitly allows it, because enabling it without the client
    being aware of the fact that his writes may be buffered may lead to data
    loss. Because of this, older versions of clients don't support write
    buffering at all, newer versions of the QEMU driver allow write buffering
    only if it's enabled in disk settings with `-blockdev cache.direct=false`,
    and newer versions of FIO only allow write buffering if you don't specify
    `-direct=1`. NBD and NFS drivers allow write buffering by default.
    You can overcome this restriction too with the `client_writeback_allowed`
    parameter, but you shouldn't do that unless you **really** know what you
    are doing.
  info_ru: |
    Данный параметр разрешает включать буферизацию записи в памяти. Буферизация
    означает, что операции записи отправляются на кластер Vitastor не сразу, а
    могут небольшое время накапливаться в памяти и сбрасываться сразу пакетами,
    до тех пор, пока либо не будет превышен лимит неотправленных записей, либо
    пока клиент не вызовет fsync.
    Буферизация значительно повышает производительность некоторых приложений,
    например, CrystalDiskMark в Windows (ха-ха :-D), но также и любых других,
    которые пишут на диск неоптимально: либо последовательно, но мелкими блоками
    (например, по 4 кб), либо случайно, но без параллелизма и без fsync - то
    есть, например, отправляя 128 операций записи в разные места диска, но не
    все сразу с помощью асинхронного I/O, а по одной.
    В QEMU с буферизацией записи можно ожидать показателя примерно 22000
    операций случайной записи в секунду в 1 поток и с глубиной очереди 1 (T1Q1)
    без fsync, почти вне зависимости от того, насколько хороши ваши диски - эта
    цифра упирается в сам QEMU. Без буферизации рекорд пока что - 9900 операций
    в секунду, но на железе похуже может быть и поменьше, например, 5000 операций
    в секунду.
    При этом, даже если данный параметр включён, буферизация не включается, если
    явно не разрешена клиентом, т.к. если клиент не знает, что запросы записи
    буферизуются, это может приводить к потере данных. Поэтому в старых версиях
    клиентских драйверов буферизация записи не включается вообще, в новых
    версиях QEMU-драйвера включается, только если разрешена опцией диска
    `-blockdev cache.direct=false`, а в fio - только если нет опции `-direct=1`.
    В NBD и NFS драйверах буферизация записи разрешена по умолчанию.
    Можно обойти и это ограничение с помощью параметра `client_writeback_allowed`,
    но делать так не надо, если только вы не уверены в том, что делаете, на все
    100%. :-)
 - name: client_max_buffered_bytes
  type: int
  default: 33554432
  online: true
  info: |
    Maximum total size of buffered writes which triggers write-back when reached.
  info_ru: |
    Максимальный общий размер буферизованных записей, при достижении которого
    начинается процесс сброса данных на сервер.
 - name: client_max_buffered_ops
  type: int
  default: 1024
  online: true
  info: |
    Maximum number of buffered writes which triggers write-back when reached.
    Multiple consecutive modified data regions are counted as 1 write here.
  info_ru: |
    Максимальное количество буферизованных записей, при достижении которого
    начинается процесс сброса данных на сервер. При этом несколько
    последовательных изменённых областей здесь считаются 1 записью.
 - name: client_max_writeback_iodepth
  type: int
  default: 256
  online: true
  info: |
    Maximum number of parallel writes when flushing buffered data to the server.
  info_ru: |
    Максимальное число параллельных операций записи при сбросе буферов на сервер.
 - name: nbd_timeout
  type: sec
  default: 300
  online: false
  info: |
    Timeout for I/O operations for [NBD](../usage/nbd.en.md). If an operation
    executes for longer than this timeout, including when your cluster is just
    temporarily down for more than timeout, the NBD device will detach by itself
    (and possibly break the mounted file system).
    You can set timeout to 0 to never detach, but in that case you won't be
    able to remove the kernel device at all if the NBD process dies - you'll have
    to reboot the host.
  info_ru: |
    Таймаут для операций чтения/записи через [NBD](../usage/nbd.ru.md). Если
    операция выполняется дольше таймаута, включая временную недоступность
    кластера на время, большее таймаута, NBD-устройство отключится само собой
    (и, возможно, сломает примонтированную ФС).
    Вы можете установить таймаут в 0, чтобы никогда не отключать устройство по
    таймауту, но в этом случае вы вообще не сможете удалить устройство, если
    процесс NBD умрёт - вам придётся перезагружать сервер.
 - name: nbd_max_devices
  type: int
  default: 64
  online: false
  info: |
    Maximum number of NBD devices in the system. This value is passed as
    `nbds_max` parameter for the nbd kernel module when vitastor-nbd autoloads it.
  info_ru: |
    Максимальное число NBD-устройств в системе. Данное значение передаётся
    модулю ядра nbd как параметр `nbds_max`, когда его загружает vitastor-nbd.
 - name: nbd_max_part
  type: int
  default: 3
  online: false
  info: |
    Maximum number of partitions per NBD device. This value is passed as
    `max_part` parameter for the nbd kernel module when vitastor-nbd autoloads it.
    Note that (nbds_max)*(1+max_part) usually can't exceed 256.
  info_ru: |
    Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
    модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
    Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
 - name: osd_nearfull_ratio
  type: float
  default: 0.95
  online: true
  info: |
    Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
    Remember that some client writes may hang or complete with an error if even
    just one OSD becomes 100 % full!
    However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
    unable to start at all), so you'll be able to recover from "out of space" errors
    without destroying and recreating OSDs.
  info_ru: |
    Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
    выводе vitastor-cli status.
    Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
    если на 100 % заполнится хотя бы 1 OSD!
    Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
    заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
    восстановить работу кластера после ошибок отсутствия свободного места
    без уничтожения и пересоздания OSD.
--- a/docs/config/src/common.yml
+++ b/docs/config/src/common.yml
@ -11,21 +11,13 @@
 - name: etcd_address
  type: string or array of strings
  type_ru: строка или массив строк
  online: true
  info: |
    etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
    specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
    Note that https is not supported for etcd connections yet.
    etcd connection endpoints can be changed online by updating global
    configuration in etcd itself - this allows to switch the cluster to new
    etcd addresses without downtime.
  info_ru: |
    Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
    или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
    Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
    самом etcd - это позволяет переключить кластер на новые etcd без остановки.
 - name: etcd_prefix
  type: string
  default: "/vitastor"
@ -39,6 +31,5 @@
 - name: log_level
  type: int
  default: 0
  online: true
  info: Log level. Raise if you want more verbose output.
  info_ru: Уровень логгирования. Повысьте, если хотите более подробный вывод.
--- a/docs/config/src/include.js
+++ b/docs/config/src/include.js
@ -1,145 +0,0 @@
 #!/usr/bin/nodejs
 const fsp = require('fs').promises;
 run(process.argv).catch(console.error);
 async function run(argv)
 {
    if (argv.length < 3)
    {
        console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
        return;
    }
    const index_file = await fsp.realpath(argv[2]);
    const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
    let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
    text = text.split(re);
    let included = {};
    let heading = 0, heading_name = '', m;
    for (let i = 0; i < text.length; i++)
    {
        if (text[i].substr(0, 2) == '{{')
        {
            // Inclusion
            let incfile = text[i].substr(2, text[i].length-4);
            let section = null;
            let indent = heading;
            incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
            incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
            let inc_heading = section;
            incfile = rel2abs(index_file, incfile);
            let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
            inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
            inc = inc.split(re);
            const indent_str = new Array(indent+1).join('#');
            let section_start = -1, section_end = -1;
            for (let j = 0; j < inc.length; j++)
            {
                if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
                {
                    if (!inc_heading)
                    {
                        inc_heading = m[3].trim();
                    }
                    if (section)
                    {
                        if (m[3].trim() == section)
                            section_start = j;
                        else if (section_start >= 0)
                        {
                            section_end = j;
                            break;
                        }
                    }
                    inc[j] = m[1] + indent_str + m[2] + m[3];
                }
                else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
                {
                    const abs_m2 = rel2abs(incfile, m[2]);
                    const rel_m = abs2rel(__filename, abs_m2);
                    if (rel_m.substr(0, 9) == '../../../') // outside docs
                        inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
                    else
                        inc[j] = m[1] + abs_m2 + m[3];
                }
            }
            if (section)
            {
                inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
            }
            if (inc.length)
            {
                if (!inc_heading)
                    inc_heading = heading_name||'';
                included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
                inc[0] = inc[0].replace(/^\s+/, '');
                inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
            }
            text.splice(i, 1, ...inc);
            i = i + inc.length - 1;
        }
        else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
        {
            // Heading
            heading = m[1].length;
            heading_name = m[2].trim();
        }
    }
    for (let i = 0; i < text.length; i++)
    {
        if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
        {
            const p = m[2].indexOf('#');
            if (included[m[2]])
            {
                text[i] = m[1]+included[m[2]]+m[3];
            }
            else if (p >= 0 && included[m[2].substr(0, p)])
            {
                text[i] = m[1]+m[2].substr(p)+m[3];
            }
        }
    }
    console.log(text.join(''));
 }
 function rel2abs(ref, rel)
 {
    rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
    return killdots(rel).join('/');
 }
 function abs2rel(ref, abs)
 {
    ref = ref.split(/\/+/);
    abs = abs.split(/\/+/);
    while (ref.length > 1 && ref[0] == abs[0])
    {
        ref.shift();
        abs.shift();
    }
    for (let i = 1; i < ref.length; i++)
    {
        abs.unshift('..');
    }
    return killdots(abs).join('/');
 }
 function killdots(rel)
 {
    for (let i = 0; i < rel.length; i++)
    {
        if (rel[i] == '.')
        {
            rel.splice(i, 1);
            i--;
        }
        else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
        {
            rel.splice(i-1, 2);
            i -= 2;
        }
    }
    return rel;
 }
--- a/docs/config/src/included.en.md
+++ b/docs/config/src/included.en.md
@ -1,75 +0,0 @@
 # Vitastor
 {{../../../README.md#The Idea}}
 {{../../../README.md#Talks and presentations}}
 {{../../intro/features.en.md}}
 {{../../intro/quickstart.en.md}}
 {{../../intro/architecture.en.md}}
 ## Installation
 {{../../installation/packages.en.md}}
 {{../../installation/docker.en.md}}
 {{../../installation/proxmox.en.md}}
 {{../../installation/opennebula.en.md}}
 {{../../installation/openstack.en.md}}
 {{../../installation/kubernetes.en.md}}
 {{../../installation/source.en.md}}
 {{../../config.en.md|indent=1}}
 {{../../config/common.en.md|indent=2}}
 {{../../config/network.en.md|indent=2}}
 {{../../config/client.en.md|indent=2}}
 {{../../config/layout-cluster.en.md|indent=2}}
 {{../../config/layout-osd.en.md|indent=2}}
 {{../../config/osd.en.md|indent=2}}
 {{../../config/monitor.en.md|indent=2}}
 {{../../config/pool.en.md|indent=2}}
 {{../../config/inode.en.md|indent=2}}
 ## Usage
 {{../../usage/cli.en.md}}
 {{../../usage/disk.en.md}}
 {{../../usage/fio.en.md}}
 {{../../usage/nbd.en.md}}
 {{../../usage/qemu.en.md}}
 {{../../usage/nfs.en.md}}
 {{../../usage/admin.en.md}}
 ## Performance
 {{../../performance/understanding.en.md}}
 {{../../performance/theoretical.en.md}}
 {{../../performance/comparison1.en.md}}
 {{../../performance/bench2.en.md}}
 {{../../intro/author.en.md|indent=1}}
--- a/docs/config/src/included.ru.md
+++ b/docs/config/src/included.ru.md
@ -1,75 +0,0 @@
 # Vitastor
 {{../../../README-ru.md#Идея|indent=0}}
 {{../../../README-ru.md#Презентации и записи докладов|indent=0}}
 {{../../intro/features.ru.md}}
 {{../../intro/quickstart.ru.md}}
 {{../../intro/architecture.ru.md}}
 ## Установка
 {{../../installation/packages.ru.md}}
 {{../../installation/docker.ru.md}}
 {{../../installation/proxmox.ru.md}}
 {{../../installation/opennebula.ru.md}}
 {{../../installation/openstack.ru.md}}
 {{../../installation/kubernetes.ru.md}}
 {{../../installation/source.ru.md}}
 {{../../config.ru.md|indent=1}}
 {{../../config/common.ru.md|indent=2}}
 {{../../config/network.ru.md|indent=2}}
 {{../../config/client.ru.md|indent=2}}
 {{../../config/layout-cluster.ru.md|indent=2}}
 {{../../config/layout-osd.ru.md|indent=2}}
 {{../../config/osd.ru.md|indent=2}}
 {{../../config/monitor.ru.md|indent=2}}
 {{../../config/pool.ru.md|indent=2}}
 {{../../config/inode.ru.md|indent=2}}
 ## Использование
 {{../../usage/cli.ru.md}}
 {{../../usage/disk.ru.md}}
 {{../../usage/fio.ru.md}}
 {{../../usage/nbd.ru.md}}
 {{../../usage/qemu.ru.md}}
 {{../../usage/nfs.ru.md}}
 {{../../usage/admin.ru.md}}
 ## Производительность
 {{../../performance/understanding.ru.md}}
 {{../../performance/theoretical.ru.md}}
 {{../../performance/comparison1.ru.md}}
 {{../../performance/bench2.ru.md}}
 {{../../intro/author.ru.md|indent=1}}
--- a/docs/config/src/layout-cluster.yml
+++ b/docs/config/src/layout-cluster.yml
@ -7,27 +7,26 @@
    in Vitastor, affects memory usage, write amplification and I/O load
    distribution effectiveness.
-    Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
+    Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
-    it's possible to use 1 MB for SSD too - it will lower memory usage, but
+    it's possible to use 4 MB for SSD too - it will lower memory usage, but
    may increase average WA and reduce linear performance.
    OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
    544 MB per 1 TB of used disk space with the default 128 KB block size.
    With 1 MB it's 8 times lower.
  info_ru: |
    Размер объектов (блоков данных), на которые делятся физические и виртуальные
    диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
    настроек, влияет на потребление памяти, объём избыточной записи (write
    amplification) и эффективность распределения нагрузки по OSD.
-    Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
+    Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
-    для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
+    мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
    это понизит использование памяти, но ухудшит распределение нагрузки и в
    среднем увеличит WA.
    Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
    т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
-    стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
+    стандартном 128 КБ блоке.
 - name: bitmap_granularity
  type: int
  default: 4096
@ -47,24 +46,14 @@
    Не может быть меньше размера сектора дисков данных OSD.
 - name: immediate_commit
  type: string
-  default: all
+  default: false
  info: |
-    One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
+    Another parameter which is really important for performance.
    This parameter is also really important for performance.
    TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
    power loss protection (nonvolatile write-through cache) and also for most HDDs.
    "none" or "small" should be only selected if you use desktop SSDs without
    capacitors or drives with slow write-back cache that can't be disabled. Check
    immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
    Detailed explanation:
    Desktop SSDs are very fast (100000+ iops) for simple random writes
    without cache flush. However, they are really slow (only around 1000 iops)
-    if you try to fsync() each write, that is, if you want to guarantee that
+    if you try to fsync() each write, that is, when you want to guarantee that
-    each change gets actually persisted to the physical media.
+    each change gets immediately persisted to the physical media.
    Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
    "Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -76,8 +65,8 @@
    efficiently utilize desktop SSDs by postponing fsync until the client calls
    it explicitly.
-    This is what this parameter regulates. When it's set to "all" Vitastor
+    This is what this parameter regulates. When it's set to "all" the whole
-    cluster commits each change to disks immediately and clients just
+    Vitastor cluster commits each change to disks immediately and clients just
    ignore fsyncs because they know for sure that they're unneeded. This reduces
    the amount of network roundtrips performed by clients and improves
    performance. So it's always better to use server grade SSDs with
@ -97,22 +86,16 @@
    it (they have internal SSD cache even though it's not stated in datasheets).
    Setting this parameter to "all" or "small" in OSD parameters requires enabling
-    [disable_journal_fsync](layout-osd.en.md#disable_journal_fsync) and
+    disable_journal_fsync and disable_meta_fsync, setting it to "all" also requires
-    [disable_meta_fsync](layout-osd.en.md#disable_meta_fsync), setting it to
+    enabling disable_data_fsync.
-    "all" also requires enabling [disable_data_fsync](layout-osd.en.md#disable_data_fsync).
+
-    vitastor-disk tried to do that by default, first checking/disabling drive cache.
+    TLDR: For optimal performance, set immediate_commit to "all" if you only use
-    If it can't disable drive cache, OSD get initialized with "none".
+    SSDs with supercapacitor-based power loss protection (nonvolatile
    write-through cache) for both data and journals in the whole Vitastor
    cluster. Set it to "small" if you only use such SSDs for journals. Leave
    empty if your drives have write-back cache.
  info_ru: |
-    Одно из значений "none", "small" или "all". Глобальное значение, может быть
+    Ещё один важный для производительности параметр.
    переопределено [на уровне пула](pool.ru.md#immediate_commit).
    Данный параметр тоже важен для производительности.
    Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
    суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
    устанавливать только при использовании SSD настольного класса без
    суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
    Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
    Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
    секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -133,7 +116,7 @@
    эффективно утилизировать настольные SSD.
    Данный параметр влияет как раз на это. Когда он установлен в значение "all",
-    кластер Vitastor мгновенно фиксирует каждое изменение на физические
+    весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
    носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
    знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
    по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -156,6 +139,12 @@
    указано в спецификациях).
    Указание "all" или "small" в настройках / командной строке OSD требует
-    включения [disable_journal_fsync](layout-osd.ru.md#disable_journal_fsync) и
+    включения disable_journal_fsync и disable_meta_fsync, значение "all" также
-    [disable_meta_fsync](layout-osd.ru.md#disable_meta_fsync), значение "all"
+    требует включения disable_data_fsync.
-    также требует включения [disable_data_fsync](layout-osd.ru.md#disable_data_fsync).
+
    Итого, вкратце: для оптимальной производительности установите
    immediate_commit в значение "all", если вы используете в кластере только SSD
    с суперконденсаторами и для данных, и для журналов. Если вы используете
    такие SSD для всех журналов, но не для данных - можете установить параметр
    в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
    оставьте параметр пустым.
--- a/docs/config/src/layout-osd.yml
+++ b/docs/config/src/layout-osd.yml
@ -110,22 +110,20 @@
  type: bool
  default: false
  info: |
-    Do not issue fsyncs to the data device, i.e. do not force it to flush cache.
+    Do not issue fsyncs to the data device, i.e. do not flush its cache.
-    Safe ONLY if your data device has write-through cache or if write-back
+    Safe ONLY if your data device has write-through cache. If you disable
-    cache is disabled. If you disable drive cache manually with `hdparm` or
+    the cache yourself using `hdparm` or `scsi_disk/cache_type` then make sure
-    writing to `/sys/.../scsi_disk/cache_type` then make sure that you do it
+    that the cache disable command is run every time before starting Vitastor
-    every time before starting Vitastor OSD (vitastor-disk does it automatically).
+    OSD, for example, in the systemd unit. See also `immediate_commit` option
-    See also [immediate_commit](layout-cluster.en.md#immediate_commit)
+    for the instructions to disable cache and how to benefit from it.
    for information about how to benefit from disabled cache.
  info_ru: |
-    Не отправлять fsync-и устройству данных, т.е. не заставлять его сбрасывать кэш.
+    Не отправлять fsync-и устройству данных, т.е. не сбрасывать его кэш.
    Безопасно, ТОЛЬКО если ваше устройство данных имеет кэш со сквозной
-    записью (write-through) или если кэш с отложенной записью (write-back) отключён.
+    записью (write-through). Если вы отключаете кэш через `hdparm` или
-    Если вы отключаете кэш вручную через `hdparm` или запись в `/sys/.../scsi_disk/cache_type`,
+    `scsi_disk/cache_type`, то удостоверьтесь, что команда отключения кэша
-    то удостоверьтесь, что вы делаете это каждый раз перед запуском Vitastor OSD
+    выполняется перед каждым запуском Vitastor OSD, например, в systemd unit-е.
-    (vitastor-disk делает это автоматически). Смотрите также опцию
+    Смотрите также опцию `immediate_commit` для инструкций по отключению кэша
-    [immediate_commit](layout-cluster.ru.md#immediate_commit) для информации о том,
+    и о том, как из этого извлечь выгоду.
    как извлечь выгоду из отключённого кэша.
 - name: disable_meta_fsync
  type: bool
  default: false
@ -181,7 +179,8 @@
    Because of this it can actually be beneficial to use SSDs which work well
    with 512 byte sectors and use 512 byte disk_alignment, journal_block_size
-    and meta_block_size. But at the moment, no such SSDs are known...
+    and meta_block_size. But the only SSD that may fit into this category is
    Intel Optane (probably, not tested yet).
    Clients don't need to be aware of disk_alignment, so it's not required to
    put a modified value into etcd key /vitastor/config/global.
@ -199,78 +198,9 @@
    Поэтому, на самом деле, может быть выгодно найти SSD, хорошо работающие с
    меньшими, 512-байтными, блоками и использовать 512-байтные disk_alignment,
-    journal_block_size и meta_block_size. Однако на данный момент такие SSD
+    journal_block_size и meta_block_size. Однако единственные SSD, которые
-    не известны...
+    теоретически могут попасть в эту категорию - это Intel Optane (но и это
    пока не проверялось автором).
    Клиентам не обязательно знать про disk_alignment, так что помещать значение
    этого параметра в etcd в /vitastor/config/global не нужно.
 - name: data_csum_type
  type: string
  default: none
  info: |
    Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
    enable data checksums.
  info_ru: |
    Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
    Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
    Следует понимать, что контрольные суммы в зависимости от размера блока их
    расчёта либо увеличивают потребление памяти, либо снижают производительность.
    Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
 - name: csum_block_size
  type: int
  default: 4096
  info: |
    Checksum calculation block size.
    Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
    (which is usually 4 KB).
    Checksums increase metadata size by 4 bytes per each csum_block_size of data.
    Checksums are always a tradeoff:
    1. You either sacrifice +1 GB RAM per 1 TB of data
    2. Or you raise csum_block_size, for example, to 32k and sacrifice
       50% random write iops due to checksum read-modify-write
    3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
       sacrifice 50% random read iops due to checksum reads
    All-flash clusters usually have enough RAM to use default csum_block_size,
    which uses 1 GB RAM per 1 TB of data. HDD clusters usually don't.
    Thus, recommended setups are:
    1. All-flash, 1 GB RAM per 1 TB data: default (csum_block_size=4k)
    2. All-flash, less RAM: csum_block_size=4k + inmemory_metadata=false
    3. Hybrid HDD+SSD: csum_block_size=4k + inmemory_metadata=false
    4. HDD-only, faster random read: csum_block_size=32k
    5. HDD-only, faster random write: csum_block_size=4k +
       inmemory_metadata=false + meta_io=cached
    See also [meta_io](osd.en.md#meta_io).
  info_ru: |
    Размер блока расчёта контрольных сумм.
    Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
    (который обычно равен 4 КБ).
    Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
    csum_block_size данных.
    Контрольные суммы - это всегда компромисс:
    1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
    2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
       скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
       новых контрольных сумм
    3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
       жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
       с диска
    Таким образом, рекомендуются следующие варианты настроек:
    1. All-flash, 1 ГБ памяти на 1 ТБ данных: по умолчанию (csum_block_size=4k)
    2. All-flash, меньше памяти: csum_block_size=4k + inmemory_metadata=false
    3. Гибридные HDD+SSD: csum_block_size=4k + inmemory_metadata=false
    4. Только HDD, быстрее случайное чтение: csum_block_size=32k
    5. Только HDD, быстрее случайная запись: csum_block_size=4k +
       inmemory_metadata=false + meta_io=cached
    Смотрите также [meta_io](osd.ru.md#meta_io).
--- a/docs/config/src/make.js
+++ b/docs/config/src/make.js
@ -14,7 +14,6 @@ const L = {
        toc_config: '[Configuration](../config.en.md)',
        toc_usage: 'Usage',
        toc_performance: 'Performance',
        online: 'Can be changed online: yes',
    },
    ru: {
        Documentation: 'Документация',
@ -29,7 +28,6 @@ const L = {
        toc_config: '[Конфигурация](../config.ru.md)',
        toc_usage: 'Использование',
        toc_performance: 'Производительность',
        online: 'Можно менять на лету: да',
    },
 };
 const types = {
@ -38,7 +36,6 @@ const types = {
        bool: 'boolean',
        int: 'integer',
        sec: 'seconds',
        float: 'number',
        ms: 'milliseconds',
        us: 'microseconds',
    },
@ -47,7 +44,6 @@ const types = {
        bool: 'булево (да/нет)',
        int: 'целое число',
        sec: 'секунды',
        float: 'число',
        ms: 'миллисекунды',
        us: 'микросекунды',
    },
@ -74,8 +70,6 @@ for (const file of params_files)
                out += `- ${L[lang]['Default'] || 'Default'}: ${c.default}\n`;
            if (c.min !== undefined)
                out += `- ${L[lang]['Minimum'] || 'Minimum'}: ${c.min}\n`;
            if (c.online)
                out += `- ${L[lang]['online'] || 'Can be changed online: yes'}\n`;
            out += `\n`+(c["info_"+lang] || c["info"]).replace(/\s+$/, '');
        }
        const head = fs.readFileSync(__dirname+'/'+file+'.'+lang+'.md', { encoding: 'utf-8' });
--- a/Show More
+++ b/Show More
		`@ -1 +1 @@`
			`Subproject commit 8de8b467acbca50cfd8835c20e0e379110f3b32b`				`Subproject commit 45e6d1f13196a0824e2089a586c53b9de0283f17`
		`@ -1,2 +0,0 @@`
			`deb http://vitastor.io/debian bookworm main`
			`deb http://http.debian.net/debian/ bookworm-backports main`
		`@ -1,3 +0,0 @@`
			`#!/bin/bash`

			`docker exec -it vitastor vitastor-cli "$@"`
		`@ -1,3 +0,0 @@`
			`#!/bin/bash`

			`docker exec -it vitastor vitastor-disk "$@"`
		`@ -1,3 +0,0 @@`
			`#!/bin/bash`

			`docker exec -it vitastor fio "$@"`
		`@ -1,3 +0,0 @@`
			`#!/bin/bash`

			`docker exec -it vitastor vitastor-nbd "$@"`
		`@ -1,3 +0,0 @@`
			`#!/bin/bash`

			`while :; do sleep infinity; done`